if samplesize > min_sample_size: trainindices = list(range(samplesize)) # load vectors into train and test collections for i in range(samplesize): dest = '' if len(trainindices) > 0 and trainindices[0] == i: dest = 'train' trainindices = trainindices[1:] else: dest = 'test' sample = samples[i] # samples are of form holmatrixrow, argstring, weight if sample[1] != '': # import of vectors hol_v = holmatrix[int(sample[0])] arg_v = arg_data.vector(sample[1]) weight = float(sample[2]) #apply standard scaling scaler = sklearn.preprocessing.StandardScaler( with_std=False) arg_v = scaler.fit_transform(arg_v[:, np.newaxis]) arg_v = np.squeeze(arg_v) #apply standard scaling scaler = sklearn.preprocessing.StandardScaler() scaler.set_params(with_std=variance_control) arg_v = scaler.fit_transform(arg_v[:, np.newaxis]) arg_v = np.squeeze(arg_v) if variance_control: arg_v = mean_std * arg_v
verb = re.search(r'^(\w)+(?=\|)', line).group(0) rel = re.search(r'(?<=\|)(\w)+(?=\|)', line).group(0) arg = re.search(r'(?<=(O|S)\|).+$', line).group(0) key = verb + '|' + rel # check if this is a new row if oldkey != key: verbarray = np.vstack([ verbarray, np.array([key, np.array([np.array([0, ''], object)], object)], object) ]) verbindex += 1 oldkey = key arg_v = np.array(r.vector(arg)) if np.array_equal(arg_v, control): new_args = t.transform(arg) arg_str = '' for w in new_args: new_v = np.array(r.vector(w)) if not np.array_equal(new_v, control): arg_str = w break else: arg_str = arg if not arg_str: log.write(arg + '\t\t' + str(new_args) + '\n') failedcount += 1 else: verbarray[-1][1] = np.vstack(