def dataReceived(self,data): data = data.strip("\r\n") if data.startswith("vote"): self.factory.datastore.vote(data.split("|")[1]) elif data.startswith("add"): args = data.split("|") self.factory.datastore.add(args[1],args[2]) #name, path
def run(self, clf): X, y = data.split(self.data) skf_config = config.xvalidation self.skf = StratifiedShuffleSplit(y, **skf_config) self.X = X self.y = y report = Report(" | ".join([step[0] for step in clf.steps])) X, y = self.X, self.y y_true = [] y_predicted = [] for train_index, test_index in self.skf: X_train, y_train = X[train_index], y[train_index] X_test, y_test = X[test_index], y[test_index] y_true += list(y_test) clf.fit(X_train, y_train) y_pred = clf.predict(X_test) y_predicted += list(y_pred) report.summary = classification_report(y_true, y_predicted, target_names=("dead", "alive")) cm = confusion_matrix(y_true, y_predicted) report.confusion_matrix = cm report.confusion_matrix_norm = cm.astype("float") / cm.sum( axis=1)[:, np.newaxis] report.accuracy = accuracy_score(y_true, y_predicted) return report
def run(self, clf): X, y = data.split(self.data) skf_config = config.xvalidation self.skf = StratifiedShuffleSplit(y, **skf_config) self.X = X self.y = y report = Report(" | ".join([step[0] for step in clf.steps])) X, y = self.X, self.y y_true = [] y_predicted = [] for train_index, test_index in self.skf: X_train, y_train = X[train_index], y[train_index] X_test, y_test = X[test_index], y[test_index] y_true += list(y_test) clf.fit(X_train, y_train) y_pred = clf.predict(X_test) y_predicted += list(y_pred) report.summary = classification_report(y_true, y_predicted, target_names=("dead", "alive")) cm = confusion_matrix(y_true, y_predicted) report.confusion_matrix = cm report.confusion_matrix_norm = cm.astype("float") / cm.sum(axis=1)[:, np.newaxis] report.accuracy = accuracy_score(y_true, y_predicted) return report
def test_split(): indices = np.array([2, 4, 8, 9]) expected = np.concatenate((indices, indices + 10)) actual = np.array(list(data.split(indices, 10, range(20)))) assert len(actual) == len(expected) assert actual.dtype == expected.dtype assert all(actual == expected)
def main(file_size): """Generates the files needed to run tagWeighting.py (the locality script). """ data_funcs_by_size = { 'small': data.get_small, 'medium': data.get_medium, 'large': data.get_large } all_data = data_funcs_by_size[file_size]() train, test = data.split(all_data, 0.8) metadata = [] photo = [] uid = [] for img in train: pictureID = img['watchlink'].strip() tags = img['tags'] lat, lon = img['latitude'], img['longitude'] userID = img['userID'] metadata.append('{0}\t0\t0\t{1}'.format(pictureID, ', '.join(tags))) photo.append('0\t0\t{0}\t{1}'.format(lon, lat)) uid.append('0\t{0}'.format(userID)) write_to_file(metadata, file_size + '_train_metadata') write_to_file(photo, file_size + '_train_photo') write_to_file(uid, file_size + '_train_uid')
def process(self, opType): if(len(self.threads) == 0): self.responseLine.setText("No connection.") return #Get data inputed on UI dt = self.vectorLine.text() if(len(dt) == 0): self.responseLine.setText("No data to proccess.") return #Creates sub-vectors dt = list(map(int, dt.split())) vectorOfSubvectors = self.vectorFragmentation(dt) resp = dt[0] if opType == "MAX": typeFunction = max elif opType == "MIN": typeFunction = min else: self.responseLine.setText("Type of function error") self.update() return for i in range(len(self.threads)): resp = typeFunction(resp, int(self.threads[i].sendData(opType + "#" + " ".join(str(x) for x in vectorOfSubvectors[i])))) self.responseLine.setText(opType + " VALUE: " + str(resp)) self.update()
def save_layout_file(name, view, layout, controls): dir = java.io.File('layouts') make_layout_dir(dir) fn = 'layouts/%s.layout' % name # Check if file exists # - If it does, extract java layout information, otherwise just make a new file java_layout = "" if java.io.File(fn).exists(): # Save a backup of the layout file fp = java.io.File(fn).getCanonicalPath() copyfile(fp, fp + '.bak') f = file(fn, 'r') data = f.read() for line in data.split('\n'): if line.startswith('#'): java_layout += '\n' + line f.close() f = file(fn, 'w') layout_text = ',\n '.join([repr(x) for x in layout]) f.write('(%s,\n [%s],\n %s) %s' % (view, layout_text, controls, java_layout)) f.close()
def load_saved_lf(filename='lf_data.dat'): lf_lines = open(filename, 'r').readlines() dic = {} for line in lf_lines: if line.startswith('#'): continue line = line.strip() filter = line.split(' ')[0] region = line.split(' ')[1] data = line[7:] bins = map(float, data.split(';')[0].split()) hist = map(float, data.split(';')[1].split()) if not filter in dic.keys(): dic[filter] = {} if not region in dic[filter].keys(): dic[filter][region] = {} #if not 'bins' in dic[filter][region].keys(): dic[filter][region]['bins'] = {} dic[filter][region] = {'bins': bins, 'hist': hist} return dic
def main(): print("load csv into a pandas dataframe") dt = data_loader() data = dt.load_data() print(f"data has {data.shape}") data = dt.encode_target(data) print( "preprocess data by removing outliers and encoding feature variables") data = dt.preprocess(data) #print(data.columns) print( "scale data using standardscaler and encoding using pandas get_dummies" ) data = dt.scale_columns(data) print(f"data contains {data.columns}") sam = resample_data() data = sam.under_sample(data) print(data['y'].value_counts()) s = split() data = s.train_test(data) print(data[0].shape) classifiers_cv(data[0], data[1], data[2], data[3])
def parse_to_dict(data): programs = {} for line in data.split('\n'): key, values = line.split(' <-> ') key = int(key) values = tuple((int(v) for v in values.split(','))) programs[key] = values return programs
def parse_text(data): parts = [] for pipe in data.split('\n'): a, b = pipe.split('/') parts.append(Pipe(int(a), int(b))) return Parts(parts)
def train_test_split(self, X, y, eval_size): if eval_size: X_train, X_valid, y_train, y_valid = data.split( X, y, test_size=eval_size) else: X_train, y_train = X, y X_valid, y_valid = X[len(X):], y[len(y):] return X_train, X_valid, y_train, y_valid
def compare_lenght(in_path): seq_dict = imgs.read_seqs(in_path) len_dict = get_len_dict(seq_dict) train, test = data.split(len_dict.keys()) train, test = by_cat(train), by_cat(test) for cat_i in train.keys(): train_i = np.mean([len_dict[name_i] for name_i in train[cat_i]]) test_i = np.mean([len_dict[name_i] for name_i in test[cat_i]]) print("%d,%.2f,%.2f" % (cat_i, test_i, train_i))
def test(): clf = RandomForest X_train, y_train = data.split(data.train) X_test = data.extract_features(data.test) clf.fit(X_train, y_train) labels = clf.predict(X_test) pd.DataFrame({ "PassengerId": np.array(data.test["PassengerId"]), "Survived": labels }).to_csv("submit.csv", index=False)
def person_model(in_path, out_path, n_epochs=100): seq_dict = imgs.read_seqs(in_path) train, test = data.split(seq_dict.keys()) persons = [data.parse_name(name_i)[1] - 1 for name_i in train] persons = keras.utils.to_categorical(persons) X, y = to_dataset(train, seq_dict) n_cats, n_channels = y.shape[1], X.shape[-1] model = models.make_exp(n_cats, n_channels) model.summary() model.fit(X, y, epochs=n_epochs, batch_size=256) model.save(out_path)
def agum_template(raw_path,agum_path,agum,n_iters=10): raw_data=imgs.read_seqs(raw_path) train,test=data.split(raw_data.keys()) train_data={ name_i:raw_data[name_i] for name_i in train} agum_dict={} for name_i,seq_i in list(train_data.items()): agum_seq_i = agum(images=seq_i) for j in range(n_iters): new_name_i=name_i+'_'+str(j) print(new_name_i) agum_dict[new_name_i]=agum_seq_i new_dict={**raw_data,**agum_dict} imgs.save_seqs(new_dict,agum_path)
def apply_physics_properties_to_shape( self, node, shape ): shape.restitution = DEFAULT_RESTITUTION shape.density = DEFAULT_DENSITY shape.friction = DEFAULT_FRICTION data = node.getAttribute('physics_shape' ) if data: keyvalues = data.split(',') for keyvalue in keyvalues: key,value = keyvalue.split('=') value = self.cast_value( value ) setattr( shape, key, value )
def load_data(in_path, split=True): feat_dict = single.read_frame_feats(in_path) if (split): train, test = data.split(feat_dict.keys()) train, test = prepare_data(train, feat_dict), prepare_data(test, feat_dict) params = { 'ts_len': train[0].shape[1], 'n_feats': train[0].shape[2], 'n_cats': train[1].shape[1] } return train, test, params else: names = list(feat_dict.keys()) return prepare_data(names, feat_dict), names
def run(self, msg, user): c = httplib2.HTTPSConnectionWithTimeout("api.mrbesen.de") c.request("GET", "/pos.php") response = c.getresponse() if response.code == 200: data = response.read().decode().strip() time, loc, lastknowntime, lastknownloc = data.split("\n") if loc == 'unknown': print("unknown location") datestr = self.getTimeStr(time) return "Also am " + datestr + " war ich in " + lastknownloc + " aber grade bin ich Unterwegs." else: datestr = self.getTimeStr(time) return "Bin gerade (" + datestr + ") in " + loc return None
def sim_model(in_path,out_path): full=img_dataset(in_path) train,test=data.split(full.keys()) # train={ name_i:full[name_i] for name_i in train} X0,X1,y=[],[],[] for i,name_i in enumerate(train): for name_j in train[i:]: y_k=int(name_i.split("_")[0]==name_j.split("_")[0]) X0.append( full[name_i]) X1.append(full[name_j]) y.append(y_k) X=[np.array(X0),np.array(X1)] sim_metric,model=sim.frames.make_five(20,1) sim_metric.fit(X,y,epochs=250,batch_size=100) if(out_path): model.save(out_path)
def get_data(): global times global tidalVolume global oldtidalVolume global peakPressure global oldpeakPressure global respirationRate global oldrespirationRate global oldTime global maxTime global corrupt global PEEP maxTime = 0 startingTime = time.time() while (corrupt and (int(time.time() - startingTime) <= 1)): try: ser = serial.Serial('COM3', baudrate) corrupt = False while True: while (ser.inWaiting() == 0): pass value = ser.readline() try: data = str(value.decode("utf-8")) data = data.split(",") dataTime = int(data[0]) signal1 = int(data[1]) - 500 update_level(dataTime, 0, signal1, 0) except: pass except: pass if corrupt == True: currTime = 0.0 while True: if currTime / 1000 not in data1.keys(): currTime = 0.0 maxTime = 0 #pp = data1[currTime/1000]/30 pp = 45 rr = data2[currTime / 1000] / 12 tv = data3[currTime / 1000] / 3 update_level(currTime, pp, rr, tv) currTime += 10 time.sleep(0.01)
def get_data(file_size): """Fetches training and test data. Args: file_size: 'small', 'medium', or 'large' indicating the size of the desired dataset Returns: (train_data, test_data) where train_data and test_data are lists of data points (each data point is a dict) """ data_funcs_by_size = { 'small': data.get_small, 'medium': data.get_medium, 'large': data.get_large } all_data = data_funcs_by_size[file_size]() train_data, test_data = data.split(all_data, 0.8) return train_data, test_data
def _ParseCookie(self, cookie): """Parses the cookie and returns NULL_COOKIE if it's invalid. Args: cookie: The text of the cookie. Returns: A map containing the values in the cookie. """ try: (hashed, data) = cookie.split('|', 1) # global cookie_secret if hashed != str(hash(cookie_secret + data) & 0x7FFFFFF): return self.NULL_COOKIE values = data.split('|') return { COOKIE_UID: values[0], COOKIE_ADMIN: values[1] == 'admin', COOKIE_AUTHOR: values[2] == 'author', } except (IndexError, ValueError): return self.NULL_COOKIE
def load_params(train_prog, train_exe, place, logger, args=None): if not args.para_load_dir: return logger.info('loading para from {}'.format(args.para_load_dir)) param_list = train_prog.block(0).all_parameters() param_name_list = [p.name for p in param_list] for data in listDir(args.para_load_dir): slot = int(data.split('_')[1].split('.')[0]) with open(data, 'rb') as fin: if six.PY2: p_array = pickle.load(fin) else: p_array = pickle.load(fin, encoding='bytes') p_array = p_array.reshape((-1)) offset = 0 for name in name_dict: s = name_dict[name] if s == slot: card = 0 #for scope in [train_exe.scope]:#train_exe.executor.local_scopes(): for scope in train_exe.executor.local_scopes(): tensor = scope.find_var(name).get_tensor() shape = tensor.shape() tensor_len = np.prod(shape) new_array = p_array[offset:offset + tensor_len] new_array = new_array.reshape(shape) if args.use_gpu: placex = fluid.CUDAPlace(card) else: placex = fluid.CPUPlace() tensor.set(new_array.astype(np.float32), placex) logger.info('card {} loaded {}[{}] from {}[{}:{}]'. format(card, name, shape, data, offset, offset + tensor_len)) card = card + 1 offset += tensor_len
def cmd_tio(self, msg, args, stdin): ''' Run {prefix}tio [language] [code] to evaluate code in a given language on Try it online! (https://tio.run/). Specify additional sections on a separate line consisting of three hashes (###) followed by the section name, which can be any of: stdin (provide input), arg (provide any number of command line arguments), or stderr (specify this section to view stderr output in addition to stdout; you may also retroactively do this with {prefix}tio err). ''' err = " (try `{}help tio` for more information)".format(self.prefix) if not args: return 'Basic usage: {}tio [lang] [code]'.format(self.prefix) + err if args == 'err': return self.tioerr lang, *rest = args.split(None, 1) rest = rest[0] if len(rest) else '' stdin = '' stderr = False args = [] code, *parts = rest.split('\n###') for part in parts: name, data = part.split(None, 1) if '\n' in part or ' ' in part else (part, '') name = name.strip() if name == 'stdin': stdin = data elif name == 'stderr': stderr = True elif name == 'arg': args.append(data) else: return "Unknown section `{}`".format(name) + err try: data = requests.post('https://tio.run/cgi-bin/run/api/', zlib.compress(bytes('Vlang\u00001\u0000{}\u0000F.code.tio\u0000{}\u0000{}F.input.tio\u0000{}\u0000{}Vargs\u0000{}{}\u0000R'.format(lang, len(bytes(code, 'utf-8')), code, len(bytes(stdin, 'utf-8')), stdin, len(args), (len(args) * '\u0000{}').format(*args)), 'utf-8'), 9)[2:-4], timeout=5).content.decode('utf-8') data = data.split(data[:16])[1:] if len(data) == 1: return data[0] # error dout, derr = [x.strip('\n') for x in data[:2]] self.tioerr = derr haserr = re.search('\nReal time: \\d+\\.\\d+ s\nUser time: \\d+\\.\\d+ s\nSys\\. time: \\d+\\.\\d+ s\nCPU share: \\d+\\.\\d+ %\nExit code: \\d+$', data[1]).start() > 0 return (dout+'\n--- stderr ---\n'+derr if stderr else dout+('\n[stderr output - use {}tio err to view]'.format(self.prefix) if haserr else '')) or '[no output]' except requests.exceptions.ConnectionError: return '5 second timeout reached.'
# digits_session_2_dataset # digits_session_3_dataset # digits_session_4_dataset # # sequence_groups[i] contains data for class i # 4-dimensional data structure: (class, sequence_num, timestep, channel_num) sequence_groups = transform_data(data.digits_session_4_dataset()) # Split sequence_groups into training and validation data #training_sequence_groups, validation_sequence_groups = data.split(sequence_groups, 1./6) # Manually selecting different training and validation datasets training_sequence_groups = transform_data(data.digits_session_1_dataset()) validation_sequence_groups = transform_data(data.digits_session_4_dataset()) validation_sequence_groups = data.split(validation_sequence_groups, 1 / 6.)[1] # Pads or truncates each sequence to length length = 600 training_sequence_groups = data.transform.pad_truncate( training_sequence_groups, length) validation_sequence_groups = data.transform.pad_truncate( validation_sequence_groups, length) # Format into sequences and labels train_sequences, train_labels = data.get_inputs(training_sequence_groups) val_sequences, val_labels = data.get_inputs(validation_sequence_groups) # Calculate sample weights class_weights = compute_class_weight('balanced', np.unique(train_labels), train_labels) train_weights = class_weights[list(train_labels)]
#sequence_groups = data.combine([ # data.process(10, ['data/data/2_subvocal_digits_9_trials.txt']), # data.process(10, ['data/data/3_subvocal_digits_11_trials.txt']), # data.process(10, ['data/data/4_subvocal_digits_10_trials.txt']), # data.process(10, ['data/data/5_subvocal_digits_10_trials.txt']), # data.process(10, ['data/data/6_subvocal_digits_10_trials.txt']), # ]) #sequence_groups = sequence_groups[:2] sequence_groups = data.transform.default_transform(sequence_groups) #sequence_groups = np.array(map(lambda x: x[30:], sequence_groups)) # Split into training and validation data training_sequence_groups, validation_sequence_groups = data.split(sequence_groups, 1./6) # Augment training data by varying positioning of padding/truncating # Uncomment #training_sequence_groups = data.transform.pad_extra(training_sequence_groups, length) #training_sequence_groups = data.transform.augment_pad_truncate_intervals(training_sequence_groups, length, 10) #training_sequence_groups = data.transform.augment_pad_truncate_intervals(training_sequence_groups, length, 100) #training_sequence_groups = data.transform.augment_pad_truncate_intervals(training_sequence_groups, length, 50) training_sequence_groups = data.transform.pad_truncate(training_sequence_groups, length) #training_sequence_groups = data.transform.augment(training_sequence_groups, # [(data.transform.gaussian_filter, [3], {})], # include_original=True)
clf.fit(x_train,y_train) # result = pd.DataFrame.from_dict(clf.cv_results_) # with open(m[0]+'.csv','w') as f: # result.to_csv(f) print('The parameters of the best '+m[0]+' are: ') print(clf.best_params_) y_pred = clf.predict(x_train) print(classification_report(y_true=y_train, y_pred=y_pred)) y_test_pred = clf.predict(x_test) # print(classification_report(y_true=y_test, y_pred=y_test_pred)) # df_test_y = pd.DataFrame(y_test_pred , columns=['Survived']) df = pd.DataFrame(data.get_test_PassengerId()).join(pd.DataFrame(y_test_pred , columns=['Survived'])) print(df.head()) df.to_csv('./titanic_test_result_'+m[0]+'.csv',index=False) import data,preprocess if '__main__' == __name__: train_data = data.get_train_data() train_data =preprocess.fill_missing_data(train_data ,sex_cat=True, embarked_one_hot=True) # train_data = preprocess.feature_selection(train_data) # train_data = preprocess.detect_outlier(train_data,drop=True) print(train_data.head()) x_train,y_train = data.split(train_data) # print(y_train.values) x_test,y_test = data.get_test_x(),data.get_test_y() x_test =preprocess.fill_missing_data(x_test,is_train=False,sex_cat=True, embarked_one_hot=True) # poly = PolynomialFeatures(2,interaction_only=True) # x_train = poly.fit_transform(x_train.values) # x_test = poly.fit_transform(x_test.values) model_selection(x_train.values,y_train.values,x_test.values,y_test.values)
def split_step(preprocessed_data_path, train_pct): from data import split train_data_path, test_data_path = split(preprocessed_data_path, train_pct) return train_data_path, test_data_path
# Update internal cache of parameters params['num_samples'] = num_samples params['num_features'] = num_features params['num_frames'] = num_frames params['x_scaler'] = MinMaxScaler(feature_range=(-1, 1)) params['y_scaler'] = MinMaxScaler(feature_range=(-1, 1)) params['num_testcases'] = 2 X = empty((num_frames, num_timesteps, num_features)) Y = empty((num_frames, num_predictions)) for i in range(num_samples - num_timesteps): X[i] = non_stationary[i:i + num_timesteps, ] Y[i] = non_stationary[i + num_timesteps:i + num_timesteps + num_predictions, 0] Y X_scaled = np.array( [params['x_scaler'].fit_transform(X[i]) for i in range(X.shape[0])]) Y_scaled = params['y_scaler'].fit_transform(Y) Y_scaled X_train, Y_train, X_test, Y_test = data.split(X_scaled, Y_scaled, params['num_testcases']) Y_test y_unscaled = params['y_scaler'].inverse_transform(Y_test) y_undiff = data.inverse_diff(y_unscaled, a[-(params['num_testcases'] + 1):, 0:1])[-params['num_testcases']:] y_undiff print(data.recover_Ytest(Y_test, a, params)) # hi
N_CLASSES = 10 HIDDEN_SIZE = 256 CONV_L2 = 5e-4 FC_L2 = 1e-3 DROPOUT = 0.5 LR = 0.1 BATCH_SIZE = 128 EPOCHS = 300 TEST_TIME_K = 5 x_train, y_train, x_test, y_test = data.load_numpy() x_train, y_train, _, _ = data.split(x_train, y_train, PERCENTAGE) N_TRAIN_BATCHES = int(x_train.shape[0] / BATCH_SIZE) N_TEST_BATCHES = int(x_test.shape[0] / BATCH_SIZE) model = learning.cnn_classifier(number_of_classes=N_CLASSES, hidden_layer_size=HIDDEN_SIZE, conv_l2=CONV_L2, fc_l2=FC_L2, drop_out=DROPOUT) opt = SGD(learning_rate=LR, momentum=0.9) model.compile(loss='categorical_crossentropy', optimizer=opt, metrics=['accuracy'],
# data.combine([ # map(lambda x: x[:30], data.digits_session_dependence_1_dataset(channels=range(1, 8))), # map(lambda x: x[:30], data.digits_session_dependence_2_dataset(channels=range(1, 8))), # map(lambda x: x[:40], data.digits_session_dependence_3_dataset(channels=range(1, 8))), # ])) # #print map(len, sequence_groups) #lens = map(len, data.get_inputs(sequence_groups)[0]) #print min(lens), np.mean(lens), max(lens) # Split sequence_groups into training and validation data #training_sequence_groups, validation_sequence_groups = data.split(sequence_groups, 1./6) # Manually selecting different training and validation datasets training_sequence_groups, validation_sequence_groups = data.split( data.digits_session_dependence_3_dataset(channels=range(1, 8)), 1. / 6) training_sequence_groups = transform_data( data.combine([ # map(lambda x: x[:30], data.digits_session_dependence_1_dataset(channels=range(1, 8))), # map(lambda x: x[:30], data.digits_session_dependence_2_dataset(channels=range(1, 8))), # map(lambda x: x[:40], data.digits_session_dependence_3_dataset(channels=range(1, 8))), training_sequence_groups, ])) validation_sequence_groups = transform_data( data.combine([ # map(lambda x: x[:30], data.digits_session_dependence_1_dataset(channels=range(1, 8))), # map(lambda x: x[:30], data.digits_session_dependence_2_dataset(channels=range(1, 8))), # map(lambda x: x[:40], data.digits_session_dependence_3_dataset(channels=range(1, 8))), validation_sequence_groups, ]))
import pandas as pd from data import get_dataset, split from detl.mydb import db_client from svm import SVMClassifier, confusion_matrix, accuracy with db_client().as_default(): digits = get_dataset() X_train, X_test, y_train, y_test = split(digits) classifier = SVMClassifier() classifier.fit(X_train, y_train) pred = classifier.predict(X_test) print(confusion_matrix(y_test, pred).data) print('Accuracy', accuracy(y_test, pred).data)