def insert_patient(patient): name_to_post_id = {} client = MongoClient() db = client['gingivere'] collection = db[patient] d = collections.defaultdict(list) for data in load_raw_data.walk_training_mats(patient): post_item = copy.deepcopy(data) channels = post_item['channels'] del post_item['data'] del post_item['channels'] for i, item in enumerate(data['data']): post_item['data'] = item.tolist() post_item['channel'] = channels[i] name = "%02d_%s" % (i, data['file']) post_id = collection.insert(post_item) d['name'].append(name) d['_id'].append(str(post_id)) d['state'].append(post_item['state']) d['channel'].append(channels[i]) print("Just posted: " + name) del post_item['_id'] df = pd.DataFrame(d) shelve_api.insert(df, "labeled_" + patient)
from collections import defaultdict import numpy as np import raw_data_clf import mongo_select from tests import shelve_api print("Training the CLF") clf = raw_data_clf.RawClf('Dog_2') clf.clear_data() print() print("Cleared the data") print() d = defaultdict(list) for item in mongo_select.get_all('Dog_2'): data = np.array(item['data']).astype('float32') prediction = clf.predict(data) d['pred'].append(prediction) del item['data'] for key in item: d[key].append(item[key]) print("Just predicted %s for %s" % (prediction[0], item['file'])) shelve_api.insert(d, 'preds')
insert_item['_id'] = count count += 1 yield insert_item def shelve(result, error): if error: print('error getting user!', error) else: name = "%02d_%s" % (i, result['file']) d['name'].append(name) d['_id'].append(result['_id']) d['state'].append(result['state']) d['channel'].append(result['channel']) print("Just posted: " + name) @gen.coroutine def bulk_write(): global d d = defaultdict(list) collection.insert((i for i in insert_patient('Dog_2')), callback=shelve) if __name__ == "__main__": client = motor.MotorClient() db = motor.MotorDatabase(client, 'gingivere') collection = motor.MotorCollection(db, 'Dog_1') tornado.ioloop.IOLoop.current().run_sync(bulk_write) df = pd.DataFrame(d) shelve_api.insert(df, 'test_dog_1')
return (np.min(mm), np.max(mm)) def walk_data(patient): path = "D:/gingivere/data/" for file in os.listdir(path): if patient in file: yield path + file if __name__ == '__main__': patients = [ "Dog_1", "Dog_2", "Dog_3", "Dog_4", "Dog_5", "Patient_1", "Patient_2" ] # d_keys = ['data_length_sec', 'sampling_frequency', 'sequence', 'state', 'file'] num_cores = multiprocessing.cpu_count() now = time.time() if len(sys.argv) >= 2: patient = sys.argv[1] r = get_min_max(patient) res = Parallel(n_jobs=num_cores)(delayed(preprocess_data)((i, r)) for i in walk_data(patient)) sapi.insert(res, "%s_len" % patient) else: for patient in patients: r = get_min_max(patient) res = Parallel(n_jobs=num_cores)(delayed(preprocess_data)((i, r)) for i in walk_data(patient)) sapi.insert(res, "%s_len" % patient) print("Finished in", time.time() - now, "sec")
print(__doc__) def process_data(input): data = lrd.load_mat(*input) name = data['file'].split('.')[0] print(name) store = pd.HDFStore("D:/gingivere/data/%s.h5" % name) # pp = preprocessing.scale(data['data']) df = pd.DataFrame(data['data'], index=data['channels']) del data store['data'] = df store.close() return np.asarray([df.values.min(), df.values.max()]) if __name__ == '__main__': patients = ["Dog_1", "Dog_2", "Dog_3", "Dog_4", "Dog_5", "Patient_1", "Patient_2"] num_cores = multiprocessing.cpu_count() now = time.time() if len(sys.argv) >= 2: patient = sys.argv[1] # res = Parallel(n_jobs=num_cores)(delayed(process_data)(i) for i in lrd.walk_files(patient)) res = [] for i in lrd.walk_files(patient): res.append(process_data(i)) sapi.insert(res, "%s_mm" % patient) else: for patient in patients: res = Parallel(n_jobs=num_cores)(delayed(process_data)(i) for i in lrd.walk_files(patient)) sapi.insert(res, "%s_mm" % patient) print("Finished in", time.time()-now , "sec")
name = data['file'].split('.')[0] print(name) store = pd.HDFStore("D:/gingivere/data/%s.h5" % name) # pp = preprocessing.scale(data['data']) df = pd.DataFrame(data['data'], index=data['channels']) del data store['data'] = df store.close() return np.asarray([df.values.min(), df.values.max()]) if __name__ == '__main__': patients = [ "Dog_1", "Dog_2", "Dog_3", "Dog_4", "Dog_5", "Patient_1", "Patient_2" ] num_cores = multiprocessing.cpu_count() now = time.time() if len(sys.argv) >= 2: patient = sys.argv[1] # res = Parallel(n_jobs=num_cores)(delayed(process_data)(i) for i in lrd.walk_files(patient)) res = [] for i in lrd.walk_files(patient): res.append(process_data(i)) sapi.insert(res, "%s_mm" % patient) else: for patient in patients: res = Parallel(n_jobs=num_cores)(delayed(process_data)(i) for i in lrd.walk_files(patient)) sapi.insert(res, "%s_mm" % patient) print("Finished in", time.time() - now, "sec")
# print(roc_auc_score(y_true, y_pred)) # print() clf.fit(X, y) return clf if __name__ == '__main__': patients = [ "Dog_1", "Dog_2", "Dog_3", "Dog_4", "Dog_5", "Patient_1", "Patient_2" ] # d_keys = ['data_length_sec', 'sampling_frequency', 'sequence', 'state', 'file'] num_cores = multiprocessing.cpu_count() now = time.time() if len(sys.argv) >= 2: patient = sys.argv[1] res = Parallel(n_jobs=num_cores)( delayed(load_training_data)(file) for file in walk_training_data(patient)) X, y = sort_data(res) clf = train_clf(X, y) sapi.insert(clf, "%s_clf" % patient) else: for patient in patients: res = Parallel(n_jobs=num_cores)( delayed(load_training_data)(file) for file in walk_training_data(patient)) X, y = sort_data(res) clf = train_clf(X, y) sapi.insert(clf, "%s_clf" % patient) print("Finished in", time.time() - now, "sec")
from __future__ import print_function from collections import defaultdict import numpy as np import raw_data_clf import mongo_select from tests import shelve_api print("Training the CLF") clf = raw_data_clf.RawClf('Dog_2') clf.clear_data() print() print("Cleared the data") print() d = defaultdict(list) for item in mongo_select.get_all('Dog_2'): data = np.array(item['data']).astype('float32') prediction = clf.predict(data) d['pred'].append(prediction) del item['data'] for key in item: d[key].append(item[key]) print("Just predicted %s for %s" % (prediction[0], item['file'])) shelve_api.insert(d, 'preds')
print(file) return len(X) def get_min_max(patient): mm = sapi.load("%s_mm" % patient) return (np.min(mm), np.max(mm)) def walk_data(patient): path = "D:/gingivere/data/" for file in os.listdir(path): if patient in file: yield path + file if __name__ == '__main__': patients = ["Dog_1", "Dog_2", "Dog_3", "Dog_4", "Dog_5", "Patient_1", "Patient_2"] # d_keys = ['data_length_sec', 'sampling_frequency', 'sequence', 'state', 'file'] num_cores = multiprocessing.cpu_count() now = time.time() if len(sys.argv) >= 2: patient = sys.argv[1] r = get_min_max(patient) res = Parallel(n_jobs=num_cores)(delayed(preprocess_data)((i, r)) for i in walk_data(patient)) sapi.insert(res, "%s_len" % patient) else: for patient in patients: r = get_min_max(patient) res = Parallel(n_jobs=num_cores)(delayed(preprocess_data)((i, r)) for i in walk_data(patient)) sapi.insert(res, "%s_len" % patient) print("Finished in", time.time()-now , "sec")
# print() # # y_true, y_pred = y, clf.predict(X) # print(roc_auc_score(y_true, y_pred)) # y_pred = y_pred - y_pred.mean() # y_pred = y_pred/y_pred.std() # y_pred = [1/(1+math.pow(math.e, -.5*p)) for p in y_pred] # print(roc_auc_score(y_true, y_pred)) # print() clf.fit(X, y) return clf if __name__ == '__main__': patients = ["Dog_1", "Dog_2", "Dog_3", "Dog_4", "Dog_5", "Patient_1", "Patient_2"] # d_keys = ['data_length_sec', 'sampling_frequency', 'sequence', 'state', 'file'] num_cores = multiprocessing.cpu_count() now = time.time() if len(sys.argv) >= 2: patient = sys.argv[1] res = Parallel(n_jobs=num_cores)(delayed(load_training_data)(file) for file in walk_training_data(patient)) X, y = sort_data(res) clf = train_clf(X, y) sapi.insert(clf, "%s_clf" % patient) else: for patient in patients: res = Parallel(n_jobs=num_cores)(delayed(load_training_data)(file) for file in walk_training_data(patient)) X, y = sort_data(res) clf = train_clf(X, y) sapi.insert(clf, "%s_clf" % patient) print("Finished in", time.time()-now , "sec")