Beispiel #1
0
def insert_patient(patient):
    name_to_post_id = {}
    client = MongoClient()
    db = client['gingivere']
    collection = db[patient]

    d = collections.defaultdict(list)

    for data in load_raw_data.walk_training_mats(patient):
        post_item = copy.deepcopy(data)
        channels = post_item['channels']
        del post_item['data']
        del post_item['channels']
        for i, item in enumerate(data['data']):
            post_item['data'] = item.tolist()
            post_item['channel'] = channels[i]
            name = "%02d_%s" % (i, data['file'])
            post_id = collection.insert(post_item)
            d['name'].append(name)
            d['_id'].append(str(post_id))
            d['state'].append(post_item['state'])
            d['channel'].append(channels[i])
            print("Just posted: " + name)
            del post_item['_id']

    df = pd.DataFrame(d)
    shelve_api.insert(df, "labeled_" + patient)
Beispiel #2
0
from collections import defaultdict

import numpy as np

import raw_data_clf
import mongo_select
from tests import shelve_api


print("Training the CLF")
clf = raw_data_clf.RawClf('Dog_2')
clf.clear_data()

print()
print("Cleared the data")
print()

d = defaultdict(list)

for item in mongo_select.get_all('Dog_2'):
    data = np.array(item['data']).astype('float32')
    prediction = clf.predict(data)
    d['pred'].append(prediction)
    del item['data']
    for key in item:
        d[key].append(item[key])
    print("Just predicted %s for %s" % (prediction[0], item['file']))


shelve_api.insert(d, 'preds')
Beispiel #3
0
            insert_item['_id'] = count
            count += 1
            yield insert_item

def shelve(result, error):
    if error:
        print('error getting user!', error)
    else:
        name = "%02d_%s" % (i, result['file'])
        d['name'].append(name)
        d['_id'].append(result['_id'])
        d['state'].append(result['state'])
        d['channel'].append(result['channel'])
        print("Just posted: " + name)


@gen.coroutine
def bulk_write():
    global d
    d = defaultdict(list)
    collection.insert((i for i in insert_patient('Dog_2')), callback=shelve)


if __name__ == "__main__":
    client = motor.MotorClient()
    db = motor.MotorDatabase(client, 'gingivere')
    collection = motor.MotorCollection(db, 'Dog_1')
    tornado.ioloop.IOLoop.current().run_sync(bulk_write)
    df = pd.DataFrame(d)
    shelve_api.insert(df, 'test_dog_1')
Beispiel #4
0
    return (np.min(mm), np.max(mm))


def walk_data(patient):
    path = "D:/gingivere/data/"
    for file in os.listdir(path):
        if patient in file:
            yield path + file


if __name__ == '__main__':
    patients = [
        "Dog_1", "Dog_2", "Dog_3", "Dog_4", "Dog_5", "Patient_1", "Patient_2"
    ]
    # d_keys = ['data_length_sec', 'sampling_frequency', 'sequence', 'state', 'file']
    num_cores = multiprocessing.cpu_count()
    now = time.time()
    if len(sys.argv) >= 2:
        patient = sys.argv[1]
        r = get_min_max(patient)
        res = Parallel(n_jobs=num_cores)(delayed(preprocess_data)((i, r))
                                         for i in walk_data(patient))
        sapi.insert(res, "%s_len" % patient)
    else:
        for patient in patients:
            r = get_min_max(patient)
            res = Parallel(n_jobs=num_cores)(delayed(preprocess_data)((i, r))
                                             for i in walk_data(patient))
            sapi.insert(res, "%s_len" % patient)
    print("Finished in", time.time() - now, "sec")
Beispiel #5
0
print(__doc__)

def process_data(input):
    data = lrd.load_mat(*input)
    name = data['file'].split('.')[0]
    print(name)
    store = pd.HDFStore("D:/gingivere/data/%s.h5" % name)
    # pp = preprocessing.scale(data['data'])
    df = pd.DataFrame(data['data'], index=data['channels'])
    del data
    store['data'] = df
    store.close()
    return np.asarray([df.values.min(), df.values.max()])

if __name__ == '__main__':
    patients = ["Dog_1", "Dog_2", "Dog_3", "Dog_4", "Dog_5", "Patient_1", "Patient_2"]
    num_cores = multiprocessing.cpu_count()
    now = time.time()
    if len(sys.argv) >= 2:
        patient = sys.argv[1]
        # res = Parallel(n_jobs=num_cores)(delayed(process_data)(i) for i in lrd.walk_files(patient))
        res = []
        for i in lrd.walk_files(patient):
            res.append(process_data(i))
        sapi.insert(res, "%s_mm" % patient)
    else:
        for patient in patients:
            res =  Parallel(n_jobs=num_cores)(delayed(process_data)(i) for i in lrd.walk_files(patient))
            sapi.insert(res, "%s_mm" % patient)
    print("Finished in", time.time()-now , "sec")
Beispiel #6
0
    name = data['file'].split('.')[0]
    print(name)
    store = pd.HDFStore("D:/gingivere/data/%s.h5" % name)
    # pp = preprocessing.scale(data['data'])
    df = pd.DataFrame(data['data'], index=data['channels'])
    del data
    store['data'] = df
    store.close()
    return np.asarray([df.values.min(), df.values.max()])


if __name__ == '__main__':
    patients = [
        "Dog_1", "Dog_2", "Dog_3", "Dog_4", "Dog_5", "Patient_1", "Patient_2"
    ]
    num_cores = multiprocessing.cpu_count()
    now = time.time()
    if len(sys.argv) >= 2:
        patient = sys.argv[1]
        # res = Parallel(n_jobs=num_cores)(delayed(process_data)(i) for i in lrd.walk_files(patient))
        res = []
        for i in lrd.walk_files(patient):
            res.append(process_data(i))
        sapi.insert(res, "%s_mm" % patient)
    else:
        for patient in patients:
            res = Parallel(n_jobs=num_cores)(delayed(process_data)(i)
                                             for i in lrd.walk_files(patient))
            sapi.insert(res, "%s_mm" % patient)
    print("Finished in", time.time() - now, "sec")
Beispiel #7
0
        #     print(roc_auc_score(y_true, y_pred))
        #     print()
    clf.fit(X, y)
    return clf


if __name__ == '__main__':
    patients = [
        "Dog_1", "Dog_2", "Dog_3", "Dog_4", "Dog_5", "Patient_1", "Patient_2"
    ]
    # d_keys = ['data_length_sec', 'sampling_frequency', 'sequence', 'state', 'file']
    num_cores = multiprocessing.cpu_count()
    now = time.time()
    if len(sys.argv) >= 2:
        patient = sys.argv[1]
        res = Parallel(n_jobs=num_cores)(
            delayed(load_training_data)(file)
            for file in walk_training_data(patient))
        X, y = sort_data(res)
        clf = train_clf(X, y)
        sapi.insert(clf, "%s_clf" % patient)
    else:
        for patient in patients:
            res = Parallel(n_jobs=num_cores)(
                delayed(load_training_data)(file)
                for file in walk_training_data(patient))
            X, y = sort_data(res)
            clf = train_clf(X, y)
            sapi.insert(clf, "%s_clf" % patient)
    print("Finished in", time.time() - now, "sec")
Beispiel #8
0
from __future__ import print_function

from collections import defaultdict

import numpy as np

import raw_data_clf
import mongo_select
from tests import shelve_api

print("Training the CLF")
clf = raw_data_clf.RawClf('Dog_2')
clf.clear_data()

print()
print("Cleared the data")
print()

d = defaultdict(list)

for item in mongo_select.get_all('Dog_2'):
    data = np.array(item['data']).astype('float32')
    prediction = clf.predict(data)
    d['pred'].append(prediction)
    del item['data']
    for key in item:
        d[key].append(item[key])
    print("Just predicted %s for %s" % (prediction[0], item['file']))

shelve_api.insert(d, 'preds')
    print(file)
    return len(X)

def get_min_max(patient):
    mm = sapi.load("%s_mm" % patient)
    return (np.min(mm), np.max(mm))

def walk_data(patient):
    path = "D:/gingivere/data/"
    for file in os.listdir(path):
        if patient in file:
            yield path + file


if __name__ == '__main__':
    patients = ["Dog_1", "Dog_2", "Dog_3", "Dog_4", "Dog_5", "Patient_1", "Patient_2"]
    # d_keys = ['data_length_sec', 'sampling_frequency', 'sequence', 'state', 'file']
    num_cores = multiprocessing.cpu_count()
    now = time.time()
    if len(sys.argv) >= 2:
        patient = sys.argv[1]
        r = get_min_max(patient)
        res = Parallel(n_jobs=num_cores)(delayed(preprocess_data)((i, r)) for i in walk_data(patient))
        sapi.insert(res, "%s_len" % patient)
    else:
        for patient in patients:
            r = get_min_max(patient)
            res =  Parallel(n_jobs=num_cores)(delayed(preprocess_data)((i, r)) for i in walk_data(patient))
            sapi.insert(res, "%s_len" % patient)
    print("Finished in", time.time()-now , "sec")
Beispiel #10
0
        #     print()
        #
        #     y_true, y_pred = y, clf.predict(X)
        #     print(roc_auc_score(y_true, y_pred))
            # y_pred = y_pred - y_pred.mean()
            # y_pred = y_pred/y_pred.std()
            # y_pred = [1/(1+math.pow(math.e, -.5*p)) for p in y_pred]
        #     print(roc_auc_score(y_true, y_pred))
        #     print()
    clf.fit(X, y)
    return clf

if __name__ == '__main__':
    patients = ["Dog_1", "Dog_2", "Dog_3", "Dog_4", "Dog_5", "Patient_1", "Patient_2"]
    # d_keys = ['data_length_sec', 'sampling_frequency', 'sequence', 'state', 'file']
    num_cores = multiprocessing.cpu_count()
    now = time.time()
    if len(sys.argv) >= 2:
        patient = sys.argv[1]
        res = Parallel(n_jobs=num_cores)(delayed(load_training_data)(file) for file in walk_training_data(patient))
        X, y = sort_data(res)
        clf = train_clf(X, y)
        sapi.insert(clf, "%s_clf" % patient)
    else:
        for patient in patients:
            res =  Parallel(n_jobs=num_cores)(delayed(load_training_data)(file) for file in walk_training_data(patient))
            X, y = sort_data(res)
            clf = train_clf(X, y)
            sapi.insert(clf, "%s_clf" % patient)
    print("Finished in", time.time()-now , "sec")