def predict_for_patient(patient_id, pred):

    print "predicting ", patient_id + 1
    data = iu.load_data_for_patient(patient_id, dtype='test')
    ch_idx = 4

    X = np.array(
        [get_radon_features(sp[ch_idx]) for sp in data['raw_spectrograms']])

    return zip(data['file_name'], pred.predict_proba(X))
def predict_for_patient(patient_id, run_id):

    print "predicting ", patient_id + 1
    data = iu.load_data_for_patient(patient_id, dtype='test')
    # spec_data = iu.load_data_for_patient(
    #     patient_id, dtype='test', file_name='spectrograms_4x4.npy')
    X = get_X_data(data['corr_channel_bands'])
    X[np.isnan(X)] = 0.

    model = load_model(get_model_file(run_id, patient_id))
    predictions = model.predict_proba(X)
    print predictions.shape
    return zip(data['file_name'], predictions[:, 1])
Beispiel #3
0
def predict(patient_id):

    tr_d = apply_safe_indexes(
        ut.load_data_for_patient(patient_id=patient_id, dtype='train'))
    te_d = ut.load_data_for_patient(patient_id=patient_id, dtype='test')

    te_X = generate_x_data(te_d)

    nbags = 5
    nfolds = 5
    kf = KFold(n_splits=nfolds)

    early_stopping = EarlyStopping(monitor='val_loss', patience=4)

    pred = np.zeros((te_d['raw_spectrograms'].shape[0], ))
    for train_index, val_index in kf.split(tr_d['target']):

        tr_tr_d = apply_indexes(tr_d, train_index)
        val_d = apply_indexes(tr_d, val_index)

        for _ in range(nbags):

            model = build_model()

            dg = data_generator(tr_tr_d)
            model.fit_generator(dg,
                                samples_per_epoch=tr_tr_d['target'].shape[0],
                                validation_data=generate_x_y_data(val_d),
                                callbacks=[early_stopping],
                                nb_epoch=10)

            pred += model.predict_proba(te_X)[:, 1]

    pred /= 1. * nbags * nfolds

    return zip(te_d['file_name'], pred)
def train_svm(patient_id):
    data = iu.load_data_for_patient(patient_id)
    safe_indexes = iu.get_safe_index(data['file_name'])

    data = apply_indexes(data, safe_indexes)
    ch_idx = 3

    X = np.array(
        [get_radon_features(sp[ch_idx]) for sp in data['raw_spectrograms']])

    Y = map(int, data['target'])

    clf = svm.SVC(probability=True)
    clf.fit(X, Y)

    return clf
def chilly_build(pid):
    data = ut.load_data_for_patient(patient_id=pid,
                                    file_name='traditional.npy',
                                    dtype='test')
    data = chilyfy_data(data)

    X = scaler.transform(data['data'])

    idx = [
        map(int,
            np.array(list(v))[:, 0]) for _, v in groupby(zip(
                np.arange(0, data['target'].shape[-1]), data['mat_files']),
                                                         key=lambda x: x[1])
    ]
    files = [data['file_name'][ix][0] for ix in idx]

    pred = [model.predict_proba(X[ix, :])[:, 1] for ix in idx]
    pfeats = map(get_feat, pred)

    return np.array(pfeats), files
def generate_model(run_id, patient_id, model_gen_func=build_model):
    data = iu.load_data_for_patient(patient_id)
    safe_indexes = iu.get_safe_index(data['file_name'])

    data = apply_indexes(data, safe_indexes)
    # spec_4x4 = iu.load_data_for_patient(
    #     patient_id, file_name='spectrograms_4x4.npy')[safe_indexes]
    # X = np.array([
    #     convert_to_image_format(spec)
    #     for spec in data['raw_spectrograms']
    # ])
    X = get_X_data(data['corr_channel_bands'])
    X[np.isnan(X)] = 0.
    Y = np_utils.to_categorical(data['target'], 2)

    X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=0.1)

    print "Building model ...", patient_id
    model = model_gen_func(X_train[0].shape)

    print "Fitting model ...", patient_id
    fit_model(model, (X_train, Y_train, X_test, Y_test))
    model.save(get_model_file(run_id, patient_id))

def chilyfy_data(data):
    data['file_name'] = np.array([basename(fn) for fn in data['mat_files']])
    data['mat_files'] = np.array(data['mat_files'])
    data['data'] = np.array(data['data'])
    if 'target' in data:
        data['target'] = np.array(data['target'])
    return data


patient_id = 2

data = ut.apply_safe_indexes(
    chilyfy_data(
        ut.load_data_for_patient(patient_id, file_name='traditional.npy')))
X = data['data']
scaler = MinMaxScaler()
scaler.fit(X)

X = scaler.transform(X)

clf = SVC(verbose=True, probability=True, C=1e2)
clf.fit(X, data['target'])

y_pred = clf.predict(X)

print accuracy_score(data['target'], y_pred)

indexes = np.arange(data['target'].shape[0]).reshape(-1, 30)
    idx = [
        map(int,
            np.array(list(v))[:, 0]) for _, v in groupby(zip(
                np.arange(0, data['target'].shape[-1]), data['mat_files']),
                                                         key=lambda x: x[1])
    ]
    files = [data['file_name'][ix][0] for ix in idx]

    pred = [model.predict_proba(X[ix, :])[:, 1] for ix in idx]
    pfeats = map(get_feat, pred)

    return np.array(pfeats), files


data = ut.load_data_for_patient(patient_id=2, file_name='traditional.npy')
data = ut.apply_safe_indexes(chilyfy_data(data))

# filter all complete dropouts
good_data_indx = np.where(np.mean(np.abs(data['data'][:, 192:]), axis=1) != 0)

data = ut.apply_indexes(data, good_data_indx)
X = data['data']

scaler = MinMaxScaler()
scaler.fit(X)

X = scaler.transform(X)
Y = np_utils.to_categorical(data['target'])

wdata = ut.load_data(patient_id=0)
Beispiel #9
0
import utils as ut
import pandas as pd
import numpy as np

patient_id = 0
data = ut.load_data_for_patient(patient_id)

# this is (1302, 16, 6, 60)
specs = data['raw_spectrograms']


# extract correlation for each time-frame
def extract_correlation(i, time_index):

    spec = specs[i]
    d = pd.DataFrame(data=spec[:, :, time_index].T).corr()
    return d.as_matrix()[np.triu_indices(16, k=1)].ravel()


final_arr = np.zeros((specs.shape[0], 120, 60))
for i in range(specs.shape[0]):
    corr_mat = map(lambda x: extract_correlation(i, x), range(60))
    final_arr[i, :, :] = np.array(corr_mat).T

data['corr_channel_bands'] = final_arr

ut.save_data_for_patient(patient_id, data)