def predict_for_patient(patient_id, pred): print "predicting ", patient_id + 1 data = iu.load_data_for_patient(patient_id, dtype='test') ch_idx = 4 X = np.array( [get_radon_features(sp[ch_idx]) for sp in data['raw_spectrograms']]) return zip(data['file_name'], pred.predict_proba(X))
def predict_for_patient(patient_id, run_id): print "predicting ", patient_id + 1 data = iu.load_data_for_patient(patient_id, dtype='test') # spec_data = iu.load_data_for_patient( # patient_id, dtype='test', file_name='spectrograms_4x4.npy') X = get_X_data(data['corr_channel_bands']) X[np.isnan(X)] = 0. model = load_model(get_model_file(run_id, patient_id)) predictions = model.predict_proba(X) print predictions.shape return zip(data['file_name'], predictions[:, 1])
def predict(patient_id): tr_d = apply_safe_indexes( ut.load_data_for_patient(patient_id=patient_id, dtype='train')) te_d = ut.load_data_for_patient(patient_id=patient_id, dtype='test') te_X = generate_x_data(te_d) nbags = 5 nfolds = 5 kf = KFold(n_splits=nfolds) early_stopping = EarlyStopping(monitor='val_loss', patience=4) pred = np.zeros((te_d['raw_spectrograms'].shape[0], )) for train_index, val_index in kf.split(tr_d['target']): tr_tr_d = apply_indexes(tr_d, train_index) val_d = apply_indexes(tr_d, val_index) for _ in range(nbags): model = build_model() dg = data_generator(tr_tr_d) model.fit_generator(dg, samples_per_epoch=tr_tr_d['target'].shape[0], validation_data=generate_x_y_data(val_d), callbacks=[early_stopping], nb_epoch=10) pred += model.predict_proba(te_X)[:, 1] pred /= 1. * nbags * nfolds return zip(te_d['file_name'], pred)
def train_svm(patient_id): data = iu.load_data_for_patient(patient_id) safe_indexes = iu.get_safe_index(data['file_name']) data = apply_indexes(data, safe_indexes) ch_idx = 3 X = np.array( [get_radon_features(sp[ch_idx]) for sp in data['raw_spectrograms']]) Y = map(int, data['target']) clf = svm.SVC(probability=True) clf.fit(X, Y) return clf
def chilly_build(pid): data = ut.load_data_for_patient(patient_id=pid, file_name='traditional.npy', dtype='test') data = chilyfy_data(data) X = scaler.transform(data['data']) idx = [ map(int, np.array(list(v))[:, 0]) for _, v in groupby(zip( np.arange(0, data['target'].shape[-1]), data['mat_files']), key=lambda x: x[1]) ] files = [data['file_name'][ix][0] for ix in idx] pred = [model.predict_proba(X[ix, :])[:, 1] for ix in idx] pfeats = map(get_feat, pred) return np.array(pfeats), files
def generate_model(run_id, patient_id, model_gen_func=build_model): data = iu.load_data_for_patient(patient_id) safe_indexes = iu.get_safe_index(data['file_name']) data = apply_indexes(data, safe_indexes) # spec_4x4 = iu.load_data_for_patient( # patient_id, file_name='spectrograms_4x4.npy')[safe_indexes] # X = np.array([ # convert_to_image_format(spec) # for spec in data['raw_spectrograms'] # ]) X = get_X_data(data['corr_channel_bands']) X[np.isnan(X)] = 0. Y = np_utils.to_categorical(data['target'], 2) X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=0.1) print "Building model ...", patient_id model = model_gen_func(X_train[0].shape) print "Fitting model ...", patient_id fit_model(model, (X_train, Y_train, X_test, Y_test)) model.save(get_model_file(run_id, patient_id))
def chilyfy_data(data): data['file_name'] = np.array([basename(fn) for fn in data['mat_files']]) data['mat_files'] = np.array(data['mat_files']) data['data'] = np.array(data['data']) if 'target' in data: data['target'] = np.array(data['target']) return data patient_id = 2 data = ut.apply_safe_indexes( chilyfy_data( ut.load_data_for_patient(patient_id, file_name='traditional.npy'))) X = data['data'] scaler = MinMaxScaler() scaler.fit(X) X = scaler.transform(X) clf = SVC(verbose=True, probability=True, C=1e2) clf.fit(X, data['target']) y_pred = clf.predict(X) print accuracy_score(data['target'], y_pred) indexes = np.arange(data['target'].shape[0]).reshape(-1, 30)
idx = [ map(int, np.array(list(v))[:, 0]) for _, v in groupby(zip( np.arange(0, data['target'].shape[-1]), data['mat_files']), key=lambda x: x[1]) ] files = [data['file_name'][ix][0] for ix in idx] pred = [model.predict_proba(X[ix, :])[:, 1] for ix in idx] pfeats = map(get_feat, pred) return np.array(pfeats), files data = ut.load_data_for_patient(patient_id=2, file_name='traditional.npy') data = ut.apply_safe_indexes(chilyfy_data(data)) # filter all complete dropouts good_data_indx = np.where(np.mean(np.abs(data['data'][:, 192:]), axis=1) != 0) data = ut.apply_indexes(data, good_data_indx) X = data['data'] scaler = MinMaxScaler() scaler.fit(X) X = scaler.transform(X) Y = np_utils.to_categorical(data['target']) wdata = ut.load_data(patient_id=0)
import utils as ut import pandas as pd import numpy as np patient_id = 0 data = ut.load_data_for_patient(patient_id) # this is (1302, 16, 6, 60) specs = data['raw_spectrograms'] # extract correlation for each time-frame def extract_correlation(i, time_index): spec = specs[i] d = pd.DataFrame(data=spec[:, :, time_index].T).corr() return d.as_matrix()[np.triu_indices(16, k=1)].ravel() final_arr = np.zeros((specs.shape[0], 120, 60)) for i in range(specs.shape[0]): corr_mat = map(lambda x: extract_correlation(i, x), range(60)) final_arr[i, :, :] = np.array(corr_mat).T data['corr_channel_bands'] = final_arr ut.save_data_for_patient(patient_id, data)