def test_data_ann_rnn(feats, target, groups, ann, rnn): """ mode = 'scores' or 'preds' take two ready trained models (cnn+rnn) test on input data and return acc+f1 """ if target.ndim == 2: target = np.argmax(target, 1) cnn_pred = ann.predict_classes(feats, 1024, verbose=0) cnn_acc = accuracy_score(target, cnn_pred) cnn_f1 = f1_score(target, cnn_pred, average='macro') seqlen = rnn.input_shape[1] features_seq, target_seq, groups_seq = tools.to_sequences(feats, target, seqlen=seqlen, groups=groups) new_targ_seq = np.roll(target_seq, 4) rnn_pred = rnn.predict_classes(features_seq, 1024, verbose=0) rnn_acc = accuracy_score(new_targ_seq, rnn_pred) rnn_f1 = f1_score(new_targ_seq, rnn_pred, average='macro') confmat = confusion_matrix(new_targ_seq, rnn_pred) return [ cnn_acc, cnn_f1, rnn_acc, rnn_f1, confmat, (rnn_pred, target_seq, groups_seq) ]
def feat_rnn(c=0): feats_eeg = scipy.stats.zscore(tools.feat_eeg(data[:,:,0])) feats_emg = scipy.stats.zscore(tools.feat_emg(data[:,:,1])) feats_eog = scipy.stats.zscore(tools.feat_eog(data[:,:,2])) feats_all = np.hstack([feats_eeg, feats_eog, feats_emg]) feats_seq, targ_seq, groups_seq = tools.to_sequences(feats_all, target, groups=groups, seqlen=6, tolist=False) r = cv(feats_seq, targ_seq, groups_seq, models.pure_rnn_do, name = 'feat-rnn-all', stop_after=15, counter=c, plot=plot) with open('edfxresults_recurrent_feat.pkl', 'wb') as f: pickle.dump(r, f)
def test_data_cnn_rnn(data, target, groups, cnn, rnn, layername='fc1', cropsize=2800, verbose=1, only_lstm=False): """ mode = 'scores' or 'preds' take two ready trained models (cnn+rnn) test on input data and return acc+f1 """ if target.ndim == 2: target = np.argmax(target, 1) if cropsize != 0: diff = (data.shape[1] - cropsize) // 2 data = data[:, diff:-diff:, :] with warnings.catch_warnings(): warnings.simplefilter("ignore") if only_lstm == False: cnn_pred = cnn.predict_classes(data, 1024, verbose=0) else: cnn_pred = target features = get_activations(cnn, data, 'fc1', verbose=verbose) cnn_acc = accuracy_score(target, cnn_pred) cnn_f1 = f1_score(target, cnn_pred, average='macro') seqlen = rnn.input_shape[1] features_seq, target_seq, groups_seq = tools.to_sequences( features, target, seqlen=seqlen, groups=groups) new_targ_seq = np.roll(target_seq, 4) rnn_pred = rnn.predict_classes(features_seq, 1024, verbose=0) rnn_acc = accuracy_score(new_targ_seq, rnn_pred) rnn_f1 = f1_score(new_targ_seq, rnn_pred, average='macro') confmat = confusion_matrix(new_targ_seq, rnn_pred) return [ cnn_acc, cnn_f1, rnn_acc, rnn_f1, confmat, (rnn_pred, target_seq, groups_seq) ]
def cv(data, targets, groups, modfun, rnn=False, trans_tuple=None, epochs=250, folds=5, batch_size=256, val_batch_size=0, stop_after=0, name='', counter=0, plot=False, balanced=False, cropsize=0): """ Crossvalidation routinge for an RNN using extracted features on a basemodel :param rnns: list with the following: [rnnfun, [layernames], seqlen, batch_size] :param stop_after: stop after x epochs without f1-improvement. 0 for no stopping :param plot: True for plotting intermediate results and loss :param counter: prefix for saving files. can be any number. :param balanced: True if the generator should supply class-balanced batches :param cropsize: Size that is randomly cropped for training (data augmentation) :param ...: all others should be self-explanatory :returns results: dictionary with all RNN results """ if val_batch_size == 0: val_batch_size = batch_size input_shape = list((np.array(data[0])).shape) #train_data.shape if cropsize != 0: input_shape[0] = cropsize n_classes = targets.shape[1] if type(modfun) == str: wpath = modfun modfun = False gcv = GroupKFold(folds) dict_id = modfun.__name__ + name if modfun else 'cnn' + '_' + name results = {dict_id: []} if rnn: for lname in rnn['layers']: results[name + '_' + lname] = [] for i, idxs in enumerate(gcv.split(groups, groups, groups)): K.clear_session() print('-----------------------------') print('Starting fold {}: {}-{} at {}'.format( i + 1, modfun.__name__ if modfun else 'cnn', name, time.ctime())) train_idx, test_idx = idxs sub_cv = GroupKFold(folds) train_sub_idx, val_idx = sub_cv.split(groups[train_idx], groups[train_idx], groups[train_idx]).__next__() val_idx = train_idx[val_idx] train_idx = train_idx[train_sub_idx] train_data = [data[i] for i in train_idx] train_target = targets[train_idx] train_groups = groups[train_idx] val_data = [data[i] for i in val_idx] val_target = targets[val_idx] val_groups = groups[val_idx] test_data = [data[i] for i in test_idx] test_target = targets[test_idx] test_groups = groups[test_idx] if modfun: model = modfun(input_shape, n_classes) else: fold = os.listdir(wpath)[i] model = keras.models.load_model(os.path.join(wpath, fold)) modelname = model.name lname = modelname g_train = generator(train_data, train_target, batch_size * 2, val=True, cropsize=cropsize) g_val = generator(val_data, val_target, batch_size * 2, val=True, cropsize=cropsize) g_test = generator(test_data, test_target, batch_size * 2, val=True, cropsize=cropsize) if balanced: g = generator_balanced(train_data, train_target, batch_size, cropsize=cropsize) cb = Checkpoint_balanced(g_val, g, g_train, verbose=1, counter=counter, groups=val_groups, epochs_to_stop=stop_after, plot=plot, name='{}, {}, fold: {}'.format( name, lname, i)) else: g = generator(train_data, train_target, batch_size, random=True, cropsize=cropsize) cb = Checkpoint_balanced(g_val, verbose=1, counter=counter, groups=val_groups, epochs_to_stop=stop_after, plot=plot, name='{}, {}, fold: {}'.format( name, lname, i)) if modfun: model.fit_generator(g, g.n_batches, epochs=epochs, callbacks=[cb], max_queue_size=1, verbose=0) y_pred = model.predict_generator(g_test, g_test.n_batches, max_queue_size=1) y_true = g_test.Y val_acc = cb.best_acc val_f1 = cb.best_f1 test_acc = accuracy_score(np.argmax(y_true, 1), np.argmax(y_pred, 1)) test_f1 = f1_score(np.argmax(y_true, 1), np.argmax(y_pred, 1), average="macro") confmat = confusion_matrix(np.argmax(y_true, 1), np.argmax(y_pred, 1)) if plot: plt.subplot(2, 3, 5) plt.cla() tools.plot_results_per_patient(y_pred, y_true, test_groups, fname='') plt.title('Test Cases') plt.subplot(2, 3, 6) plt.cla() tools.plot_confusion_matrix('', confmat, ['W', 'S1', 'S2', 'SWS', 'REM'], cbar=False) plt.title('Test conf. Acc: {:.1f} F1: {:.1f}'.format( test_acc * 100, test_f1 * 100)) plt.show() plt.pause(0.0001) results[dict_id].append( [cb.best_acc, cb.best_f1, test_acc, test_f1, confmat]) if modfun: # only save if we calculated the results try: model.save( os.path.join( '.', 'weights', str(counter) + name + model.name + '_' + str(i) + "_{:.3f}-{:.3f}".format(test_acc, test_f1))) except Exception as error: print("Got an error while saving model: {}".format(error)) print( 'ANN results: val acc/f1: {:.5f}/{:.5f}, test acc/f1: {:.5f}/{:.5f}' .format(cb.best_acc, cb.best_f1, test_acc, test_f1)) ########## if trans_tuple is not None: trans_data, trans_target, trans_groups = trans_tuple g_trans = generator(trans_data, trans_target, batch_size * 2, val=True, cropsize=cropsize) y_trans = model.predict_generator(g_trans, g_trans.n_batches, max_queue_size=1) t_trans = g_trans.Y trans_acc = accuracy_score(np.argmax(t_trans, 1), np.argmax(y_trans, 1)) trans_f1 = f1_score(np.argmax(t_trans, 1), np.argmax(y_trans, 1), average="macro") print('Transfer ANN results: acc/f1: {:.5f}/{:.5f}'.format( trans_acc, trans_f1)) ########## if rnn: rnn_modelfun = rnn['model'] layernames = rnn['layers'] seq = rnn['seqlen'] rnn_bs = rnn['batch_size'] rnn_epochs = rnn['epochs'] stopafter_rnn = rnn['stop_after'] for lname in layernames: extracted = get_activations(model, train_data + val_data + test_data, lname, batch_size * 2, cropsize=cropsize) train_data_extracted = extracted[0:len(train_data)] val_data_extracted = extracted[len(train_data ):len(train_data) + len(val_data)] test_data_extracted = extracted[len(train_data) + len(val_data):] assert (len(train_data) == len(train_data_extracted)) and ( len(test_data) == len(test_data_extracted)) and ( len(val_data) == len(val_data_extracted)) train_data_seq, train_target_seq, train_groups_seq = tools.to_sequences( train_data_extracted, train_target, groups=train_groups, seqlen=seq) val_data_seq, val_target_seq, val_groups_seq = tools.to_sequences( val_data_extracted, val_target, groups=val_groups, seqlen=seq) test_data_seq, test_target_seq, test_groups_seq = tools.to_sequences( test_data_extracted, test_target, groups=test_groups, seqlen=seq) rnn_shape = list((np.array(train_data_seq[0])).shape) neurons = 100 print('Starting RNN model with input from layer {}: {} at {}'. format(lname, rnn_shape, time.ctime())) rnn_model = rnn_modelfun(rnn_shape, n_classes, layers=2, neurons=neurons, dropout=0.3) g_val = generator(val_data_seq, val_target_seq, rnn_bs * 2, val=True) g_test = generator(test_data_seq, test_target_seq, rnn_bs * 2, val=True) g_train = generator(train_data_seq, train_target_seq, batch_size * 2, val=True) if rnn['balanced']: g = generator_balanced(train_data_seq, train_target_seq, rnn_bs) cb = Checkpoint_balanced(g_val, g, g_train, verbose=1, counter=counter, groups=val_groups_seq, epochs_to_stop=stopafter_rnn, plot=plot, name='{}, {}, fold: {}'.format( name, lname, i)) else: g = generator(train_data_seq, train_target_seq, rnn_bs) cb = Checkpoint_balanced(g_val, verbose=1, counter=counter, groups=val_groups_seq, epochs_to_stop=stopafter_rnn, plot=plot, name='{}, {}, fold: {}'.format( name, lname, i)) rnn_model.fit_generator(g, g.n_batches, epochs=rnn_epochs, verbose=0, callbacks=[cb], max_queue_size=1) y_pred = rnn_model.predict_generator(g_test, g_test.n_batches, max_queue_size=1) y_true = g_test.Y val_acc = cb.best_acc val_f1 = cb.best_f1 test_acc = accuracy_score(np.argmax(y_true, 1), np.argmax(y_pred, 1)) test_f1 = f1_score(np.argmax(y_true, 1), np.argmax(y_pred, 1), average="macro") confmat = confusion_matrix(np.argmax(y_true, 1), np.argmax(y_pred, 1)) try: rnn_model.save( os.path.join( '.', 'weights', str(counter) + name + lname + '_' + str(i) + "_{:.3f}-{:.3f}".format(test_acc, test_f1))) except Exception as error: print("Got an error while saving model: {}".format(error)) if plot: plt.subplot(2, 3, 5) plt.cla() tools.plot_results_per_patient(y_pred, y_true, test_groups_seq, fname='') plt.title('Test Cases') plt.subplot(2, 3, 6) plt.cla() tools.plot_confusion_matrix( '', confmat, ['W', 'S1', 'S2', 'SWS', 'REM'], cbar=False) plt.title('Test conf. Acc: {:.1f} F1: {:.1f}'.format( test_acc * 100, test_f1 * 100)) plt.show() plt.pause(0.0001) results[name + '_' + lname].append( [val_acc, val_f1, test_acc, test_f1, confmat]) print( 'fold {}: val acc/f1: {:.5f}/{:.5f}, test acc/f1: {:.5f}/{:.5f}' .format(i, cb.best_acc, cb.best_f1, test_acc, test_f1)) ########## if trans_tuple is not None: trans_data, trans_target, trans_groups = trans_tuple extracted = get_activations(model, trans_data, lname, batch_size * 2, cropsize=cropsize) trans_data, trans_target, trans_groups = tools.to_sequences( extracted, trans_target, groups=trans_groups, seqlen=seq) g_trans = generator(trans_data, trans_target, batch_size * 2, val=True, cropsize=0) y_trans = rnn_model.predict_generator(g_trans, g_trans.n_batches, max_queue_size=1) t_trans = g_trans.Y trans_acc = accuracy_score(np.argmax(t_trans, 1), np.argmax(y_trans, 1)) trans_f1 = f1_score(np.argmax(t_trans, 1), np.argmax(y_trans, 1), average="macro") print( 'Transfer LSTM results: acc/f1: {:.5f}/{:.5f}'.format( trans_acc, trans_f1)) ########## save_dict = { '1 Number': counter, '2 Time': time.ctime(), '3 CV': '{}/{}.'.format(i + 1, folds), '5 Model': lname, '100 Comment': name, '10 Epochs': epochs, '11 Val acc': '{:.2f}'.format(val_acc * 100), '12 Val f1': '{:.2f}'.format(val_f1 * 100), '13 Test acc': '{:.2f}'.format(test_acc * 100), '14 Test f1': '{:.2f}'.format(test_f1 * 100), 'Test Conf': str(confmat).replace('\n', '') } tools.save_results(save_dict=save_dict) try: with open('{}_{}_results.pkl'.format(counter, dict_id), 'wb') as f: pickle.dump(results, f) except Exception as e: print("Error while saving results: ", e) sys.stdout.flush() return results
def train_models_feat(data, targets, groups, batch_size=512, epochs=250, epochs_to_stop=15): """ trains a ann and rnn model with features the given data with 20% validation set and returns the two models """ batch_size = 512 input_shape = list((np.array(data[0])).shape) #train_data.shape n_classes = targets.shape[1] train_idx, val_idx = GroupKFold(5).split(groups, groups, groups).__next__() train_data = [data[i] for i in train_idx] train_target = targets[train_idx] train_groups = groups[train_idx] val_data = [data[i] for i in val_idx] val_target = targets[val_idx] val_groups = groups[val_idx] model = models.ann(input_shape, n_classes) g_train = generator(train_data, train_target, batch_size, val=False) g_val = generator(val_data, val_target, batch_size, val=True) cb = Checkpoint_balanced(g_val, verbose=1, groups=val_groups, epochs_to_stop=epochs_to_stop, plot=True, name='{}, {}'.format(model.name, 'testing')) model.fit_generator(g_train, g_train.n_batches, epochs=epochs, callbacks=[cb], max_queue_size=1, verbose=0) val_acc = cb.best_acc val_f1 = cb.best_f1 print('CNN Val acc: {:.1f}, Val F1: {:.1f}'.format(val_acc * 100, val_f1 * 100)) # LSTM training batch_size = 512 n_classes = targets.shape[1] train_idx, val_idx = GroupKFold(5).split(groups, groups, groups).__next__() train_data = np.array([data[i] for i in train_idx]) train_target = targets[train_idx] train_groups = groups[train_idx] val_data = np.array([data[i] for i in val_idx]) val_target = targets[val_idx] val_groups = groups[val_idx] train_data_seq, train_target_seq, train_groups_seq = tools.to_sequences( train_data, train_target, groups=train_groups, seqlen=6) val_data_seq, val_target_seq, val_groups_seq = tools.to_sequences( val_data, val_target, groups=val_groups, seqlen=6) input_shape = list((np.array(train_data_seq[0])).shape) #train_data.shape print(input_shape) rnn_model = models.pure_rnn_do(input_shape, n_classes) g_train = generator(train_data_seq, train_target_seq, batch_size, val=False) g_val = generator(val_data_seq, val_target_seq, batch_size, val=True) cb = Checkpoint_balanced(g_val, verbose=1, groups=val_groups_seq, epochs_to_stop=epochs_to_stop, plot=True, name='{}, {}'.format(rnn_model.name, 'testing')) rnn_model.fit_generator(g_train, g_train.n_batches, epochs=epochs, callbacks=[cb], max_queue_size=1, verbose=0) val_acc = cb.best_acc val_f1 = cb.best_f1 print('CNN Val acc: {:.1f}, Val F1: {:.1f}'.format(val_acc * 100, val_f1 * 100)) return model, rnn_model
def train_models(data, targets, groups, model=None, cropsize=2800, batch_size=512, epochs=250, epochs_to_stop=15, rnn_epochs_to_stop=15): """ trains a cnn3adam_filter_l2 model with a LSTM on top on the given data with 20% validation set and returns the two models """ input_shape = list((np.array(data[0])).shape) #train_data.shape input_shape[0] = cropsize n_classes = targets.shape[1] train_idx, val_idx = GroupKFold(5).split(groups, groups, groups).__next__() train_data = [data[i] for i in train_idx] train_target = targets[train_idx] train_groups = groups[train_idx] val_data = [data[i] for i in val_idx] val_target = targets[val_idx] val_groups = groups[val_idx] model = models.cnn3adam_filter_l2(input_shape, n_classes) if model is None else model( input_shape, n_classes) g_train = generator(train_data, train_target, batch_size, val=False, cropsize=cropsize) g_val = generator(val_data, val_target, batch_size, val=True, cropsize=cropsize) cb = Checkpoint_balanced(g_val, verbose=1, groups=val_groups, epochs_to_stop=epochs_to_stop, plot=True, name='{}, {}'.format(model.name, 'testing')) model.fit_generator(g_train, g_train.n_batches, epochs=epochs, callbacks=[cb], max_queue_size=1, verbose=0) val_acc = cb.best_acc val_f1 = cb.best_f1 print('CNN Val acc: {:.1f}, Val F1: {:.1f}'.format(val_acc * 100, val_f1 * 100)) # LSTM training rnn_modelfun = models.pure_rnn_do lname = 'fc1' seq = 6 rnn_epochs = epochs stopafter_rnn = rnn_epochs_to_stop features = get_activations(model, train_data + val_data, lname, batch_size * 2, cropsize=cropsize) train_data_extracted = features[0:len(train_data)] val_data_extracted = features[len(train_data):] assert (len(train_data) == len(train_data_extracted)) and (len(val_data) == len(val_data_extracted)) train_data_seq, train_target_seq, train_groups_seq = tools.to_sequences( train_data_extracted, train_target, groups=train_groups, seqlen=seq) val_data_seq, val_target_seq, val_groups_seq = tools.to_sequences( val_data_extracted, val_target, groups=val_groups, seqlen=seq) rnn_shape = list((np.array(train_data_seq[0])).shape) neurons = int(np.sqrt(rnn_shape[-1]) * 4) rnn_model = rnn_modelfun(rnn_shape, n_classes, layers=2, neurons=neurons, dropout=0.3) print('Starting RNN model with input from layer fc1: {} at {}'.format( rnn_model.name, rnn_shape, time.ctime())) g_train = generator(train_data_seq, train_target_seq, batch_size, val=False) g_val = generator(val_data_seq, val_target_seq, batch_size, val=True) cb = Checkpoint_balanced(g_val, verbose=1, groups=val_groups_seq, epochs_to_stop=stopafter_rnn, plot=True, name='{}, {}'.format(rnn_model.name, 'fc1')) rnn_model.fit_generator(g_train, g_train.n_batches, epochs=rnn_epochs, verbose=0, callbacks=[cb], max_queue_size=1) val_acc = cb.best_acc val_f1 = cb.best_f1 print('LSTM Val acc: {:.1f}, Val F1: {:.1f}'.format( val_acc * 100, val_f1 * 100)) return model, rnn_model
target = np.delete(target, idx) target = keras.utils.to_categorical(target) return data, target, groups data, target, groups = load_data(dataset) #%% #s batch_size = 256 epochs = 250 name = dataset model = models.pure_rnn_do data = tools.get_all_features(data) feats = scipy.stats.zscore(data) feats_seq, target_seq, groups_seq = tools.to_sequences(data, target, groups=groups, seqlen=6) results = keras_utils.cv(feats_seq, target_seq, groups_seq, model, name=name, epochs=epochs, folds=5, batch_size=batch_size, counter=counter, plot=plot, stop_after=15, balanced=False) with open('results_dataset_feat_{}'.format(dataset), 'wb') as f:
def predict_rnn(self, features, modelpath = None, batch_size=256): if modelpath is not None: self.load_rnn_model(modelpath) feat_seq = tools.to_sequences(features, seqlen = self.rnn.input_shape[1], tolist=False) preds = self.rnn.predict(feat_seq, batch_size=batch_size) return preds