def bpic(lstmsize, lstmdropout, lstmoptim): lstmsize = lstmsize[0] * 10 lstmdropout = lstmdropout[0] lstmoptim = lstmoptim[0] lstmnepochs = 50 lstmbatchsize = 64 nfolds = 5 # Load the dataset #static_train = np.load('/storage/hpc_anna/GMiC/Data/BPIChallenge/f1/preprocessed/train_static.npy') dynamic_train = np.load('/storage/hpc_anna/GMiC/Data/BPIChallenge/f1/preprocessed/train_dynamic.npy') #static_val = np.load('/storage/hpc_anna/GMiC/Data/BPIChallenge/f1/preprocessed/test_static.npy') dynamic_val = np.load('/storage/hpc_anna/GMiC/Data/BPIChallenge/f1/preprocessed/test_dynamic.npy') labels_train = np.load('/storage/hpc_anna/GMiC/Data/BPIChallenge/f1/preprocessed/train_labels.npy') labels_val = np.load('/storage/hpc_anna/GMiC/Data/BPIChallenge/f1/preprocessed/test_labels.npy') # Merge train and test #static_all = np.concatenate((static_train, static_val), axis=0) dynamic_all = np.concatenate((dynamic_train, dynamic_val), axis=0) labels_all = np.concatenate((labels_train, labels_val), axis=0) nsamples = dynamic_all.shape[0] # k-fold cross validation to obtain accuracy val_idx_list = np.array_split(range(nsamples), nfolds) scores = [] for fid, val_idx in enumerate(val_idx_list): train_idx = list(set(range(nsamples)) - set(val_idx)) # LSTM on dynamic features (8) lstmcl = LSTMClassifier(lstmsize, lstmdropout, lstmoptim, lstmnepochs, lstmbatchsize) model_pos, model_neg = lstmcl.train(dynamic_all[train_idx], labels_all[train_idx]) scores.append(lstmcl.test(model_pos, model_neg, dynamic_all[val_idx], labels_all[val_idx])) print 'Result: %.4f' % np.mean(scores) return -np.mean(scores)
def function(lstmsize, lstmdropout, lstmoptim): lstmsize = lstmsize[0] * 10 lstmdropout = lstmdropout[0] lstmoptim = lstmoptim[0] lstmnepochs = 50 lstmbatchsize = 64 nfolds = 5 print("Reading data...") dynamic_train = np.load('/storage/hpc_anna/GMiC/Data/ECoGmixed/preprocessed/train_data.npy') labels_train = np.load('/storage/hpc_anna/GMiC/Data/ECoGmixed/preprocessed/train_labels.npy') nsamples = dynamic_train.shape[0] # k-fold cross validation to obtain accuracy val_idx_list = np.array_split(range(nsamples), nfolds) scores = [] for fid, val_idx in enumerate(val_idx_list): train_idx = list(set(range(nsamples)) - set(val_idx)) print "Current fold is %d / %d" % (fid + 1, nfolds) # LSTM on dynamic features (8) lstmcl = LSTMClassifier(lstmsize, lstmdropout, lstmoptim, lstmnepochs, lstmbatchsize) model_pos, model_neg = lstmcl.train(dynamic_train[train_idx], labels_train[train_idx]) scores.append(lstmcl.test(model_pos, model_neg, dynamic_train[val_idx], labels_train[val_idx])) print 'Result: %.4f' % np.mean(scores) return -np.mean(scores)
def function(lstmsize, lstmdropout, lstmoptim): lstmsize = lstmsize[0] * 10 lstmdropout = lstmdropout[0] lstmoptim = lstmoptim[0] lstmnepochs = 50 lstmbatchsize = 64 nfolds = 5 print("Reading data...") dynamic_train = np.load( '/storage/hpc_anna/GMiC/Data/ECoGmixed/preprocessed/train_data.npy') labels_train = np.load( '/storage/hpc_anna/GMiC/Data/ECoGmixed/preprocessed/train_labels.npy') nsamples = dynamic_train.shape[0] # k-fold cross validation to obtain accuracy val_idx_list = np.array_split(range(nsamples), nfolds) scores = [] for fid, val_idx in enumerate(val_idx_list): train_idx = list(set(range(nsamples)) - set(val_idx)) print "Current fold is %d / %d" % (fid + 1, nfolds) # LSTM on dynamic features (8) lstmcl = LSTMClassifier(lstmsize, lstmdropout, lstmoptim, lstmnepochs, lstmbatchsize) model_pos, model_neg = lstmcl.train(dynamic_train[train_idx], labels_train[train_idx]) scores.append( lstmcl.test(model_pos, model_neg, dynamic_train[val_idx], labels_train[val_idx])) print 'Result: %.4f' % np.mean(scores) return -np.mean(scores)
def modelopt(lstmsize, lstmdropout, lstmoptim, nestimators): lstmsize = lstmsize[0] * 10 lstmdropout = lstmdropout[0] lstmoptim = lstmoptim[0] lstmnepochs = 50 lstmbatchsize = 64 nestimators = nestimators[0] * 100 nfolds = 5 print lstmsize, lstmdropout, lstmoptim, nestimators # Load the dataset static_train = np.load("/storage/hpc_anna/GMiC/Data/ECoGmixed/fourier/train_data.npy") dynamic_train = np.load("/storage/hpc_anna/GMiC/Data/ECoGmixed/preprocessed/train_data.npy") static_val = np.load("/storage/hpc_anna/GMiC/Data/ECoGmixed/fourier/test_data.npy") dynamic_val = np.load("/storage/hpc_anna/GMiC/Data/ECoGmixed/preprocessed/test_data.npy") labels_train = np.load("/storage/hpc_anna/GMiC/Data/ECoGmixed/preprocessed/train_labels.npy") labels_val = np.load("/storage/hpc_anna/GMiC/Data/ECoGmixed/preprocessed/test_labels.npy") # Merge train and test static_all = np.concatenate((static_train, static_val), axis=0) dynamic_all = np.concatenate((dynamic_train, dynamic_val), axis=0) labels_all = np.concatenate((labels_train, labels_val), axis=0) nsamples = static_all.shape[0] # prepare where to store the ratios ratios_all_lstm = np.empty(len(labels_all)) # split indices into folds enrich_idx_list = np.array_split(range(nsamples), nfolds) # run CV for fid, enrich_idx in enumerate(enrich_idx_list): train_idx = list(set(range(nsamples)) - set(enrich_idx)) # extract predictions using LSTM on dynamic lstmcl = LSTMClassifier(lstmsize, lstmdropout, lstmoptim, lstmnepochs, lstmbatchsize) model_pos, model_neg = lstmcl.train(dynamic_all[train_idx], labels_all[train_idx]) ratios_all_lstm[enrich_idx] = lstmcl.pos_neg_ratios(model_pos, model_neg, dynamic_all[enrich_idx]) # dataset for hybrid learning enriched_by_lstm = np.concatenate((static_all, np.matrix(ratios_all_lstm).T), axis=1) # (2.) k-fold cross validation to obtain accuracy val_idx_list = np.array_split(range(nsamples), nfolds) scores = [] for fid, val_idx in enumerate(val_idx_list): train_idx = list(set(range(nsamples)) - set(val_idx)) # Hybrid on features enriched by HMM (3) rf = RandomForestClassifier(n_estimators=nestimators) rf.fit(enriched_by_lstm[train_idx], labels_all[train_idx]) scores.append(rf.score(enriched_by_lstm[val_idx], labels_all[val_idx])) print "Result: %.4f" % np.mean(scores) return -np.mean(scores)
def bpic(lstmsize, lstmdropout, lstmoptim): lstmsize = lstmsize[0] * 10 lstmdropout = lstmdropout[0] lstmoptim = lstmoptim[0] lstmnepochs = 50 lstmbatchsize = 64 nfolds = 5 # Load the dataset #static_train = np.load('/storage/hpc_anna/GMiC/Data/BPIChallenge/f1/preprocessed/train_static.npy') dynamic_train = np.load( '/storage/hpc_anna/GMiC/Data/BPIChallenge/f1/preprocessed/train_dynamic.npy' ) #static_val = np.load('/storage/hpc_anna/GMiC/Data/BPIChallenge/f1/preprocessed/test_static.npy') dynamic_val = np.load( '/storage/hpc_anna/GMiC/Data/BPIChallenge/f1/preprocessed/test_dynamic.npy' ) labels_train = np.load( '/storage/hpc_anna/GMiC/Data/BPIChallenge/f1/preprocessed/train_labels.npy' ) labels_val = np.load( '/storage/hpc_anna/GMiC/Data/BPIChallenge/f1/preprocessed/test_labels.npy' ) # Merge train and test #static_all = np.concatenate((static_train, static_val), axis=0) dynamic_all = np.concatenate((dynamic_train, dynamic_val), axis=0) labels_all = np.concatenate((labels_train, labels_val), axis=0) nsamples = dynamic_all.shape[0] # k-fold cross validation to obtain accuracy val_idx_list = np.array_split(range(nsamples), nfolds) scores = [] for fid, val_idx in enumerate(val_idx_list): train_idx = list(set(range(nsamples)) - set(val_idx)) # LSTM on dynamic features (8) lstmcl = LSTMClassifier(lstmsize, lstmdropout, lstmoptim, lstmnepochs, lstmbatchsize) model_pos, model_neg = lstmcl.train(dynamic_all[train_idx], labels_all[train_idx]) scores.append( lstmcl.test(model_pos, model_neg, dynamic_all[val_idx], labels_all[val_idx])) print 'Result: %.4f' % np.mean(scores) return -np.mean(scores)
train_half = train_nsamples / 2 trainA_static = train_static[:train_half] trainB_static = train_static[train_half:] trainA_dynamic = train_dynamic[:train_half] trainB_dynamic = train_dynamic[train_half:] trainA_labels = train_labels[:train_half] trainB_labels = train_labels[train_half:] # # Train enrichment models on trainA # print 'Training enrichment models...' # train LSTM activations extractor lstmcl = LSTMClassifier(lstmsize, lstmdropout, lstmoptim, lstmnepochs, lstmbatchsize) model_pos, model_neg = lstmcl.train(trainA_dynamic, trainA_labels) trainB_activations_pos = lstmcl.activations(model_pos, trainB_dynamic) trainB_activations_neg = lstmcl.activations(model_neg, trainB_dynamic) trainB_activations = np.concatenate((trainB_activations_pos[:, 499, :], trainB_activations_neg[:, 499, :]), axis=1) test_activations_pos = lstmcl.activations(model_pos, test_dynamic) test_activations_neg = lstmcl.activations(model_neg, test_dynamic) test_activations = np.concatenate((test_activations_pos[:, 499, :], test_activations_neg[:, 499, :]), axis=1) # # Prepare combined datasets for the future experiments # # datasets for hybrid learning
# Train enrichment models on trainA # print 'Training enrichment models...' # extract predictions using RF on static rf = RandomForestClassifier(n_estimators=nestimators) rf.fit(trainA_static, trainA_labels) predictions_trainB_rf = rf.predict_log_proba(trainB_static) predictions_trainB_rf[predictions_trainB_rf == -inf] = np.min( predictions_trainB_rf[predictions_trainB_rf != -inf]) predictions_test_rf = rf.predict_log_proba(test_static) predictions_test_rf[predictions_test_rf == -inf] = np.min( predictions_test_rf[predictions_test_rf != -inf]) # extract predictions using LSTM on dynamic lstmcl = LSTMClassifier(lstmsize, lstmdropout, lstmoptim, lstmnepochs, lstmbatchsize) model_pos, model_neg = lstmcl.train(trainA_dynamic, trainA_labels) mse_pos, mse_neg = lstmcl.predict_mse(model_pos, model_neg, trainB_dynamic) predictions_trainB_lstm = np.vstack((mse_pos, mse_neg)).T ratios_trainB_lstm = lstmcl.pos_neg_ratios(model_pos, model_neg, trainB_dynamic) mse_pos, mse_neg = lstmcl.predict_mse(model_pos, model_neg, test_dynamic) predictions_test_lstm = np.vstack((mse_pos, mse_neg)).T ratios_test_lstm = lstmcl.pos_neg_ratios(model_pos, model_neg, test_dynamic) # # Prepare combined datasets for the future experiments # # datasets for ensemble learning trainB_predictions_combined_rf_lstm = np.concatenate(
labels_all = np.concatenate((labels_train, labels_val), axis=0) nsamples = static_all.shape[0] # prepare where to store the ratios activations_all = np.empty((len(labels_all), lstmsize * 2)) # split indices into folds enrich_idx_list = np.array_split(range(nsamples), nfolds) # CV for feature enrichment for fid, enrich_idx in enumerate(enrich_idx_list): print "Current fold is %d / %d" % (fid + 1, nfolds) train_idx = list(set(range(nsamples)) - set(enrich_idx)) # train models for enrichment lstmcl = LSTMClassifier(lstmsize, lstmdropout, lstmoptim, lstmnepochs, lstmbatchsize) model_pos, model_neg = lstmcl.train(dynamic_all[train_idx], labels_all[train_idx]) # extract activations activations_pos = lstmcl.activations(model_pos, dynamic_all[enrich_idx]) activations_neg = lstmcl.activations(model_neg, dynamic_all[enrich_idx]) activations_all[enrich_idx] = np.concatenate((activations_pos[:, -1, :], activations_neg[:, -1, :]), axis=1) # dataset for hybrid learning enriched_by_lstm = np.concatenate((static_all, activations_all), axis=1) print static_all.shape print enriched_by_lstm.shape # CV for accuracy estimation val_idx_list = np.array_split(range(nsamples), nfolds) scores = []
rf = RandomForestClassifier(n_estimators=nestimators) rf.fit(static_all[train_idx], labels_all[train_idx]) predictions_all_rf[predict_idx] = rf.predict_log_proba(static_all[predict_idx]) predictions_all_rf[predictions_all_rf == -inf] = np.min(predictions_all_rf[predictions_all_rf != -inf]) # extract predictions using HMM on dynamic print " Extracting predictions on dynamic data with HMM..." hmmcl = HMMClassifier() model_pos, model_neg = hmmcl.train(nhmmstates, nhmmiter, hmmcovtype, dynamic_all[train_idx], labels_all[train_idx]) predictions_all_hmm[predict_idx] = hmmcl.predict_log_proba(model_pos, model_neg, dynamic_all[predict_idx]) ratios_all_hmm[predict_idx] = hmmcl.pos_neg_ratios(model_pos, model_neg, dynamic_all[predict_idx]) # extract predictions using LSTM on dynamic print " Extracting predictions on dynamic data with LSTM..." lstmcl = LSTMClassifier(lstmsize, lstmdropout, lstmoptim, lstmnepochs) model_pos, model_neg = lstmcl.train(dynamic_all[train_idx], labels_all[train_idx]) mse_pos, mse_neg = lstmcl.predict_mse(model_pos, model_neg, dynamic_all[predict_idx]) predictions_all_lstm[predict_idx] = np.vstack((mse_pos, mse_neg)).T ratios_all_lstm[predict_idx] = lstmcl.pos_neg_ratios(model_pos, model_neg, dynamic_all[predict_idx]) # # Prepare combined datasets for the future experiments # # datasets for ensemble learning predictions_combined_rf_hmm = np.concatenate((predictions_all_rf, predictions_all_hmm), axis=1) predictions_combined_rf_lstm = np.concatenate((predictions_all_rf, predictions_all_lstm), axis=1) # datasets for hybrid learning
# general parameters lstm_nepochs = 20 # load the dataset print "Loading the dataset.." static_train = np.load("/storage/hpc_anna/GMiC/Data/ECoGmixed/fourier/train_data.npy") dynamic_train = np.load("/storage/hpc_anna/GMiC/Data/ECoGmixed/preprocessed/train_data.npy") static_val = np.load("/storage/hpc_anna/GMiC/Data/ECoGmixed/fourier/test_data.npy") dynamic_val = np.load("/storage/hpc_anna/GMiC/Data/ECoGmixed/preprocessed/test_data.npy") labels_train = np.load("/storage/hpc_anna/GMiC/Data/ECoGmixed/preprocessed/train_labels.npy") labels_val = np.load("/storage/hpc_anna/GMiC/Data/ECoGmixed/preprocessed/test_labels.npy") # transform static features into "fake" sequences dynamized_static_train = np.zeros((static_train.shape[0], static_train.shape[1], dynamic_train.shape[2])) for i in range(static_train.shape[0]): dynamized_static_train[i, :, :] = np.tile(static_train[i, :], (dynamic_train.shape[2], 1)).T dynamized_static_val = np.zeros((static_val.shape[0], static_val.shape[1], dynamic_val.shape[2])) for i in range(static_val.shape[0]): dynamized_static_val[i, :, :] = np.tile(static_val[i, :], (dynamic_val.shape[2], 1)).T # meld dynamized static and dynamic features together all_train = np.concatenate((dynamized_static_train, dynamic_train), axis=1) all_val = np.concatenate((dynamized_static_val, dynamic_val), axis=1) # dynamic data with LSTM lstmcl = LSTMClassifier(2000, 0.5, "adagrad", lstm_nepochs) model_pos, model_neg = lstmcl.train(all_train, labels_train) print "LSTM with dynamized static and dynamic features on validation set: %.4f" % lstmcl.test( model_pos, model_neg, all_val, labels_val )
print " Extracting predictions on static data with RF..." rf = RandomForestClassifier(n_estimators=nestimators) rf.fit(static_all[train_idx], labels_all[train_idx]) predictions_all_rf[predict_idx] = rf.predict_log_proba(static_all[predict_idx]) predictions_all_rf[predictions_all_rf == -inf] = np.min(predictions_all_rf[predictions_all_rf != -inf]) # extract predictions using HMM on dynamic #print " Extracting predictions on dynamic data with HMM..." #hmmcl = HMMClassifier() #model_pos, model_neg = hmmcl.train(nhmmstates, nhmmiter, hmmcovtype, dynamic_all[train_idx], labels_all[train_idx]) #predictions_all_hmm[predict_idx] = hmmcl.predict_log_proba(model_pos, model_neg, dynamic_all[predict_idx]) #ratios_all_hmm[predict_idx] = hmmcl.pos_neg_ratios(model_pos, model_neg, dynamic_all[predict_idx]) # extract predictions using LSTM on dynamic print " Extracting predictions on dynamic data with LSTM..." lstmcl = LSTMClassifier(lstmsize, lstmdropout, lstmoptim, lstmnepochs, lstmbatchsize) model_pos, model_neg = lstmcl.train(dynamic_all[train_idx], labels_all[train_idx]) mse_pos, mse_neg = lstmcl.predict_mse(model_pos, model_neg, dynamic_all[predict_idx]) predictions_all_lstm[predict_idx] = np.vstack((mse_pos, mse_neg)).T ratios_all_lstm[predict_idx] = lstmcl.pos_neg_ratios(model_pos, model_neg, dynamic_all[predict_idx]) # # Prepare combined datasets for the future experiments # # datasets for ensemble learning #predictions_combined_rf_hmm = np.concatenate((predictions_all_rf, ratios_all_hmm.reshape((ratios_all_hmm.shape[0], 1))), axis=1) predictions_combined_rf_lstm = np.concatenate((predictions_all_rf, ratios_all_lstm.reshape((ratios_all_lstm.shape[0], 1))), axis=1) # datasets for hybrid learning
dynamic_train = np.load( '/storage/hpc_anna/GMiC/Data/ECoGmixed/preprocessed/train_data.npy') static_val = np.load( '/storage/hpc_anna/GMiC/Data/ECoGmixed/fourier/test_data.npy') dynamic_val = np.load( '/storage/hpc_anna/GMiC/Data/ECoGmixed/preprocessed/test_data.npy') labels_train = np.load( '/storage/hpc_anna/GMiC/Data/ECoGmixed/preprocessed/train_labels.npy') labels_val = np.load( '/storage/hpc_anna/GMiC/Data/ECoGmixed/preprocessed/test_labels.npy') nsamples = dynamic_train.shape[0] # split the data into training and test train_idx = np.random.choice(range(0, nsamples), size=np.round(nsamples * 0.7, 0), replace=False) test_idx = list(set(range(0, nsamples)) - set(train_idx)) # train the model and report performance print 'Training the model...' lstmcl = LSTMClassifier(lstmsize, lstmdropout, lstmoptim, lstmnepochs, lstmbatch, validation_split=0.3) model_pos, model_neg = lstmcl.train(dynamic_train[train_idx], labels_train[train_idx]) print 'Generative LSTM classifier on dynamic features: %.4f' % lstmcl.test( model_pos, model_neg, dynamic_train[test_idx], labels_train[test_idx])
(len(labels_all), g_lstmsize * 2)) # LSTM size in generative case activations_discriminative = np.empty( (len(labels_all), 100)) # FC layer in disciminative case predict_idx_list = np.array_split(range(nsamples), nfolds) for fid, predict_idx in enumerate(predict_idx_list): print "Current fold is %d" % fid train_idx = list(set(range(nsamples)) - set(predict_idx)) # # Generative LSTM # print " Extracting ratios and activations from generative LSTM..." # train the models lstmcl = LSTMClassifier(g_lstmsize, g_lstmdropout, g_lstmoptim, g_lstmnepochs, g_lstmbatch) model_pos, model_neg = lstmcl.train(dynamic_all[train_idx], labels_all[train_idx]) # extract ratios mse_pos, mse_neg = lstmcl.predict_mse(model_pos, model_neg, dynamic_all[predict_idx]) ratios_generative[predict_idx] = lstmcl.pos_neg_ratios( model_pos, model_neg, dynamic_all[predict_idx]) # extract activations activations_pos = lstmcl.activations(model_pos, dynamic_all[predict_idx]) activations_neg = lstmcl.activations(model_neg, dynamic_all[predict_idx]) activations_generative[predict_idx] = np.concatenate( (activations_pos[:, -1, :], activations_neg[:, -1, :]), axis=1)
# dynamic data with HMM hmmcl = HMMClassifier() model_pos, model_neg = hmmcl.train(3, 10, dynamic_train, labels_train) print "HMM with dynamic features on validation set: %.4f" % hmmcl.test(model_pos, model_neg, dynamic_val, labels_val) # dynamic data with RF print "Training RF on the dynamic dataset..." dynamic_as_static_train = dynamic_train.reshape((dynamic_train.shape[0], dynamic_train.shape[1] * dynamic_train.shape[2])) dynamic_as_static_val = dynamic_val.reshape((dynamic_val.shape[0], dynamic_val.shape[1] * dynamic_val.shape[2])) rf = RandomForestClassifier(n_estimators=rf_estimators, n_jobs=-1) rf.fit(dynamic_as_static_train, labels_train) print "RF with dynamic features on validation set: %.4f" % rf.score(dynamic_as_static_val, labels_val) # dynamic data with LSTM lstmcl = LSTMClassifier(2000, 0.5, 'adagrad', lstm_nepochs) model_pos, model_neg = lstmcl.train(dynamic_train, labels_train) print "LSTM with dynamic features on validation set: %.4f" % lstmcl.test(model_pos, model_neg, dynamic_val, labels_val) # joint models print "" print "Splitting data in two halves..." fh_idx = np.random.choice(range(0, dynamic_train.shape[0]), size=np.round(dynamic_train.shape[0] * 0.5, 0), replace=False) sh_idx = list(set(range(0, dynamic_train.shape[0])) - set(fh_idx)) fh_data = dynamic_train[fh_idx, :, :] fh_labels = labels_train[fh_idx] sh_data = dynamic_train[sh_idx, :, :] sh_labels = labels_train[sh_idx] # RF+HMM print "Evaluating RF+HMM model:"
labels_all = np.concatenate((labels_train, labels_val), axis=0) nsamples = static_all.shape[0] # prepare where to store the ratios ratios_all_lstm = np.empty(len(labels_all)) # split indices into folds enrich_idx_list = np.array_split(range(nsamples), nfolds) # CV for feature enrichment for fid, enrich_idx in enumerate(enrich_idx_list): print "Current fold is %d / %d" % (fid + 1, nfolds) train_idx = list(set(range(nsamples)) - set(enrich_idx)) # extract ratios using LSTM on dynamic lstmcl = LSTMClassifier(lstmsize, lstmdropout, lstmoptim, lstmnepochs, lstmbatchsize) model_pos, model_neg = lstmcl.train(dynamic_all[train_idx], labels_all[train_idx]) ratios_all_lstm[enrich_idx] = lstmcl.pos_neg_ratios(model_pos, model_neg, dynamic_all[enrich_idx]) # dataset for hybrid learning enriched_by_lstm = np.concatenate((static_all, np.matrix(ratios_all_lstm).T), axis=1) # CV for accuracy estimation val_idx_list = np.array_split(range(nsamples), nfolds) scores = [] for fid, val_idx in enumerate(val_idx_list): print "Current fold is %d / %d" % (fid + 1, nfolds) train_idx = list(set(range(nsamples)) - set(val_idx)) # Hybrid on features enriched by HMM (3) rf = RandomForestClassifier(n_estimators=nestimators)
activations_generative = np.empty((len(labels_all), g_lstmsize * 2)) # LSTM size in generative case activations_discriminative = np.empty((len(labels_all), 100)) # FC layer in disciminative case predict_idx_list = np.array_split(range(nsamples), nfolds) for fid, predict_idx in enumerate(predict_idx_list): print "Current fold is %d" % fid train_idx = list(set(range(nsamples)) - set(predict_idx)) # # Generative LSTM # print " Extracting ratios and activations from generative LSTM..." # train the models lstmcl = LSTMClassifier(g_lstmsize, g_lstmdropout, g_lstmoptim, g_lstmnepochs, g_lstmbatch) model_pos, model_neg = lstmcl.train(dynamic_all[train_idx], labels_all[train_idx]) # extract ratios mse_pos, mse_neg = lstmcl.predict_mse(model_pos, model_neg, dynamic_all[predict_idx]) ratios_generative[predict_idx] = lstmcl.pos_neg_ratios(model_pos, model_neg, dynamic_all[predict_idx]) # extract activations activations_pos = lstmcl.activations(model_pos, dynamic_all[predict_idx]) activations_neg = lstmcl.activations(model_neg, dynamic_all[predict_idx]) activations_generative[predict_idx] = np.concatenate((activations_pos[:, -1, :], activations_neg[:, -1, :]), axis=1) # # Discriminative LSTM #
# # k-fold CV for performance estimation # val_idx_list = np.array_split(range(nsamples), nfolds) scores = {1: [], 2: [], 3: [], 4: [], 5: [], 6: [], 7: [], 8: [], 9: [], 10: [], 11: []} for fid, val_idx in enumerate(val_idx_list): print "Current fold is %d / %d" % (fid + 1, nfolds) train_idx = list(set(range(nsamples)) - set(val_idx)) # RF on dynamic features (6) rf = RandomForestClassifier(n_estimators=nestimators) rf.fit(dynamic_as_static[train_idx], labels_all[train_idx]) scores[6].append(rf.score(dynamic_as_static[val_idx], labels_all[val_idx])) # HMM on dynamic features (7) hmmcl = HMMClassifier() model_pos, model_neg = hmmcl.train(nhmmstates, nhmmiter, hmmcovtype, dynamic_all[train_idx], labels_all[train_idx]) acc, auc = hmmcl.test(model_pos, model_neg, dynamic_all[val_idx], labels_all[val_idx]) scores[7].append(acc) # LSTM on dynamic features (8) lstmcl = LSTMClassifier(lstmsize, lstmdropout, lstmoptim, lstmnepochs, lstmbatchsize) model_pos, model_neg = lstmcl.train(dynamic_all[train_idx], labels_all[train_idx]) scores[8].append(lstmcl.test(model_pos, model_neg, dynamic_all[val_idx], labels_all[val_idx])) print "===> (6) RF on dynamic (spatialized) features: %.4f (+/- %.4f) %s" % (np.mean(scores[6]), np.std(scores[6]), scores[6]) print "===> (7) HMM on dynamic features: %.4f (+/- %.4f) %s" % (np.mean(scores[7]), np.std(scores[7]), scores[7]) print "===> (8) LSTM on dynamic features: %.4f (+/- %.4f) %s" % (np.mean(scores[8]), np.std(scores[8]), scores[8])
rf.fit(trainA_static, trainA_labels) predictions_trainB_rf = rf.predict_log_proba(trainB_static) predictions_trainB_rf[predictions_trainB_rf == -inf] = np.min(predictions_trainB_rf[predictions_trainB_rf != -inf]) predictions_test_rf = rf.predict_log_proba(test_static) predictions_test_rf[predictions_test_rf == -inf] = np.min(predictions_test_rf[predictions_test_rf != -inf]) # extract predictions using HMM on dynamic hmmcl = HMMClassifier() model_pos, model_neg = hmmcl.train(nhmmstates, nhmmiter, hmmcovtype, trainA_dynamic, trainA_labels) predictions_trainB_hmm = hmmcl.predict_log_proba(model_pos, model_neg, trainB_dynamic) ratios_trainB_hmm = hmmcl.pos_neg_ratios(model_pos, model_neg, trainB_dynamic) predictions_test_hmm = hmmcl.predict_log_proba(model_pos, model_neg, test_dynamic) ratios_test_hmm = hmmcl.pos_neg_ratios(model_pos, model_neg, test_dynamic) # extract predictions using LSTM on dynamic lstmcl = LSTMClassifier(lstmsize, lstmdropout, lstmoptim, lstmnepochs, lstmbatchsize) model_pos, model_neg = lstmcl.train(trainA_dynamic, trainA_labels) mse_pos, mse_neg = lstmcl.predict_mse(model_pos, model_neg, trainB_dynamic) predictions_trainB_lstm = np.vstack((mse_pos, mse_neg)).T ratios_trainB_lstm = lstmcl.pos_neg_ratios(model_pos, model_neg, trainB_dynamic) mse_pos, mse_neg = lstmcl.predict_mse(model_pos, model_neg, test_dynamic) predictions_test_lstm = np.vstack((mse_pos, mse_neg)).T ratios_test_lstm = lstmcl.pos_neg_ratios(model_pos, model_neg, test_dynamic) # # Prepare combined datasets for the future experiments # # datasets for ensemble learning trainB_predictions_combined_rf_hmm = np.concatenate((predictions_trainB_rf, ratios_trainB_hmm.reshape((ratios_trainB_hmm.shape[0], 1))), axis=1)
def bpic(lstmsize, lstmdropout, lstmoptim, nestimators): lstmsize = lstmsize[0] * 10 lstmdropout = lstmdropout[0] lstmoptim = lstmoptim[0] lstmnepochs = 50 lstmbatchsize = 256 nestimators = nestimators[0] * 100 nfolds = 5 print lstmsize, lstmdropout, lstmoptim, nestimators # Load the dataset static_train = np.load( '/storage/hpc_anna/GMiC/Data/BPIChallenge/f1/preprocessed/train_static.npy' ) dynamic_train = np.load( '/storage/hpc_anna/GMiC/Data/BPIChallenge/f1/preprocessed/train_dynamic.npy' ) static_val = np.load( '/storage/hpc_anna/GMiC/Data/BPIChallenge/f1/preprocessed/test_static.npy' ) dynamic_val = np.load( '/storage/hpc_anna/GMiC/Data/BPIChallenge/f1/preprocessed/test_dynamic.npy' ) labels_train = np.load( '/storage/hpc_anna/GMiC/Data/BPIChallenge/f1/preprocessed/train_labels.npy' ) labels_val = np.load( '/storage/hpc_anna/GMiC/Data/BPIChallenge/f1/preprocessed/test_labels.npy' ) # Merge train and test static_all = np.concatenate((static_train, static_val), axis=0) dynamic_all = np.concatenate((dynamic_train, dynamic_val), axis=0) labels_all = np.concatenate((labels_train, labels_val), axis=0) nsamples = static_all.shape[0] # prepare where to store the ratios ratios_all_lstm = np.empty(len(labels_all)) # split indices into folds enrich_idx_list = np.array_split(range(nsamples), nfolds) # run CV for fid, enrich_idx in enumerate(enrich_idx_list): train_idx = list(set(range(nsamples)) - set(enrich_idx)) # extract predictions using LSTM on dynamic lstmcl = LSTMClassifier(lstmsize, lstmdropout, lstmoptim, lstmnepochs, lstmbatchsize) model_pos, model_neg = lstmcl.train(dynamic_all[train_idx], labels_all[train_idx]) ratios_all_lstm[enrich_idx] = lstmcl.pos_neg_ratios( model_pos, model_neg, dynamic_all[enrich_idx]) # dataset for hybrid learning enriched_by_lstm = np.concatenate( (static_all, np.matrix(ratios_all_lstm).T), axis=1) # (2.) k-fold cross validation to obtain accuracy val_idx_list = np.array_split(range(nsamples), nfolds) scores = [] for fid, val_idx in enumerate(val_idx_list): train_idx = list(set(range(nsamples)) - set(val_idx)) # Hybrid on features enriched by HMM (3) rf = RandomForestClassifier(n_estimators=nestimators) rf.fit(enriched_by_lstm[train_idx], labels_all[train_idx]) scores.append(rf.score(enriched_by_lstm[val_idx], labels_all[val_idx])) print 'Result: %.4f' % np.mean(scores) return -np.mean(scores)
rf = RandomForestClassifier(n_estimators=nestimators) rf.fit(static_all[train_idx], labels_all[train_idx]) predictions_all_rf[predict_idx] = rf.predict_log_proba(static_all[predict_idx]) predictions_all_rf[predictions_all_rf == -inf] = np.min(predictions_all_rf[predictions_all_rf != -inf]) # extract predictions using HMM on dynamic print " Extracting predictions on dynamic data with HMM..." hmmcl = HMMClassifier() model_pos, model_neg = hmmcl.train(nhmmstates, nhmmiter, hmmcovtype, dynamic_all[train_idx], labels_all[train_idx]) predictions_all_hmm[predict_idx] = hmmcl.predict_log_proba(model_pos, model_neg, dynamic_all[predict_idx]) ratios_all_hmm[predict_idx] = hmmcl.pos_neg_ratios(model_pos, model_neg, dynamic_all[predict_idx]) # extract predictions using LSTM on dynamic print " Extracting predictions on dynamic data with LSTM..." lstmcl = LSTMClassifier(lstmsize, lstmdropout, lstmoptim, lstmnepochs, lstmbatchsize) model_pos, model_neg = lstmcl.train(dynamic_all[train_idx], labels_all[train_idx]) mse_pos, mse_neg = lstmcl.predict_mse(model_pos, model_neg, dynamic_all[predict_idx]) predictions_all_lstm[predict_idx] = np.vstack((mse_pos, mse_neg)).T ratios_all_lstm[predict_idx] = lstmcl.pos_neg_ratios(model_pos, model_neg, dynamic_all[predict_idx]) # # Prepare combined datasets for the future experiments # # datasets for ensemble learning predictions_combined_rf_hmm = np.concatenate((predictions_all_rf, predictions_all_hmm), axis=1) predictions_combined_rf_lstm = np.concatenate((predictions_all_rf, predictions_all_lstm), axis=1) # datasets for hybrid learning
labels_train = np.load( '/storage/hpc_anna/GMiC/Data/ECoGmixed/preprocessed/train_labels.npy') labels_val = np.load( '/storage/hpc_anna/GMiC/Data/ECoGmixed/preprocessed/test_labels.npy') nsamples = dynamic_train.shape[0] # split indices into folds val_idx_list = np.array_split(range(nsamples), nfolds) # run CV scores = [] for fid, val_idx in enumerate(val_idx_list): print "Current fold is %d/%d" % (fid + 1, nfolds) train_idx = list(set(range(nsamples)) - set(val_idx)) # train the model and report performance lstmcl = LSTMClassifier(lstmsize, lstmdropout, lstmoptim, lstmnepochs, lstmbatch, validation_split=0.3) model_pos, model_neg = lstmcl.train(dynamic_train[train_idx], labels_train[train_idx]) scores.append( lstmcl.test(model_pos, model_neg, dynamic_train[val_idx], labels_train[val_idx])) print 'Generative LSTM classifier on dynamic features: %.4f (+- %.4f) %s' % ( np.mean(scores), np.std(scores), scores)
lstmoptim = 'adadelta' lstmnepochs = 20 lstmbatch = 64 # load the dataset print 'Loading the dataset..' static_train = np.load('/storage/hpc_anna/GMiC/Data/ECoGmixed/fourier/train_data.npy') dynamic_train = np.load('/storage/hpc_anna/GMiC/Data/ECoGmixed/preprocessed/train_data.npy') static_val = np.load('/storage/hpc_anna/GMiC/Data/ECoGmixed/fourier/test_data.npy') dynamic_val = np.load('/storage/hpc_anna/GMiC/Data/ECoGmixed/preprocessed/test_data.npy') labels_train = np.load('/storage/hpc_anna/GMiC/Data/ECoGmixed/preprocessed/train_labels.npy') labels_val = np.load('/storage/hpc_anna/GMiC/Data/ECoGmixed/preprocessed/test_labels.npy') nsamples = dynamic_train.shape[0] # split indices into folds val_idx_list = np.array_split(range(nsamples), nfolds) # run CV scores = [] for fid, val_idx in enumerate(val_idx_list): print "Current fold is %d/%d" % (fid + 1, nfolds) train_idx = list(set(range(nsamples)) - set(val_idx)) # train the model and report performance lstmcl = LSTMClassifier(lstmsize, lstmdropout, lstmoptim, lstmnepochs, lstmbatch, validation_split=0.3) model_pos, model_neg = lstmcl.train(dynamic_train[train_idx], labels_train[train_idx]) scores.append(lstmcl.test(model_pos, model_neg, dynamic_train[val_idx], labels_train[val_idx])) print 'Generative LSTM classifier on dynamic features: %.4f (+- %.4f) %s' % (np.mean(scores), np.std(scores), scores)
static_val = np.load( '/storage/hpc_anna/GMiC/Data/syn_lstm_wins/test_static.npy') dynamic_val = np.load( '/storage/hpc_anna/GMiC/Data/syn_lstm_wins/test_dynamic.npy') labels_train = np.load( '/storage/hpc_anna/GMiC/Data/syn_lstm_wins/train_labels.npy') labels_val = np.load( '/storage/hpc_anna/GMiC/Data/syn_lstm_wins/test_labels.npy') # transform static features into "fake" sequences dynamized_static_train = np.zeros( (static_train.shape[0], static_train.shape[1], dynamic_train.shape[2])) for i in range(static_train.shape[0]): dynamized_static_train[i, :, :] = np.tile(static_train[i, :], (dynamic_train.shape[2], 1)).T dynamized_static_val = np.zeros( (static_val.shape[0], static_val.shape[1], dynamic_val.shape[2])) for i in range(static_val.shape[0]): dynamized_static_val[i, :, :] = np.tile(static_val[i, :], (dynamic_val.shape[2], 1)).T # meld dynamized static and dynamic features together all_train = np.concatenate((dynamized_static_train, dynamic_train), axis=1) all_val = np.concatenate((dynamized_static_val, dynamic_val), axis=1) # dynamic data with LSTM lstmcl = LSTMClassifier(2000, 0.5, 'adagrad', lstm_nepochs) model_pos, model_neg = lstmcl.train(all_train, labels_train) print "LSTM with dynamized static and dynamic features on validation set: %.4f" % lstmcl.test( model_pos, model_neg, all_val, labels_val)
import numpy as np from LSTM.lstm_classifier import LSTMClassifier # parameters lstmsize = 512 lstmdropout = 0.0 lstmoptim = 'rmsprop' lstmnepochs = 50 lstmbatch = 32 # load the dataset print 'Loading the dataset..' static_train = np.load('/storage/hpc_anna/GMiC/Data/syn_lstm_wins/train_static.npy') dynamic_train = np.load('/storage/hpc_anna/GMiC/Data/syn_lstm_wins/train_dynamic.npy') static_val = np.load('/storage/hpc_anna/GMiC/Data/syn_lstm_wins/test_static.npy') dynamic_val = np.load('/storage/hpc_anna/GMiC/Data/syn_lstm_wins/test_dynamic.npy') labels_train = np.load('/storage/hpc_anna/GMiC/Data/syn_lstm_wins/train_labels.npy') labels_val = np.load('/storage/hpc_anna/GMiC/Data/syn_lstm_wins/test_labels.npy') nsamples = dynamic_train.shape[0] # split the data into training and test train_idx = np.random.choice(range(0, nsamples), size=np.round(nsamples * 0.7, 0), replace=False) test_idx = list(set(range(0, nsamples)) - set(train_idx)) # train the model and report performance print 'Training the model...' lstmcl = LSTMClassifier(lstmsize, lstmdropout, lstmoptim, lstmnepochs, lstmbatch) model_pos, model_neg = lstmcl.train(dynamic_train[train_idx], labels_train[train_idx]) print 'Generative LSTM classifier on dynamic features: %.4f' % lstmcl.test(model_pos, model_neg, dynamic_train[test_idx], labels_train[test_idx])
labels_val = np.load( '/storage/hpc_anna/GMiC/Data/syn_multisame/test_labels.npy') # # Sanity Checks # print "Expected performance of a lonely model is 0.75, of the joint model 1.0" # a) static data classification rf = RandomForestClassifier(n_estimators=100) rf.fit(static_train, labels_train) print "Random Forest with static features on validation set: %.4f" % rf.score( static_val, labels_val) # b) dynamic data classification lstmcl = LSTMClassifier(2000, 0.5, 'adagrad', 20) model_pos, model_neg = lstmcl.train(dynamic_train, labels_train) print "LSTM with dynamic features on validation set: %.4f" % lstmcl.test( model_pos, model_neg, dynamic_val, labels_val) # # Evaluating Joint Model # print "" print "Evaluating joint model:" print "Splitting data in two halves..." fh_idx = np.random.choice(range(0, dynamic_train.shape[0]), size=np.round(dynamic_train.shape[0] * 0.5, 0), replace=False) sh_idx = list(set(range(0, dynamic_train.shape[0])) - set(fh_idx)) fh_data = dynamic_train[fh_idx, :, :]
# train_half = train_nsamples / 2 trainA_static = train_static[:train_half] trainB_static = train_static[train_half:] trainA_dynamic = train_dynamic[:train_half] trainB_dynamic = train_dynamic[train_half:] trainA_labels = train_labels[:train_half] trainB_labels = train_labels[train_half:] # # Train enrichment models on trainA # print 'Training enrichment models...' # train LSTM activations extractor lstmcl = LSTMClassifier(lstmsize, lstmdropout, lstmoptim, lstmnepochs, lstmbatchsize) model_pos, model_neg = lstmcl.train(trainA_dynamic, trainA_labels) trainB_activations_pos = lstmcl.activations(model_pos, trainB_dynamic) trainB_activations_neg = lstmcl.activations(model_neg, trainB_dynamic) trainB_activations = np.concatenate((trainB_activations_pos[:, seqlen - 1, :], trainB_activations_neg[:, seqlen - 1, :]), axis=1) test_activations_pos = lstmcl.activations(model_pos, test_dynamic) test_activations_neg = lstmcl.activations(model_neg, test_dynamic) test_activations = np.concatenate((test_activations_pos[:, seqlen - 1, :], test_activations_neg[:, seqlen - 1, :]), axis=1) #
# run CV for fid, predict_idx in enumerate(predict_idx_list): print "Enrichment fold %d / %d" % (fid + 1, nfolds) train_idx = list(set(range(nsamples)) - set(predict_idx)) # extract predictions using HMM on dynamic hmmcl = HMMClassifier() model_pos, model_neg = hmmcl.train(nhmmstates, nhmmiter, hmmcovtype, dynamic_all[train_idx], labels_all[train_idx]) ratios_all_hmm[predict_idx] = hmmcl.pos_neg_ratios( model_pos, model_neg, dynamic_all[predict_idx]) # extract predictions using LSTM on dynamic lstmcl = LSTMClassifier(lstmsize, lstmdropout, lstmoptim, lstmnepochs, lstmbatchsize) model_pos, model_neg = lstmcl.train(dynamic_all[train_idx], labels_all[train_idx]) mse_pos, mse_neg = lstmcl.predict_mse(model_pos, model_neg, dynamic_all[predict_idx]) ratios_all_lstm[predict_idx] = lstmcl.pos_neg_ratios( model_pos, model_neg, dynamic_all[predict_idx]) # datasets for hybrid learning enriched_by_hmm = np.concatenate((static_all, np.matrix(ratios_all_hmm).T), axis=1) enriched_by_lstm = np.concatenate( (static_all, np.matrix(ratios_all_lstm).T), axis=1) # k-fold CV for performance estimation val_idx_list = np.array_split(range(nsamples), nfolds)
predictions_all_rf[predictions_all_rf != -inf]) # extract predictions using HMM on dynamic print " Extracting predictions on dynamic data with HMM..." hmmcl = HMMClassifier() model_pos, model_neg = hmmcl.train(nhmmstates, nhmmiter, hmmcovtype, dynamic_all[train_idx], labels_all[train_idx]) predictions_all_hmm[predict_idx] = hmmcl.predict_log_proba( model_pos, model_neg, dynamic_all[predict_idx]) ratios_all_hmm[predict_idx] = hmmcl.pos_neg_ratios( model_pos, model_neg, dynamic_all[predict_idx]) # extract predictions using LSTM on dynamic print " Extracting predictions on dynamic data with LSTM..." lstmcl = LSTMClassifier(lstmsize, lstmdropout, lstmoptim, lstmnepochs, lstmbatchsize) model_pos, model_neg = lstmcl.train(dynamic_all[train_idx], labels_all[train_idx]) mse_pos, mse_neg = lstmcl.predict_mse(model_pos, model_neg, dynamic_all[predict_idx]) predictions_all_lstm[predict_idx] = np.vstack((mse_pos, mse_neg)).T ratios_all_lstm[predict_idx] = lstmcl.pos_neg_ratios( model_pos, model_neg, dynamic_all[predict_idx]) # # Prepare combined datasets for the future experiments # # datasets for ensemble learning predictions_combined_rf_hmm = np.concatenate( (predictions_all_rf, predictions_all_hmm), axis=1)
val_idx_list = np.array_split(range(nsamples), nfolds) scores = {1: [], 2: [], 3: [], 4: [], 5: [], 6: [], 7: [], 8: [], 9: [], 10: [], 11: []} for fid, val_idx in enumerate(val_idx_list): print "Current fold is %d / %d" % (fid + 1, nfolds) train_idx = list(set(range(nsamples)) - set(val_idx)) # RF on dynamic features (6) rf = RandomForestClassifier(n_estimators=nestimators) rf.fit(dynamic_as_static[train_idx], labels_all[train_idx]) scores[6].append(rf.score(dynamic_as_static[val_idx], labels_all[val_idx])) # HMM on dynamic features (7) hmmcl = HMMClassifier() model_pos, model_neg = hmmcl.train(nhmmstates, nhmmiter, hmmcovtype, dynamic_all[train_idx], labels_all[train_idx]) acc, auc = hmmcl.test(model_pos, model_neg, dynamic_all[val_idx], labels_all[val_idx]) scores[7].append(acc) # LSTM on dynamic features (8) lstmcl = LSTMClassifier(lstmsize, lstmdropout, lstmoptim, lstmnepochs, lstmbatchsize) model_pos, model_neg = lstmcl.train(dynamic_all[train_idx], labels_all[train_idx]) scores[8].append(lstmcl.test(model_pos, model_neg, dynamic_all[val_idx], labels_all[val_idx])) print "===> (6) RF on dynamic (spatialized) features: %.4f (+/- %.4f) %s" % ( np.mean(scores[6]), np.std(scores[6]), scores[6], ) print "===> (7) HMM on dynamic features: %.4f (+/- %.4f) %s" % (np.mean(scores[7]), np.std(scores[7]), scores[7]) print "===> (8) LSTM on dynamic features: %.4f (+/- %.4f) %s" % (np.mean(scores[8]), np.std(scores[8]), scores[8])
labels_all = np.concatenate((labels_train, labels_val), axis=0) nsamples = static_all.shape[0] # prepare where to store the ratios activations_all = np.empty((len(labels_all), lstmsize * 2)) # split indices into folds enrich_idx_list = np.array_split(range(nsamples), nfolds) # CV for feature enrichment for fid, enrich_idx in enumerate(enrich_idx_list): print "Enrichment fold %d / %d" % (fid + 1, nfolds) train_idx = list(set(range(nsamples)) - set(enrich_idx)) # train models for enrichment lstmcl = LSTMClassifier(lstmsize, lstmdropout, lstmoptim, lstmnepochs, lstmbatchsize) model_pos, model_neg = lstmcl.train(dynamic_all[train_idx], labels_all[train_idx]) # extract activations activations_pos = lstmcl.activations(model_pos, dynamic_all[enrich_idx]) activations_neg = lstmcl.activations(model_neg, dynamic_all[enrich_idx]) activations_all[enrich_idx] = np.concatenate((activations_pos[:, -1, :], activations_neg[:, -1, :]), axis=1) # dataset for hybrid learning enriched_by_lstm = np.concatenate((static_all, activations_all), axis=1) print static_all.shape print enriched_by_lstm.shape # CV for accuracy estimation val_idx_list = np.array_split(range(nsamples), nfolds) scores = []
# run CV for fid, predict_idx in enumerate(predict_idx_list): print "Current fold is %d" % fid train_idx = list(set(range(nsamples)) - set(predict_idx)) # extract predictions using RF on static print " Extracting predictions on static data with RF..." rf = RandomForestClassifier(n_estimators=nestimators) rf.fit(static_all[train_idx], labels_all[train_idx]) predictions_all_rf[predict_idx] = rf.predict_log_proba(static_all[predict_idx]) predictions_all_rf[predictions_all_rf == -inf] = np.min(predictions_all_rf[predictions_all_rf != -inf]) # extract predictions using LSTM on dynamic print " Extracting predictions on dynamic data with LSTM..." lstmcl = LSTMClassifier(lstmsize, lstmdropout, lstmoptim, lstmnepochs, lstmbatchsize) model_pos, model_neg = lstmcl.train(dynamic_all[train_idx], labels_all[train_idx]) mse_pos, mse_neg = lstmcl.predict_mse(model_pos, model_neg, dynamic_all[predict_idx]) predictions_all_lstm[predict_idx] = np.vstack((mse_pos, mse_neg)).T ratios_all_lstm[predict_idx] = lstmcl.pos_neg_ratios(model_pos, model_neg, dynamic_all[predict_idx]) # # Prepare combined datasets for the future experiments # # datasets for ensemble learning predictions_combined_rf_lstm = np.concatenate((predictions_all_rf, predictions_all_lstm), axis=1) # datasets for hybrid learning enriched_by_lstm = np.concatenate((static_all, np.matrix(ratios_all_lstm).T), axis=1)
rf.fit(trainA_static, trainA_labels) predictions_trainB_rf = rf.predict_log_proba(trainB_static) predictions_trainB_rf[predictions_trainB_rf == -inf] = np.min(predictions_trainB_rf[predictions_trainB_rf != -inf]) predictions_test_rf = rf.predict_log_proba(test_static) predictions_test_rf[predictions_test_rf == -inf] = np.min(predictions_test_rf[predictions_test_rf != -inf]) # extract predictions using HMM on dynamic hmmcl = HMMClassifier() model_pos, model_neg = hmmcl.train(nhmmstates, nhmmiter, hmmcovtype, trainA_dynamic, trainA_labels) predictions_trainB_hmm = hmmcl.predict_log_proba(model_pos, model_neg, trainB_dynamic) ratios_trainB_hmm = hmmcl.pos_neg_ratios(model_pos, model_neg, trainB_dynamic) predictions_test_hmm = hmmcl.predict_log_proba(model_pos, model_neg, test_dynamic) ratios_test_hmm = hmmcl.pos_neg_ratios(model_pos, model_neg, test_dynamic) # extract predictions using LSTM on dynamic lstmcl = LSTMClassifier(lstmsize, lstmdropout, lstmoptim, lstmnepochs, lstmbatchsize) model_pos, model_neg = lstmcl.train(trainA_dynamic, trainA_labels) mse_pos, mse_neg = lstmcl.predict_mse(model_pos, model_neg, trainB_dynamic) predictions_trainB_lstm = np.vstack((mse_pos, mse_neg)).T ratios_trainB_lstm = lstmcl.pos_neg_ratios(model_pos, model_neg, trainB_dynamic) trainB_activations_pos = lstmcl.activations(model_pos, trainB_dynamic) trainB_activations_neg = lstmcl.activations(model_neg, trainB_dynamic) trainB_activations = np.concatenate((trainB_activations_pos[:, -1, :], trainB_activations_neg[:, -1, :]), axis=1) mse_pos, mse_neg = lstmcl.predict_mse(model_pos, model_neg, test_dynamic) predictions_test_lstm = np.vstack((mse_pos, mse_neg)).T ratios_test_lstm = lstmcl.pos_neg_ratios(model_pos, model_neg, test_dynamic) test_activations_pos = lstmcl.activations(model_pos, test_dynamic) test_activations_neg = lstmcl.activations(model_neg, test_dynamic) test_activations = np.concatenate((test_activations_pos[:, -1, :], test_activations_neg[:, -1, :]), axis=1)