def bpic(nhmmstates, nhmmiter): nhmmstates = nhmmstates[0] nhmmiter = nhmmiter[0] * 10 nfolds = 5 hmmcovtype = 'full' print nhmmstates, nhmmiter # Load the dataset static_train = np.load( '/storage/hpc_anna/GMiC/Data/BPIChallenge/f1/preprocessed/train_static.npy' ) dynamic_train = np.load( '/storage/hpc_anna/GMiC/Data/BPIChallenge/f1/preprocessed/train_dynamic.npy' ) static_val = np.load( '/storage/hpc_anna/GMiC/Data/BPIChallenge/f1/preprocessed/test_static.npy' ) dynamic_val = np.load( '/storage/hpc_anna/GMiC/Data/BPIChallenge/f1/preprocessed/test_dynamic.npy' ) labels_train = np.load( '/storage/hpc_anna/GMiC/Data/BPIChallenge/f1/preprocessed/train_labels.npy' ) labels_val = np.load( '/storage/hpc_anna/GMiC/Data/BPIChallenge/f1/preprocessed/test_labels.npy' ) # Merge train and test static_all = np.concatenate((static_train, static_val), axis=0) dynamic_all = np.concatenate((dynamic_train, dynamic_val), axis=0) labels_all = np.concatenate((labels_train, labels_val), axis=0) nsamples = static_all.shape[0] # k-fold cross validation to obtain accuracy val_idx_list = np.array_split(range(nsamples), nfolds) scores = [] for fid, val_idx in enumerate(val_idx_list): train_idx = list(set(range(nsamples)) - set(val_idx)) # extract predictions using HMM on dynamic hmmcl = HMMClassifier() model_pos, model_neg = hmmcl.train(nhmmstates, nhmmiter, hmmcovtype, dynamic_all[train_idx], labels_all[train_idx]) scores.append( hmmcl.test(model_pos, model_neg, dynamic_all[val_idx], labels_all[val_idx])) print 'Result: %.4f' % np.mean(scores) return -np.mean(scores)
def modelopt(nhmmstates, nhmmiter): nhmmstates = nhmmstates[0] nhmmiter = nhmmiter[0] * 10 nfolds = 5 hmmcovtype = 'full' print nhmmstates, nhmmiter # Load the dataset #static_train = np.load('/storage/hpc_anna/GMiC/Data/ECoGmixed/fourier/train_data.npy') dynamic_train = np.load( '/storage/hpc_anna/GMiC/Data/ECoGmixed/preprocessed/train_data.npy') #static_test = np.load('/storage/hpc_anna/GMiC/Data/ECoGmixed/fourier/test_data.npy') #dynamic_test = np.load('/storage/hpc_anna/GMiC/Data/ECoGmixed/preprocessed/test_data.npy') labels_train = np.load( '/storage/hpc_anna/GMiC/Data/ECoGmixed/preprocessed/train_labels.npy') #labels_test = np.load('/storage/hpc_anna/GMiC/Data/ECoGmixed/preprocessed/test_labels.npy') nsamples = dynamic_train.shape[0] # k-fold cross validation to obtain accuracy val_idx_list = np.array_split(range(nsamples), nfolds) scores = [] for fid, val_idx in enumerate(val_idx_list): train_idx = list(set(range(nsamples)) - set(val_idx)) print "Current fold is %d / %d" % (fid + 1, nfolds) # extract predictions using HMM on dynamic hmmcl = HMMClassifier() model_pos, model_neg = hmmcl.train(nhmmstates, nhmmiter, hmmcovtype, dynamic_train[train_idx], labels_train[train_idx]) scores.append( hmmcl.test(model_pos, model_neg, dynamic_train[val_idx], labels_train[val_idx])) print 'Result: %.4f' % np.mean(scores) return -np.mean(scores)
# # Prepare combined datasets for the future experiments # # dataset to check how generative models perform if provided with static features along with dynamic static_as_dynamic = np.zeros((static_all.shape[0], static_all.shape[1], dynamic_all.shape[2])) for i in range(static_all.shape[0]): static_as_dynamic[i, :, :] = np.tile(static_all[i, :], (dynamic_all.shape[2], 1)).T dynamic_and_static_as_dynamic = np.concatenate((dynamic_all, static_as_dynamic + np.random.uniform(-0.0001, 0.0001, static_as_dynamic.shape)), axis=1) # # k-fold CV for performance estimation # val_idx_list = np.array_split(range(nsamples), nfolds) scores = {1: [], 2: [], 3: [], 4: [], 5: [], 6: [], 7: [], 8: [], 9: [], 10: [], 11: []} for fid, val_idx in enumerate(val_idx_list): print "Current fold is %d / %d" % (fid + 1, nfolds) train_idx = list(set(range(nsamples)) - set(val_idx)) # HMM on dynamic and static (turned into fake sequences) (9) hmmcl = HMMClassifier() model_pos, model_neg = hmmcl.train(nhmmstates, nhmmiter, hmmcovtype, dynamic_and_static_as_dynamic[train_idx], labels_all[train_idx]) acc, auc = hmmcl.test(model_pos, model_neg, dynamic_and_static_as_dynamic[val_idx], labels_all[val_idx]) scores[9].append(acc) print "===> (9) HMM on dynamic and static features: %.4f (+/- %.4f) %s" % (np.mean(scores[9]), np.std(scores[9]), scores[9])
def bpic(nhmmstates, nestimators, nhmmiter): nhmmstates = nhmmstates[0] nestimators = nestimators[0] * 100 nhmmiter = nhmmiter[0] * 10 nfolds = 5 hmmcovtype = 'full' print nhmmstates, nestimators, nhmmiter # Load the dataset static_train = np.load( '/storage/hpc_anna/GMiC/Data/BPIChallenge/f1/preprocessed/train_static.npy' ) dynamic_train = np.load( '/storage/hpc_anna/GMiC/Data/BPIChallenge/f1/preprocessed/train_dynamic.npy' ) static_val = np.load( '/storage/hpc_anna/GMiC/Data/BPIChallenge/f1/preprocessed/test_static.npy' ) dynamic_val = np.load( '/storage/hpc_anna/GMiC/Data/BPIChallenge/f1/preprocessed/test_dynamic.npy' ) labels_train = np.load( '/storage/hpc_anna/GMiC/Data/BPIChallenge/f1/preprocessed/train_labels.npy' ) labels_val = np.load( '/storage/hpc_anna/GMiC/Data/BPIChallenge/f1/preprocessed/test_labels.npy' ) # Merge train and test static_all = np.concatenate((static_train, static_val), axis=0) dynamic_all = np.concatenate((dynamic_train, dynamic_val), axis=0) labels_all = np.concatenate((labels_train, labels_val), axis=0) nsamples = static_all.shape[0] # prepare where to store the ratios ratios_all_hmm = np.empty(len(labels_all)) # split indices into folds enrich_idx_list = np.array_split(range(nsamples), nfolds) # run CV for fid, enrich_idx in enumerate(enrich_idx_list): train_idx = list(set(range(nsamples)) - set(enrich_idx)) # extract predictions using HMM on dynamic hmmcl = HMMClassifier() model_pos, model_neg = hmmcl.train(nhmmstates, nhmmiter, hmmcovtype, dynamic_all[train_idx], labels_all[train_idx]) ratios_all_hmm[enrich_idx] = hmmcl.pos_neg_ratios( model_pos, model_neg, dynamic_all[enrich_idx]) # dataset for hybrid learning enriched_by_hmm = np.concatenate((static_all, np.matrix(ratios_all_hmm).T), axis=1) # (2.) k-fold cross validation to obtain accuracy val_idx_list = np.array_split(range(nsamples), nfolds) scores = [] for fid, val_idx in enumerate(val_idx_list): train_idx = list(set(range(nsamples)) - set(val_idx)) # Hybrid on features enriched by HMM (3) rf = RandomForestClassifier(n_estimators=nestimators) rf.fit(enriched_by_hmm[train_idx], labels_all[train_idx]) scores.append(rf.score(enriched_by_hmm[val_idx], labels_all[val_idx])) print 'Result: %.4f' % np.mean(scores) return -np.mean(scores)