d_data_test = dict() settings = Settings() print settings for i in range(0, 3): nsubject = i + 1 K = [settings.kfoldCV] R = settings.repeatCVouter # XTRAIN, ytrain, aFeatNames, aFiles_tr, plabels, data_q = load_features('train', nsubject, feat_select) # XTEST, ytest, aFeatNames_ts, dummy4, dummy5, dummy3 = load_features('test', nsubject, feat_select) d_tr, d_ts = load_features_and_preprocess(nsubject, feat_select[i], settings=settings) XTRAIN, ytrain, aFeatNames_tr, aFiles_tr, plabels_tr, data_q_tr, ind_nan_tr = d_tr[0], d_tr[1], d_tr[2], d_tr[3], \ d_tr[4], d_tr[5], d_tr[6] XTEST, ytest, aFeatNames_ts, aFiles_ts, plabels_ts, data_q_ts, ind_nan_ts = d_ts[0], d_ts[1], d_ts[2], d_ts[3], \ d_ts[4], d_ts[5], d_ts[6] # XTRAIN, ytrain, aFeatNames_tr, aFiles_tr, plabels_tr, data_q_tr = load_features('train', nsubject, feat_select[i]) # XTEST, ytest, aFeatNames_ts, aFiles_ts, plabels_ts, data_q_ts = load_features('test', nsubject, feat_select[i]) # pp.fit(XTRAIN, XTEST, drop_nan=True) # # print '####### Subject: ', nsubject # print '-- Original dataset' # print XTRAIN.shape # print ytrain.shape #
settings = Settings() print settings K = settings.kfoldCV R = settings.repeatCV # settings.remove_covariate_shift = False nr_bins = 25 # prob_calib_alg = settings.prob_calib_alg # prob_calib_alg = None prob_calib_alg = 'rank' # prob_calib_alg = 'median_centered' d_tr, d_ts = load_features_and_preprocess(nsubject, feat_select_unique, settings=settings, verbose=False) XTRAIN_ALL, ytrain, aFeatNames_tr_all, aFiles_tr, plabels_tr, data_q_tr, ind_nan_tr = d_tr[0], d_tr[1], d_tr[2], d_tr[3], \ d_tr[4], d_tr[5], d_tr[6] XTEST_ALL, ytest, aFeatNames_ts_all, aFiles_ts, plabels_ts, data_q_ts, ind_nan_ts = d_ts[0], d_ts[1], d_ts[2], d_ts[3], \ d_ts[4], d_ts[5], d_ts[6] y_all_clf = np.zeros((XTRAIN_ALL.shape[0], len(aclf))) auc_all = np.zeros((len(aclf), 1)) for i, clf in enumerate(aclf): print XTRAIN_ALL.shape XTRAIN, aFeatNames_tr, dummy3 = select_feature_group( XTRAIN_ALL, aFeatNames_tr_all,
return vals df = pd.DataFrame(columns=['feat_group', 'electrode', 'total', 'removed'], index=range(0, 200)) # print df index = -1 nsubject = 2 for sfeat in afeat_select: d_tr, dummy1 = load_features_and_preprocess(nsubject, [sfeat], settings=settings, verbose=False) feat_names = d_tr[2] feat_names_removed = load_removed_features(nsubject, [sfeat]) # feat_names_removed += load_removed_features(nsubject, ['stat_spectral_sp_entropy_mfj_corr']) nr_total = get_nr_feat_electrode(feat_names) nr_removed = get_nr_feat_electrode(feat_names_removed) for iel in range(0, 16): index += 1 df['feat_group'].loc[index] = sfeat df['total'].loc[index] = nr_total[iel, 0] df['electrode'].loc[index] = iel + 1 df['removed'].loc[index] = nr_removed[iel, 0]
# print("TRAIN:", train_index, "TEST:", test_index) # # print X[train_index] # # print X[test_index] # # X_train, X_test = X[train_index], X[test_index] # # y_train, y_test = y[train_index], y[test_index] from utils import load_features_and_preprocess from spp_ut_settings import Settings # from sklearn.utils import shuffle settings = Settings() settings.remove_outliers = False settings.standardize = False settings.drop_nan = False d_tr, d_ts = load_features_and_preprocess(3, ['stat'], settings, verbose=True) XTRAIN, ytrain, aFeatNames_tr, aFiles_tr, plabels_tr, data_q_tr, ind_nan_tr = d_tr[0], d_tr[1], d_tr[2], d_tr[3], \ d_tr[4], d_tr[5], d_tr[6] ytrain = ytrain.ravel() XTRAIN, ytrain, plabels_tr = insert_pathol_to_normal_random_keep_order(XTRAIN, ytrain, plabels_tr) tscv = TimeSeriesSplitGroupSafe(n_splits=100) p = np.unique(plabels_tr) for train_index, test_index in tscv.split(XTRAIN, ytrain, plabels_tr): # pass print("TRAIN:", train_index, "TEST:", test_index) print("Groups TRAIN:", plabels_tr[train_index], "Groups TEST:", plabels_tr[test_index])