def classif_psd(state, elec, freq, n_jobs=-1): info_data = pd.read_csv(SAVE_PATH.parent / "info_data.csv")[STATE_LIST] n_trials = info_data.min().min() n_subs = len(info_data) - 1 groups = [i for i in range(n_subs) for _ in range(n_trials)] n_total = n_trials * n_subs labels = [0 if i < n_total / 2 else 1 for i in range(n_total)] print(state, elec, freq) data_file_name = NAME + "_{}_{}_{}_{}_{:.2f}.mat".format( state, freq, elec, WINDOW, OVERLAP) save_file_name = PREFIX + data_file_name data_file_path = SAVE_PATH / data_file_name save_file_path = SAVE_PATH / "results" / save_file_name data = loadmat(data_file_path) data = prepare_data(data, n_trials=n_trials, random_state=666) data = np.array(data).reshape(len(data), 1) sl2go = StratifiedLeave2GroupsOut() clf = LDA(solver=SOLVER) save = classification(clf, sl2go, data, labels, groups, N_PERM, n_jobs=n_jobs) savemat(save_file_path, save)
def main(state, freq): """Where the magic happens""" print(state, freq) if FULL_TRIAL: labels = np.concatenate((np.ones(18), np.zeros(18))) groups = range(36) elif SUBSAMPLE: info_data = pd.read_csv(SAVE_PATH.parent / "info_data.csv")[STATE_LIST] n_trials = info_data.min().min() n_subs = len(info_data) - 1 groups = [i for i in range(n_subs) for _ in range(n_trials)] n_total = n_trials * n_subs labels = [0 if i < n_total / 2 else 1 for i in range(n_total)] else: labels = loadmat(LABEL_PATH / state + "_labels.mat")["y"].ravel() labels, groups = create_groups(labels) file_path = (SAVE_PATH / "results" / PREFIX + NAME + "_{}_{}_{}_{:.2f}.mat".format(state, freq, WINDOW, OVERLAP)) if not file_path.isfile(): file_name = NAME + "_{}_{}_{}_{:.2f}.mat".format( state, freq, WINDOW, OVERLAP) data_file_path = SAVE_PATH / file_name if data_file_path.isfile(): final_save = {} random_seed = 0 data = loadmat(data_file_path) if FULL_TRIAL: data = data["data"] elif SUBSAMPLE: data = prepare_data(data, n_trials=n_trials, random_state=random_seed) else: data = prepare_data(data) sl2go = StratifiedLeave2GroupsOut() lda = LDA() clf = TSclassifier(clf=lda) best_combin, best_score = backward_selection( clf, data, labels, sl2go, groups) final_save = { "best_combin_index": best_combin, "best_combin": CHANNEL_NAMES[best_combin], "score": best_score, } savemat(file_path, final_save) print( f"Best combin: {CHANNEL_NAMES[best_combin]}, score: {best_score}" ) else: print(data_file_path.NAME + " Not found")
def main(state, elec): labels = loadmat(LABEL_PATH / state + '_labels.mat')['y'].ravel() labels, groups = create_groups(labels) final_data = None print(state, elec) results_file_path = SAVE_PATH / 'results' /\ 'perm_PSDM_{}_{}_{}_{:.2f}_NoGamma.mat'.format( state, elec, WINDOW, OVERLAP) if not path(results_file_path).isfile(): # print('\nloading PSD for {} frequencies'.format(key)) for key in FREQ_DICT: if not key.startswith('Gamma'): data_file_path = SAVE_PATH /\ 'PSD_{}_{}_{}_{}_{:.2f}.mat'.format( state, key, elec, WINDOW, OVERLAP) if path(data_file_path).isfile(): temp = loadmat(data_file_path)['data'].ravel() data = temp[0].ravel() for submat in temp[1:]: data = np.concatenate((submat.ravel(), data)) data = data.reshape(len(data), 1) final_data = data if final_data is None\ else np.hstack((final_data, data)) del temp else: print(path(data_file_path).name + ' Not found') print('please run "computePSD.py" and\ "group_PSD_per_subjects.py" before\ running this script') # print('classification...') sl2go = StratifiedLeave2GroupsOut() clf = LDA() save = classification(clf, sl2go, final_data, labels, groups, n_jobs=-1) savemat(results_file_path, save)
X = None # print('\nloading PSD for %s frequencies' % key) if sleep_state == "NREM": for n in ["S1", "S2", "SWS"]: X = ( load_data(save_path, n, key, elec, window, overlap) if X is None else X.vstack(load_data(n, key, elec, window, overlap)) ) else: X = load_data( save_path, sleep_state, key, elec, window, overlap ) t3 = time() # print('Classification...') sl2go = StratifiedLeave2GroupsOut() clf = LDA() scores = [] pvalue = 0 good_score = cross_val_score( cv=sl2go, estimator=clf, X=X, y=y, groups=groups, n_jobs=-1 ).mean() for perm in range(n_permutations): clf = LDA() perm_set = permutation(len(y)) y_perm = y[perm_set] groups_perm = groups[perm_set] scores.append( cross_val_score( cv=sl2go, estimator=clf,
for freq in FREQ_DICT: print(STATE, ELEC, freq) data_file_name = "PSD_{}_{}_{}_{}_{:.2f}.mat".format( STATE, freq, ELEC, WINDOW, OVERLAP ) save_file_name = PREFIX + data_file_name data_file_path = SAVE_PATH / data_file_name save_file_path = SAVE_PATH / "results" / save_file_name if not save_file_path.isfile(): data = loadmat(data_file_path) data = prepare_data(data) data = np.array(data).reshape(len(data), 1) cross_val = StratifiedLeave2GroupsOut() save = {"score": [], "cv_results": [], "best_score": [], "best_params": []} for train_index, test_index in cross_val.split(data, LABELS, GROUPS): train_set, validation_set = data[train_index], data[test_index] train_labs, validation_labs = LABELS[train_index], LABELS[test_index] train_groups, validation_groups = GROUPS[train_index], GROUPS[test_index] nested_cv = StratifiedLeave2GroupsOut() clf = SVC(kernel="rbf") parameters = {"C": np.logspace(-3, 2, 6), "gamma": np.logspace(-3, 2, 6)} random_search = RS(clf, parameters, n_iter=1, n_jobs=-1, cv=nested_cv) random_search.fit(X=train_set, y=train_labs, groups=train_groups) score = random_search.score(validation_set, validation_labs) save["score"].append(score) save["cv_results"].append(random_search.cv_results_) save["best_score"].append(random_search.best_score_)
def classif_cov(state): """Where the magic happens""" print(state) if FULL_TRIAL: labels = np.concatenate((np.ones(18), np.zeros(18))) groups = range(36) elif SUBSAMPLE: info_data = pd.read_csv(SAVE_PATH.parent / "info_data.csv")[STATE_LIST] n_trials = info_data.min().min() n_subs = len(info_data) - 1 groups = [i for i in range(n_subs) for _ in range(n_trials)] n_total = n_trials * n_subs labels = [0 if i < n_total / 2 else 1 for i in range(n_total)] else: labels = loadmat(LABEL_PATH / state + "_labels.mat")["y"].ravel() labels, groups = create_groups(labels) file_path = SAVE_PATH / "results" / PREFIX + NAME + "_{}.mat".format(state) if not file_path.isfile(): n_rep = 0 else: final_save = proper_loadmat(file_path) n_rep = final_save["n_rep"] print("starting from i={}".format(n_rep)) file_name = NAME + "_{}.mat".format(state) data_file_path = SAVE_PATH / file_name if data_file_path.isfile(): data_og = loadmat(data_file_path) for i in range(n_rep, N_BOOTSTRAPS): if FULL_TRIAL: data = data_og["data"] elif SUBSAMPLE: data = prepare_data(data_og, n_trials=n_trials, random_state=i) else: data = prepare_data(data_og) if REDUCED: reduced_data = [] for submat in data: temp_a = np.delete(submat, i, 0) temp_b = np.delete(temp_a, i, 1) reduced_data.append(temp_b) data = np.asarray(reduced_data) if FULL_TRIAL: crossval = SSS(9) else: crossval = StratifiedLeave2GroupsOut() lda = LDA() clf = TSclassifier(clf=lda) save = classification(clf, crossval, data, labels, groups, N_PERM, n_jobs=-1) print(save["acc_score"]) if i == 0: final_save = save elif BOOTSTRAP or REDUCED: for key, value in save.items(): final_save[key] += value final_save["n_rep"] = i + 1 savemat(file_path, final_save) final_save["n_rep"] = N_BOOTSTRAPS if BOOTSTRAP: final_save["auc_score"] = np.mean(final_save["auc_score"]) final_save["acc_score"] = np.mean(final_save["acc_score"]) savemat(file_path, final_save) print("accuracy for %s %s : %0.2f (+/- %0.2f)" % (state, np.mean(save["acc_score"]), np.std(save["acc"]))) if PERM: print("pval = {}".format(save["acc_pvalue"])) else: print(data_file_path.name + " Not found")
def main(state): """Where the magic happens""" print(state) if FULL_TRIAL: labels = np.concatenate((np.ones(18), np.zeros(18))) groups = range(36) elif SUBSAMPLE: info_data = pd.read_csv(SAVE_PATH.parent / "info_data.csv")[STATE_LIST] ##### FOR A TEST ##### info_data = info_data["SWS"] ##### FOR A TEST ##### N_TRIALS = info_data.min().min() N_SUBS = len(info_data) - 1 groups = [i for _ in range(N_TRIALS) for i in range(N_SUBS)] N_TOTAL = N_TRIALS * N_SUBS labels = [0 if i < N_TOTAL / 2 else 1 for i in range(N_TOTAL)] else: labels = loadmat(LABEL_PATH / state + "_labels.mat")["y"].ravel() labels, groups = create_groups(labels) file_name = prefix + name + "n153_{}.mat".format(state) save_file_path = SAVE_PATH / "results" / file_name if not save_file_path.isfile(): data_file_path = SAVE_PATH / name + "_{}.mat".format(state) if data_file_path.isfile(): final_save = None for i in range(N_BOOTSTRAPS): data = loadmat(data_file_path) if FULL_TRIAL: data = data["data"] elif SUBSAMPLE: data = prepare_data(data, n_trials=N_TRIALS, random_state=i) else: data = prepare_data(data) sl2go = StratifiedLeave2GroupsOut() lda = LDA() clf = TSclassifier(clf=lda) save = classification(clf, sl2go, data, labels, groups, N_PERM, n_jobs=-1) save["acc_bootstrap"] = [save["acc_score"]] save["auc_bootstrap"] = [save["auc_score"]] if final_save is None: final_save = save else: for key, value in final_save.items(): final_save[key] = final_save[key] + save[key] savemat(save_file_path, final_save) print("accuracy for %s : %0.2f (+/- %0.2f)" % (state, save["acc_score"], np.std(save["acc"]))) else: print(data_file_path.name + " Not found")
def main(elec): """feature selection and permutations. For each separation of subjects with leave 2 subjects out, we train on the big set (feature selection) and test on the two remaining subjects. for each permutation, we just permute the labels at the trial level (we could use permutations at the subject level, but we wouldn't get as many permutations) """ final_data = None print(STATE, elec) results_file_path = ( SAVE_PATH / "results" / "EFS_NoGamma_{}_{}_{}_{:.2f}.mat".format(STATE, elec, WINDOW, OVERLAP)) if not results_file_path.isfile(): for freq in FREQS: data_file_path = SAVE_PATH / "PSD_{}_{}_{}_{}_{:.2f}.mat".format( STATE, freq, elec, WINDOW, OVERLAP) data = loadmat(data_file_path)["data"].ravel() if final_data is None: final_data = data else: for i, submat in enumerate(final_data): final_data[i] = np.concatenate((submat, data[i]), axis=0) final_data = np.array(list(map(np.transpose, final_data))) lil_labels = [0] * 18 + [1] * 18 lil_labels = np.asarray(lil_labels) lil_groups = list(range(36)) sl2go = StratifiedLeave2GroupsOut() best_freqs = [] pvalues, pscores = [], [] test_scores, best_scores = [], [] for train_subjects, test_subjects in sl2go.split( final_data, lil_labels, lil_groups): x_train, x_test = final_data[train_subjects], final_data[ test_subjects] y_train, y_test = lil_labels[train_subjects], lil_labels[ test_subjects] y_train = [[label] * len(x_train[i]) for i, label in enumerate(y_train)] y_train, groups = create_groups(y_train) x_train = np.concatenate(x_train[:], axis=0) nested_sl2go = StratifiedLeave2GroupsOut() clf = LDA() f_select = EFS(estimator=clf, max_features=5, cv=nested_sl2go, n_jobs=-1) f_select = f_select.fit(x_train, y_train, groups) best_idx = f_select.best_idx_ best_freqs.append(list(FREQS[list(best_idx)])) best_scores.append(f_select.best_score_) test_clf = LDA() test_clf.fit(x_train[:, best_idx], y_train) y_test = [[label] * len(x_test[i]) for i, label in enumerate(y_test)] y_test, groups = create_groups(y_test) x_test = np.concatenate(x_test[:], axis=0) test_score = test_clf.score(x_test[:, best_idx], y_test) test_scores.append(test_score) if PERM: pscores_cv = [] for _ in range(N_PERM): y_train = np.random.permutation(y_train) y_test = np.random.permutation(y_test) clf = LDA() clf.fit(x_train[:, best_idx], y_train) pscore = clf.score(x_test[:, best_idx], y_test) pscores_cv.append(pscore) pvalue = compute_pval(test_score, pscores_cv) pvalues.append(pvalue) pscores.append(pscores_cv) score = np.mean(test_scores) data = { "score": score, "train_scores": best_scores, "test_scores": test_scores, "freqs": best_freqs, "pvalue": pvalues, "pscores": pscores, } savemat(results_file_path, data)
def main(state, elec): """Permutations. For each separation of subjects with leave 2 subjects out, we train on the big set and test on the two remaining subjects. for each permutation, we just permute the labels at the trial level (we could use permutations at the subject level, but we wouldn't get as many permutations) """ file_name = f"EFS_NoGamma_{state}_{elec}_{WINDOW}_{OVERLAP:.2f}.mat" print(file_name) file_path = RESULT_PATH / file_name data = loadmat(file_path) lil_labels = [0] * 18 + [1] * 18 lil_labels = np.asarray(lil_labels) lil_groups = list(range(36)) sl2go = StratifiedLeave2GroupsOut() best_freqs = list(data["freqs"].ravel()) scores = list(data["test_scores"].ravel()) data = load_data(state, elec) pscores = [] pvalues = [] i = 0 for train_subjects, test_subjects in sl2go.split(data, lil_labels, lil_groups): x_feature, x_classif = data[train_subjects], data[test_subjects] y_feature = lil_labels[train_subjects] y_classif = lil_labels[test_subjects] y_feature = [ np.array([label] * x_feature[i].shape[1]) for i, label in enumerate(y_feature) ] y_feature, _ = create_groups(y_feature) y_classif = [ np.array([label] * x_classif[i].shape[1]) for i, label in enumerate(y_classif) ] y_classif, _ = create_groups(y_classif) print(best_freqs[i]) best_idx = [ FREQS.index(value.strip().capitalize()) for value in best_freqs[i] ] x_classif = np.concatenate(x_classif[:], axis=1).T x_feature = np.concatenate(x_feature[:], axis=1).T for _ in range(N_PERM): y_feature = np.random.permutation(y_feature) y_classif = np.random.permutation(y_classif) clf = LDA() clf.fit(x_feature[:, best_idx], y_feature) pscore = clf.score(x_classif[:, best_idx], y_classif) pscores.append(pscore) score = scores[i] pvalue = compute_pval(score, pscores) pvalues.append(pvalue) i += 1 data["pvalue"] = pvalues data["pscores"] = pscores savemat(file_path, data)