Example #1
0
def main(state, freq):
    """Where the magic happens"""
    print(state, freq)
    if FULL_TRIAL:
        labels = np.concatenate((np.ones(18), np.zeros(18)))
        groups = range(36)
    elif SUBSAMPLE:
        info_data = pd.read_csv(SAVE_PATH.parent / "info_data.csv")[STATE_LIST]
        n_trials = info_data.min().min()
        n_subs = len(info_data) - 1
        groups = [i for i in range(n_subs) for _ in range(n_trials)]
        n_total = n_trials * n_subs
        labels = [0 if i < n_total / 2 else 1 for i in range(n_total)]
    else:
        labels = loadmat(LABEL_PATH / state + "_labels.mat")["y"].ravel()
        labels, groups = create_groups(labels)

    file_path = (SAVE_PATH / "results" / PREFIX + NAME +
                 "_{}_{}_{}_{:.2f}.mat".format(state, freq, WINDOW, OVERLAP))

    if not file_path.isfile():
        file_name = NAME + "_{}_{}_{}_{:.2f}.mat".format(
            state, freq, WINDOW, OVERLAP)
        data_file_path = SAVE_PATH / file_name

        if data_file_path.isfile():
            final_save = {}

            random_seed = 0
            data = loadmat(data_file_path)
            if FULL_TRIAL:
                data = data["data"]
            elif SUBSAMPLE:
                data = prepare_data(data,
                                    n_trials=n_trials,
                                    random_state=random_seed)
            else:
                data = prepare_data(data)

            sl2go = StratifiedLeave2GroupsOut()
            lda = LDA()
            clf = TSclassifier(clf=lda)
            best_combin, best_score = backward_selection(
                clf, data, labels, sl2go, groups)

            final_save = {
                "best_combin_index": best_combin,
                "best_combin": CHANNEL_NAMES[best_combin],
                "score": best_score,
            }
            savemat(file_path, final_save)

            print(
                f"Best combin: {CHANNEL_NAMES[best_combin]}, score: {best_score}"
            )

        else:
            print(data_file_path.NAME + " Not found")
Example #2
0
def prepare_data(dico,
                 labels=None,
                 rm_outl=None,
                 key="data",
                 n_trials=None,
                 random_state=0):
    data = dico[key].ravel()
    data = np.asarray([sub.squeeze() for sub in data])
    final_data = None
    if rm_outl is not None:
        data = np.asarray([rm_outliers(sub, rm_outl) for sub in data])

    sizes = [len(sub) for sub in data]
    if n_trials is not None:
        n_sub_min = min(sizes)
        if n_trials > n_sub_min:
            print(
                "can't take {} trials, will take the minimum amout {} instead".
                format(n_trials, n_sub_min))
            n_trials = n_sub_min

        labels = np.asarray([[lab] * n_trials for lab in labels])
    elif labels is not None:
        labels = np.asarray([labels[i] * size for i, size in enumerate(sizes)])
    else:
        raise Exception("Error: either specify a number of trials and the " +
                        "labels will be generated or give the original labels")
    labels, groups = create_groups(labels)

    for submat in data:
        if submat.shape[0] == 1:
            submat = submat.ravel()
        if n_trials is not None:
            index = np.random.RandomState(random_state).choice(range(
                len(submat)),
                                                               n_trials,
                                                               replace=False)
            prep_submat = submat[index]
        else:
            prep_submat = submat

        final_data = (prep_submat if final_data is None else np.concatenate(
            (prep_submat, final_data)))

    return np.asarray(final_data), labels, groups
Example #3
0
def main(state, elec):
    labels = loadmat(LABEL_PATH / state + '_labels.mat')['y'].ravel()
    labels, groups = create_groups(labels)
    final_data = None

    print(state, elec)
    results_file_path = SAVE_PATH / 'results' /\
        'perm_PSDM_{}_{}_{}_{:.2f}_NoGamma.mat'.format(
            state, elec, WINDOW, OVERLAP)

    if not path(results_file_path).isfile():
        # print('\nloading PSD for {} frequencies'.format(key))

        for key in FREQ_DICT:
            if not key.startswith('Gamma'):
                data_file_path = SAVE_PATH /\
                        'PSD_{}_{}_{}_{}_{:.2f}.mat'.format(
                            state, key, elec, WINDOW, OVERLAP)

                if path(data_file_path).isfile():
                    temp = loadmat(data_file_path)['data'].ravel()
                    data = temp[0].ravel()
                    for submat in temp[1:]:
                        data = np.concatenate((submat.ravel(), data))
                    data = data.reshape(len(data), 1)
                    final_data = data if final_data is None\
                        else np.hstack((final_data, data))
                    del temp
                else:
                    print(path(data_file_path).name + ' Not found')
                    print('please run "computePSD.py" and\
                          "group_PSD_per_subjects.py" before\
                          running this script')

        # print('classification...')
        sl2go = StratifiedLeave2GroupsOut()
        clf = LDA()
        save = classification(clf,
                              sl2go,
                              final_data,
                              labels,
                              groups,
                              n_jobs=-1)

        savemat(results_file_path, save)
Example #4
0
                                                clipnorm=args.clip_norm)
        elif args.optimizer == 'rmsprop':
            optimizer = tf.keras.optimizers.RMSprop(learning_rate=args.lr,
                                                    momentum=0.9,
                                                    clipnorm=args.clip_norm)
        else:
            optimizer = tf.keras.optimizers.Adam(learning_rate=args.lr,
                                                 clipnorm=args.clip_norm)
        metrics = [
            tf.keras.metrics.Accuracy(),
            tf.keras.metrics.Precision(top_k=5)
        ]

    history = {'train': {}, 'val': {}}
    groups = utils.create_groups(
        ram_percent=args.memory_ratio,
        seq_length=args.nb_frames,
        version=args.dataset_version)  # create groups of videos to train
    for epoch in range(args.epochs // args.epochs_per_group):
        epoch_history = {'train': [], 'val': []}

        # loop over the groups
        for videos, size, train in groups:
            X = np.empty(shape=(size, args.nb_frames, 256, 256, 3),
                         dtype='uint8')
            Y = None

            i = 0
            start = time.time()
            for v in videos:  # load training/validation data
                print('\r   Loading actions: {}/{}     '.format(i, X.shape[0]),
                      end='')
    sleep_list = ["AWA", "NREM", "Rem"]
    save_path = save_path / "PSD"
    classifier = "LDA"

    print("\nClassification of dreamers vs non dreamers")
    print("features : PSD")
    print("parameters : window = %i overlap = %0.2f" % (window, overlap))
    print("Classifier : " + classifier)

    for sleep_state in sleep_list:
        print("\nProcessing state %s" % sleep_state)
        for elec in range(n_elec):
            print("electrode : %i/%i" % (elec, n_elec))
            t1 = time()
            y = loadmat(label_path / sleep_state + "_labels.mat")["y"].ravel()
            y, groups = create_groups(y)

            for key in freq_dict:

                results_file_path = (
                    save_path
                    / "results"
                    / "perm_PSD_%s_%s_%i_%i_%0.2f.mat"
                    % (sleep_state, key, elec, window, overlap)
                )
                # results_file_path = save_path / 'results' / 'perm_PSD_EOG_%s_%s_%i_%i_%0.2f.mat' % (sleep_state, key, elec, window, overlap) # EOG
                if not path(results_file_path).isfile():
                    X = None
                    # print('\nloading PSD for %s frequencies' % key)
                    if sleep_state == "NREM":
                        for n in ["S1", "S2", "SWS"]:
"""
import numpy as np
import sys
from scipy.io import savemat, loadmat
from sklearn.svm import SVC
from sklearn.model_selection import RandomizedSearchCV as RS
from utils import StratifiedLeave2GroupsOut, create_groups, prepare_data
from params import SAVE_PATH, LABEL_PATH, WINDOW, OVERLAP, FREQ_DICT

SAVE_PATH = SAVE_PATH / "psd"
PREFIX = "classif_svm_"
N_PERM = None
STATE, ELEC = sys.argv[1], sys.argv[2][:-1]

LABELS = loadmat(LABEL_PATH / STATE + "_labels.mat")["y"].ravel()
LABELS, GROUPS = create_groups(LABELS)

for freq in FREQ_DICT:
    print(STATE, ELEC, freq)

    data_file_name = "PSD_{}_{}_{}_{}_{:.2f}.mat".format(
        STATE, freq, ELEC, WINDOW, OVERLAP
    )
    save_file_name = PREFIX + data_file_name
    data_file_path = SAVE_PATH / data_file_name
    save_file_path = SAVE_PATH / "results" / save_file_name

    if not save_file_path.isfile():
        data = loadmat(data_file_path)
        data = prepare_data(data)
        data = np.array(data).reshape(len(data), 1)
Example #7
0
def classif_cov(state):
    """Where the magic happens"""
    print(state)
    if FULL_TRIAL:
        labels = np.concatenate((np.ones(18), np.zeros(18)))
        groups = range(36)
    elif SUBSAMPLE:
        info_data = pd.read_csv(SAVE_PATH.parent / "info_data.csv")[STATE_LIST]
        n_trials = info_data.min().min()
        n_subs = len(info_data) - 1
        groups = [i for i in range(n_subs) for _ in range(n_trials)]
        n_total = n_trials * n_subs
        labels = [0 if i < n_total / 2 else 1 for i in range(n_total)]
    else:
        labels = loadmat(LABEL_PATH / state + "_labels.mat")["y"].ravel()
        labels, groups = create_groups(labels)

    file_path = SAVE_PATH / "results" / PREFIX + NAME + "_{}.mat".format(state)
    if not file_path.isfile():
        n_rep = 0
    else:
        final_save = proper_loadmat(file_path)
        n_rep = final_save["n_rep"]
    print("starting from i={}".format(n_rep))

    file_name = NAME + "_{}.mat".format(state)
    data_file_path = SAVE_PATH / file_name

    if data_file_path.isfile():
        data_og = loadmat(data_file_path)
        for i in range(n_rep, N_BOOTSTRAPS):
            if FULL_TRIAL:
                data = data_og["data"]
            elif SUBSAMPLE:
                data = prepare_data(data_og, n_trials=n_trials, random_state=i)
            else:
                data = prepare_data(data_og)

            if REDUCED:
                reduced_data = []
                for submat in data:
                    temp_a = np.delete(submat, i, 0)
                    temp_b = np.delete(temp_a, i, 1)
                    reduced_data.append(temp_b)
                data = np.asarray(reduced_data)

            if FULL_TRIAL:
                crossval = SSS(9)
            else:
                crossval = StratifiedLeave2GroupsOut()
            lda = LDA()
            clf = TSclassifier(clf=lda)
            save = classification(clf,
                                  crossval,
                                  data,
                                  labels,
                                  groups,
                                  N_PERM,
                                  n_jobs=-1)

            print(save["acc_score"])
            if i == 0:
                final_save = save
            elif BOOTSTRAP or REDUCED:
                for key, value in save.items():
                    final_save[key] += value

            final_save["n_rep"] = i + 1
            savemat(file_path, final_save)

        final_save["n_rep"] = N_BOOTSTRAPS
        if BOOTSTRAP:
            final_save["auc_score"] = np.mean(final_save["auc_score"])
            final_save["acc_score"] = np.mean(final_save["acc_score"])
        savemat(file_path, final_save)

        print("accuracy for %s %s : %0.2f (+/- %0.2f)" %
              (state, np.mean(save["acc_score"]), np.std(save["acc"])))
        if PERM:
            print("pval = {}".format(save["acc_pvalue"]))

    else:
        print(data_file_path.name + " Not found")
Example #8
0
def main(state):
    """Where the magic happens"""
    print(state)
    if FULL_TRIAL:
        labels = np.concatenate((np.ones(18), np.zeros(18)))
        groups = range(36)
    elif SUBSAMPLE:
        info_data = pd.read_csv(SAVE_PATH.parent / "info_data.csv")[STATE_LIST]
        ##### FOR A TEST #####
        info_data = info_data["SWS"]
        ##### FOR A TEST #####
        N_TRIALS = info_data.min().min()
        N_SUBS = len(info_data) - 1
        groups = [i for _ in range(N_TRIALS) for i in range(N_SUBS)]
        N_TOTAL = N_TRIALS * N_SUBS
        labels = [0 if i < N_TOTAL / 2 else 1 for i in range(N_TOTAL)]
    else:
        labels = loadmat(LABEL_PATH / state + "_labels.mat")["y"].ravel()
        labels, groups = create_groups(labels)

    file_name = prefix + name + "n153_{}.mat".format(state)

    save_file_path = SAVE_PATH / "results" / file_name

    if not save_file_path.isfile():
        data_file_path = SAVE_PATH / name + "_{}.mat".format(state)

        if data_file_path.isfile():
            final_save = None

            for i in range(N_BOOTSTRAPS):
                data = loadmat(data_file_path)
                if FULL_TRIAL:
                    data = data["data"]
                elif SUBSAMPLE:
                    data = prepare_data(data,
                                        n_trials=N_TRIALS,
                                        random_state=i)
                else:
                    data = prepare_data(data)

                sl2go = StratifiedLeave2GroupsOut()
                lda = LDA()
                clf = TSclassifier(clf=lda)
                save = classification(clf,
                                      sl2go,
                                      data,
                                      labels,
                                      groups,
                                      N_PERM,
                                      n_jobs=-1)
                save["acc_bootstrap"] = [save["acc_score"]]
                save["auc_bootstrap"] = [save["auc_score"]]
                if final_save is None:
                    final_save = save
                else:
                    for key, value in final_save.items():
                        final_save[key] = final_save[key] + save[key]

            savemat(save_file_path, final_save)

            print("accuracy for %s : %0.2f (+/- %0.2f)" %
                  (state, save["acc_score"], np.std(save["acc"])))

        else:
            print(data_file_path.name + " Not found")
Example #9
0
    print("Press 2 for average daily {}".format(feature))
    print("Press 3 for {} within a population".format(feature))


if __name__ == "__main__":
    os.system("clear")
    command = int(input("Press 1 for cases or 2 for deaths:\t"))
    if command == 1 or command == 2:
        feature = ""
        if command == 1:
            feature = "cases"
        elif command == 2:
            feature = "deaths"
        menu(feature)
        analysis_menu(feature)
        command = int(input())
        if command == 1:
            mongodb.display_countries()
            a = mongodb.dates_cases_totals(utils.create_groups(), feature)
            utils.plot_graphs("Number of Daily {}".format(feature), a)
        elif command == 2:
            mongodb.display_countries()
            a = mongodb.dates_cases_averages(utils.create_groups(), feature)
            utils.plot_graphs("Number of Average Daily {}".format(feature), a)
        elif command == 3:
            greater_than = int(input("You want a population greater than:\t"))
            less_than = int(input("You want a population less than:\t"))
            countries = mongodb.aggregate_population_countries(
                greater_than, less_than, feature)
            utils.plot_graphs("Population", countries)
def main(state, elec):
    """feature selection and permutations.

    For each separation of subjects with leave 2 subjects out, we train on the
    big set (feature selection) and test on the two remaining subjects.
    for each permutation, we just permute the labels at the trial level (we
    could use permutations at the subject level, but we wouldn't get as many
    permutations)
    """
    final_data = None

    print(state, elec)
    results_file_path = (
        SAVE_PATH
        / "results"
        / "EFS_NoGamma_{}_{}_{}_{:.2f}.mat".format(state, elec, WINDOW, OVERLAP)
    )
    if not results_file_path.isfile():
        for freq in FREQS:
            data_file_path = SAVE_PATH / "psd_{}_{}_{}_{}_{:.2f}.mat".format(
                state, freq, elec, WINDOW, OVERLAP
            )

            data = loadmat(data_file_path)["data"].ravel()
            if final_data is None:
                final_data = data
            else:
                for i, submat in enumerate(final_data):
                    final_data[i] = np.concatenate((submat, data[i]), axis=0)

        final_data = np.array(list(map(np.transpose, final_data)))

        lil_labels = [0] * 18 + [1] * 18
        lil_labels = np.asarray(lil_labels)
        lil_groups = list(range(36))
        sl2go = StratifiedShuffleGroupSplit(2)

        best_freqs = []
        pvalues, pscores = [], []
        test_scores, best_scores = [], []
        for train_subjects, test_subjects in sl2go.split(
            final_data, lil_labels, lil_groups
        ):

            x_train, x_test = final_data[train_subjects], final_data[test_subjects]
            y_train, y_test = lil_labels[train_subjects], lil_labels[test_subjects]

            y_train = [[label] * len(x_train[i]) for i, label in enumerate(y_train)]
            y_train, groups = create_groups(y_train)
            x_train = np.concatenate(x_train[:], axis=0)

            nested_sl2go = StratifiedShuffleGroupSplit(2)
            clf = LDA()
            f_select = EFS(
                estimator=clf,
                max_features=x_train.shape[-1],
                cv=nested_sl2go,
                n_jobs=-1,
            )

            f_select = f_select.fit(x_train, y_train, groups)

            best_idx = f_select.best_idx_
            best_freqs.append(list(FREQS[list(best_idx)]))
            best_scores.append(f_select.best_score_)

            test_clf = LDA()
            test_clf.fit(x_train[:, best_idx], y_train)
            y_test = [[label] * len(x_test[i]) for i, label in enumerate(y_test)]
            y_test, groups = create_groups(y_test)
            x_test = np.concatenate(x_test[:], axis=0)
            test_score = test_clf.score(x_test[:, best_idx], y_test)
            test_scores.append(test_score)

            if PERM:
                pscores_cv = []
                for _ in range(N_PERM):
                    y_train = np.random.permutation(y_train)
                    y_test = np.random.permutation(y_test)

                    clf = LDA()
                    clf.fit(x_train[:, best_idx], y_train)
                    pscore = clf.score(x_test[:, best_idx], y_test)
                    pscores_cv.append(pscore)

                pvalue = compute_pval(test_score, pscores_cv)
                pvalues.append(pvalue)
                pscores.append(pscores_cv)

        score = np.mean(test_scores)
        data = {
            "score": score,
            "train_scores": best_scores,
            "test_scores": test_scores,
            "freqs": best_freqs,
            "pvalue": pvalues,
            "pscores": pscores,
        }

        savemat(results_file_path, data)
def main(state, elec):
    """Permutations.

    For each separation of subjects with leave 2 subjects out, we train on the
    big set and test on the two remaining subjects.
    for each permutation, we just permute the labels at the trial level (we
    could use permutations at the subject level, but we wouldn't get as many
    permutations)
    """
    file_name = f"EFS_NoGamma_{state}_{elec}_{WINDOW}_{OVERLAP:.2f}.mat"
    print(file_name)
    file_path = RESULT_PATH / file_name
    data = loadmat(file_path)

    lil_labels = [0] * 18 + [1] * 18
    lil_labels = np.asarray(lil_labels)
    lil_groups = list(range(36))
    sl2go = StratifiedLeave2GroupsOut()

    best_freqs = list(data["freqs"].ravel())
    scores = list(data["test_scores"].ravel())

    data = load_data(state, elec)
    pscores = []
    pvalues = []
    i = 0
    for train_subjects, test_subjects in sl2go.split(data, lil_labels,
                                                     lil_groups):
        x_feature, x_classif = data[train_subjects], data[test_subjects]
        y_feature = lil_labels[train_subjects]
        y_classif = lil_labels[test_subjects]

        y_feature = [
            np.array([label] * x_feature[i].shape[1])
            for i, label in enumerate(y_feature)
        ]
        y_feature, _ = create_groups(y_feature)
        y_classif = [
            np.array([label] * x_classif[i].shape[1])
            for i, label in enumerate(y_classif)
        ]
        y_classif, _ = create_groups(y_classif)

        print(best_freqs[i])
        best_idx = [
            FREQS.index(value.strip().capitalize()) for value in best_freqs[i]
        ]
        x_classif = np.concatenate(x_classif[:], axis=1).T
        x_feature = np.concatenate(x_feature[:], axis=1).T

        for _ in range(N_PERM):
            y_feature = np.random.permutation(y_feature)
            y_classif = np.random.permutation(y_classif)

            clf = LDA()
            clf.fit(x_feature[:, best_idx], y_feature)
            pscore = clf.score(x_classif[:, best_idx], y_classif)
            pscores.append(pscore)

        score = scores[i]
        pvalue = compute_pval(score, pscores)
        pvalues.append(pvalue)
        i += 1

    data["pvalue"] = pvalues
    data["pscores"] = pscores

    savemat(file_path, data)