Beispiel #1
0
def load_agg_data(subject, word, experiment, voice, proc, rep_set):
    all_data, _, sen_ints, time, _ = load_data.load_sentence_data_v2(
        subject=subject,
        align_to='last',
        voice=voice,
        experiment=experiment,
        proc=proc,
        num_instances=1,
        reps_filter=lambda nr: [rep in rep_set for rep in range(nr)],
        sensor_type=None,
        is_region_sorted=False,
        tmin=TMIN[word],
        tmax=TMAX[word])
    all_data *= 1e12
    stimuli_voice = list(load_data.read_stimuli(experiment))
    labels = []
    voice_labels = []
    if 'full' not in word:
        data = np.ones(
            (all_data.shape[0] / 2, all_data.shape[1], all_data.shape[2]))
    else:
        data = all_data
    i_data = 0
    for i_sen_int, sen_int in enumerate(sen_ints):
        word_list = stimuli_voice[sen_int]['stimulus'].split()
        curr_voice = stimuli_voice[sen_int]['voice']
        if 'full' in word:
            labels.append(word_list[WORD_COLS[curr_voice][word]])
            voice_labels.append(curr_voice)
        elif len(word_list) > 5:
            data[i_data, :, :] = all_data[i_sen_int, :, :]
            labels.append(word_list[WORD_COLS[curr_voice][word]])
            voice_labels.append(curr_voice)
            i_data += 1
    print(labels)
    print(voice_labels)
    return data, labels, voice_labels, time
Beispiel #2
0
def run_tgm_exp(experiment,
                subject,
                sen_type,
                word,
                win_len,
                overlap,
                fold,
                isPerm=False,
                alg='lr-l1',
                adj=None,
                doTimeAvg=False,
                doTestAvg=True,
                num_instances=1,
                reps_to_use=10,
                proc=load_data.DEFAULT_PROC,
                random_state_perm=1,
                force=False):
    warnings.filterwarnings(action='ignore')
    # Save Directory
    top_dir = TOP_DIR.format(exp=experiment)
    if not os.path.exists(top_dir):
        os.mkdir(top_dir)
    save_dir = SAVE_DIR.format(top_dir=top_dir, sub=subject)
    if not os.path.exists(save_dir):
        os.mkdir(save_dir)

    fname = SAVE_FILE.format(dir=save_dir,
                             sub=subject,
                             sen_type=sen_type,
                             word=word,
                             win_len=win_len,
                             ov=overlap,
                             perm=bool_to_str(isPerm),
                             alg=alg,
                             adj=adj,
                             avgTm=bool_to_str(doTimeAvg),
                             avgTst=bool_to_str(doTestAvg),
                             inst=num_instances,
                             rep=reps_to_use,
                             rsP=random_state_perm,
                             fold=fold)

    old_fname = OLD_FILE.format(dir=save_dir,
                                sub=subject,
                                sen_type=sen_type,
                                word=word,
                                win_len=win_len,
                                ov=overlap,
                                perm=bool_to_str(isPerm),
                                alg=alg,
                                adj=adj,
                                avgTm=bool_to_str(doTimeAvg),
                                avgTst=bool_to_str(doTestAvg),
                                inst=num_instances,
                                rep=reps_to_use,
                                rsP=random_state_perm,
                                fold=fold)

    print(force)
    if os.path.isfile(fname + '.npz') and not force:
        print('Job already completed. Skipping Job.')
        print(fname)
        return
    elif os.path.isfile(old_fname + '.npz') and not force:
        print('Job already completed. Skipping Job.')
        print(fname)
        return

    data, labels, sen_ints, time, sensor_regions = load_data.load_sentence_data_v2(
        subject=subject,
        align_to=word,
        voice=sen_type,
        experiment=experiment,
        proc=proc,
        num_instances=num_instances,
        reps_filter=reps_to_use,
        sensor_type=None,
        is_region_sorted=False,
        tmin=None,
        tmax=None)

    tmin = time.min()
    tmax = time.max()

    total_win = int((tmax - tmin) * 500)

    if win_len < 0:
        win_len = total_win - overlap

    win_starts = range(0, total_win - win_len, overlap)

    if isPerm:
        random.seed(random_state_perm)
        random.shuffle(labels)

    l_ints, cv_membership, tgm_acc, tgm_pred = models.lr_diag_loso_fold(
        data,
        labels,
        win_starts,
        win_len,
        sen_ints,
        fold,
        penalty=alg[3:],
        adj=adj,
        doTimeAvg=doTimeAvg,
        doTestAvg=doTestAvg)
    np.savez_compressed(fname,
                        l_ints=l_ints,
                        cv_membership=cv_membership,
                        tgm_acc=tgm_acc,
                        tgm_pred=tgm_pred,
                        win_starts=win_starts,
                        time=time,
                        proc=proc)
Beispiel #3
0
def run_tgm_exp(experiment,
                subject,
                sen_type,
                word,
                win_len,
                overlap,
                isPerm=False,
                alg='lr-l1',
                adj=None,
                doTimeAvg=False,
                doTestAvg=True,
                num_instances=1,
                reps_to_use=10,
                proc=load_data.DEFAULT_PROC,
                random_state_perm=1,
                force=False,
                mode='acc'):
    warnings.filterwarnings(action='ignore')
    # Save Directory
    top_dir = TOP_DIR.format(exp=experiment)
    if not os.path.exists(top_dir):
        os.mkdir(top_dir)
    save_dir = SAVE_DIR.format(top_dir=top_dir, sub=subject)
    if not os.path.exists(save_dir):
        os.mkdir(save_dir)

    fname = SAVE_FILE.format(dir=save_dir,
                             sub=subject,
                             sen_type=sen_type,
                             word=word,
                             win_len=win_len,
                             ov=overlap,
                             perm=bool_to_str(isPerm),
                             alg=alg,
                             adj=adj,
                             avgTm=bool_to_str(doTimeAvg),
                             avgTst=bool_to_str(doTestAvg),
                             inst=num_instances,
                             rep=reps_to_use,
                             rsP=random_state_perm,
                             mode=mode)

    print(force)
    if os.path.isfile(fname + '.npz') and not force:
        print('Job already completed. Skipping Job.')
        print(fname)
        return

    data, labels, sen_ints, time, sensor_regions = load_data.load_sentence_data_v2(
        subject=subject,
        align_to=word,
        voice=sen_type,
        experiment=experiment,
        proc=proc,
        num_instances=num_instances,
        reps_filter=reps_to_use,
        sensor_type=None,
        is_region_sorted=False,
        tmin=None,
        tmax=None)

    # if experiment=='PassAct3':
    #     data, labels, time, final_inds = load_data.load_PassAct3_matlab(subject=subject,
    #                                                                     sen_type=sen_type,
    #                                                                     num_instances=num_instances,
    #                                                                     reps_to_use=reps_to_use,
    #                                                                     noMag=False,
    #                                                                     sorted_inds=None)
    #     new_labels = [lab if len(lab) > 2 else [lab[0], lab[1], ''] for lab in labels]
    #     labels = np.array(new_labels)
    #     print(data.shape)
    #     print(labels.shape)
    # else:
    #     data, labels, time, final_inds = load_data.load_sentence_data(subject=subject,
    #                                                                   word='noun1',
    #                                                                   sen_type=sen_type,
    #                                                                   experiment=experiment,
    #                                                                   proc=proc,
    #                                                                   num_instances=num_instances,
    #                                                                   reps_to_use=reps_to_use,
    #                                                                   noMag=False,
    #                                                                   sorted_inds=None)
    #     print(data.shape)
    #     print(np.array(labels).shape)

    tmin = time.min()
    tmax = time.max()

    total_win = int((tmax - tmin) * 500)

    if win_len < 0:
        win_len = total_win - overlap

    win_starts = range(0, total_win - win_len, overlap)

    # sen_set = np.unique(labels, axis=0).tolist()
    # num_labels = labels.shape[0]
    # sen_ints = np.empty((num_labels,))
    # for i_l in range(num_labels):
    #     for j_l, l in enumerate(sen_set):
    #         if np.all(l == labels[i_l, :]):
    #             sen_ints[i_l] = j_l
    #             break

    # labels = labels[:, WORD_COLS[experiment][word]]

    if isPerm:
        random.seed(random_state_perm)
        random.shuffle(labels)

    if mode == 'acc':
        l_ints, cv_membership, tgm_acc, tgm_pred = models.lr_tgm_loso(
            data,
            labels,
            win_starts,
            win_len,
            sen_ints,
            penalty=alg[3:],
            adj=adj,
            doTimeAvg=doTimeAvg,
            doTestAvg=doTestAvg)
        np.savez_compressed(fname,
                            l_ints=l_ints,
                            cv_membership=cv_membership,
                            tgm_acc=tgm_acc,
                            tgm_pred=tgm_pred,
                            win_starts=win_starts,
                            time=time,
                            proc=proc)
    else:
        l_ints, coef, Cs = models.lr_tgm_coef(data,
                                              labels,
                                              win_starts,
                                              win_len,
                                              penalty=alg[3:],
                                              adj=adj,
                                              doTimeAvg=doTimeAvg)
        np.savez_compressed(fname,
                            l_ints=l_ints,
                            coef=coef,
                            Cs=Cs,
                            win_starts=win_starts,
                            time=time,
                            proc=proc)
Beispiel #4
0
def run_tgm_exp(experiment,
                subject,
                word,
                win_len,
                overlap,
                dist='cosine',
                doTimeAvg=False,
                proc=load_data.DEFAULT_PROC,
                force=False):

    # Save Directory
    top_dir = TOP_DIR.format(exp=experiment)
    if not os.path.exists(top_dir):
        os.mkdir(top_dir)
    save_dir = SAVE_DIR.format(top_dir=top_dir, sub=subject)
    if not os.path.exists(save_dir):
        os.mkdir(save_dir)

    fname = SAVE_FILE.format(dir=save_dir,
                             sub=subject,
                             word=word,
                             win_len=win_len,
                             ov=overlap,
                             dist=dist,
                             avgTm=bool_to_str(doTimeAvg))

    print(force)
    if os.path.isfile(fname + '.npz') and not force:
        print('Job already completed. Skipping Job.')
        print(fname)
        return

    voice = ['active', 'passive']
    num_instances = 1

    all_data, _, sen_ints, time, _ = load_data.load_sentence_data_v2(
        subject=subject,
        align_to='last',
        voice=voice,
        experiment=experiment,
        proc=proc,
        num_instances=num_instances,
        reps_filter=None,
        sensor_type=None,
        is_region_sorted=False,
        tmin=TMIN[word],
        tmax=TMAX[word])
    all_data *= 1e12
    stimuli_voice = list(load_data.read_stimuli(experiment))
    labels = []
    voice_labels = []
    if word != 'eos-full':
        data = np.ones(
            (all_data.shape[0] / 2, all_data.shape[1], all_data.shape[2]))
    else:
        data = all_data
    i_data = 0
    for i_sen_int, sen_int in enumerate(sen_ints):
        word_list = stimuli_voice[sen_int]['stimulus'].split()
        curr_voice = stimuli_voice[sen_int]['voice']
        if word == 'eos-full':
            labels.append(word_list[-2])
            voice_labels.append(curr_voice)
        elif len(word_list) > 5:
            data[i_data, :, :] = all_data[i_sen_int, :, :]
            labels.append(word_list[WORD_COLS[curr_voice][word]])
            voice_labels.append(curr_voice)
            i_data += 1
    print(labels)
    print(voice_labels)
    tmin = time.min()
    tmax = time.max()

    total_win = int((tmax - tmin) * 500)

    if win_len < 0:
        win_len = total_win - overlap

    win_starts = range(0, total_win - win_len, overlap)
    n_time = data.shape[2]
    windows = [
        np.array([i >= w_s and i < w_s + win_len for i in xrange(n_time)])
        for w_s in win_starts
    ]
    n_w = len(windows)

    RDM = []
    for wi in xrange(n_w):
        time_to_use = windows[wi]
        data_to_use = data[:, :, time_to_use]
        if doTimeAvg:
            data_to_use = np.mean(data_to_use, axis=2)
        else:
            data_to_use = np.reshape(data_to_use, (data_to_use.shape[0], -1))
        curr_RDM = squareform(pdist(data_to_use, metric=dist))
        if np.any(np.isnan(curr_RDM)):
            print('Data state:')
            print(np.any(np.isinf(data_to_use)))
            print(np.any(np.isnan(data_to_use)))
            print(np.min(data_to_use))
            print(np.min(np.abs(data_to_use)))
            meow = pdist(data_to_use, metric=my_cosine)
            nan_els = np.unravel_index(np.where(np.isnan(meow)),
                                       curr_RDM.shape)
            # print(nan_els)
            print('My cosine:')
            print my_cosine(data_to_use[nan_els[0][0][0], :],
                            data_to_use[nan_els[1][0][0], :])
        RDM.append(curr_RDM[None, ...])

    RDM = np.concatenate(RDM, axis=0)
    np.savez_compressed(fname,
                        RDM=RDM,
                        labels=labels,
                        voice_labels=voice_labels,
                        win_starts=win_starts,
                        time=time,
                        proc=proc)
Beispiel #5
0
def run_tgm_exp(experiment,
                subject,
                sen_type,
                word,
                win_len,
                overlap,
                fold,
                isPerm = False,
                alg='lr-l1',
                adj=None,
                doTimeAvg=False,
                doTestAvg=True,
                num_instances=1,
                proc=load_data.DEFAULT_PROC,
                random_state_perm=1,
                force=False):
    warnings.filterwarnings(action='ignore')
    # Save Directory
    top_dir = TOP_DIR.format(exp=experiment)
    if not os.path.exists(top_dir):
        os.mkdir(top_dir)
    save_dir = SAVE_DIR.format(top_dir=top_dir, sub=subject)
    if not os.path.exists(save_dir):
        os.mkdir(save_dir)

    fname = SAVE_FILE.format(dir=save_dir,
                             sub=subject,
                             sen_type=sen_type,
                             word=word,
                             win_len=win_len,
                             ov=overlap,
                             perm=bool_to_str(isPerm),
                             alg=alg,
                             adj=adj,
                             avgTm=bool_to_str(doTimeAvg),
                             avgTst=bool_to_str(doTestAvg),
                             inst=num_instances,
                             rsP=random_state_perm,
                             fold=fold)

    print(force)
    if os.path.isfile(fname + '.npz') and not force:
        print('Job already completed. Skipping Job.')
        print(fname)
        return

    if sen_type == 'pooled':
        voice = ['active', 'passive']
    else:
        voice = [sen_type]

    data, _, sen_ints, time, _ = load_data.load_sentence_data_v2(subject=subject,
                                                                 align_to='last',
                                                                 voice=voice,
                                                                 experiment=experiment,
                                                                 proc=proc,
                                                                 num_instances=num_instances,
                                                                 reps_filter=None,
                                                                 sensor_type=None,
                                                                 is_region_sorted=False,
                                                                 tmin=0.0,
                                                                 tmax=TMAX[experiment])

    stimuli_voice = list(load_data.read_stimuli(experiment))
    # print(stimuli_voice)
    if word == 'propid':
        all_words = [stimuli_voice[sen_int]['stimulus'].split() for sen_int in sen_ints]
        all_voices = [stimuli_voice[sen_int]['voice'] for sen_int in sen_ints]
        content_words = []
        valid_inds = []
        for i_word_list, word_list in enumerate(all_words):
            curr_voice = all_voices[i_word_list]
            if experiment == 'PassAct3':
                if len(word_list) > 5:
                    valid_inds.append(i_word_list)
                    content_words.append([word_list[WORD_COLS[curr_voice]['agent']], word_list[WORD_COLS[curr_voice]['verb']],
                                          word_list[WORD_COLS[curr_voice]['patient']]])
            else:
                valid_inds.append(i_word_list)
                content_words.append(
                    [word_list[WORD_COLS[curr_voice]['agent']], word_list[WORD_COLS[curr_voice]['verb']],
                     word_list[WORD_COLS[curr_voice]['patient']]])
        uni_content, labels = np.unique(np.array(content_words), axis=0, return_inverse=True)
        print(uni_content)
    else:
        labels = []
        valid_inds = []
        for i_sen_int, sen_int in enumerate(sen_ints):
            word_list = stimuli_voice[sen_int]['stimulus'].split()
            curr_voice = stimuli_voice[sen_int]['voice']
            if word == 'voice':
                labels.append(curr_voice)
                valid_inds.append(i_sen_int)
            elif word == 'senlen':
                if len(word_list) > 5:
                    labels.append('long')
                else:
                    labels.append('short')
                valid_inds.append(i_sen_int)
            elif word == 'agent' or word == 'patient':
                if experiment == 'PassAct3':
                    if len(word_list) > 5:
                        valid_inds.append(i_sen_int)
                        labels.append(word_list[WORD_COLS[curr_voice][word]])
                else:
                    labels.append(word_list[WORD_COLS[curr_voice][word]])
                    valid_inds.append(i_sen_int)
            else:
                labels.append(word_list[WORD_COLS[curr_voice][word]])
                valid_inds.append(i_sen_int)

    valid_inds = np.array(valid_inds)
    data = data[valid_inds, ...]
    sen_ints = [sen for i_sen, sen in enumerate(sen_ints) if i_sen in valid_inds]


    # print(labels)
    tmin = time.min()
    tmax = time.max()

    total_win = int((tmax - tmin) * 500)

    if win_len < 0:
        win_len = total_win - overlap

    win_starts = range(0, total_win - win_len, overlap)

    if isPerm:
        random.seed(random_state_perm)
        random.shuffle(labels)


    l_ints, cv_membership, tgm_acc, tgm_pred = models.lr_tgm_loso_fold(data,
                                                                      labels,
                                                                      win_starts,
                                                                      win_len,
                                                                      sen_ints,
                                                                       fold,
                                                                      penalty=alg[3:],
                                                                      adj=adj,
                                                                      doTimeAvg=doTimeAvg,
                                                                      doTestAvg=doTestAvg)
    np.savez_compressed(fname,
                        l_ints=l_ints,
                        cv_membership=cv_membership,
                        tgm_acc=tgm_acc,
                        tgm_pred=tgm_pred,
                        win_starts=win_starts,
                        time=time,
                        proc=proc)
Beispiel #6
0
def run_tgm_exp(experiment,
                sen_type,
                word,
                win_len,
                overlap,
                fold,
                isPerm = False,
                alg='lr-l1',
                adj=None,
                doTimeAvg=False,
                doTestAvg=True,
                num_instances=1,
                proc=load_data.DEFAULT_PROC,
                random_state_perm=1,
                force=False):
    warnings.filterwarnings(action='ignore')
    # Save Directory
    top_dir = TOP_DIR.format(exp=experiment)
    if not os.path.exists(top_dir):
        os.mkdir(top_dir)

    fname = SAVE_FILE.format(dir=top_dir,
                             sen_type=sen_type,
                             word=word,
                             win_len=win_len,
                             ov=overlap,
                             perm=bool_to_str(isPerm),
                             alg=alg,
                             adj=adj,
                             avgTm=bool_to_str(doTimeAvg),
                             avgTst=bool_to_str(doTestAvg),
                             inst=num_instances,
                             rsP=random_state_perm,
                             fold=fold)

    print(force)
    if os.path.isfile(fname + '.npz') and not force:
        print('Job already completed. Skipping Job.')
        print(fname)
        return

    stimuli_voice = list(load_data.read_stimuli(experiment))

    data_list = []
    sen_ints = []
    time = []
    labels = []
    for i_sub, subject in enumerate(VALID_SUBS[experiment]):
        data, _, sen_ints_sub, time_sub, sensor_regions = load_data.load_sentence_data_v2(subject=subject,
                                                                                       align_to='noun1',
                                                                                       voice=sen_type,
                                                                                       experiment=experiment,
                                                                                       proc=proc,
                                                                                       num_instances=num_instances,
                                                                                       reps_filter=lambda x: [i for i in range(x) if i < 10],
                                                                                       sensor_type=None,
                                                                                       is_region_sorted=False,
                                                                                       tmin=TIME_LIMITS[experiment][sen_type]['noun1']['tmin'],
                                                                                       tmax=TIME_LIMITS[experiment][sen_type]['noun1']['tmax'])

        # print(labels_sub)
        # print(data.shape)
        valid_inds = []
        labels_sub = []
        new_sen_ints_sub = []
        for i_sen_int, sen_int in enumerate(sen_ints_sub):
            word_list = stimuli_voice[sen_int]['stimulus'].split()
            if word == 'noun2':
                if len(word_list) > 5:
                    labels_sub.append(word_list[WORD_COLS[sen_type][word]])
                    valid_inds.append(i_sen_int)
                    new_sen_ints_sub.append(sen_int)
            else:
                labels_sub.append(word_list[WORD_COLS[sen_type][word]])
                valid_inds.append(i_sen_int)
                new_sen_ints_sub.append(sen_int)

        valid_inds = np.array(valid_inds)
        print(labels_sub)
        data_list.append(data[valid_inds, ...])
        print(data_list[i_sub].shape)
        if i_sub == 0:
            sen_ints = new_sen_ints_sub
            time = time_sub
            labels = labels_sub
        else:
            assert np.all(sen_ints == new_sen_ints_sub)
            assert np.all(time == time_sub)
            assert np.all(np.array(labels) == np.array(labels_sub))

    tmin = time.min()
    tmax = time.max()

    total_win = int((tmax - tmin) * 500)

    if win_len < 0:
        win_len = total_win - overlap

    win_starts = range(0, total_win - win_len, overlap)
    # print(win_starts)
    print(sen_ints)
    print(labels)
    print(data_list[0].shape)


    if isPerm:
        random.seed(random_state_perm)
        random.shuffle(labels)

    l_ints, cv_membership, tgm_acc, tgm_pred = models.lr_tgm_loso_multisub_fold(data_list,
                                                                                labels,
                                                                                win_starts,
                                                                                win_len,
                                                                                sen_ints,
                                                                                fold,
                                                                                penalty=alg[3:],
                                                                                adj=adj,
                                                                                doTimeAvg=doTimeAvg,
                                                                                doTestAvg=doTestAvg)

    np.savez_compressed(fname,
                        l_ints=l_ints,
                        cv_membership=cv_membership,
                        tgm_acc=tgm_acc,
                        tgm_pred=tgm_pred,
                        win_starts=win_starts,
                        time=time,
                        proc=proc)
Beispiel #7
0
def run_tgm_exp(experiment,
                sen_type,
                word,
                win_len,
                overlap,
                alg='lr-l1',
                adj=None,
                doTimeAvg=False,
                num_instances=1,
                proc=load_data.DEFAULT_PROC,
                force=False):
    warnings.filterwarnings(action='ignore')
    # Save Directory
    top_dir = TOP_DIR.format(exp=experiment)
    if not os.path.exists(top_dir):
        os.mkdir(top_dir)

    fname = SAVE_FILE.format(dir=top_dir,
                             sen_type=sen_type,
                             word=word,
                             win_len=win_len,
                             ov=overlap,
                             alg=alg,
                             adj=adj,
                             avgTm=bool_to_str(doTimeAvg),
                             inst=num_instances)

    print(force)
    if os.path.isfile(fname + '.npz') and not force:
        print('Job already completed. Skipping Job.')
        print(fname)
        return

    if sen_type == 'pooled':
        voice = ['active', 'passive']
    else:
        voice = [sen_type]

    data_list = []
    sen_ints = []
    time = []
    for i_sub, subject in enumerate(VALID_SUBS[experiment]):
        data, _, sen_ints_sub, time_sub, _ = load_data.load_sentence_data_v2(subject=subject,
                                                                             align_to='last',
                                                                             voice=voice,
                                                                             experiment=experiment,
                                                                             proc=proc,
                                                                             num_instances=num_instances,
                                                                             reps_filter=None,
                                                                             sensor_type=None,
                                                                             is_region_sorted=False,
                                                                             tmin=0.0,
                                                                             tmax=TMAX[experiment])
        data_list.append(data)
        if i_sub == 0:
            sen_ints = sen_ints_sub
            time = time_sub
        else:
            assert np.all(sen_ints == sen_ints_sub)
            assert np.all(time == time)

    stimuli_voice = list(load_data.read_stimuli(experiment))
    # print(stimuli_voice)
    if word == 'propid':
        all_words = [stimuli_voice[sen_int]['stimulus'].split() for sen_int in sen_ints]
        all_voices = [stimuli_voice[sen_int]['voice'] for sen_int in sen_ints]
        content_words = []
        valid_inds = []
        for i_word_list, word_list in enumerate(all_words):
            curr_voice = all_voices[i_word_list]
            if experiment == 'PassAct3':
                if len(word_list) > 5:
                    valid_inds.append(i_word_list)
                    content_words.append([word_list[WORD_COLS[curr_voice]['agent']], word_list[WORD_COLS[curr_voice]['verb']],
                                          word_list[WORD_COLS[curr_voice]['patient']]])
            else:
                valid_inds.append(i_word_list)
                content_words.append(
                    [word_list[WORD_COLS[curr_voice]['agent']], word_list[WORD_COLS[curr_voice]['verb']],
                     word_list[WORD_COLS[curr_voice]['patient']]])
        uni_content, labels = np.unique(np.array(content_words), axis=0, return_inverse=True)
        print(uni_content)
    else:
        labels = []
        valid_inds = []
        for i_sen_int, sen_int in enumerate(sen_ints):
            word_list = stimuli_voice[sen_int]['stimulus'].split()
            curr_voice = stimuli_voice[sen_int]['voice']
            if word == 'voice':
                labels.append(curr_voice)
                valid_inds.append(i_sen_int)
            elif word == 'senlen':
                if len(word_list) > 5:
                    labels.append('long')
                else:
                    labels.append('short')
                valid_inds.append(i_sen_int)
            elif word == 'agent' or word == 'patient':
                if experiment == 'PassAct3':
                    if len(word_list) > 5:
                        valid_inds.append(i_sen_int)
                        labels.append(word_list[WORD_COLS[curr_voice][word]])
                else:
                    labels.append(word_list[WORD_COLS[curr_voice][word]])
                    valid_inds.append(i_sen_int)
            else:
                labels.append(word_list[WORD_COLS[curr_voice][word]])
                valid_inds.append(i_sen_int)

    valid_inds = np.array(valid_inds)
    data_list = [data[valid_inds, ...] for data in data_list]

    # print(labels)
    tmin = time.min()
    tmax = time.max()

    total_win = int((tmax - tmin) * 500)

    if win_len < 0:
        win_len = total_win - overlap

    win_starts = range(0, total_win - win_len, overlap)

    coef, Cs, haufe_maps = models.lr_tgm_loso_multisub_coef(data_list,
                                                            labels,
                                                            win_starts,
                                                            win_len,
                                                            penalty=alg[3:],
                                                            adj=adj,
                                                            doTimeAvg=doTimeAvg)
    print('Saving {}'.format(fname))
    np.savez_compressed(fname,
                        coef=coef,
                        Cs=Cs,
                        haufe_maps=haufe_maps,
                        win_starts=win_starts,
                        time=time,
                        proc=proc)
Beispiel #8
0
            win_starts,
            win_len,
            feature_select=doFeatSelect,
            doZscore=doZscore,
            doAvg=doTimeAvg,
            ddof=1)


if __name__ == '__main__':

    data, _, sen_ints, time, _ = load_data.load_sentence_data_v2(
        subject='B',
        align_to='last',
        voice=['active', 'passive'],
        experiment='krns2',
        proc=load_data.DEFAULT_PROC,
        num_instances=1,
        reps_filter=None,
        sensor_type=None,
        is_region_sorted=False,
        tmin=0.5,
        tmax=1.0)

    stimuli_voice = list(load_data.read_stimuli('krns2'))
    labels = []
    for i_sen_int, sen_int in enumerate(sen_ints):
        curr_voice = stimuli_voice[sen_int]['voice']
        labels.append(curr_voice)

    print(labels)
    tmin = time.min()
    tmax = time.max()
Beispiel #9
0
    inds_to_plot = np.logical_and(time_old >= (tmin + 0.5), time_old <=
                                  (tmax + 0.5))
    print(inds_to_plot.shape)
    print(time_old.shape)
    print(data_old.shape)
    data_old = data_old[:, :, inds_to_plot]
    time_old = time_old[inds_to_plot]

    # subject, align_to, voice, experiment, proc, num_instances, reps_filter,
    # sensor_type = None, is_region_sorted = True):
    data_new, labels_new, indices_in_master_experiment_stimuli, time_new, sensor_regions = load_data_new.load_sentence_data_v2(
        subject='A',
        align_to='noun1',
        voice=('active', 'passive'),
        experiment='PassAct3',
        proc=None,
        num_instances=2,
        reps_filter=None,
        sensor_type=None,
        is_region_sorted=False,
        tmin=tmin,
        tmax=tmax)

    print(data_new.shape)
    print(labels_new)
    print(indices_in_master_experiment_stimuli)

    def num_stimulus_words(stimuli_dict_):
        return len([
            w for w in load_data_new.punctuation_regex.sub(
                '', stimuli_dict_['stimulus']).strip().split()
            if len(w.strip()) > 0
Beispiel #10
0
def run_tgm_exp(subject,
                sen_type,
                analysis,
                win_len,
                overlap,
                isPerm=False,
                alg='lr-l1',
                adj=None,
                doTimeAvg=False,
                doTestAvg=True,
                num_instances=1,
                random_state_perm=1,
                force=False,
                mode='acc'):
    warnings.filterwarnings(action='ignore')
    # Save Directory
    if not os.path.exists(TOP_DIR):
        os.mkdir(TOP_DIR)
    save_dir = SAVE_DIR.format(top_dir=TOP_DIR, sub=subject)
    if not os.path.exists(save_dir):
        os.mkdir(save_dir)

    fname = SAVE_FILE.format(dir=save_dir,
                             sub=subject,
                             sen_type=sen_type,
                             analysis=analysis,
                             win_len=win_len,
                             ov=overlap,
                             perm=bool_to_str(isPerm),
                             alg=alg,
                             adj=adj,
                             avgTm=bool_to_str(doTimeAvg),
                             avgTst=bool_to_str(doTestAvg),
                             inst=num_instances,
                             rsP=random_state_perm,
                             mode=mode)

    print(force)
    if os.path.isfile(fname + '.npz') and not force:
        print('Job already completed. Skipping Job.')
        print(fname)
        return

    if sen_type == 'pooled':
        voice = ['active', 'passive']
    else:
        voice = sen_type
    experiment = 'krns2'

    data_det1, _, sen_ints_det1, _, _ = load_data.load_sentence_data_v2(
        subject=subject,
        align_to='noun1',
        voice=voice,
        experiment=experiment,
        proc=load_data.DEFAULT_PROC,
        num_instances=num_instances,
        reps_filter=None,
        sensor_type=None,
        is_region_sorted=False,
        tmin=-0.5,
        tmax=0.0)

    data_n1, _, sen_ints_n1, _, _ = load_data.load_sentence_data_v2(
        subject=subject,
        align_to='noun1',
        voice=voice,
        experiment=experiment,
        proc=load_data.DEFAULT_PROC,
        num_instances=num_instances,
        reps_filter=None,
        sensor_type=None,
        is_region_sorted=False,
        tmin=0.0,
        tmax=0.5)

    data_det2, _, sen_ints_det2, _, _ = load_data.load_sentence_data_v2(
        subject=subject,
        align_to='noun2',
        voice=voice,
        experiment=experiment,
        proc=load_data.DEFAULT_PROC,
        num_instances=num_instances,
        reps_filter=None,
        sensor_type=None,
        is_region_sorted=False,
        tmin=-0.5,
        tmax=0.0)

    data_n2, _, sen_ints_n2, time, _ = load_data.load_sentence_data_v2(
        subject=subject,
        align_to='noun2',
        voice=voice,
        experiment=experiment,
        proc=load_data.DEFAULT_PROC,
        num_instances=num_instances,
        reps_filter=None,
        sensor_type=None,
        is_region_sorted=False,
        tmin=0.0,
        tmax=0.5)

    stimuli_voice = list(load_data.read_stimuli(experiment))
    labels = []
    for sen_int in sen_ints_det1:
        word_list = stimuli_voice[sen_int]['stimulus'].split()
        curr_voice = stimuli_voice[sen_int]['voice']
        labels.append(word_list[WORD_COLS[curr_voice]['det1']])

    if 'det-type' not in analysis:
        for sen_int in sen_ints_n1:
            word_list = stimuli_voice[sen_int]['stimulus'].split()
            curr_voice = stimuli_voice[sen_int]['voice']
            labels.append(word_list[WORD_COLS[curr_voice]['noun1']])
        for sen_int in sen_ints_det2:
            word_list = stimuli_voice[sen_int]['stimulus'].split()
            curr_voice = stimuli_voice[sen_int]['voice']
            labels.append(word_list[WORD_COLS[curr_voice]['det2']])
        for sen_int in sen_ints_n2:
            word_list = stimuli_voice[sen_int]['stimulus'].split()
            curr_voice = stimuli_voice[sen_int]['voice']
            labels.append(word_list[WORD_COLS[curr_voice]['noun2']])

        sen_ints = np.concatenate(
            [sen_ints_det1, sen_ints_n1, sen_ints_det2, sen_ints_n2], axis=0)
        data = np.concatenate([data_det1, data_n1, data_det2, data_n2], axis=0)
    elif analysis == 'det-type-first':
        sen_ints = np.array(sen_ints_det1)
        data = data_det1
    else:
        for sen_int in sen_ints_det2:
            word_list = stimuli_voice[sen_int]['stimulus'].split()
            curr_voice = stimuli_voice[sen_int]['voice']
            labels.append(word_list[WORD_COLS[curr_voice]['det2']])

        sen_ints = np.concatenate([sen_ints_det1, sen_ints_det2], axis=0)
        data = np.concatenate([data_det1, data_det2], axis=0)

    inds_to_keep = np.ones((len(labels), ), dtype=bool)
    if analysis == 'the-dog':
        for i_label, label in enumerate(labels):
            if label != 'the' and label != 'dog':
                inds_to_keep[i_label] = False
    elif analysis == 'a-dog':
        for i_label, label in enumerate(labels):
            if label != 'a' and label != 'dog':
                inds_to_keep[i_label] = False

    data = data[inds_to_keep, :, :]
    sen_ints = sen_ints[inds_to_keep]
    new_labels = [
        labels[i_label] for i_label, _ in enumerate(labels)
        if inds_to_keep[i_label]
    ]

    print(np.unique(np.array(new_labels)))
    total_win = data.shape[-1]
    win_starts = range(0, total_win - win_len, overlap)

    if isPerm:
        random.seed(random_state_perm)
        random.shuffle(labels)

    if mode == 'acc':
        l_ints, cv_membership, tgm_acc, tgm_pred = models.lr_tgm_loso(
            data,
            new_labels,
            win_starts,
            win_len,
            sen_ints,
            penalty=alg[3:],
            adj=adj,
            doTimeAvg=doTimeAvg,
            doTestAvg=doTestAvg)

        np.savez_compressed(fname,
                            l_ints=l_ints,
                            cv_membership=cv_membership,
                            tgm_acc=tgm_acc,
                            tgm_pred=tgm_pred,
                            win_starts=win_starts,
                            time=time)
    else:
        l_ints, coef, Cs = models.lr_tgm_coef(data,
                                              labels,
                                              win_starts,
                                              win_len,
                                              penalty=alg[3:],
                                              adj=adj,
                                              doTimeAvg=doTimeAvg)
        np.savez_compressed(fname,
                            l_ints=l_ints,
                            coef=coef,
                            Cs=Cs,
                            win_starts=win_starts,
                            time=time)
Beispiel #11
0
import matplotlib
matplotlib.use('TkAgg')
import matplotlib.pyplot as plt
from syntax_vs_semantics import load_data
import numpy as np

if __name__ == '__main__':
    for sen_type in ['active', 'passive']:
        for word in ['noun1', 'verb', 'last']:
            data, labels, sen_ints, time, sensor_regions = load_data.load_sentence_data_v2(
                subject='B',
                align_to=word,
                voice=sen_type,
                experiment='krns2',
                proc=load_data.DEFAULT_PROC,
                num_instances=1,
                reps_filter=None,
                sensor_type=None,
                is_region_sorted=False,
                tmin=0.0,
                tmax=3.0)
            data_to_plot = np.squeeze(np.mean(data, axis=0))

            fig, ax = plt.subplots()
            h = ax.imshow(data_to_plot, interpolation='nearest', aspect='auto')
            ax.set_title('{} {}'.format(sen_type, word))
    plt.show()
def run_tgm_exp(experiment,
                sen_type,
                word,
                win_len,
                overlap,
                alg='lr-l1',
                adj=None,
                doTimeAvg=False,
                num_instances=1,
                proc=load_data.DEFAULT_PROC,
                force=False):
    warnings.filterwarnings(action='ignore')
    # Save Directory
    top_dir = TOP_DIR.format(exp=experiment)
    if not os.path.exists(top_dir):
        os.mkdir(top_dir)

    fname = SAVE_FILE.format(dir=top_dir,
                             sen_type=sen_type,
                             word=word,
                             win_len=win_len,
                             ov=overlap,
                             alg=alg,
                             adj=adj,
                             avgTm=bool_to_str(doTimeAvg),
                             inst=num_instances)

    print(force)
    if os.path.isfile(fname + '.npz') and not force:
        print('Job already completed. Skipping Job.')
        print(fname)
        return

    data_list = []
    sen_ints = []
    time = []
    labels = []
    for i_sub, subject in enumerate(VALID_SUBS[experiment]):
        data, labels_sub, sen_ints_sub, time_sub, sensor_regions = load_data.load_sentence_data_v2(
            subject=subject,
            align_to=word,
            voice=sen_type,
            experiment=experiment,
            proc=proc,
            num_instances=num_instances,
            reps_filter=lambda x: [i for i in range(x) if i < 10],
            sensor_type=None,
            is_region_sorted=False,
            tmin=TIME_LIMITS[experiment][sen_type][word]['tmin'],
            tmax=TIME_LIMITS[experiment][sen_type][word]['tmax'])
        data_list.append(data)
        if i_sub == 0:
            sen_ints = sen_ints_sub
            time = time_sub
            labels = labels_sub
        else:
            assert np.all(sen_ints == sen_ints_sub)
            assert np.all(time == time_sub)
            assert np.all(np.array(labels) == np.array(labels_sub))

    tmin = time.min()
    tmax = time.max()

    total_win = int((tmax - tmin) * 500)

    if win_len < 0:
        win_len = total_win - overlap

    win_starts = range(0, total_win - win_len, overlap)

    coef, Cs, haufe_maps = models.lr_tgm_loso_multisub_coef(
        data_list,
        labels,
        win_starts,
        win_len,
        penalty=alg[3:],
        adj=adj,
        doTimeAvg=doTimeAvg)
    print('Saving {}'.format(fname))
    np.savez_compressed(fname,
                        coef=coef,
                        Cs=Cs,
                        haufe_maps=haufe_maps,
                        win_starts=win_starts,
                        time=time,
                        proc=proc)
Beispiel #13
0
def run_tgm_exp(experiment,
                subject,
                win_len,
                overlap,
                fold,
                isPerm=False,
                alg='lr-l1',
                adj=None,
                doTimeAvg=False,
                doTestAvg=True,
                num_instances=1,
                proc=load_data.DEFAULT_PROC,
                random_state_perm=1,
                force=False):
    warnings.filterwarnings(action='ignore')
    # Save Directory
    top_dir = TOP_DIR.format(exp=experiment)
    if not os.path.exists(top_dir):
        os.mkdir(top_dir)
    save_dir = SAVE_DIR.format(top_dir=top_dir, sub=subject)
    if not os.path.exists(save_dir):
        os.mkdir(save_dir)

    fname = SAVE_FILE.format(dir=save_dir,
                             sub=subject,
                             win_len=win_len,
                             ov=overlap,
                             perm=bool_to_str(isPerm),
                             alg=alg,
                             adj=adj,
                             avgTm=bool_to_str(doTimeAvg),
                             avgTst=bool_to_str(doTestAvg),
                             inst=num_instances,
                             rsP=random_state_perm,
                             fold=fold)

    if os.path.isfile(fname + '.npz') and not force:
        print('Job already completed. Skipping Job.')
        print(fname)
        return

    n1_data, labels, n1_sen_ints, n1_time, _ = load_data.load_sentence_data_v2(
        subject=subject,
        align_to='noun1',
        voice=['active', 'passive'],
        experiment=experiment,
        proc=proc,
        num_instances=num_instances,
        reps_filter=lambda x: [i for i in range(x) if i < 10],
        sensor_type=None,
        is_region_sorted=False,
        tmin=0.0,
        tmax=0.5)

    n1_labels = [NEW_LABELS[experiment][lab] for lab in labels]

    n2_data, labels, n2_sen_ints, n2_time, _ = load_data.load_sentence_data_v2(
        subject=subject,
        align_to='noun2',
        voice=['active', 'passive'],
        experiment=experiment,
        proc=proc,
        num_instances=num_instances,
        reps_filter=lambda x: [i for i in range(x) if i < 10],
        sensor_type=None,
        is_region_sorted=False,
        tmin=0.0,
        tmax=1.0)

    n2_labels = [NEW_LABELS[experiment][lab] for lab in labels]

    n1_tmin = n1_time.min()
    n1_tmax = n1_time.max()

    n1_total_win = int((n1_tmax - n1_tmin) * 500)

    n1_win_starts = range(0, n1_total_win - win_len, overlap)

    n2_tmin = n2_time.min()
    n2_tmax = n2_time.max()

    n2_total_win = int((n2_tmax - n2_tmin) * 500)

    n2_win_starts = range(0, n2_total_win - win_len, overlap)

    if isPerm:
        random.seed(random_state_perm)
        random.shuffle(n1_labels)
        random.shuffle(n2_labels)

    l_ints, cv_membership, tgm_acc, tgm_pred = models.lr_cross_tgm_loso_fold(
        data_list=[n1_data, n2_data],
        labels_list=[n1_labels, n2_labels],
        win_starts_list=[n1_win_starts, n2_win_starts],
        win_len=win_len,
        sen_ints_list=[n1_sen_ints, n2_sen_ints],
        fold=fold,
        penalty=alg[3:],
        adj=adj,
        doTimeAvg=doTimeAvg,
        doTestAvg=doTestAvg)

    np.savez_compressed(fname,
                        l_ints=l_ints,
                        cv_membership=cv_membership,
                        tgm_acc=tgm_acc,
                        tgm_pred=tgm_pred,
                        n1_win_starts=n1_win_starts,
                        n2_win_starts=n2_win_starts,
                        n1_time=n1_time,
                        n2_time=n2_time,
                        proc=proc)
Beispiel #14
0
def run_tgm_exp(experiment,
                subject,
                word,
                win_len,
                overlap,
                isPerm=False,
                alg='lr-l1',
                adj=None,
                doTimeAvg=False,
                doTestAvg=True,
                num_instances=1,
                proc=load_data.DEFAULT_PROC,
                random_state_perm=1,
                force=False):
    warnings.filterwarnings(action='ignore')
    # Save Directory
    top_dir = TOP_DIR.format(exp=experiment)
    if not os.path.exists(top_dir):
        os.mkdir(top_dir)
    save_dir = SAVE_DIR.format(top_dir=top_dir, sub=subject)
    if not os.path.exists(save_dir):
        os.mkdir(save_dir)

    fname = SAVE_FILE.format(dir=save_dir,
                             sub=subject,
                             word=word,
                             win_len=win_len,
                             ov=overlap,
                             perm=bool_to_str(isPerm),
                             alg=alg,
                             adj=adj,
                             avgTm=bool_to_str(doTimeAvg),
                             avgTst=bool_to_str(doTestAvg),
                             inst=num_instances,
                             rsP=random_state_perm)

    print(force)
    if os.path.isfile(fname + '.npz') and not force:
        print('Job already completed. Skipping Job.')
        print(fname)
        return

    if word == 'voice':
        data, _, sen_ints, time, _ = load_data.load_sentence_data_v2(
            subject=subject,
            align_to='last',
            voice=['active', 'passive'],
            experiment=experiment,
            proc=proc,
            num_instances=num_instances,
            reps_filter=None,
            sensor_type=None,
            is_region_sorted=False,
            tmin=0.5,
            tmax=1.0)
    else:
        data_act, _, sen_ints_act, time, _ = load_data.load_sentence_data_v2(
            subject=subject,
            align_to='verb',
            voice=['active'],
            experiment=experiment,
            proc=proc,
            num_instances=num_instances,
            reps_filter=None,
            sensor_type=None,
            is_region_sorted=False,
            tmin=0.0,
            tmax=0.5)
        data_pass, _, sen_ints_pass, _, _ = load_data.load_sentence_data_v2(
            subject=subject,
            align_to='verb',
            voice=['passive'],
            experiment=experiment,
            proc=proc,
            num_instances=num_instances,
            reps_filter=None,
            sensor_type=None,
            is_region_sorted=False,
            tmin=-0.5,
            tmax=0.0)

        data = np.concatenate([data_act, data_pass], axis=0)
        sen_ints = np.concatenate([sen_ints_act, sen_ints_pass], axis=0)

    stimuli_voice = list(load_data.read_stimuli(experiment))
    labels = []
    for i_sen_int, sen_int in enumerate(sen_ints):
        curr_voice = stimuli_voice[sen_int]['voice']
        labels.append(curr_voice)

    print(labels)
    tmin = time.min()
    tmax = time.max()

    total_win = int((tmax - tmin) * 500)

    if win_len < 0:
        win_len = total_win - overlap

    win_starts = range(0, total_win - win_len, overlap)

    if isPerm:
        random.seed(random_state_perm)
        random.shuffle(labels)

    if 'lr' in alg:
        l_ints, cv_membership, tgm_acc, tgm_pred = models.lr_tgm_loso(
            data,
            labels,
            win_starts,
            win_len,
            sen_ints,
            penalty=str_to_none(alg[3:]),
            adj=adj,
            doTimeAvg=doTimeAvg,
            doTestAvg=doTestAvg)
    elif 'svm' in alg:
        l_ints, cv_membership, tgm_acc, tgm_pred = models.svc_tgm_loso(
            data,
            labels,
            win_starts,
            win_len,
            sen_ints,
            sub_rs=1,
            penalty=alg[4:],
            adj=adj,
            doTimeAvg=doTimeAvg,
            doTestAvg=doTestAvg,
            ddof=1,
            C=None)
    else:
        if adj == 'zscore':
            doZscore = True
        else:
            doZscore = False
        if 'None' in alg:
            doFeatSelect = False
        else:
            doFeatSelect = True
        tgm_pred, l_ints, cv_membership, feature_masks, num_feat_selected = models.nb_tgm_loso(
            data,
            labels,
            sen_ints,
            1,
            win_starts,
            win_len,
            feature_select=doFeatSelect,
            doZscore=doZscore,
            doAvg=doTimeAvg,
            ddof=1)
        tgm_acc = tgm_from_preds_GNB(tgm_pred, l_ints, cv_membership)

    print('Max Acc: %.2f' % np.max(np.mean(tgm_acc, axis=0)))
    np.savez_compressed(fname,
                        l_ints=l_ints,
                        cv_membership=cv_membership,
                        tgm_acc=tgm_acc,
                        tgm_pred=tgm_pred,
                        win_starts=win_starts,
                        time=time,
                        proc=proc)
Beispiel #15
0
            inst_grid = AxesGrid(inst_fig,
                                 111,
                                 nrows_ncols=(len(inst_list), 1),
                                 axes_pad=0.7,
                                 cbar_mode='single',
                                 cbar_location='right',
                                 cbar_pad=0.5,
                                 cbar_size='2%',
                                 share_all=True)
            for i_inst, num_instances in enumerate(inst_list):
                data, labels, sen_ints, time, sensor_regions = load_data.load_sentence_data_v2(
                    subject=subject,
                    align_to='noun1',
                    voice=[sen_type],
                    experiment=experiment,
                    proc=proc,
                    num_instances=num_instances,
                    reps_filter=lambda x: [i for i in range(x) if i < 10],
                    sensor_type=None,
                    is_region_sorted=False,
                    tmin=-1.0,
                    tmax=4.5)
                # time_to_plot = range(180, 254)
                # data = data[:, :, time_to_plot]
                # time = time[time_to_plot]
                if num_instances == 1:
                    data = np.squeeze(data[sen_list[0], :, :])
                else:
                    data = np.squeeze(data[sen_id, :, :])

                data_to_plot = data[sorted_inds, ::2]
                print(np.max(data_to_plot))
                                       ni=num_instances,
                                       tmin=tmin,
                                       tmax=tmax,
                                       i_sensor=i_sensor,
                                       voice=args.voice)

    if os.path.isfile(result_fname) and not str_to_bool(args.force):
        print('Job already completed. Skipping Job.')
        print(result_fname)
    else:
        data, labels, sen_ints, time, sensor_regions = load_data.load_sentence_data_v2(
            subject=sub,
            align_to='last',
            voice=voice,
            experiment=exp,
            proc=proc,
            num_instances=num_instances,
            reps_filter=None,
            sensor_type=None,
            is_region_sorted=False,
            tmin=tmin,
            tmax=tmax)
        data = data * 1e12
        sen_ints = np.array(sen_ints)

        if i_sensor < 0:
            do_transpose = False
            i_sensor = np.ones((data.shape[1], ), dtype=bool)
        else:
            do_transpose = True

        sen0_data = data[sen_ints == sen0, ...]