def load_bow(experiment, distance='euclidean'):
    model_embeddings = np.load(
        '/share/volume0/RNNG/semantic_models/embeddings_dict.npz').item(
        )['glove']

    sentence_vectors = list()

    for stimulus_dict in load_data.read_stimuli(experiment):
        voice = stimulus_dict['voice']
        stimulus = stimulus_dict['stimulus'].split()
        curr_vectors = list()
        for w in CONTENT_WORDS[voice]:
            if w < len(stimulus):
                curr_vectors.append(model_embeddings[unicode(
                    stimulus[w])][None, ...])
        sentence_vectors.append(
            np.mean(np.concatenate(curr_vectors, axis=0), axis=0)[None, ...])
    sentence_vectors = np.concatenate(sentence_vectors, axis=0)

    model_rdm = squareform(pdist(sentence_vectors, metric=distance))
    return model_rdm
Ejemplo n.º 2
0
def load_agg_data(subject, word, experiment, voice, proc, rep_set):
    all_data, _, sen_ints, time, _ = load_data.load_sentence_data_v2(
        subject=subject,
        align_to='last',
        voice=voice,
        experiment=experiment,
        proc=proc,
        num_instances=1,
        reps_filter=lambda nr: [rep in rep_set for rep in range(nr)],
        sensor_type=None,
        is_region_sorted=False,
        tmin=TMIN[word],
        tmax=TMAX[word])
    all_data *= 1e12
    stimuli_voice = list(load_data.read_stimuli(experiment))
    labels = []
    voice_labels = []
    if 'full' not in word:
        data = np.ones(
            (all_data.shape[0] / 2, all_data.shape[1], all_data.shape[2]))
    else:
        data = all_data
    i_data = 0
    for i_sen_int, sen_int in enumerate(sen_ints):
        word_list = stimuli_voice[sen_int]['stimulus'].split()
        curr_voice = stimuli_voice[sen_int]['voice']
        if 'full' in word:
            labels.append(word_list[WORD_COLS[curr_voice][word]])
            voice_labels.append(curr_voice)
        elif len(word_list) > 5:
            data[i_data, :, :] = all_data[i_sen_int, :, :]
            labels.append(word_list[WORD_COLS[curr_voice][word]])
            voice_labels.append(curr_voice)
            i_data += 1
    print(labels)
    print(voice_labels)
    return data, labels, voice_labels, time
Ejemplo n.º 3
0
def run_tgm_exp(experiment,
                subject,
                word,
                win_len,
                overlap,
                dist='cosine',
                doTimeAvg=False,
                proc=load_data.DEFAULT_PROC,
                force=False):

    # Save Directory
    top_dir = TOP_DIR.format(exp=experiment)
    if not os.path.exists(top_dir):
        os.mkdir(top_dir)
    save_dir = SAVE_DIR.format(top_dir=top_dir, sub=subject)
    if not os.path.exists(save_dir):
        os.mkdir(save_dir)

    fname = SAVE_FILE.format(dir=save_dir,
                             sub=subject,
                             word=word,
                             win_len=win_len,
                             ov=overlap,
                             dist=dist,
                             avgTm=bool_to_str(doTimeAvg))

    print(force)
    if os.path.isfile(fname + '.npz') and not force:
        print('Job already completed. Skipping Job.')
        print(fname)
        return

    voice = ['active', 'passive']
    num_instances = 1

    all_data, _, sen_ints, time, _ = load_data.load_sentence_data_v2(
        subject=subject,
        align_to='last',
        voice=voice,
        experiment=experiment,
        proc=proc,
        num_instances=num_instances,
        reps_filter=None,
        sensor_type=None,
        is_region_sorted=False,
        tmin=TMIN[word],
        tmax=TMAX[word])
    all_data *= 1e12
    stimuli_voice = list(load_data.read_stimuli(experiment))
    labels = []
    voice_labels = []
    if word != 'eos-full':
        data = np.ones(
            (all_data.shape[0] / 2, all_data.shape[1], all_data.shape[2]))
    else:
        data = all_data
    i_data = 0
    for i_sen_int, sen_int in enumerate(sen_ints):
        word_list = stimuli_voice[sen_int]['stimulus'].split()
        curr_voice = stimuli_voice[sen_int]['voice']
        if word == 'eos-full':
            labels.append(word_list[-2])
            voice_labels.append(curr_voice)
        elif len(word_list) > 5:
            data[i_data, :, :] = all_data[i_sen_int, :, :]
            labels.append(word_list[WORD_COLS[curr_voice][word]])
            voice_labels.append(curr_voice)
            i_data += 1
    print(labels)
    print(voice_labels)
    tmin = time.min()
    tmax = time.max()

    total_win = int((tmax - tmin) * 500)

    if win_len < 0:
        win_len = total_win - overlap

    win_starts = range(0, total_win - win_len, overlap)
    n_time = data.shape[2]
    windows = [
        np.array([i >= w_s and i < w_s + win_len for i in xrange(n_time)])
        for w_s in win_starts
    ]
    n_w = len(windows)

    RDM = []
    for wi in xrange(n_w):
        time_to_use = windows[wi]
        data_to_use = data[:, :, time_to_use]
        if doTimeAvg:
            data_to_use = np.mean(data_to_use, axis=2)
        else:
            data_to_use = np.reshape(data_to_use, (data_to_use.shape[0], -1))
        curr_RDM = squareform(pdist(data_to_use, metric=dist))
        if np.any(np.isnan(curr_RDM)):
            print('Data state:')
            print(np.any(np.isinf(data_to_use)))
            print(np.any(np.isnan(data_to_use)))
            print(np.min(data_to_use))
            print(np.min(np.abs(data_to_use)))
            meow = pdist(data_to_use, metric=my_cosine)
            nan_els = np.unravel_index(np.where(np.isnan(meow)),
                                       curr_RDM.shape)
            # print(nan_els)
            print('My cosine:')
            print my_cosine(data_to_use[nan_els[0][0][0], :],
                            data_to_use[nan_els[1][0][0], :])
        RDM.append(curr_RDM[None, ...])

    RDM = np.concatenate(RDM, axis=0)
    np.savez_compressed(fname,
                        RDM=RDM,
                        labels=labels,
                        voice_labels=voice_labels,
                        win_starts=win_starts,
                        time=time,
                        proc=proc)
Ejemplo n.º 4
0
def run_tgm_exp(experiment,
                subject,
                sen_type,
                word,
                win_len,
                overlap,
                fold,
                isPerm = False,
                alg='lr-l1',
                adj=None,
                doTimeAvg=False,
                doTestAvg=True,
                num_instances=1,
                proc=load_data.DEFAULT_PROC,
                random_state_perm=1,
                force=False):
    warnings.filterwarnings(action='ignore')
    # Save Directory
    top_dir = TOP_DIR.format(exp=experiment)
    if not os.path.exists(top_dir):
        os.mkdir(top_dir)
    save_dir = SAVE_DIR.format(top_dir=top_dir, sub=subject)
    if not os.path.exists(save_dir):
        os.mkdir(save_dir)

    fname = SAVE_FILE.format(dir=save_dir,
                             sub=subject,
                             sen_type=sen_type,
                             word=word,
                             win_len=win_len,
                             ov=overlap,
                             perm=bool_to_str(isPerm),
                             alg=alg,
                             adj=adj,
                             avgTm=bool_to_str(doTimeAvg),
                             avgTst=bool_to_str(doTestAvg),
                             inst=num_instances,
                             rsP=random_state_perm,
                             fold=fold)

    print(force)
    if os.path.isfile(fname + '.npz') and not force:
        print('Job already completed. Skipping Job.')
        print(fname)
        return

    if sen_type == 'pooled':
        voice = ['active', 'passive']
    else:
        voice = [sen_type]

    data, _, sen_ints, time, _ = load_data.load_sentence_data_v2(subject=subject,
                                                                 align_to='last',
                                                                 voice=voice,
                                                                 experiment=experiment,
                                                                 proc=proc,
                                                                 num_instances=num_instances,
                                                                 reps_filter=None,
                                                                 sensor_type=None,
                                                                 is_region_sorted=False,
                                                                 tmin=0.0,
                                                                 tmax=TMAX[experiment])

    stimuli_voice = list(load_data.read_stimuli(experiment))
    # print(stimuli_voice)
    if word == 'propid':
        all_words = [stimuli_voice[sen_int]['stimulus'].split() for sen_int in sen_ints]
        all_voices = [stimuli_voice[sen_int]['voice'] for sen_int in sen_ints]
        content_words = []
        valid_inds = []
        for i_word_list, word_list in enumerate(all_words):
            curr_voice = all_voices[i_word_list]
            if experiment == 'PassAct3':
                if len(word_list) > 5:
                    valid_inds.append(i_word_list)
                    content_words.append([word_list[WORD_COLS[curr_voice]['agent']], word_list[WORD_COLS[curr_voice]['verb']],
                                          word_list[WORD_COLS[curr_voice]['patient']]])
            else:
                valid_inds.append(i_word_list)
                content_words.append(
                    [word_list[WORD_COLS[curr_voice]['agent']], word_list[WORD_COLS[curr_voice]['verb']],
                     word_list[WORD_COLS[curr_voice]['patient']]])
        uni_content, labels = np.unique(np.array(content_words), axis=0, return_inverse=True)
        print(uni_content)
    else:
        labels = []
        valid_inds = []
        for i_sen_int, sen_int in enumerate(sen_ints):
            word_list = stimuli_voice[sen_int]['stimulus'].split()
            curr_voice = stimuli_voice[sen_int]['voice']
            if word == 'voice':
                labels.append(curr_voice)
                valid_inds.append(i_sen_int)
            elif word == 'senlen':
                if len(word_list) > 5:
                    labels.append('long')
                else:
                    labels.append('short')
                valid_inds.append(i_sen_int)
            elif word == 'agent' or word == 'patient':
                if experiment == 'PassAct3':
                    if len(word_list) > 5:
                        valid_inds.append(i_sen_int)
                        labels.append(word_list[WORD_COLS[curr_voice][word]])
                else:
                    labels.append(word_list[WORD_COLS[curr_voice][word]])
                    valid_inds.append(i_sen_int)
            else:
                labels.append(word_list[WORD_COLS[curr_voice][word]])
                valid_inds.append(i_sen_int)

    valid_inds = np.array(valid_inds)
    data = data[valid_inds, ...]
    sen_ints = [sen for i_sen, sen in enumerate(sen_ints) if i_sen in valid_inds]


    # print(labels)
    tmin = time.min()
    tmax = time.max()

    total_win = int((tmax - tmin) * 500)

    if win_len < 0:
        win_len = total_win - overlap

    win_starts = range(0, total_win - win_len, overlap)

    if isPerm:
        random.seed(random_state_perm)
        random.shuffle(labels)


    l_ints, cv_membership, tgm_acc, tgm_pred = models.lr_tgm_loso_fold(data,
                                                                      labels,
                                                                      win_starts,
                                                                      win_len,
                                                                      sen_ints,
                                                                       fold,
                                                                      penalty=alg[3:],
                                                                      adj=adj,
                                                                      doTimeAvg=doTimeAvg,
                                                                      doTestAvg=doTestAvg)
    np.savez_compressed(fname,
                        l_ints=l_ints,
                        cv_membership=cv_membership,
                        tgm_acc=tgm_acc,
                        tgm_pred=tgm_pred,
                        win_starts=win_starts,
                        time=time,
                        proc=proc)
Ejemplo n.º 5
0
def run_tgm_exp(experiment,
                sen_type,
                word,
                win_len,
                overlap,
                fold,
                isPerm = False,
                alg='lr-l1',
                adj=None,
                doTimeAvg=False,
                doTestAvg=True,
                num_instances=1,
                proc=load_data.DEFAULT_PROC,
                random_state_perm=1,
                force=False):
    warnings.filterwarnings(action='ignore')
    # Save Directory
    top_dir = TOP_DIR.format(exp=experiment)
    if not os.path.exists(top_dir):
        os.mkdir(top_dir)

    fname = SAVE_FILE.format(dir=top_dir,
                             sen_type=sen_type,
                             word=word,
                             win_len=win_len,
                             ov=overlap,
                             perm=bool_to_str(isPerm),
                             alg=alg,
                             adj=adj,
                             avgTm=bool_to_str(doTimeAvg),
                             avgTst=bool_to_str(doTestAvg),
                             inst=num_instances,
                             rsP=random_state_perm,
                             fold=fold)

    print(force)
    if os.path.isfile(fname + '.npz') and not force:
        print('Job already completed. Skipping Job.')
        print(fname)
        return

    stimuli_voice = list(load_data.read_stimuli(experiment))

    data_list = []
    sen_ints = []
    time = []
    labels = []
    for i_sub, subject in enumerate(VALID_SUBS[experiment]):
        data, _, sen_ints_sub, time_sub, sensor_regions = load_data.load_sentence_data_v2(subject=subject,
                                                                                       align_to='noun1',
                                                                                       voice=sen_type,
                                                                                       experiment=experiment,
                                                                                       proc=proc,
                                                                                       num_instances=num_instances,
                                                                                       reps_filter=lambda x: [i for i in range(x) if i < 10],
                                                                                       sensor_type=None,
                                                                                       is_region_sorted=False,
                                                                                       tmin=TIME_LIMITS[experiment][sen_type]['noun1']['tmin'],
                                                                                       tmax=TIME_LIMITS[experiment][sen_type]['noun1']['tmax'])

        # print(labels_sub)
        # print(data.shape)
        valid_inds = []
        labels_sub = []
        new_sen_ints_sub = []
        for i_sen_int, sen_int in enumerate(sen_ints_sub):
            word_list = stimuli_voice[sen_int]['stimulus'].split()
            if word == 'noun2':
                if len(word_list) > 5:
                    labels_sub.append(word_list[WORD_COLS[sen_type][word]])
                    valid_inds.append(i_sen_int)
                    new_sen_ints_sub.append(sen_int)
            else:
                labels_sub.append(word_list[WORD_COLS[sen_type][word]])
                valid_inds.append(i_sen_int)
                new_sen_ints_sub.append(sen_int)

        valid_inds = np.array(valid_inds)
        print(labels_sub)
        data_list.append(data[valid_inds, ...])
        print(data_list[i_sub].shape)
        if i_sub == 0:
            sen_ints = new_sen_ints_sub
            time = time_sub
            labels = labels_sub
        else:
            assert np.all(sen_ints == new_sen_ints_sub)
            assert np.all(time == time_sub)
            assert np.all(np.array(labels) == np.array(labels_sub))

    tmin = time.min()
    tmax = time.max()

    total_win = int((tmax - tmin) * 500)

    if win_len < 0:
        win_len = total_win - overlap

    win_starts = range(0, total_win - win_len, overlap)
    # print(win_starts)
    print(sen_ints)
    print(labels)
    print(data_list[0].shape)


    if isPerm:
        random.seed(random_state_perm)
        random.shuffle(labels)

    l_ints, cv_membership, tgm_acc, tgm_pred = models.lr_tgm_loso_multisub_fold(data_list,
                                                                                labels,
                                                                                win_starts,
                                                                                win_len,
                                                                                sen_ints,
                                                                                fold,
                                                                                penalty=alg[3:],
                                                                                adj=adj,
                                                                                doTimeAvg=doTimeAvg,
                                                                                doTestAvg=doTestAvg)

    np.savez_compressed(fname,
                        l_ints=l_ints,
                        cv_membership=cv_membership,
                        tgm_acc=tgm_acc,
                        tgm_pred=tgm_pred,
                        win_starts=win_starts,
                        time=time,
                        proc=proc)
Ejemplo n.º 6
0
def run_tgm_exp(experiment,
                sen_type,
                word,
                win_len,
                overlap,
                alg='lr-l1',
                adj=None,
                doTimeAvg=False,
                num_instances=1,
                proc=load_data.DEFAULT_PROC,
                force=False):
    warnings.filterwarnings(action='ignore')
    # Save Directory
    top_dir = TOP_DIR.format(exp=experiment)
    if not os.path.exists(top_dir):
        os.mkdir(top_dir)

    fname = SAVE_FILE.format(dir=top_dir,
                             sen_type=sen_type,
                             word=word,
                             win_len=win_len,
                             ov=overlap,
                             alg=alg,
                             adj=adj,
                             avgTm=bool_to_str(doTimeAvg),
                             inst=num_instances)

    print(force)
    if os.path.isfile(fname + '.npz') and not force:
        print('Job already completed. Skipping Job.')
        print(fname)
        return

    if sen_type == 'pooled':
        voice = ['active', 'passive']
    else:
        voice = [sen_type]

    data_list = []
    sen_ints = []
    time = []
    for i_sub, subject in enumerate(VALID_SUBS[experiment]):
        data, _, sen_ints_sub, time_sub, _ = load_data.load_sentence_data_v2(subject=subject,
                                                                             align_to='last',
                                                                             voice=voice,
                                                                             experiment=experiment,
                                                                             proc=proc,
                                                                             num_instances=num_instances,
                                                                             reps_filter=None,
                                                                             sensor_type=None,
                                                                             is_region_sorted=False,
                                                                             tmin=0.0,
                                                                             tmax=TMAX[experiment])
        data_list.append(data)
        if i_sub == 0:
            sen_ints = sen_ints_sub
            time = time_sub
        else:
            assert np.all(sen_ints == sen_ints_sub)
            assert np.all(time == time)

    stimuli_voice = list(load_data.read_stimuli(experiment))
    # print(stimuli_voice)
    if word == 'propid':
        all_words = [stimuli_voice[sen_int]['stimulus'].split() for sen_int in sen_ints]
        all_voices = [stimuli_voice[sen_int]['voice'] for sen_int in sen_ints]
        content_words = []
        valid_inds = []
        for i_word_list, word_list in enumerate(all_words):
            curr_voice = all_voices[i_word_list]
            if experiment == 'PassAct3':
                if len(word_list) > 5:
                    valid_inds.append(i_word_list)
                    content_words.append([word_list[WORD_COLS[curr_voice]['agent']], word_list[WORD_COLS[curr_voice]['verb']],
                                          word_list[WORD_COLS[curr_voice]['patient']]])
            else:
                valid_inds.append(i_word_list)
                content_words.append(
                    [word_list[WORD_COLS[curr_voice]['agent']], word_list[WORD_COLS[curr_voice]['verb']],
                     word_list[WORD_COLS[curr_voice]['patient']]])
        uni_content, labels = np.unique(np.array(content_words), axis=0, return_inverse=True)
        print(uni_content)
    else:
        labels = []
        valid_inds = []
        for i_sen_int, sen_int in enumerate(sen_ints):
            word_list = stimuli_voice[sen_int]['stimulus'].split()
            curr_voice = stimuli_voice[sen_int]['voice']
            if word == 'voice':
                labels.append(curr_voice)
                valid_inds.append(i_sen_int)
            elif word == 'senlen':
                if len(word_list) > 5:
                    labels.append('long')
                else:
                    labels.append('short')
                valid_inds.append(i_sen_int)
            elif word == 'agent' or word == 'patient':
                if experiment == 'PassAct3':
                    if len(word_list) > 5:
                        valid_inds.append(i_sen_int)
                        labels.append(word_list[WORD_COLS[curr_voice][word]])
                else:
                    labels.append(word_list[WORD_COLS[curr_voice][word]])
                    valid_inds.append(i_sen_int)
            else:
                labels.append(word_list[WORD_COLS[curr_voice][word]])
                valid_inds.append(i_sen_int)

    valid_inds = np.array(valid_inds)
    data_list = [data[valid_inds, ...] for data in data_list]

    # print(labels)
    tmin = time.min()
    tmax = time.max()

    total_win = int((tmax - tmin) * 500)

    if win_len < 0:
        win_len = total_win - overlap

    win_starts = range(0, total_win - win_len, overlap)

    coef, Cs, haufe_maps = models.lr_tgm_loso_multisub_coef(data_list,
                                                            labels,
                                                            win_starts,
                                                            win_len,
                                                            penalty=alg[3:],
                                                            adj=adj,
                                                            doTimeAvg=doTimeAvg)
    print('Saving {}'.format(fname))
    np.savez_compressed(fname,
                        coef=coef,
                        Cs=Cs,
                        haufe_maps=haufe_maps,
                        win_starts=win_starts,
                        time=time,
                        proc=proc)
Ejemplo n.º 7
0
if __name__ == '__main__':

    data, _, sen_ints, time, _ = load_data.load_sentence_data_v2(
        subject='B',
        align_to='last',
        voice=['active', 'passive'],
        experiment='krns2',
        proc=load_data.DEFAULT_PROC,
        num_instances=1,
        reps_filter=None,
        sensor_type=None,
        is_region_sorted=False,
        tmin=0.5,
        tmax=1.0)

    stimuli_voice = list(load_data.read_stimuli('krns2'))
    labels = []
    for i_sen_int, sen_int in enumerate(sen_ints):
        curr_voice = stimuli_voice[sen_int]['voice']
        labels.append(curr_voice)

    print(labels)
    tmin = time.min()
    tmax = time.max()

    total_win = int((tmax - tmin) * 500)
    win_len = 25
    overlap = 12

    win_starts = range(0, 24, overlap)
Ejemplo n.º 8
0
        is_region_sorted=False,
        tmin=tmin,
        tmax=tmax)

    print(data_new.shape)
    print(labels_new)
    print(indices_in_master_experiment_stimuli)

    def num_stimulus_words(stimuli_dict_):
        return len([
            w for w in load_data_new.punctuation_regex.sub(
                '', stimuli_dict_['stimulus']).strip().split()
            if len(w.strip()) > 0
        ])

    stimuli_annotations = list(load_data_new.read_stimuli('PassAct3'))
    assert (len(stimuli_annotations) == data_new.shape[0])
    indicator_long = np.array(
        [num_stimulus_words(s) >= 4 for s in stimuli_annotations])
    indicator_active = np.array(
        [s['voice'] == 'active' for s in stimuli_annotations])

    is_long_new = np.logical_and(indicator_active, indicator_long)
    time_new = np.squeeze(time_new)
    # inds_to_plot = np.logical_and(time_new >= (tmin), time_new <= (tmax))
    # data_new = data_new[:, :, inds_to_plot]
    # time_new = time_new[inds_to_plot]
    print(time_old[:10])
    print(time_new[:10])

    min_time = np.min([data_new.shape[-1], data_old.shape[-1]])
Ejemplo n.º 9
0
def run_tgm_exp(subject,
                sen_type,
                analysis,
                win_len,
                overlap,
                isPerm=False,
                alg='lr-l1',
                adj=None,
                doTimeAvg=False,
                doTestAvg=True,
                num_instances=1,
                random_state_perm=1,
                force=False,
                mode='acc'):
    warnings.filterwarnings(action='ignore')
    # Save Directory
    if not os.path.exists(TOP_DIR):
        os.mkdir(TOP_DIR)
    save_dir = SAVE_DIR.format(top_dir=TOP_DIR, sub=subject)
    if not os.path.exists(save_dir):
        os.mkdir(save_dir)

    fname = SAVE_FILE.format(dir=save_dir,
                             sub=subject,
                             sen_type=sen_type,
                             analysis=analysis,
                             win_len=win_len,
                             ov=overlap,
                             perm=bool_to_str(isPerm),
                             alg=alg,
                             adj=adj,
                             avgTm=bool_to_str(doTimeAvg),
                             avgTst=bool_to_str(doTestAvg),
                             inst=num_instances,
                             rsP=random_state_perm,
                             mode=mode)

    print(force)
    if os.path.isfile(fname + '.npz') and not force:
        print('Job already completed. Skipping Job.')
        print(fname)
        return

    if sen_type == 'pooled':
        voice = ['active', 'passive']
    else:
        voice = sen_type
    experiment = 'krns2'

    data_det1, _, sen_ints_det1, _, _ = load_data.load_sentence_data_v2(
        subject=subject,
        align_to='noun1',
        voice=voice,
        experiment=experiment,
        proc=load_data.DEFAULT_PROC,
        num_instances=num_instances,
        reps_filter=None,
        sensor_type=None,
        is_region_sorted=False,
        tmin=-0.5,
        tmax=0.0)

    data_n1, _, sen_ints_n1, _, _ = load_data.load_sentence_data_v2(
        subject=subject,
        align_to='noun1',
        voice=voice,
        experiment=experiment,
        proc=load_data.DEFAULT_PROC,
        num_instances=num_instances,
        reps_filter=None,
        sensor_type=None,
        is_region_sorted=False,
        tmin=0.0,
        tmax=0.5)

    data_det2, _, sen_ints_det2, _, _ = load_data.load_sentence_data_v2(
        subject=subject,
        align_to='noun2',
        voice=voice,
        experiment=experiment,
        proc=load_data.DEFAULT_PROC,
        num_instances=num_instances,
        reps_filter=None,
        sensor_type=None,
        is_region_sorted=False,
        tmin=-0.5,
        tmax=0.0)

    data_n2, _, sen_ints_n2, time, _ = load_data.load_sentence_data_v2(
        subject=subject,
        align_to='noun2',
        voice=voice,
        experiment=experiment,
        proc=load_data.DEFAULT_PROC,
        num_instances=num_instances,
        reps_filter=None,
        sensor_type=None,
        is_region_sorted=False,
        tmin=0.0,
        tmax=0.5)

    stimuli_voice = list(load_data.read_stimuli(experiment))
    labels = []
    for sen_int in sen_ints_det1:
        word_list = stimuli_voice[sen_int]['stimulus'].split()
        curr_voice = stimuli_voice[sen_int]['voice']
        labels.append(word_list[WORD_COLS[curr_voice]['det1']])

    if 'det-type' not in analysis:
        for sen_int in sen_ints_n1:
            word_list = stimuli_voice[sen_int]['stimulus'].split()
            curr_voice = stimuli_voice[sen_int]['voice']
            labels.append(word_list[WORD_COLS[curr_voice]['noun1']])
        for sen_int in sen_ints_det2:
            word_list = stimuli_voice[sen_int]['stimulus'].split()
            curr_voice = stimuli_voice[sen_int]['voice']
            labels.append(word_list[WORD_COLS[curr_voice]['det2']])
        for sen_int in sen_ints_n2:
            word_list = stimuli_voice[sen_int]['stimulus'].split()
            curr_voice = stimuli_voice[sen_int]['voice']
            labels.append(word_list[WORD_COLS[curr_voice]['noun2']])

        sen_ints = np.concatenate(
            [sen_ints_det1, sen_ints_n1, sen_ints_det2, sen_ints_n2], axis=0)
        data = np.concatenate([data_det1, data_n1, data_det2, data_n2], axis=0)
    elif analysis == 'det-type-first':
        sen_ints = np.array(sen_ints_det1)
        data = data_det1
    else:
        for sen_int in sen_ints_det2:
            word_list = stimuli_voice[sen_int]['stimulus'].split()
            curr_voice = stimuli_voice[sen_int]['voice']
            labels.append(word_list[WORD_COLS[curr_voice]['det2']])

        sen_ints = np.concatenate([sen_ints_det1, sen_ints_det2], axis=0)
        data = np.concatenate([data_det1, data_det2], axis=0)

    inds_to_keep = np.ones((len(labels), ), dtype=bool)
    if analysis == 'the-dog':
        for i_label, label in enumerate(labels):
            if label != 'the' and label != 'dog':
                inds_to_keep[i_label] = False
    elif analysis == 'a-dog':
        for i_label, label in enumerate(labels):
            if label != 'a' and label != 'dog':
                inds_to_keep[i_label] = False

    data = data[inds_to_keep, :, :]
    sen_ints = sen_ints[inds_to_keep]
    new_labels = [
        labels[i_label] for i_label, _ in enumerate(labels)
        if inds_to_keep[i_label]
    ]

    print(np.unique(np.array(new_labels)))
    total_win = data.shape[-1]
    win_starts = range(0, total_win - win_len, overlap)

    if isPerm:
        random.seed(random_state_perm)
        random.shuffle(labels)

    if mode == 'acc':
        l_ints, cv_membership, tgm_acc, tgm_pred = models.lr_tgm_loso(
            data,
            new_labels,
            win_starts,
            win_len,
            sen_ints,
            penalty=alg[3:],
            adj=adj,
            doTimeAvg=doTimeAvg,
            doTestAvg=doTestAvg)

        np.savez_compressed(fname,
                            l_ints=l_ints,
                            cv_membership=cv_membership,
                            tgm_acc=tgm_acc,
                            tgm_pred=tgm_pred,
                            win_starts=win_starts,
                            time=time)
    else:
        l_ints, coef, Cs = models.lr_tgm_coef(data,
                                              labels,
                                              win_starts,
                                              win_len,
                                              penalty=alg[3:],
                                              adj=adj,
                                              doTimeAvg=doTimeAvg)
        np.savez_compressed(fname,
                            l_ints=l_ints,
                            coef=coef,
                            Cs=Cs,
                            win_starts=win_starts,
                            time=time)
Ejemplo n.º 10
0
def run_tgm_exp(experiment,
                subject,
                word,
                win_len,
                overlap,
                isPerm=False,
                alg='lr-l1',
                adj=None,
                doTimeAvg=False,
                doTestAvg=True,
                num_instances=1,
                proc=load_data.DEFAULT_PROC,
                random_state_perm=1,
                force=False):
    warnings.filterwarnings(action='ignore')
    # Save Directory
    top_dir = TOP_DIR.format(exp=experiment)
    if not os.path.exists(top_dir):
        os.mkdir(top_dir)
    save_dir = SAVE_DIR.format(top_dir=top_dir, sub=subject)
    if not os.path.exists(save_dir):
        os.mkdir(save_dir)

    fname = SAVE_FILE.format(dir=save_dir,
                             sub=subject,
                             word=word,
                             win_len=win_len,
                             ov=overlap,
                             perm=bool_to_str(isPerm),
                             alg=alg,
                             adj=adj,
                             avgTm=bool_to_str(doTimeAvg),
                             avgTst=bool_to_str(doTestAvg),
                             inst=num_instances,
                             rsP=random_state_perm)

    print(force)
    if os.path.isfile(fname + '.npz') and not force:
        print('Job already completed. Skipping Job.')
        print(fname)
        return

    if word == 'voice':
        data, _, sen_ints, time, _ = load_data.load_sentence_data_v2(
            subject=subject,
            align_to='last',
            voice=['active', 'passive'],
            experiment=experiment,
            proc=proc,
            num_instances=num_instances,
            reps_filter=None,
            sensor_type=None,
            is_region_sorted=False,
            tmin=0.5,
            tmax=1.0)
    else:
        data_act, _, sen_ints_act, time, _ = load_data.load_sentence_data_v2(
            subject=subject,
            align_to='verb',
            voice=['active'],
            experiment=experiment,
            proc=proc,
            num_instances=num_instances,
            reps_filter=None,
            sensor_type=None,
            is_region_sorted=False,
            tmin=0.0,
            tmax=0.5)
        data_pass, _, sen_ints_pass, _, _ = load_data.load_sentence_data_v2(
            subject=subject,
            align_to='verb',
            voice=['passive'],
            experiment=experiment,
            proc=proc,
            num_instances=num_instances,
            reps_filter=None,
            sensor_type=None,
            is_region_sorted=False,
            tmin=-0.5,
            tmax=0.0)

        data = np.concatenate([data_act, data_pass], axis=0)
        sen_ints = np.concatenate([sen_ints_act, sen_ints_pass], axis=0)

    stimuli_voice = list(load_data.read_stimuli(experiment))
    labels = []
    for i_sen_int, sen_int in enumerate(sen_ints):
        curr_voice = stimuli_voice[sen_int]['voice']
        labels.append(curr_voice)

    print(labels)
    tmin = time.min()
    tmax = time.max()

    total_win = int((tmax - tmin) * 500)

    if win_len < 0:
        win_len = total_win - overlap

    win_starts = range(0, total_win - win_len, overlap)

    if isPerm:
        random.seed(random_state_perm)
        random.shuffle(labels)

    if 'lr' in alg:
        l_ints, cv_membership, tgm_acc, tgm_pred = models.lr_tgm_loso(
            data,
            labels,
            win_starts,
            win_len,
            sen_ints,
            penalty=str_to_none(alg[3:]),
            adj=adj,
            doTimeAvg=doTimeAvg,
            doTestAvg=doTestAvg)
    elif 'svm' in alg:
        l_ints, cv_membership, tgm_acc, tgm_pred = models.svc_tgm_loso(
            data,
            labels,
            win_starts,
            win_len,
            sen_ints,
            sub_rs=1,
            penalty=alg[4:],
            adj=adj,
            doTimeAvg=doTimeAvg,
            doTestAvg=doTestAvg,
            ddof=1,
            C=None)
    else:
        if adj == 'zscore':
            doZscore = True
        else:
            doZscore = False
        if 'None' in alg:
            doFeatSelect = False
        else:
            doFeatSelect = True
        tgm_pred, l_ints, cv_membership, feature_masks, num_feat_selected = models.nb_tgm_loso(
            data,
            labels,
            sen_ints,
            1,
            win_starts,
            win_len,
            feature_select=doFeatSelect,
            doZscore=doZscore,
            doAvg=doTimeAvg,
            ddof=1)
        tgm_acc = tgm_from_preds_GNB(tgm_pred, l_ints, cv_membership)

    print('Max Acc: %.2f' % np.max(np.mean(tgm_acc, axis=0)))
    np.savez_compressed(fname,
                        l_ints=l_ints,
                        cv_membership=cv_membership,
                        tgm_acc=tgm_acc,
                        tgm_pred=tgm_pred,
                        win_starts=win_starts,
                        time=time,
                        proc=proc)