Ejemplo n.º 1
0
def run_tgm_exp(data,
                labels,
                sen_ints,
                win_len,
                alg,
                doTimeAvg=False,
                doTestAvg=False):
    import models

    if 'l2' in alg:
        adj = 'zscore'
    else:
        adj = None

    if 'lr' in alg:
        l_ints, cv_membership, tgm_acc, tgm_pred = models.lr_tgm_loso(
            data,
            labels,
            win_starts,
            win_len,
            sen_ints,
            penalty=str_to_none(alg[3:]),
            adj=adj,
            doTimeAvg=doTimeAvg,
            doTestAvg=doTestAvg)
    elif 'svm' in alg:
        l_ints, cv_membership, tgm_acc, tgm_pred = models.svc_tgm_loso(
            data,
            labels,
            win_starts,
            win_len,
            sen_ints,
            sub_rs=1,
            penalty=alg[4:],
            adj=adj,
            doTimeAvg=doTimeAvg,
            doTestAvg=doTestAvg,
            ddof=1,
            C=None)
    else:
        if adj == 'zscore':
            doZscore = True
        else:
            doZscore = False
        if 'None' in alg:
            doFeatSelect = False
        else:
            doFeatSelect = True
        tgm_pred, l_ints, cv_membership, feature_masks, num_feat_selected = models.nb_tgm_loso(
            data,
            labels,
            sen_ints,
            1,
            win_starts,
            win_len,
            feature_select=doFeatSelect,
            doZscore=doZscore,
            doAvg=doTimeAvg,
            ddof=1)
Ejemplo n.º 2
0
def run_tgm_exp(experiment,
                subject,
                sen_type,
                word,
                win_len,
                overlap,
                isPerm=False,
                alg='lr-l1',
                adj=None,
                doTimeAvg=False,
                doTestAvg=True,
                num_instances=1,
                reps_to_use=10,
                proc=load_data.DEFAULT_PROC,
                random_state_perm=1,
                force=False,
                mode='acc'):
    warnings.filterwarnings(action='ignore')
    # Save Directory
    top_dir = TOP_DIR.format(exp=experiment)
    if not os.path.exists(top_dir):
        os.mkdir(top_dir)
    save_dir = SAVE_DIR.format(top_dir=top_dir, sub=subject)
    if not os.path.exists(save_dir):
        os.mkdir(save_dir)

    fname = SAVE_FILE.format(dir=save_dir,
                             sub=subject,
                             sen_type=sen_type,
                             word=word,
                             win_len=win_len,
                             ov=overlap,
                             perm=bool_to_str(isPerm),
                             alg=alg,
                             adj=adj,
                             avgTm=bool_to_str(doTimeAvg),
                             avgTst=bool_to_str(doTestAvg),
                             inst=num_instances,
                             rep=reps_to_use,
                             rsP=random_state_perm,
                             mode=mode)

    print(force)
    if os.path.isfile(fname + '.npz') and not force:
        print('Job already completed. Skipping Job.')
        print(fname)
        return

    data, labels, sen_ints, time, sensor_regions = load_data.load_sentence_data_v2(
        subject=subject,
        align_to=word,
        voice=sen_type,
        experiment=experiment,
        proc=proc,
        num_instances=num_instances,
        reps_filter=reps_to_use,
        sensor_type=None,
        is_region_sorted=False,
        tmin=None,
        tmax=None)

    # if experiment=='PassAct3':
    #     data, labels, time, final_inds = load_data.load_PassAct3_matlab(subject=subject,
    #                                                                     sen_type=sen_type,
    #                                                                     num_instances=num_instances,
    #                                                                     reps_to_use=reps_to_use,
    #                                                                     noMag=False,
    #                                                                     sorted_inds=None)
    #     new_labels = [lab if len(lab) > 2 else [lab[0], lab[1], ''] for lab in labels]
    #     labels = np.array(new_labels)
    #     print(data.shape)
    #     print(labels.shape)
    # else:
    #     data, labels, time, final_inds = load_data.load_sentence_data(subject=subject,
    #                                                                   word='noun1',
    #                                                                   sen_type=sen_type,
    #                                                                   experiment=experiment,
    #                                                                   proc=proc,
    #                                                                   num_instances=num_instances,
    #                                                                   reps_to_use=reps_to_use,
    #                                                                   noMag=False,
    #                                                                   sorted_inds=None)
    #     print(data.shape)
    #     print(np.array(labels).shape)

    tmin = time.min()
    tmax = time.max()

    total_win = int((tmax - tmin) * 500)

    if win_len < 0:
        win_len = total_win - overlap

    win_starts = range(0, total_win - win_len, overlap)

    # sen_set = np.unique(labels, axis=0).tolist()
    # num_labels = labels.shape[0]
    # sen_ints = np.empty((num_labels,))
    # for i_l in range(num_labels):
    #     for j_l, l in enumerate(sen_set):
    #         if np.all(l == labels[i_l, :]):
    #             sen_ints[i_l] = j_l
    #             break

    # labels = labels[:, WORD_COLS[experiment][word]]

    if isPerm:
        random.seed(random_state_perm)
        random.shuffle(labels)

    if mode == 'acc':
        l_ints, cv_membership, tgm_acc, tgm_pred = models.lr_tgm_loso(
            data,
            labels,
            win_starts,
            win_len,
            sen_ints,
            penalty=alg[3:],
            adj=adj,
            doTimeAvg=doTimeAvg,
            doTestAvg=doTestAvg)
        np.savez_compressed(fname,
                            l_ints=l_ints,
                            cv_membership=cv_membership,
                            tgm_acc=tgm_acc,
                            tgm_pred=tgm_pred,
                            win_starts=win_starts,
                            time=time,
                            proc=proc)
    else:
        l_ints, coef, Cs = models.lr_tgm_coef(data,
                                              labels,
                                              win_starts,
                                              win_len,
                                              penalty=alg[3:],
                                              adj=adj,
                                              doTimeAvg=doTimeAvg)
        np.savez_compressed(fname,
                            l_ints=l_ints,
                            coef=coef,
                            Cs=Cs,
                            win_starts=win_starts,
                            time=time,
                            proc=proc)
Ejemplo n.º 3
0
def run_tgm_exp(subject,
                sen_type,
                analysis,
                win_len,
                overlap,
                isPerm=False,
                alg='lr-l1',
                adj=None,
                doTimeAvg=False,
                doTestAvg=True,
                num_instances=1,
                random_state_perm=1,
                force=False,
                mode='acc'):
    warnings.filterwarnings(action='ignore')
    # Save Directory
    if not os.path.exists(TOP_DIR):
        os.mkdir(TOP_DIR)
    save_dir = SAVE_DIR.format(top_dir=TOP_DIR, sub=subject)
    if not os.path.exists(save_dir):
        os.mkdir(save_dir)

    fname = SAVE_FILE.format(dir=save_dir,
                             sub=subject,
                             sen_type=sen_type,
                             analysis=analysis,
                             win_len=win_len,
                             ov=overlap,
                             perm=bool_to_str(isPerm),
                             alg=alg,
                             adj=adj,
                             avgTm=bool_to_str(doTimeAvg),
                             avgTst=bool_to_str(doTestAvg),
                             inst=num_instances,
                             rsP=random_state_perm,
                             mode=mode)

    print(force)
    if os.path.isfile(fname + '.npz') and not force:
        print('Job already completed. Skipping Job.')
        print(fname)
        return

    if sen_type == 'pooled':
        voice = ['active', 'passive']
    else:
        voice = sen_type
    experiment = 'krns2'

    data_det1, _, sen_ints_det1, _, _ = load_data.load_sentence_data_v2(
        subject=subject,
        align_to='noun1',
        voice=voice,
        experiment=experiment,
        proc=load_data.DEFAULT_PROC,
        num_instances=num_instances,
        reps_filter=None,
        sensor_type=None,
        is_region_sorted=False,
        tmin=-0.5,
        tmax=0.0)

    data_n1, _, sen_ints_n1, _, _ = load_data.load_sentence_data_v2(
        subject=subject,
        align_to='noun1',
        voice=voice,
        experiment=experiment,
        proc=load_data.DEFAULT_PROC,
        num_instances=num_instances,
        reps_filter=None,
        sensor_type=None,
        is_region_sorted=False,
        tmin=0.0,
        tmax=0.5)

    data_det2, _, sen_ints_det2, _, _ = load_data.load_sentence_data_v2(
        subject=subject,
        align_to='noun2',
        voice=voice,
        experiment=experiment,
        proc=load_data.DEFAULT_PROC,
        num_instances=num_instances,
        reps_filter=None,
        sensor_type=None,
        is_region_sorted=False,
        tmin=-0.5,
        tmax=0.0)

    data_n2, _, sen_ints_n2, time, _ = load_data.load_sentence_data_v2(
        subject=subject,
        align_to='noun2',
        voice=voice,
        experiment=experiment,
        proc=load_data.DEFAULT_PROC,
        num_instances=num_instances,
        reps_filter=None,
        sensor_type=None,
        is_region_sorted=False,
        tmin=0.0,
        tmax=0.5)

    stimuli_voice = list(load_data.read_stimuli(experiment))
    labels = []
    for sen_int in sen_ints_det1:
        word_list = stimuli_voice[sen_int]['stimulus'].split()
        curr_voice = stimuli_voice[sen_int]['voice']
        labels.append(word_list[WORD_COLS[curr_voice]['det1']])

    if 'det-type' not in analysis:
        for sen_int in sen_ints_n1:
            word_list = stimuli_voice[sen_int]['stimulus'].split()
            curr_voice = stimuli_voice[sen_int]['voice']
            labels.append(word_list[WORD_COLS[curr_voice]['noun1']])
        for sen_int in sen_ints_det2:
            word_list = stimuli_voice[sen_int]['stimulus'].split()
            curr_voice = stimuli_voice[sen_int]['voice']
            labels.append(word_list[WORD_COLS[curr_voice]['det2']])
        for sen_int in sen_ints_n2:
            word_list = stimuli_voice[sen_int]['stimulus'].split()
            curr_voice = stimuli_voice[sen_int]['voice']
            labels.append(word_list[WORD_COLS[curr_voice]['noun2']])

        sen_ints = np.concatenate(
            [sen_ints_det1, sen_ints_n1, sen_ints_det2, sen_ints_n2], axis=0)
        data = np.concatenate([data_det1, data_n1, data_det2, data_n2], axis=0)
    elif analysis == 'det-type-first':
        sen_ints = np.array(sen_ints_det1)
        data = data_det1
    else:
        for sen_int in sen_ints_det2:
            word_list = stimuli_voice[sen_int]['stimulus'].split()
            curr_voice = stimuli_voice[sen_int]['voice']
            labels.append(word_list[WORD_COLS[curr_voice]['det2']])

        sen_ints = np.concatenate([sen_ints_det1, sen_ints_det2], axis=0)
        data = np.concatenate([data_det1, data_det2], axis=0)

    inds_to_keep = np.ones((len(labels), ), dtype=bool)
    if analysis == 'the-dog':
        for i_label, label in enumerate(labels):
            if label != 'the' and label != 'dog':
                inds_to_keep[i_label] = False
    elif analysis == 'a-dog':
        for i_label, label in enumerate(labels):
            if label != 'a' and label != 'dog':
                inds_to_keep[i_label] = False

    data = data[inds_to_keep, :, :]
    sen_ints = sen_ints[inds_to_keep]
    new_labels = [
        labels[i_label] for i_label, _ in enumerate(labels)
        if inds_to_keep[i_label]
    ]

    print(np.unique(np.array(new_labels)))
    total_win = data.shape[-1]
    win_starts = range(0, total_win - win_len, overlap)

    if isPerm:
        random.seed(random_state_perm)
        random.shuffle(labels)

    if mode == 'acc':
        l_ints, cv_membership, tgm_acc, tgm_pred = models.lr_tgm_loso(
            data,
            new_labels,
            win_starts,
            win_len,
            sen_ints,
            penalty=alg[3:],
            adj=adj,
            doTimeAvg=doTimeAvg,
            doTestAvg=doTestAvg)

        np.savez_compressed(fname,
                            l_ints=l_ints,
                            cv_membership=cv_membership,
                            tgm_acc=tgm_acc,
                            tgm_pred=tgm_pred,
                            win_starts=win_starts,
                            time=time)
    else:
        l_ints, coef, Cs = models.lr_tgm_coef(data,
                                              labels,
                                              win_starts,
                                              win_len,
                                              penalty=alg[3:],
                                              adj=adj,
                                              doTimeAvg=doTimeAvg)
        np.savez_compressed(fname,
                            l_ints=l_ints,
                            coef=coef,
                            Cs=Cs,
                            win_starts=win_starts,
                            time=time)
Ejemplo n.º 4
0
def run_tgm_exp(experiment,
                subject,
                sen_type,
                word,
                win_len,
                overlap,
                isPerm=False,
                alg='lr-l1',
                adj=None,
                doTimeAvg=False,
                doTestAvg=True,
                num_instances=1,
                proc=load_data.DEFAULT_PROC,
                random_state_perm=1,
                force=False,
                mode='acc'):
    warnings.filterwarnings(action='ignore')
    # Save Directory
    top_dir = TOP_DIR.format(exp=experiment)
    if not os.path.exists(top_dir):
        os.mkdir(top_dir)
    save_dir = SAVE_DIR.format(top_dir=top_dir, sub=subject)
    if not os.path.exists(save_dir):
        os.mkdir(save_dir)

    fname = SAVE_FILE.format(dir=save_dir,
                             sub=subject,
                             sen_type=sen_type,
                             word=word,
                             win_len=win_len,
                             ov=overlap,
                             perm=bool_to_str(isPerm),
                             alg=alg,
                             adj=adj,
                             avgTm=bool_to_str(doTimeAvg),
                             avgTst=bool_to_str(doTestAvg),
                             inst=num_instances,
                             rsP=random_state_perm,
                             mode=mode)

    print(force)
    if os.path.isfile(fname + '.npz') and not force:
        print('Job already completed. Skipping Job.')
        print(fname)
        return

    if sen_type == 'pooled':
        voice = ['active', 'passive']
    else:
        voice = [sen_type]

    data, _, sen_ints, time, _ = load_data.load_sentence_data_v2(
        subject=subject,
        align_to='last',
        voice=voice,
        experiment=experiment,
        proc=proc,
        num_instances=num_instances,
        reps_filter=None,
        sensor_type=None,
        is_region_sorted=False,
        tmin=0.0,
        tmax=TMAX[experiment])

    stimuli_voice = list(load_data.read_stimuli(experiment))
    # print(stimuli_voice)
    if word == 'propid':
        all_words = [
            stimuli_voice[sen_int]['stimulus'].split() for sen_int in sen_ints
        ]
        all_voices = [stimuli_voice[sen_int]['voice'] for sen_int in sen_ints]
        content_words = []
        valid_inds = []
        for i_word_list, word_list in enumerate(all_words):
            curr_voice = all_voices[i_word_list]
            if experiment == 'PassAct3':
                if len(word_list) > 5:
                    valid_inds.append(i_word_list)
                    content_words.append([
                        word_list[WORD_COLS[curr_voice]['agent']],
                        word_list[WORD_COLS[curr_voice]['verb']],
                        word_list[WORD_COLS[curr_voice]['patient']]
                    ])
            else:
                valid_inds.append(i_word_list)
                content_words.append([
                    word_list[WORD_COLS[curr_voice]['agent']],
                    word_list[WORD_COLS[curr_voice]['verb']],
                    word_list[WORD_COLS[curr_voice]['patient']]
                ])
        uni_content, labels = np.unique(np.array(content_words),
                                        axis=0,
                                        return_inverse=True)
        print(uni_content)
    else:
        labels = []
        valid_inds = []
        for i_sen_int, sen_int in enumerate(sen_ints):
            word_list = stimuli_voice[sen_int]['stimulus'].split()
            curr_voice = stimuli_voice[sen_int]['voice']
            if word == 'voice':
                labels.append(curr_voice)
                valid_inds.append(i_sen_int)
            elif word == 'senlen':
                if len(word_list) > 5:
                    labels.append('long')
                else:
                    labels.append('short')
                valid_inds.append(i_sen_int)
            elif word == 'agent' or word == 'patient':
                if experiment == 'PassAct3':
                    if len(word_list) > 5:
                        valid_inds.append(i_sen_int)
                        labels.append(word_list[WORD_COLS[curr_voice][word]])
                else:
                    labels.append(word_list[WORD_COLS[curr_voice][word]])
                    valid_inds.append(i_sen_int)
            else:
                labels.append(word_list[WORD_COLS[curr_voice][word]])
                valid_inds.append(i_sen_int)

    valid_inds = np.array(valid_inds)
    data = data[valid_inds, ...]
    sen_ints = [
        sen for i_sen, sen in enumerate(sen_ints) if i_sen in valid_inds
    ]

    print(valid_inds)
    print(data.shape)
    print(sen_ints)
    print(labels)

    tmin = time.min()
    tmax = time.max()

    total_win = int((tmax - tmin) * 500)

    if win_len < 0:
        win_len = total_win - overlap

    win_starts = range(0, total_win - win_len, overlap)

    if isPerm:
        random.seed(random_state_perm)
        random.shuffle(labels)

    if mode == 'acc':
        l_ints, cv_membership, tgm_acc, tgm_pred = models.lr_tgm_loso(
            data,
            labels,
            win_starts,
            win_len,
            sen_ints,
            penalty=alg[3:],
            adj=adj,
            doTimeAvg=doTimeAvg,
            doTestAvg=doTestAvg)
        np.savez_compressed(fname,
                            l_ints=l_ints,
                            cv_membership=cv_membership,
                            tgm_acc=tgm_acc,
                            tgm_pred=tgm_pred,
                            win_starts=win_starts,
                            time=time,
                            proc=proc)
    else:
        l_ints, coef, Cs = models.lr_tgm_coef(data,
                                              labels,
                                              win_starts,
                                              win_len,
                                              penalty=alg[3:],
                                              adj=adj,
                                              doTimeAvg=doTimeAvg)
        np.savez_compressed(fname,
                            l_ints=l_ints,
                            coef=coef,
                            Cs=Cs,
                            win_starts=win_starts,
                            time=time,
                            proc=proc)
Ejemplo n.º 5
0
def run_tgm_exp(experiment,
                subject,
                word,
                win_len,
                overlap,
                isPerm=False,
                alg='lr-l1',
                adj=None,
                doTimeAvg=False,
                doTestAvg=True,
                num_instances=1,
                proc=load_data.DEFAULT_PROC,
                random_state_perm=1,
                force=False):
    warnings.filterwarnings(action='ignore')
    # Save Directory
    top_dir = TOP_DIR.format(exp=experiment)
    if not os.path.exists(top_dir):
        os.mkdir(top_dir)
    save_dir = SAVE_DIR.format(top_dir=top_dir, sub=subject)
    if not os.path.exists(save_dir):
        os.mkdir(save_dir)

    fname = SAVE_FILE.format(dir=save_dir,
                             sub=subject,
                             word=word,
                             win_len=win_len,
                             ov=overlap,
                             perm=bool_to_str(isPerm),
                             alg=alg,
                             adj=adj,
                             avgTm=bool_to_str(doTimeAvg),
                             avgTst=bool_to_str(doTestAvg),
                             inst=num_instances,
                             rsP=random_state_perm)

    print(force)
    if os.path.isfile(fname + '.npz') and not force:
        print('Job already completed. Skipping Job.')
        print(fname)
        return

    if word == 'voice':
        data, _, sen_ints, time, _ = load_data.load_sentence_data_v2(
            subject=subject,
            align_to='last',
            voice=['active', 'passive'],
            experiment=experiment,
            proc=proc,
            num_instances=num_instances,
            reps_filter=None,
            sensor_type=None,
            is_region_sorted=False,
            tmin=0.5,
            tmax=1.0)
    else:
        data_act, _, sen_ints_act, time, _ = load_data.load_sentence_data_v2(
            subject=subject,
            align_to='verb',
            voice=['active'],
            experiment=experiment,
            proc=proc,
            num_instances=num_instances,
            reps_filter=None,
            sensor_type=None,
            is_region_sorted=False,
            tmin=0.0,
            tmax=0.5)
        data_pass, _, sen_ints_pass, _, _ = load_data.load_sentence_data_v2(
            subject=subject,
            align_to='verb',
            voice=['passive'],
            experiment=experiment,
            proc=proc,
            num_instances=num_instances,
            reps_filter=None,
            sensor_type=None,
            is_region_sorted=False,
            tmin=-0.5,
            tmax=0.0)

        data = np.concatenate([data_act, data_pass], axis=0)
        sen_ints = np.concatenate([sen_ints_act, sen_ints_pass], axis=0)

    stimuli_voice = list(load_data.read_stimuli(experiment))
    labels = []
    for i_sen_int, sen_int in enumerate(sen_ints):
        curr_voice = stimuli_voice[sen_int]['voice']
        labels.append(curr_voice)

    print(labels)
    tmin = time.min()
    tmax = time.max()

    total_win = int((tmax - tmin) * 500)

    if win_len < 0:
        win_len = total_win - overlap

    win_starts = range(0, total_win - win_len, overlap)

    if isPerm:
        random.seed(random_state_perm)
        random.shuffle(labels)

    if 'lr' in alg:
        l_ints, cv_membership, tgm_acc, tgm_pred = models.lr_tgm_loso(
            data,
            labels,
            win_starts,
            win_len,
            sen_ints,
            penalty=str_to_none(alg[3:]),
            adj=adj,
            doTimeAvg=doTimeAvg,
            doTestAvg=doTestAvg)
    elif 'svm' in alg:
        l_ints, cv_membership, tgm_acc, tgm_pred = models.svc_tgm_loso(
            data,
            labels,
            win_starts,
            win_len,
            sen_ints,
            sub_rs=1,
            penalty=alg[4:],
            adj=adj,
            doTimeAvg=doTimeAvg,
            doTestAvg=doTestAvg,
            ddof=1,
            C=None)
    else:
        if adj == 'zscore':
            doZscore = True
        else:
            doZscore = False
        if 'None' in alg:
            doFeatSelect = False
        else:
            doFeatSelect = True
        tgm_pred, l_ints, cv_membership, feature_masks, num_feat_selected = models.nb_tgm_loso(
            data,
            labels,
            sen_ints,
            1,
            win_starts,
            win_len,
            feature_select=doFeatSelect,
            doZscore=doZscore,
            doAvg=doTimeAvg,
            ddof=1)
        tgm_acc = tgm_from_preds_GNB(tgm_pred, l_ints, cv_membership)

    print('Max Acc: %.2f' % np.max(np.mean(tgm_acc, axis=0)))
    np.savez_compressed(fname,
                        l_ints=l_ints,
                        cv_membership=cv_membership,
                        tgm_acc=tgm_acc,
                        tgm_pred=tgm_pred,
                        win_starts=win_starts,
                        time=time,
                        proc=proc)