def run_tgm_exp(data, labels, sen_ints, win_len, alg, doTimeAvg=False, doTestAvg=False): import models if 'l2' in alg: adj = 'zscore' else: adj = None if 'lr' in alg: l_ints, cv_membership, tgm_acc, tgm_pred = models.lr_tgm_loso( data, labels, win_starts, win_len, sen_ints, penalty=str_to_none(alg[3:]), adj=adj, doTimeAvg=doTimeAvg, doTestAvg=doTestAvg) elif 'svm' in alg: l_ints, cv_membership, tgm_acc, tgm_pred = models.svc_tgm_loso( data, labels, win_starts, win_len, sen_ints, sub_rs=1, penalty=alg[4:], adj=adj, doTimeAvg=doTimeAvg, doTestAvg=doTestAvg, ddof=1, C=None) else: if adj == 'zscore': doZscore = True else: doZscore = False if 'None' in alg: doFeatSelect = False else: doFeatSelect = True tgm_pred, l_ints, cv_membership, feature_masks, num_feat_selected = models.nb_tgm_loso( data, labels, sen_ints, 1, win_starts, win_len, feature_select=doFeatSelect, doZscore=doZscore, doAvg=doTimeAvg, ddof=1)
def run_tgm_exp(experiment, subject, sen_type, word, win_len, overlap, isPerm=False, alg='lr-l1', adj=None, doTimeAvg=False, doTestAvg=True, num_instances=1, reps_to_use=10, proc=load_data.DEFAULT_PROC, random_state_perm=1, force=False, mode='acc'): warnings.filterwarnings(action='ignore') # Save Directory top_dir = TOP_DIR.format(exp=experiment) if not os.path.exists(top_dir): os.mkdir(top_dir) save_dir = SAVE_DIR.format(top_dir=top_dir, sub=subject) if not os.path.exists(save_dir): os.mkdir(save_dir) fname = SAVE_FILE.format(dir=save_dir, sub=subject, sen_type=sen_type, word=word, win_len=win_len, ov=overlap, perm=bool_to_str(isPerm), alg=alg, adj=adj, avgTm=bool_to_str(doTimeAvg), avgTst=bool_to_str(doTestAvg), inst=num_instances, rep=reps_to_use, rsP=random_state_perm, mode=mode) print(force) if os.path.isfile(fname + '.npz') and not force: print('Job already completed. Skipping Job.') print(fname) return data, labels, sen_ints, time, sensor_regions = load_data.load_sentence_data_v2( subject=subject, align_to=word, voice=sen_type, experiment=experiment, proc=proc, num_instances=num_instances, reps_filter=reps_to_use, sensor_type=None, is_region_sorted=False, tmin=None, tmax=None) # if experiment=='PassAct3': # data, labels, time, final_inds = load_data.load_PassAct3_matlab(subject=subject, # sen_type=sen_type, # num_instances=num_instances, # reps_to_use=reps_to_use, # noMag=False, # sorted_inds=None) # new_labels = [lab if len(lab) > 2 else [lab[0], lab[1], ''] for lab in labels] # labels = np.array(new_labels) # print(data.shape) # print(labels.shape) # else: # data, labels, time, final_inds = load_data.load_sentence_data(subject=subject, # word='noun1', # sen_type=sen_type, # experiment=experiment, # proc=proc, # num_instances=num_instances, # reps_to_use=reps_to_use, # noMag=False, # sorted_inds=None) # print(data.shape) # print(np.array(labels).shape) tmin = time.min() tmax = time.max() total_win = int((tmax - tmin) * 500) if win_len < 0: win_len = total_win - overlap win_starts = range(0, total_win - win_len, overlap) # sen_set = np.unique(labels, axis=0).tolist() # num_labels = labels.shape[0] # sen_ints = np.empty((num_labels,)) # for i_l in range(num_labels): # for j_l, l in enumerate(sen_set): # if np.all(l == labels[i_l, :]): # sen_ints[i_l] = j_l # break # labels = labels[:, WORD_COLS[experiment][word]] if isPerm: random.seed(random_state_perm) random.shuffle(labels) if mode == 'acc': l_ints, cv_membership, tgm_acc, tgm_pred = models.lr_tgm_loso( data, labels, win_starts, win_len, sen_ints, penalty=alg[3:], adj=adj, doTimeAvg=doTimeAvg, doTestAvg=doTestAvg) np.savez_compressed(fname, l_ints=l_ints, cv_membership=cv_membership, tgm_acc=tgm_acc, tgm_pred=tgm_pred, win_starts=win_starts, time=time, proc=proc) else: l_ints, coef, Cs = models.lr_tgm_coef(data, labels, win_starts, win_len, penalty=alg[3:], adj=adj, doTimeAvg=doTimeAvg) np.savez_compressed(fname, l_ints=l_ints, coef=coef, Cs=Cs, win_starts=win_starts, time=time, proc=proc)
def run_tgm_exp(subject, sen_type, analysis, win_len, overlap, isPerm=False, alg='lr-l1', adj=None, doTimeAvg=False, doTestAvg=True, num_instances=1, random_state_perm=1, force=False, mode='acc'): warnings.filterwarnings(action='ignore') # Save Directory if not os.path.exists(TOP_DIR): os.mkdir(TOP_DIR) save_dir = SAVE_DIR.format(top_dir=TOP_DIR, sub=subject) if not os.path.exists(save_dir): os.mkdir(save_dir) fname = SAVE_FILE.format(dir=save_dir, sub=subject, sen_type=sen_type, analysis=analysis, win_len=win_len, ov=overlap, perm=bool_to_str(isPerm), alg=alg, adj=adj, avgTm=bool_to_str(doTimeAvg), avgTst=bool_to_str(doTestAvg), inst=num_instances, rsP=random_state_perm, mode=mode) print(force) if os.path.isfile(fname + '.npz') and not force: print('Job already completed. Skipping Job.') print(fname) return if sen_type == 'pooled': voice = ['active', 'passive'] else: voice = sen_type experiment = 'krns2' data_det1, _, sen_ints_det1, _, _ = load_data.load_sentence_data_v2( subject=subject, align_to='noun1', voice=voice, experiment=experiment, proc=load_data.DEFAULT_PROC, num_instances=num_instances, reps_filter=None, sensor_type=None, is_region_sorted=False, tmin=-0.5, tmax=0.0) data_n1, _, sen_ints_n1, _, _ = load_data.load_sentence_data_v2( subject=subject, align_to='noun1', voice=voice, experiment=experiment, proc=load_data.DEFAULT_PROC, num_instances=num_instances, reps_filter=None, sensor_type=None, is_region_sorted=False, tmin=0.0, tmax=0.5) data_det2, _, sen_ints_det2, _, _ = load_data.load_sentence_data_v2( subject=subject, align_to='noun2', voice=voice, experiment=experiment, proc=load_data.DEFAULT_PROC, num_instances=num_instances, reps_filter=None, sensor_type=None, is_region_sorted=False, tmin=-0.5, tmax=0.0) data_n2, _, sen_ints_n2, time, _ = load_data.load_sentence_data_v2( subject=subject, align_to='noun2', voice=voice, experiment=experiment, proc=load_data.DEFAULT_PROC, num_instances=num_instances, reps_filter=None, sensor_type=None, is_region_sorted=False, tmin=0.0, tmax=0.5) stimuli_voice = list(load_data.read_stimuli(experiment)) labels = [] for sen_int in sen_ints_det1: word_list = stimuli_voice[sen_int]['stimulus'].split() curr_voice = stimuli_voice[sen_int]['voice'] labels.append(word_list[WORD_COLS[curr_voice]['det1']]) if 'det-type' not in analysis: for sen_int in sen_ints_n1: word_list = stimuli_voice[sen_int]['stimulus'].split() curr_voice = stimuli_voice[sen_int]['voice'] labels.append(word_list[WORD_COLS[curr_voice]['noun1']]) for sen_int in sen_ints_det2: word_list = stimuli_voice[sen_int]['stimulus'].split() curr_voice = stimuli_voice[sen_int]['voice'] labels.append(word_list[WORD_COLS[curr_voice]['det2']]) for sen_int in sen_ints_n2: word_list = stimuli_voice[sen_int]['stimulus'].split() curr_voice = stimuli_voice[sen_int]['voice'] labels.append(word_list[WORD_COLS[curr_voice]['noun2']]) sen_ints = np.concatenate( [sen_ints_det1, sen_ints_n1, sen_ints_det2, sen_ints_n2], axis=0) data = np.concatenate([data_det1, data_n1, data_det2, data_n2], axis=0) elif analysis == 'det-type-first': sen_ints = np.array(sen_ints_det1) data = data_det1 else: for sen_int in sen_ints_det2: word_list = stimuli_voice[sen_int]['stimulus'].split() curr_voice = stimuli_voice[sen_int]['voice'] labels.append(word_list[WORD_COLS[curr_voice]['det2']]) sen_ints = np.concatenate([sen_ints_det1, sen_ints_det2], axis=0) data = np.concatenate([data_det1, data_det2], axis=0) inds_to_keep = np.ones((len(labels), ), dtype=bool) if analysis == 'the-dog': for i_label, label in enumerate(labels): if label != 'the' and label != 'dog': inds_to_keep[i_label] = False elif analysis == 'a-dog': for i_label, label in enumerate(labels): if label != 'a' and label != 'dog': inds_to_keep[i_label] = False data = data[inds_to_keep, :, :] sen_ints = sen_ints[inds_to_keep] new_labels = [ labels[i_label] for i_label, _ in enumerate(labels) if inds_to_keep[i_label] ] print(np.unique(np.array(new_labels))) total_win = data.shape[-1] win_starts = range(0, total_win - win_len, overlap) if isPerm: random.seed(random_state_perm) random.shuffle(labels) if mode == 'acc': l_ints, cv_membership, tgm_acc, tgm_pred = models.lr_tgm_loso( data, new_labels, win_starts, win_len, sen_ints, penalty=alg[3:], adj=adj, doTimeAvg=doTimeAvg, doTestAvg=doTestAvg) np.savez_compressed(fname, l_ints=l_ints, cv_membership=cv_membership, tgm_acc=tgm_acc, tgm_pred=tgm_pred, win_starts=win_starts, time=time) else: l_ints, coef, Cs = models.lr_tgm_coef(data, labels, win_starts, win_len, penalty=alg[3:], adj=adj, doTimeAvg=doTimeAvg) np.savez_compressed(fname, l_ints=l_ints, coef=coef, Cs=Cs, win_starts=win_starts, time=time)
def run_tgm_exp(experiment, subject, sen_type, word, win_len, overlap, isPerm=False, alg='lr-l1', adj=None, doTimeAvg=False, doTestAvg=True, num_instances=1, proc=load_data.DEFAULT_PROC, random_state_perm=1, force=False, mode='acc'): warnings.filterwarnings(action='ignore') # Save Directory top_dir = TOP_DIR.format(exp=experiment) if not os.path.exists(top_dir): os.mkdir(top_dir) save_dir = SAVE_DIR.format(top_dir=top_dir, sub=subject) if not os.path.exists(save_dir): os.mkdir(save_dir) fname = SAVE_FILE.format(dir=save_dir, sub=subject, sen_type=sen_type, word=word, win_len=win_len, ov=overlap, perm=bool_to_str(isPerm), alg=alg, adj=adj, avgTm=bool_to_str(doTimeAvg), avgTst=bool_to_str(doTestAvg), inst=num_instances, rsP=random_state_perm, mode=mode) print(force) if os.path.isfile(fname + '.npz') and not force: print('Job already completed. Skipping Job.') print(fname) return if sen_type == 'pooled': voice = ['active', 'passive'] else: voice = [sen_type] data, _, sen_ints, time, _ = load_data.load_sentence_data_v2( subject=subject, align_to='last', voice=voice, experiment=experiment, proc=proc, num_instances=num_instances, reps_filter=None, sensor_type=None, is_region_sorted=False, tmin=0.0, tmax=TMAX[experiment]) stimuli_voice = list(load_data.read_stimuli(experiment)) # print(stimuli_voice) if word == 'propid': all_words = [ stimuli_voice[sen_int]['stimulus'].split() for sen_int in sen_ints ] all_voices = [stimuli_voice[sen_int]['voice'] for sen_int in sen_ints] content_words = [] valid_inds = [] for i_word_list, word_list in enumerate(all_words): curr_voice = all_voices[i_word_list] if experiment == 'PassAct3': if len(word_list) > 5: valid_inds.append(i_word_list) content_words.append([ word_list[WORD_COLS[curr_voice]['agent']], word_list[WORD_COLS[curr_voice]['verb']], word_list[WORD_COLS[curr_voice]['patient']] ]) else: valid_inds.append(i_word_list) content_words.append([ word_list[WORD_COLS[curr_voice]['agent']], word_list[WORD_COLS[curr_voice]['verb']], word_list[WORD_COLS[curr_voice]['patient']] ]) uni_content, labels = np.unique(np.array(content_words), axis=0, return_inverse=True) print(uni_content) else: labels = [] valid_inds = [] for i_sen_int, sen_int in enumerate(sen_ints): word_list = stimuli_voice[sen_int]['stimulus'].split() curr_voice = stimuli_voice[sen_int]['voice'] if word == 'voice': labels.append(curr_voice) valid_inds.append(i_sen_int) elif word == 'senlen': if len(word_list) > 5: labels.append('long') else: labels.append('short') valid_inds.append(i_sen_int) elif word == 'agent' or word == 'patient': if experiment == 'PassAct3': if len(word_list) > 5: valid_inds.append(i_sen_int) labels.append(word_list[WORD_COLS[curr_voice][word]]) else: labels.append(word_list[WORD_COLS[curr_voice][word]]) valid_inds.append(i_sen_int) else: labels.append(word_list[WORD_COLS[curr_voice][word]]) valid_inds.append(i_sen_int) valid_inds = np.array(valid_inds) data = data[valid_inds, ...] sen_ints = [ sen for i_sen, sen in enumerate(sen_ints) if i_sen in valid_inds ] print(valid_inds) print(data.shape) print(sen_ints) print(labels) tmin = time.min() tmax = time.max() total_win = int((tmax - tmin) * 500) if win_len < 0: win_len = total_win - overlap win_starts = range(0, total_win - win_len, overlap) if isPerm: random.seed(random_state_perm) random.shuffle(labels) if mode == 'acc': l_ints, cv_membership, tgm_acc, tgm_pred = models.lr_tgm_loso( data, labels, win_starts, win_len, sen_ints, penalty=alg[3:], adj=adj, doTimeAvg=doTimeAvg, doTestAvg=doTestAvg) np.savez_compressed(fname, l_ints=l_ints, cv_membership=cv_membership, tgm_acc=tgm_acc, tgm_pred=tgm_pred, win_starts=win_starts, time=time, proc=proc) else: l_ints, coef, Cs = models.lr_tgm_coef(data, labels, win_starts, win_len, penalty=alg[3:], adj=adj, doTimeAvg=doTimeAvg) np.savez_compressed(fname, l_ints=l_ints, coef=coef, Cs=Cs, win_starts=win_starts, time=time, proc=proc)
def run_tgm_exp(experiment, subject, word, win_len, overlap, isPerm=False, alg='lr-l1', adj=None, doTimeAvg=False, doTestAvg=True, num_instances=1, proc=load_data.DEFAULT_PROC, random_state_perm=1, force=False): warnings.filterwarnings(action='ignore') # Save Directory top_dir = TOP_DIR.format(exp=experiment) if not os.path.exists(top_dir): os.mkdir(top_dir) save_dir = SAVE_DIR.format(top_dir=top_dir, sub=subject) if not os.path.exists(save_dir): os.mkdir(save_dir) fname = SAVE_FILE.format(dir=save_dir, sub=subject, word=word, win_len=win_len, ov=overlap, perm=bool_to_str(isPerm), alg=alg, adj=adj, avgTm=bool_to_str(doTimeAvg), avgTst=bool_to_str(doTestAvg), inst=num_instances, rsP=random_state_perm) print(force) if os.path.isfile(fname + '.npz') and not force: print('Job already completed. Skipping Job.') print(fname) return if word == 'voice': data, _, sen_ints, time, _ = load_data.load_sentence_data_v2( subject=subject, align_to='last', voice=['active', 'passive'], experiment=experiment, proc=proc, num_instances=num_instances, reps_filter=None, sensor_type=None, is_region_sorted=False, tmin=0.5, tmax=1.0) else: data_act, _, sen_ints_act, time, _ = load_data.load_sentence_data_v2( subject=subject, align_to='verb', voice=['active'], experiment=experiment, proc=proc, num_instances=num_instances, reps_filter=None, sensor_type=None, is_region_sorted=False, tmin=0.0, tmax=0.5) data_pass, _, sen_ints_pass, _, _ = load_data.load_sentence_data_v2( subject=subject, align_to='verb', voice=['passive'], experiment=experiment, proc=proc, num_instances=num_instances, reps_filter=None, sensor_type=None, is_region_sorted=False, tmin=-0.5, tmax=0.0) data = np.concatenate([data_act, data_pass], axis=0) sen_ints = np.concatenate([sen_ints_act, sen_ints_pass], axis=0) stimuli_voice = list(load_data.read_stimuli(experiment)) labels = [] for i_sen_int, sen_int in enumerate(sen_ints): curr_voice = stimuli_voice[sen_int]['voice'] labels.append(curr_voice) print(labels) tmin = time.min() tmax = time.max() total_win = int((tmax - tmin) * 500) if win_len < 0: win_len = total_win - overlap win_starts = range(0, total_win - win_len, overlap) if isPerm: random.seed(random_state_perm) random.shuffle(labels) if 'lr' in alg: l_ints, cv_membership, tgm_acc, tgm_pred = models.lr_tgm_loso( data, labels, win_starts, win_len, sen_ints, penalty=str_to_none(alg[3:]), adj=adj, doTimeAvg=doTimeAvg, doTestAvg=doTestAvg) elif 'svm' in alg: l_ints, cv_membership, tgm_acc, tgm_pred = models.svc_tgm_loso( data, labels, win_starts, win_len, sen_ints, sub_rs=1, penalty=alg[4:], adj=adj, doTimeAvg=doTimeAvg, doTestAvg=doTestAvg, ddof=1, C=None) else: if adj == 'zscore': doZscore = True else: doZscore = False if 'None' in alg: doFeatSelect = False else: doFeatSelect = True tgm_pred, l_ints, cv_membership, feature_masks, num_feat_selected = models.nb_tgm_loso( data, labels, sen_ints, 1, win_starts, win_len, feature_select=doFeatSelect, doZscore=doZscore, doAvg=doTimeAvg, ddof=1) tgm_acc = tgm_from_preds_GNB(tgm_pred, l_ints, cv_membership) print('Max Acc: %.2f' % np.max(np.mean(tgm_acc, axis=0))) np.savez_compressed(fname, l_ints=l_ints, cv_membership=cv_membership, tgm_acc=tgm_acc, tgm_pred=tgm_pred, win_starts=win_starts, time=time, proc=proc)