def load_bow(experiment, distance='euclidean'): model_embeddings = np.load( '/share/volume0/RNNG/semantic_models/embeddings_dict.npz').item( )['glove'] sentence_vectors = list() for stimulus_dict in load_data.read_stimuli(experiment): voice = stimulus_dict['voice'] stimulus = stimulus_dict['stimulus'].split() curr_vectors = list() for w in CONTENT_WORDS[voice]: if w < len(stimulus): curr_vectors.append(model_embeddings[unicode( stimulus[w])][None, ...]) sentence_vectors.append( np.mean(np.concatenate(curr_vectors, axis=0), axis=0)[None, ...]) sentence_vectors = np.concatenate(sentence_vectors, axis=0) model_rdm = squareform(pdist(sentence_vectors, metric=distance)) return model_rdm
def load_agg_data(subject, word, experiment, voice, proc, rep_set): all_data, _, sen_ints, time, _ = load_data.load_sentence_data_v2( subject=subject, align_to='last', voice=voice, experiment=experiment, proc=proc, num_instances=1, reps_filter=lambda nr: [rep in rep_set for rep in range(nr)], sensor_type=None, is_region_sorted=False, tmin=TMIN[word], tmax=TMAX[word]) all_data *= 1e12 stimuli_voice = list(load_data.read_stimuli(experiment)) labels = [] voice_labels = [] if 'full' not in word: data = np.ones( (all_data.shape[0] / 2, all_data.shape[1], all_data.shape[2])) else: data = all_data i_data = 0 for i_sen_int, sen_int in enumerate(sen_ints): word_list = stimuli_voice[sen_int]['stimulus'].split() curr_voice = stimuli_voice[sen_int]['voice'] if 'full' in word: labels.append(word_list[WORD_COLS[curr_voice][word]]) voice_labels.append(curr_voice) elif len(word_list) > 5: data[i_data, :, :] = all_data[i_sen_int, :, :] labels.append(word_list[WORD_COLS[curr_voice][word]]) voice_labels.append(curr_voice) i_data += 1 print(labels) print(voice_labels) return data, labels, voice_labels, time
def run_tgm_exp(experiment, subject, word, win_len, overlap, dist='cosine', doTimeAvg=False, proc=load_data.DEFAULT_PROC, force=False): # Save Directory top_dir = TOP_DIR.format(exp=experiment) if not os.path.exists(top_dir): os.mkdir(top_dir) save_dir = SAVE_DIR.format(top_dir=top_dir, sub=subject) if not os.path.exists(save_dir): os.mkdir(save_dir) fname = SAVE_FILE.format(dir=save_dir, sub=subject, word=word, win_len=win_len, ov=overlap, dist=dist, avgTm=bool_to_str(doTimeAvg)) print(force) if os.path.isfile(fname + '.npz') and not force: print('Job already completed. Skipping Job.') print(fname) return voice = ['active', 'passive'] num_instances = 1 all_data, _, sen_ints, time, _ = load_data.load_sentence_data_v2( subject=subject, align_to='last', voice=voice, experiment=experiment, proc=proc, num_instances=num_instances, reps_filter=None, sensor_type=None, is_region_sorted=False, tmin=TMIN[word], tmax=TMAX[word]) all_data *= 1e12 stimuli_voice = list(load_data.read_stimuli(experiment)) labels = [] voice_labels = [] if word != 'eos-full': data = np.ones( (all_data.shape[0] / 2, all_data.shape[1], all_data.shape[2])) else: data = all_data i_data = 0 for i_sen_int, sen_int in enumerate(sen_ints): word_list = stimuli_voice[sen_int]['stimulus'].split() curr_voice = stimuli_voice[sen_int]['voice'] if word == 'eos-full': labels.append(word_list[-2]) voice_labels.append(curr_voice) elif len(word_list) > 5: data[i_data, :, :] = all_data[i_sen_int, :, :] labels.append(word_list[WORD_COLS[curr_voice][word]]) voice_labels.append(curr_voice) i_data += 1 print(labels) print(voice_labels) tmin = time.min() tmax = time.max() total_win = int((tmax - tmin) * 500) if win_len < 0: win_len = total_win - overlap win_starts = range(0, total_win - win_len, overlap) n_time = data.shape[2] windows = [ np.array([i >= w_s and i < w_s + win_len for i in xrange(n_time)]) for w_s in win_starts ] n_w = len(windows) RDM = [] for wi in xrange(n_w): time_to_use = windows[wi] data_to_use = data[:, :, time_to_use] if doTimeAvg: data_to_use = np.mean(data_to_use, axis=2) else: data_to_use = np.reshape(data_to_use, (data_to_use.shape[0], -1)) curr_RDM = squareform(pdist(data_to_use, metric=dist)) if np.any(np.isnan(curr_RDM)): print('Data state:') print(np.any(np.isinf(data_to_use))) print(np.any(np.isnan(data_to_use))) print(np.min(data_to_use)) print(np.min(np.abs(data_to_use))) meow = pdist(data_to_use, metric=my_cosine) nan_els = np.unravel_index(np.where(np.isnan(meow)), curr_RDM.shape) # print(nan_els) print('My cosine:') print my_cosine(data_to_use[nan_els[0][0][0], :], data_to_use[nan_els[1][0][0], :]) RDM.append(curr_RDM[None, ...]) RDM = np.concatenate(RDM, axis=0) np.savez_compressed(fname, RDM=RDM, labels=labels, voice_labels=voice_labels, win_starts=win_starts, time=time, proc=proc)
def run_tgm_exp(experiment, subject, sen_type, word, win_len, overlap, fold, isPerm = False, alg='lr-l1', adj=None, doTimeAvg=False, doTestAvg=True, num_instances=1, proc=load_data.DEFAULT_PROC, random_state_perm=1, force=False): warnings.filterwarnings(action='ignore') # Save Directory top_dir = TOP_DIR.format(exp=experiment) if not os.path.exists(top_dir): os.mkdir(top_dir) save_dir = SAVE_DIR.format(top_dir=top_dir, sub=subject) if not os.path.exists(save_dir): os.mkdir(save_dir) fname = SAVE_FILE.format(dir=save_dir, sub=subject, sen_type=sen_type, word=word, win_len=win_len, ov=overlap, perm=bool_to_str(isPerm), alg=alg, adj=adj, avgTm=bool_to_str(doTimeAvg), avgTst=bool_to_str(doTestAvg), inst=num_instances, rsP=random_state_perm, fold=fold) print(force) if os.path.isfile(fname + '.npz') and not force: print('Job already completed. Skipping Job.') print(fname) return if sen_type == 'pooled': voice = ['active', 'passive'] else: voice = [sen_type] data, _, sen_ints, time, _ = load_data.load_sentence_data_v2(subject=subject, align_to='last', voice=voice, experiment=experiment, proc=proc, num_instances=num_instances, reps_filter=None, sensor_type=None, is_region_sorted=False, tmin=0.0, tmax=TMAX[experiment]) stimuli_voice = list(load_data.read_stimuli(experiment)) # print(stimuli_voice) if word == 'propid': all_words = [stimuli_voice[sen_int]['stimulus'].split() for sen_int in sen_ints] all_voices = [stimuli_voice[sen_int]['voice'] for sen_int in sen_ints] content_words = [] valid_inds = [] for i_word_list, word_list in enumerate(all_words): curr_voice = all_voices[i_word_list] if experiment == 'PassAct3': if len(word_list) > 5: valid_inds.append(i_word_list) content_words.append([word_list[WORD_COLS[curr_voice]['agent']], word_list[WORD_COLS[curr_voice]['verb']], word_list[WORD_COLS[curr_voice]['patient']]]) else: valid_inds.append(i_word_list) content_words.append( [word_list[WORD_COLS[curr_voice]['agent']], word_list[WORD_COLS[curr_voice]['verb']], word_list[WORD_COLS[curr_voice]['patient']]]) uni_content, labels = np.unique(np.array(content_words), axis=0, return_inverse=True) print(uni_content) else: labels = [] valid_inds = [] for i_sen_int, sen_int in enumerate(sen_ints): word_list = stimuli_voice[sen_int]['stimulus'].split() curr_voice = stimuli_voice[sen_int]['voice'] if word == 'voice': labels.append(curr_voice) valid_inds.append(i_sen_int) elif word == 'senlen': if len(word_list) > 5: labels.append('long') else: labels.append('short') valid_inds.append(i_sen_int) elif word == 'agent' or word == 'patient': if experiment == 'PassAct3': if len(word_list) > 5: valid_inds.append(i_sen_int) labels.append(word_list[WORD_COLS[curr_voice][word]]) else: labels.append(word_list[WORD_COLS[curr_voice][word]]) valid_inds.append(i_sen_int) else: labels.append(word_list[WORD_COLS[curr_voice][word]]) valid_inds.append(i_sen_int) valid_inds = np.array(valid_inds) data = data[valid_inds, ...] sen_ints = [sen for i_sen, sen in enumerate(sen_ints) if i_sen in valid_inds] # print(labels) tmin = time.min() tmax = time.max() total_win = int((tmax - tmin) * 500) if win_len < 0: win_len = total_win - overlap win_starts = range(0, total_win - win_len, overlap) if isPerm: random.seed(random_state_perm) random.shuffle(labels) l_ints, cv_membership, tgm_acc, tgm_pred = models.lr_tgm_loso_fold(data, labels, win_starts, win_len, sen_ints, fold, penalty=alg[3:], adj=adj, doTimeAvg=doTimeAvg, doTestAvg=doTestAvg) np.savez_compressed(fname, l_ints=l_ints, cv_membership=cv_membership, tgm_acc=tgm_acc, tgm_pred=tgm_pred, win_starts=win_starts, time=time, proc=proc)
def run_tgm_exp(experiment, sen_type, word, win_len, overlap, fold, isPerm = False, alg='lr-l1', adj=None, doTimeAvg=False, doTestAvg=True, num_instances=1, proc=load_data.DEFAULT_PROC, random_state_perm=1, force=False): warnings.filterwarnings(action='ignore') # Save Directory top_dir = TOP_DIR.format(exp=experiment) if not os.path.exists(top_dir): os.mkdir(top_dir) fname = SAVE_FILE.format(dir=top_dir, sen_type=sen_type, word=word, win_len=win_len, ov=overlap, perm=bool_to_str(isPerm), alg=alg, adj=adj, avgTm=bool_to_str(doTimeAvg), avgTst=bool_to_str(doTestAvg), inst=num_instances, rsP=random_state_perm, fold=fold) print(force) if os.path.isfile(fname + '.npz') and not force: print('Job already completed. Skipping Job.') print(fname) return stimuli_voice = list(load_data.read_stimuli(experiment)) data_list = [] sen_ints = [] time = [] labels = [] for i_sub, subject in enumerate(VALID_SUBS[experiment]): data, _, sen_ints_sub, time_sub, sensor_regions = load_data.load_sentence_data_v2(subject=subject, align_to='noun1', voice=sen_type, experiment=experiment, proc=proc, num_instances=num_instances, reps_filter=lambda x: [i for i in range(x) if i < 10], sensor_type=None, is_region_sorted=False, tmin=TIME_LIMITS[experiment][sen_type]['noun1']['tmin'], tmax=TIME_LIMITS[experiment][sen_type]['noun1']['tmax']) # print(labels_sub) # print(data.shape) valid_inds = [] labels_sub = [] new_sen_ints_sub = [] for i_sen_int, sen_int in enumerate(sen_ints_sub): word_list = stimuli_voice[sen_int]['stimulus'].split() if word == 'noun2': if len(word_list) > 5: labels_sub.append(word_list[WORD_COLS[sen_type][word]]) valid_inds.append(i_sen_int) new_sen_ints_sub.append(sen_int) else: labels_sub.append(word_list[WORD_COLS[sen_type][word]]) valid_inds.append(i_sen_int) new_sen_ints_sub.append(sen_int) valid_inds = np.array(valid_inds) print(labels_sub) data_list.append(data[valid_inds, ...]) print(data_list[i_sub].shape) if i_sub == 0: sen_ints = new_sen_ints_sub time = time_sub labels = labels_sub else: assert np.all(sen_ints == new_sen_ints_sub) assert np.all(time == time_sub) assert np.all(np.array(labels) == np.array(labels_sub)) tmin = time.min() tmax = time.max() total_win = int((tmax - tmin) * 500) if win_len < 0: win_len = total_win - overlap win_starts = range(0, total_win - win_len, overlap) # print(win_starts) print(sen_ints) print(labels) print(data_list[0].shape) if isPerm: random.seed(random_state_perm) random.shuffle(labels) l_ints, cv_membership, tgm_acc, tgm_pred = models.lr_tgm_loso_multisub_fold(data_list, labels, win_starts, win_len, sen_ints, fold, penalty=alg[3:], adj=adj, doTimeAvg=doTimeAvg, doTestAvg=doTestAvg) np.savez_compressed(fname, l_ints=l_ints, cv_membership=cv_membership, tgm_acc=tgm_acc, tgm_pred=tgm_pred, win_starts=win_starts, time=time, proc=proc)
def run_tgm_exp(experiment, sen_type, word, win_len, overlap, alg='lr-l1', adj=None, doTimeAvg=False, num_instances=1, proc=load_data.DEFAULT_PROC, force=False): warnings.filterwarnings(action='ignore') # Save Directory top_dir = TOP_DIR.format(exp=experiment) if not os.path.exists(top_dir): os.mkdir(top_dir) fname = SAVE_FILE.format(dir=top_dir, sen_type=sen_type, word=word, win_len=win_len, ov=overlap, alg=alg, adj=adj, avgTm=bool_to_str(doTimeAvg), inst=num_instances) print(force) if os.path.isfile(fname + '.npz') and not force: print('Job already completed. Skipping Job.') print(fname) return if sen_type == 'pooled': voice = ['active', 'passive'] else: voice = [sen_type] data_list = [] sen_ints = [] time = [] for i_sub, subject in enumerate(VALID_SUBS[experiment]): data, _, sen_ints_sub, time_sub, _ = load_data.load_sentence_data_v2(subject=subject, align_to='last', voice=voice, experiment=experiment, proc=proc, num_instances=num_instances, reps_filter=None, sensor_type=None, is_region_sorted=False, tmin=0.0, tmax=TMAX[experiment]) data_list.append(data) if i_sub == 0: sen_ints = sen_ints_sub time = time_sub else: assert np.all(sen_ints == sen_ints_sub) assert np.all(time == time) stimuli_voice = list(load_data.read_stimuli(experiment)) # print(stimuli_voice) if word == 'propid': all_words = [stimuli_voice[sen_int]['stimulus'].split() for sen_int in sen_ints] all_voices = [stimuli_voice[sen_int]['voice'] for sen_int in sen_ints] content_words = [] valid_inds = [] for i_word_list, word_list in enumerate(all_words): curr_voice = all_voices[i_word_list] if experiment == 'PassAct3': if len(word_list) > 5: valid_inds.append(i_word_list) content_words.append([word_list[WORD_COLS[curr_voice]['agent']], word_list[WORD_COLS[curr_voice]['verb']], word_list[WORD_COLS[curr_voice]['patient']]]) else: valid_inds.append(i_word_list) content_words.append( [word_list[WORD_COLS[curr_voice]['agent']], word_list[WORD_COLS[curr_voice]['verb']], word_list[WORD_COLS[curr_voice]['patient']]]) uni_content, labels = np.unique(np.array(content_words), axis=0, return_inverse=True) print(uni_content) else: labels = [] valid_inds = [] for i_sen_int, sen_int in enumerate(sen_ints): word_list = stimuli_voice[sen_int]['stimulus'].split() curr_voice = stimuli_voice[sen_int]['voice'] if word == 'voice': labels.append(curr_voice) valid_inds.append(i_sen_int) elif word == 'senlen': if len(word_list) > 5: labels.append('long') else: labels.append('short') valid_inds.append(i_sen_int) elif word == 'agent' or word == 'patient': if experiment == 'PassAct3': if len(word_list) > 5: valid_inds.append(i_sen_int) labels.append(word_list[WORD_COLS[curr_voice][word]]) else: labels.append(word_list[WORD_COLS[curr_voice][word]]) valid_inds.append(i_sen_int) else: labels.append(word_list[WORD_COLS[curr_voice][word]]) valid_inds.append(i_sen_int) valid_inds = np.array(valid_inds) data_list = [data[valid_inds, ...] for data in data_list] # print(labels) tmin = time.min() tmax = time.max() total_win = int((tmax - tmin) * 500) if win_len < 0: win_len = total_win - overlap win_starts = range(0, total_win - win_len, overlap) coef, Cs, haufe_maps = models.lr_tgm_loso_multisub_coef(data_list, labels, win_starts, win_len, penalty=alg[3:], adj=adj, doTimeAvg=doTimeAvg) print('Saving {}'.format(fname)) np.savez_compressed(fname, coef=coef, Cs=Cs, haufe_maps=haufe_maps, win_starts=win_starts, time=time, proc=proc)
if __name__ == '__main__': data, _, sen_ints, time, _ = load_data.load_sentence_data_v2( subject='B', align_to='last', voice=['active', 'passive'], experiment='krns2', proc=load_data.DEFAULT_PROC, num_instances=1, reps_filter=None, sensor_type=None, is_region_sorted=False, tmin=0.5, tmax=1.0) stimuli_voice = list(load_data.read_stimuli('krns2')) labels = [] for i_sen_int, sen_int in enumerate(sen_ints): curr_voice = stimuli_voice[sen_int]['voice'] labels.append(curr_voice) print(labels) tmin = time.min() tmax = time.max() total_win = int((tmax - tmin) * 500) win_len = 25 overlap = 12 win_starts = range(0, 24, overlap)
is_region_sorted=False, tmin=tmin, tmax=tmax) print(data_new.shape) print(labels_new) print(indices_in_master_experiment_stimuli) def num_stimulus_words(stimuli_dict_): return len([ w for w in load_data_new.punctuation_regex.sub( '', stimuli_dict_['stimulus']).strip().split() if len(w.strip()) > 0 ]) stimuli_annotations = list(load_data_new.read_stimuli('PassAct3')) assert (len(stimuli_annotations) == data_new.shape[0]) indicator_long = np.array( [num_stimulus_words(s) >= 4 for s in stimuli_annotations]) indicator_active = np.array( [s['voice'] == 'active' for s in stimuli_annotations]) is_long_new = np.logical_and(indicator_active, indicator_long) time_new = np.squeeze(time_new) # inds_to_plot = np.logical_and(time_new >= (tmin), time_new <= (tmax)) # data_new = data_new[:, :, inds_to_plot] # time_new = time_new[inds_to_plot] print(time_old[:10]) print(time_new[:10]) min_time = np.min([data_new.shape[-1], data_old.shape[-1]])
def run_tgm_exp(subject, sen_type, analysis, win_len, overlap, isPerm=False, alg='lr-l1', adj=None, doTimeAvg=False, doTestAvg=True, num_instances=1, random_state_perm=1, force=False, mode='acc'): warnings.filterwarnings(action='ignore') # Save Directory if not os.path.exists(TOP_DIR): os.mkdir(TOP_DIR) save_dir = SAVE_DIR.format(top_dir=TOP_DIR, sub=subject) if not os.path.exists(save_dir): os.mkdir(save_dir) fname = SAVE_FILE.format(dir=save_dir, sub=subject, sen_type=sen_type, analysis=analysis, win_len=win_len, ov=overlap, perm=bool_to_str(isPerm), alg=alg, adj=adj, avgTm=bool_to_str(doTimeAvg), avgTst=bool_to_str(doTestAvg), inst=num_instances, rsP=random_state_perm, mode=mode) print(force) if os.path.isfile(fname + '.npz') and not force: print('Job already completed. Skipping Job.') print(fname) return if sen_type == 'pooled': voice = ['active', 'passive'] else: voice = sen_type experiment = 'krns2' data_det1, _, sen_ints_det1, _, _ = load_data.load_sentence_data_v2( subject=subject, align_to='noun1', voice=voice, experiment=experiment, proc=load_data.DEFAULT_PROC, num_instances=num_instances, reps_filter=None, sensor_type=None, is_region_sorted=False, tmin=-0.5, tmax=0.0) data_n1, _, sen_ints_n1, _, _ = load_data.load_sentence_data_v2( subject=subject, align_to='noun1', voice=voice, experiment=experiment, proc=load_data.DEFAULT_PROC, num_instances=num_instances, reps_filter=None, sensor_type=None, is_region_sorted=False, tmin=0.0, tmax=0.5) data_det2, _, sen_ints_det2, _, _ = load_data.load_sentence_data_v2( subject=subject, align_to='noun2', voice=voice, experiment=experiment, proc=load_data.DEFAULT_PROC, num_instances=num_instances, reps_filter=None, sensor_type=None, is_region_sorted=False, tmin=-0.5, tmax=0.0) data_n2, _, sen_ints_n2, time, _ = load_data.load_sentence_data_v2( subject=subject, align_to='noun2', voice=voice, experiment=experiment, proc=load_data.DEFAULT_PROC, num_instances=num_instances, reps_filter=None, sensor_type=None, is_region_sorted=False, tmin=0.0, tmax=0.5) stimuli_voice = list(load_data.read_stimuli(experiment)) labels = [] for sen_int in sen_ints_det1: word_list = stimuli_voice[sen_int]['stimulus'].split() curr_voice = stimuli_voice[sen_int]['voice'] labels.append(word_list[WORD_COLS[curr_voice]['det1']]) if 'det-type' not in analysis: for sen_int in sen_ints_n1: word_list = stimuli_voice[sen_int]['stimulus'].split() curr_voice = stimuli_voice[sen_int]['voice'] labels.append(word_list[WORD_COLS[curr_voice]['noun1']]) for sen_int in sen_ints_det2: word_list = stimuli_voice[sen_int]['stimulus'].split() curr_voice = stimuli_voice[sen_int]['voice'] labels.append(word_list[WORD_COLS[curr_voice]['det2']]) for sen_int in sen_ints_n2: word_list = stimuli_voice[sen_int]['stimulus'].split() curr_voice = stimuli_voice[sen_int]['voice'] labels.append(word_list[WORD_COLS[curr_voice]['noun2']]) sen_ints = np.concatenate( [sen_ints_det1, sen_ints_n1, sen_ints_det2, sen_ints_n2], axis=0) data = np.concatenate([data_det1, data_n1, data_det2, data_n2], axis=0) elif analysis == 'det-type-first': sen_ints = np.array(sen_ints_det1) data = data_det1 else: for sen_int in sen_ints_det2: word_list = stimuli_voice[sen_int]['stimulus'].split() curr_voice = stimuli_voice[sen_int]['voice'] labels.append(word_list[WORD_COLS[curr_voice]['det2']]) sen_ints = np.concatenate([sen_ints_det1, sen_ints_det2], axis=0) data = np.concatenate([data_det1, data_det2], axis=0) inds_to_keep = np.ones((len(labels), ), dtype=bool) if analysis == 'the-dog': for i_label, label in enumerate(labels): if label != 'the' and label != 'dog': inds_to_keep[i_label] = False elif analysis == 'a-dog': for i_label, label in enumerate(labels): if label != 'a' and label != 'dog': inds_to_keep[i_label] = False data = data[inds_to_keep, :, :] sen_ints = sen_ints[inds_to_keep] new_labels = [ labels[i_label] for i_label, _ in enumerate(labels) if inds_to_keep[i_label] ] print(np.unique(np.array(new_labels))) total_win = data.shape[-1] win_starts = range(0, total_win - win_len, overlap) if isPerm: random.seed(random_state_perm) random.shuffle(labels) if mode == 'acc': l_ints, cv_membership, tgm_acc, tgm_pred = models.lr_tgm_loso( data, new_labels, win_starts, win_len, sen_ints, penalty=alg[3:], adj=adj, doTimeAvg=doTimeAvg, doTestAvg=doTestAvg) np.savez_compressed(fname, l_ints=l_ints, cv_membership=cv_membership, tgm_acc=tgm_acc, tgm_pred=tgm_pred, win_starts=win_starts, time=time) else: l_ints, coef, Cs = models.lr_tgm_coef(data, labels, win_starts, win_len, penalty=alg[3:], adj=adj, doTimeAvg=doTimeAvg) np.savez_compressed(fname, l_ints=l_ints, coef=coef, Cs=Cs, win_starts=win_starts, time=time)
def run_tgm_exp(experiment, subject, word, win_len, overlap, isPerm=False, alg='lr-l1', adj=None, doTimeAvg=False, doTestAvg=True, num_instances=1, proc=load_data.DEFAULT_PROC, random_state_perm=1, force=False): warnings.filterwarnings(action='ignore') # Save Directory top_dir = TOP_DIR.format(exp=experiment) if not os.path.exists(top_dir): os.mkdir(top_dir) save_dir = SAVE_DIR.format(top_dir=top_dir, sub=subject) if not os.path.exists(save_dir): os.mkdir(save_dir) fname = SAVE_FILE.format(dir=save_dir, sub=subject, word=word, win_len=win_len, ov=overlap, perm=bool_to_str(isPerm), alg=alg, adj=adj, avgTm=bool_to_str(doTimeAvg), avgTst=bool_to_str(doTestAvg), inst=num_instances, rsP=random_state_perm) print(force) if os.path.isfile(fname + '.npz') and not force: print('Job already completed. Skipping Job.') print(fname) return if word == 'voice': data, _, sen_ints, time, _ = load_data.load_sentence_data_v2( subject=subject, align_to='last', voice=['active', 'passive'], experiment=experiment, proc=proc, num_instances=num_instances, reps_filter=None, sensor_type=None, is_region_sorted=False, tmin=0.5, tmax=1.0) else: data_act, _, sen_ints_act, time, _ = load_data.load_sentence_data_v2( subject=subject, align_to='verb', voice=['active'], experiment=experiment, proc=proc, num_instances=num_instances, reps_filter=None, sensor_type=None, is_region_sorted=False, tmin=0.0, tmax=0.5) data_pass, _, sen_ints_pass, _, _ = load_data.load_sentence_data_v2( subject=subject, align_to='verb', voice=['passive'], experiment=experiment, proc=proc, num_instances=num_instances, reps_filter=None, sensor_type=None, is_region_sorted=False, tmin=-0.5, tmax=0.0) data = np.concatenate([data_act, data_pass], axis=0) sen_ints = np.concatenate([sen_ints_act, sen_ints_pass], axis=0) stimuli_voice = list(load_data.read_stimuli(experiment)) labels = [] for i_sen_int, sen_int in enumerate(sen_ints): curr_voice = stimuli_voice[sen_int]['voice'] labels.append(curr_voice) print(labels) tmin = time.min() tmax = time.max() total_win = int((tmax - tmin) * 500) if win_len < 0: win_len = total_win - overlap win_starts = range(0, total_win - win_len, overlap) if isPerm: random.seed(random_state_perm) random.shuffle(labels) if 'lr' in alg: l_ints, cv_membership, tgm_acc, tgm_pred = models.lr_tgm_loso( data, labels, win_starts, win_len, sen_ints, penalty=str_to_none(alg[3:]), adj=adj, doTimeAvg=doTimeAvg, doTestAvg=doTestAvg) elif 'svm' in alg: l_ints, cv_membership, tgm_acc, tgm_pred = models.svc_tgm_loso( data, labels, win_starts, win_len, sen_ints, sub_rs=1, penalty=alg[4:], adj=adj, doTimeAvg=doTimeAvg, doTestAvg=doTestAvg, ddof=1, C=None) else: if adj == 'zscore': doZscore = True else: doZscore = False if 'None' in alg: doFeatSelect = False else: doFeatSelect = True tgm_pred, l_ints, cv_membership, feature_masks, num_feat_selected = models.nb_tgm_loso( data, labels, sen_ints, 1, win_starts, win_len, feature_select=doFeatSelect, doZscore=doZscore, doAvg=doTimeAvg, ddof=1) tgm_acc = tgm_from_preds_GNB(tgm_pred, l_ints, cv_membership) print('Max Acc: %.2f' % np.max(np.mean(tgm_acc, axis=0))) np.savez_compressed(fname, l_ints=l_ints, cv_membership=cv_membership, tgm_acc=tgm_acc, tgm_pred=tgm_pred, win_starts=win_starts, time=time, proc=proc)