def read_data_e2(data_dir): main_dir = glob.glob(data_dir+'/*/*') print(main_dir) for fl in main_dir: # print("Participant id is: ",fl.strip().split('/')[-2]) participant = fl.strip().split("/")[-2] exp = fl.strip().split("/")[-3] print(fl.split('/')[-1]) if 'example' in fl.split('/')[-1]: ff = spio.loadmat(fl,squeeze_me=True) ff_2 = spio.loadmat(fl,squeeze_me=False) disc_pr() sents = ff['keySentences'] part_topic_id = ff['labelsPassageForEachSentence'] topic_id = ff['labelsPassageCategory'] topics = ff['keyPassageCategory'] part_of_topics =ff['keyPassages'] vxl = ff['examples'] mtd = ff_2['meta'] topic_id = [x for x, number in zip(topic_id, len(topic_id)*[4]) for _ in range(number)] data_dict={} for id,el in enumerate(part_topic_id): data_dict[(sents[id],part_of_topics[el-1],topics[topic_id[id]-1])]=vxl[id] # (Sentence,subtopic(Apple),topic(Fruit)): voxels save_pickle(data_dict, '../data_processed/' + exp + '_proc/' + participant + '/' + fl.strip().split("/")[-1]) save_pickle(mtd, '../data_processed/' + exp + '_proc/' + participant + '/' + fl.strip().split("/")[-1] + '_meta')
def load_exp1(data_dir): w2vec_dict = load_pickle('./stimuli/word2vec.pkl') exp_id = int((data_dir.split('/')[-2]).split('_')[0][-1]) assert exp_id == 1 fld = data_dir # Run one participant data_files = sorted(glob.glob(fld + '/*')) dt_fls_grouped = [tuple(data_files[i:i + 2]) for i in range(0, len(data_files), 2)] print(fld) disc_pr() # for every file wordcloud pictures and sentences cases for data_group in dt_fls_grouped: data_dict, metadata = load_data_meta(data_group) word_dict = dict() for word, _ in data_dict.items(): word_dict[word] = w2vec_dict[word] yield data_group[0], data_dict, word_dict, metadata
def read_data_e3(data_dir): main_dir = glob.glob(data_dir+'/*/*') for fl in main_dir: print("Participant id is: ",fl.strip().split('/')[-2]) participant = fl.strip().split("/")[-2] exp = fl.strip().split("/")[-3] if 'example' in fl.split('/')[-1]: ff = spio.loadmat(fl,squeeze_me=True) ff_v2 = spio.loadmat(fl,squeeze_me=True) disc_pr() sents = ff['keySentences'] vxl = ff['examples'] mtd = ff_v2['meta'] sen_lbl = ff['labelsPassageForEachSentence'].tolist() zipped = list(zip(list(set(sen_lbl)),ff['labelsPassageCategory'].tolist())) freq = [sen_lbl.count(key) for key in list(set(sen_lbl))] final_list_lbls = [] for idx,el in enumerate(zipped): for x in range(freq[idx]): final_list_lbls.append(el) print(len(final_list_lbls)) for i,j in enumerate(final_list_lbls): final_list_lbls[i]=(sents[i],final_list_lbls[i][0],ff['keyPassageCategory'][final_list_lbls[i][1]-1]) print(final_list_lbls) data_dict={} for i,j in enumerate(final_list_lbls): data_dict[j] = vxl[i] save_pickle(data_dict, '../data_processed/' + exp + '_proc/' + participant + '/' + fl.strip().split("/")[-1]) save_pickle(mtd, '../data_processed/' + exp + '_proc/' + participant + '/' + fl.strip().split("/")[-1] + '_meta') disc_pr()
yield data_group[0], data_dict, word_dict, metadata if __name__ == '__main__': parser = argparse.ArgumentParser() parser.add_argument('-i', '-data_dir', dest="data_dir", required=True) args = parser.parse_args() print(args.data_dir) # assert 'data_processed' not in args.data_dir, 'You should rename your {} to data_processed'.format(args.data_dir) exp = int((args.data_dir.split('/')[-2]).split('_')[0][-1]) assert exp == 1 or exp == 2 or exp == 3 assert 'exp' in args.data_dir.split('/')[-2] if exp == 1: data_gen = load_exp1(args.data_dir) disc_pr() # how to access a generator silly boy :* for x in data_gen: print(x[0]) out_file = os.path.join('./', 'voxels_scores', '{}.npy'.format(x[0].split('.')[0])) out_dir = '/'.join(out_file.split('/')[:-1]) mkdir_p(out_dir) vscores = voxel_scores(x[1], x[2], x[3]) np.save(out_file, vscores) elif exp == 2 or exp == 3: load_exp23(args.data_dir) else: raise ValueError("Illegal value for data folder .Select from{1,2,3}")
def regression_decoder(train_data, train_targets): ''' :param train_data: #examples x #voxels matrix :param train_targets: # examples x #dimensions matrix :return: weighMatrix - a #voxels+1 x #dimensions weight matrix r - #dimensions vector with the regularization parameter value for each dimension column i of weightMatrix has #voxels weights + intercept (last row) for predicting target i This function uses an efficient implementation of cross-validation within the training set to pick a different optimal value of the regularization parameter for each semantic dimension in the target vector This function uses kernel ridge regression with a linear kernel. This allows us to use the full brain as input features because we avoid the large inversion of the voxels/voxels matrix ''' # add one in the end of train data for the bias term h_x = np.ones((train_data.shape[0], train_data.shape[1] + 1)) h_x[:, :-1] = train_data train_data = h_x dims_vxl = train_data.shape[1] emb_dim = train_targets.shape[1] examples = train_data.shape[0] assert train_data.shape[0] == train_targets.shape[ 0], 'Same numbers of examples for data and targets ' params = [ 1, .5, 5, 0.1, 10, 0.01, 100, 0.001, 1000, 0.0001, 10000, 0.00001, 100000, 0.000001, 1000000 ] n_words = train_data.shape[0] cv_err = np.zeros((len(params), emb_dim)) K = np.matmul(train_data, train_data.T) U, D, V = svd(K) D = np.eye(U.shape[1], V.shape[0]) * D for idx, reg_param in enumerate(params): dlambda = D + reg_param * np.eye(D.shape[0], D.shape[1]) dlambdaInv = np.diag(1 / np.diag(dlambda)) klambdainv = np.matmul(np.matmul(V, dlambdaInv), U.T) K_p = np.matmul(train_data.T, klambdainv) S = np.matmul(train_data, K_p) weights = np.matmul(K_p, train_targets) # Snorm = repmat(1 - diag(S), 1, train_targets.shape[1]) Snorm = np.tile((1 - np.diag(S)).reshape(np.diag(S).shape[0], 1), (1, emb_dim)) #Snorm = np.tile(1-np.diag(S),(1,emb_dim)) #print(Snorm.shape) Y_diff = train_targets - np.matmul(train_data, weights) disc_pr() disc_pr() print(Y_diff) disc_pr() disc_pr() Y_diff = Y_diff / Snorm print(Y_diff) disc_pr() disc_pr() cv_err[idx, :] = (1 / examples) * np.sum( Y_diff * Y_diff) # elementwise disc_pr() print(cv_err) disc_pr() minerridx = cv_err.argmin(axis=0) # minerr = np.amin(cv_err) reg_dim = np.zeros((1, emb_dim)) for i in range(emb_dim): reg_param = params[minerridx[i]] reg_dim[0, i] = reg_param dlambda = D + reg_param * np.eye(D.shape[0], D.shape[1]) dlambdaInv = np.diag(1 / np.diag(dlambda)) klambdainv = np.matmul(np.matmul(V, dlambdaInv), U.T) weights[:, i] = np.matmul(np.matmul(train_data.T, klambdainv), train_targets[:, i]) return weights, reg_dim