def prepare_data(fMRI_file, subject, type="glove", mode="none", select=True): brain_activations_1 = genfromtxt(fMRI_file, delimiter=',') brain_activations = brain_activations_1 - np.mean(brain_activations_1, axis=0) brain_activations = np.tanh(brain_activations) words_1 = [] with open('../data/words', 'r') as f: reader = csv.reader(f) words_1 = list(reader) words = [] words.extend([w[0] for w in words_1]) word_set = list(set(words)) print("number of words: %d " % len(word_set)) selected = np.arange(len(brain_activations_1[0])) if select == True: selected_file_name = "general_selected_500_" + subject + ".npy" if not os.path.isfile(selected_file_name): selected = select_stable_voxels(brain_activations_1, word_set, words, number_of_trials=6, size_of_selection=500) np.save(selected_file_name, selected) selected = np.load(selected_file_name) mean_Activations = [] if mode == 'limited': with open('../data/experimental_wordList.csv', 'r') as f: word_set = [w[0] for w in list(csv.reader(f))] words = np.asarray(words) for word in word_set: indices = np.where(words == word)[0] mean_Activations.append( np.mean(brain_activations[indices, :], axis=0)) words = word_set if type == 'glove': wem = WordEmbeddingLayer() wem.load_filtered_embedding( "../data/neuro_words_glove_6B_300d") #neuro_words embedded_words = wem.embed_words(words) elif type == 'word2vec': wem = WordEmbeddingLayer() wem.load_filtered_embedding("../data/neuro_words_word2vec") embedded_words = wem.embed_words(words) elif type == 'fasttext': wem = WordEmbeddingLayer() wem.load_filtered_embedding("../data/neuro_words_fasttext") embedded_words = wem.embed_words(words) elif type == 'lexvec': wem = WordEmbeddingLayer() wem.load_filtered_embedding("../data/neuro_words_lexvec") embedded_words = wem.embed_words(words) elif type == 'experimental': embedding_dic, embedded_words = get_word_representation( type='experimental', words=word_set) elif type == 'deps': embedding_dic, embedded_words = get_word_representation( type='deps', words=word_set) elif type == 'F25': embedding_dic, embedded_words = get_word_representation( type='F25', words=word_set) elif type == 'non-distributional': wem = WordEmbeddingLayer() wem.load_filtered_embedding("../data/neuro_words_cnd") embedded_words = wem.embed_words(words) word_representations = np.asarray(embedded_words, np.float32) return words, np.asarray(word_representations), np.asarray( mean_Activations)[:, selected]
if not os.path.isfile(selected_file_name): selected = select_stable_voxels(brain_activations_1, word_set, words, number_of_trials=6, size_of_selection=500) np.save(selected_file_name, selected) selected = np.load(selected_file_name) mean_Activations = [] words = np.asarray(words) for word in word_set: indices = np.where(words == word)[0] mean_Activations.append(np.mean(brain_activations[indices, :], axis=0)) mean_Activations = np.asarray(mean_Activations) pdp.compute_and_plot_dists(mean_Activations[:, selected], word_set) words = word_set wem = WordEmbeddingLayer() wem.load_filtered_embedding("../data/neuro_words") embedded_words = wem.embed_words(word_set) word_representations = embedded_words pdp.compute_and_plot_diff_dists(mean_Activations[:, selected], word_representations, word_set)
lrm.load_model("../glove_all.model") expSetup_reversed = ExpSetup(learning_rate=0.01, batch_size=29, number_of_epochs=2000) lrm_reversed = tf_LRModel_reversed.LRModel( y_train.shape[1], x_train.shape[1], learning_rate=expSetup_reversed.learning_rate, hidden_dim=x_train.shape[1], training_steps=expSetup_reversed.number_of_epochs, batch_size=expSetup_reversed.batch_size) lrm_reversed.load_model("../glove_reversed_all_3.model") wem = WordEmbeddingLayer() wem.load_embeddings_from_glove_file( filename="../data/glove.6B/glove.6B.300d.txt", filter=[], dim=300) #wem.load_filtered_embedding("../data/glove_all_6B_300d") # neuro_words all_words = [w for w in wem.word2vec.keys()] all_embedded_words = wem.embed_words(all_words[:len(all_words)]) all_embedded_words = np.asarray(all_embedded_words) print(all_embedded_words.shape) word_tree = cKDTree(all_embedded_words) word_tree_cheat = cKDTree(x_all) coords = [] with open('../data/coords', 'r') as f: reader = csv.reader(f) coords = list(reader)