def prepare_data(fMRI_file,
                     subject,
                     type="glove",
                     mode="none",
                     select=True):
        brain_activations_1 = genfromtxt(fMRI_file, delimiter=',')
        brain_activations = brain_activations_1 - np.mean(brain_activations_1,
                                                          axis=0)
        brain_activations = np.tanh(brain_activations)

        words_1 = []
        with open('../data/words', 'r') as f:
            reader = csv.reader(f)
            words_1 = list(reader)

        words = []
        words.extend([w[0] for w in words_1])
        word_set = list(set(words))
        print("number of words: %d " % len(word_set))

        selected = np.arange(len(brain_activations_1[0]))
        if select == True:
            selected_file_name = "general_selected_500_" + subject + ".npy"

            if not os.path.isfile(selected_file_name):
                selected = select_stable_voxels(brain_activations_1,
                                                word_set,
                                                words,
                                                number_of_trials=6,
                                                size_of_selection=500)
                np.save(selected_file_name, selected)

            selected = np.load(selected_file_name)

        mean_Activations = []

        if mode == 'limited':
            with open('../data/experimental_wordList.csv', 'r') as f:
                word_set = [w[0] for w in list(csv.reader(f))]

        words = np.asarray(words)
        for word in word_set:
            indices = np.where(words == word)[0]
            mean_Activations.append(
                np.mean(brain_activations[indices, :], axis=0))

        words = word_set

        if type == 'glove':
            wem = WordEmbeddingLayer()
            wem.load_filtered_embedding(
                "../data/neuro_words_glove_6B_300d")  #neuro_words
            embedded_words = wem.embed_words(words)
        elif type == 'word2vec':
            wem = WordEmbeddingLayer()
            wem.load_filtered_embedding("../data/neuro_words_word2vec")
            embedded_words = wem.embed_words(words)
        elif type == 'fasttext':
            wem = WordEmbeddingLayer()
            wem.load_filtered_embedding("../data/neuro_words_fasttext")
            embedded_words = wem.embed_words(words)
        elif type == 'lexvec':
            wem = WordEmbeddingLayer()
            wem.load_filtered_embedding("../data/neuro_words_lexvec")
            embedded_words = wem.embed_words(words)
        elif type == 'experimental':
            embedding_dic, embedded_words = get_word_representation(
                type='experimental', words=word_set)
        elif type == 'deps':
            embedding_dic, embedded_words = get_word_representation(
                type='deps', words=word_set)
        elif type == 'F25':
            embedding_dic, embedded_words = get_word_representation(
                type='F25', words=word_set)
        elif type == 'non-distributional':
            wem = WordEmbeddingLayer()
            wem.load_filtered_embedding("../data/neuro_words_cnd")
            embedded_words = wem.embed_words(words)

        word_representations = np.asarray(embedded_words, np.float32)

        return words, np.asarray(word_representations), np.asarray(
            mean_Activations)[:, selected]
    if not os.path.isfile(selected_file_name):
        selected = select_stable_voxels(brain_activations_1,
                                        word_set,
                                        words,
                                        number_of_trials=6,
                                        size_of_selection=500)
        np.save(selected_file_name, selected)

    selected = np.load(selected_file_name)

    mean_Activations = []

    words = np.asarray(words)
    for word in word_set:
        indices = np.where(words == word)[0]
        mean_Activations.append(np.mean(brain_activations[indices, :], axis=0))

    mean_Activations = np.asarray(mean_Activations)

    pdp.compute_and_plot_dists(mean_Activations[:, selected], word_set)

    words = word_set
    wem = WordEmbeddingLayer()
    wem.load_filtered_embedding("../data/neuro_words")

    embedded_words = wem.embed_words(word_set)
    word_representations = embedded_words

    pdp.compute_and_plot_diff_dists(mean_Activations[:, selected],
                                    word_representations, word_set)
Example #3
0
lrm.load_model("../glove_all.model")

expSetup_reversed = ExpSetup(learning_rate=0.01,
                             batch_size=29,
                             number_of_epochs=2000)

lrm_reversed = tf_LRModel_reversed.LRModel(
    y_train.shape[1],
    x_train.shape[1],
    learning_rate=expSetup_reversed.learning_rate,
    hidden_dim=x_train.shape[1],
    training_steps=expSetup_reversed.number_of_epochs,
    batch_size=expSetup_reversed.batch_size)
lrm_reversed.load_model("../glove_reversed_all_3.model")

wem = WordEmbeddingLayer()
wem.load_embeddings_from_glove_file(
    filename="../data/glove.6B/glove.6B.300d.txt", filter=[], dim=300)
#wem.load_filtered_embedding("../data/glove_all_6B_300d")  # neuro_words

all_words = [w for w in wem.word2vec.keys()]
all_embedded_words = wem.embed_words(all_words[:len(all_words)])
all_embedded_words = np.asarray(all_embedded_words)
print(all_embedded_words.shape)
word_tree = cKDTree(all_embedded_words)
word_tree_cheat = cKDTree(x_all)

coords = []
with open('../data/coords', 'r') as f:
    reader = csv.reader(f)
    coords = list(reader)