Ejemplo n.º 1
0
    def test_create_dictionary(self):
        sentences = [["foo", "bar", "baz"], ["foo", "bar"], ["foo"]]
        expected = {"foo": 0, "bar": 1}
        actual = create_dictionary(sentences, 2, False)
        self.assertEqual(expected, actual)

        expected = {"foo": 0, "bar": 1, "baz": 2}
        actual = create_dictionary(sentences, 3, False)
        self.assertEqual(expected, actual)

        expected = {"foo": 0, "bar": 1, "baz": 2}
        actual = create_dictionary(sentences, None, False)
        self.assertEqual(expected, actual)

        expected = {"foo": 0}
        actual = create_dictionary(sentences, 1, False)
        self.assertEqual(expected, actual)
Ejemplo n.º 2
0
    def get_word_embedding(self, path_to_vec,orthonormalized=True):
        samples = self.data['train']['X'] + self.data['dev']['X'] + \
                self.data['test']['X']

        id2word, word2id = data.create_dictionary(samples, threshold=0)
        word_vec = data.get_wordvec(path_to_vec, word2id,orthonormalized=orthonormalized)
        wvec_dim = len(word_vec[next(iter(word_vec))])

        #stores the value of theta for each word
        word_complex_phase = data.set_wordphase(word2id)

        params = {'word2id':word2id, 'word_vec':word_vec, 'wvec_dim':wvec_dim,'word_complex_phase':word_complex_phase,'id2word':id2word}

        return params
Ejemplo n.º 3
0
def prepare(params, samples):
    _, params.word2id = data.create_dictionary(samples)
    params.word_vec = data.get_wordvec(PATH_TO_VEC, params.word2id)
    params.wvec_dim = 300
    return
Ejemplo n.º 4
0
def prepare(params, samples):
    _, params.word2id = data.create_dictionary(samples)
    params.word_vec = data.get_wordvec(PATH_TO_GLOVE, params.word2id)
    return
Ejemplo n.º 5
0
def prepare(params, samples):
    _, params.word2id = data.create_dictionary(samples)
    params.word_vec = data.get_wordvec(PATH_TO_VEC, params.word2id)
    # Make sure this matches the embedding size
    params.wvec_dim = emb_size
    return