예제 #1
0
    def get_word_embedding(self, path_to_vec,orthonormalized=True):
        samples = self.data['train']['X'] + self.data['dev']['X'] + \
                self.data['test']['X']

        id2word, word2id = data.create_dictionary(samples, threshold=0)
        word_vec = data.get_wordvec(path_to_vec, word2id,orthonormalized=orthonormalized)
        wvec_dim = len(word_vec[next(iter(word_vec))])

        #stores the value of theta for each word
        word_complex_phase = data.set_wordphase(word2id)

        params = {'word2id':word2id, 'word_vec':word_vec, 'wvec_dim':wvec_dim,'word_complex_phase':word_complex_phase,'id2word':id2word}

        return params
예제 #2
0
def prepare(params, samples):
    _, params.word2id = data.create_dictionary(samples)
    params.word_vec = data.get_wordvec(PATH_TO_VEC, params.word2id)
    params.wvec_dim = 300
    return
예제 #3
0
파일: bow.py 프로젝트: jayden11/SentEval
def prepare(params, samples):
    _, params.word2id = data.create_dictionary(samples)
    params.word_vec = data.get_wordvec(PATH_TO_GLOVE, params.word2id)
    return
예제 #4
0
def prepare(params, samples):
    _, params.word2id = data.create_dictionary(samples)
    params.word_vec = data.get_wordvec(PATH_TO_VEC, params.word2id)
    # Make sure this matches the embedding size
    params.wvec_dim = emb_size
    return