def get_word_embedding(self, path_to_vec,orthonormalized=True): samples = self.data['train']['X'] + self.data['dev']['X'] + \ self.data['test']['X'] id2word, word2id = data.create_dictionary(samples, threshold=0) word_vec = data.get_wordvec(path_to_vec, word2id,orthonormalized=orthonormalized) wvec_dim = len(word_vec[next(iter(word_vec))]) #stores the value of theta for each word word_complex_phase = data.set_wordphase(word2id) params = {'word2id':word2id, 'word_vec':word_vec, 'wvec_dim':wvec_dim,'word_complex_phase':word_complex_phase,'id2word':id2word} return params
def prepare(params, samples): _, params.word2id = data.create_dictionary(samples) params.word_vec = data.get_wordvec(PATH_TO_VEC, params.word2id) params.wvec_dim = 300 return
def prepare(params, samples): _, params.word2id = data.create_dictionary(samples) params.word_vec = data.get_wordvec(PATH_TO_GLOVE, params.word2id) return
def prepare(params, samples): _, params.word2id = data.create_dictionary(samples) params.word_vec = data.get_wordvec(PATH_TO_VEC, params.word2id) # Make sure this matches the embedding size params.wvec_dim = emb_size return