def get_lex_repr(self, word): """ Get representation for lexical feature """ if not self.embed_lex: ## n-hot representation n_hot = np.zeros(len(self.dictionary_values)) values = is_in_dict(word, self.dictionary) if values: for v in values: n_hot[self.dictionary_values.index(v)] = 1.0 return n_hot else: lex_feats = [] for property in self.dictionary_values: values = is_in_dict(word, self.dictionary) if values: if property in values: lex_feats.append( self.lembeds[self.l2i[property]].npvalue()) else: lex_feats.append(self.lembeds[self.l2i[UNK]].npvalue()) else: lex_feats.append( self.lembeds[self.l2i[UNK]].npvalue()) # unknown word return np.concatenate(lex_feats)
def save_wordlex_map(self, out_filename): """ save final word-to-lexicon-embedding map to file :param out_filename: filename """ # construct reverse mapping using word embeddings i2wl = {self.w2i[w]: w for w in self.w2i.keys()} OUT = open(out_filename+".wlmap.emb","w") for word_id in i2wl.keys(): word = i2wl[word_id] lex_feats = [] for property in self.dictionary_values: values = is_in_dict(word, self.dictionary) if values: if property in values: lex_feats.append(property) else: lex_feats.append(UNK) else: lex_feats.append(UNK) # unknown word OUT.write("{} {}\n".format(word," ".join([str(x) for x in lex_feats]))) OUT.close()
def save_wordlex_map(self, out_filename): """ save final word-to-lexicon-embedding map to file :param out_filename: filename """ # construct reverse mapping using word embeddings i2wl = {self.w2i[w]: w for w in self.w2i.keys()} OUT = open(out_filename + ".wlmap.emb", "w") for word_id in i2wl.keys(): word = i2wl[word_id] lex_feats = [] for property in self.dictionary_values: values = is_in_dict(word, self.dictionary) if values: if property in values: lex_feats.append(property) else: lex_feats.append(UNK) else: lex_feats.append(UNK) # unknown word OUT.write("{} {}\n".format(word, " ".join([str(x) for x in lex_feats]))) OUT.close()
def get_lex_repr(self, word): """ Get representation for lexical feature """ if not self.embed_lex: ## n-hot representation n_hot = np.zeros(len(self.dictionary_values)) values = is_in_dict(word, self.dictionary) if values: for v in values: n_hot[self.dictionary_values.index(v)] = 1.0 return n_hot else: lex_feats = [] for property in self.dictionary_values: values = is_in_dict(word, self.dictionary) if values: if property in values: lex_feats.append(self.lembeds[self.l2i[property]].npvalue()) else: lex_feats.append(self.lembeds[self.l2i[UNK]].npvalue()) else: lex_feats.append(self.lembeds[self.l2i[UNK]].npvalue()) # unknown word return np.concatenate(lex_feats)