def extract_information(self, train_instances): seqs = [] for train_instance in train_instances: lemma_sa, lemma_sb = train_instance.get_word( type='lemma', stopwords=self.stopwords, lower=True) seqs.append(lemma_sa) seqs.append(lemma_sb) self.idf_weight = utils.idf_calculator(seqs) self.vocab = utils.word2index(self.idf_weight)
def extract_information(self, train_instances): seqs = [] for train_instance in train_instances: dep_sa, dep_sb = train_instance.get_dependency() dep_sa = [(dep[1], dep[2]) for dep in dep_sa] dep_sb = [(dep[1], dep[2]) for dep in dep_sb] seqs.append(dep_sa) seqs.append(dep_sb) self.idf_weight = utils.idf_calculator(seqs) self.vocab = utils.word2index(self.idf_weight)
def extract(self, train_instance): warrant0, warrant1, reason, claim, title, info = train_instance.get_six( type='word') _warrant0 = warrant0 + reason + claim _warrant1 = warrant1 + reason + claim self.vocab = utils.word2index(self.unigram_dict) feat0 = utils.vectorize(_warrant0, self.unigram_dict, self.vocab) feat1 = utils.vectorize(_warrant1, self.unigram_dict, self.vocab) infos = [len(self.unigram_dict), 'unigram'] return feat0 + feat1, infos
def extract(self, train_instance): idf_weight = dict_utils.DictLoader().load_dict('global_idf') vocab = utils.word2index(idf_weight) sa, sb = train_instance.get_word(type='lemma', stopwords=True, lower=True) features, infos = utils.sentence_vectorize_features(sa, sb, idf_weight, vocab, convey='idf') return features, infos
def extract_information(self, train_instances): seqs = [] for train_instance in train_instances: word_sa, word_sb = train_instance.get_word( type=self.word_type, stopwords=self.stopwords, lower=self.lower) seqs.append(word_sa) seqs.append(word_sb) self.idf_weight = utils.idf_calculator(seqs) self.vocab = utils.word2index(self.idf_weight) self.vocab, self.embeddings = utils.load_word_embedding( self.vocab, self.emb_file)
def extract_information(self, train_instances): seqs = [] for train_instance in train_instances: pos_sa, pos_sb = train_instance.get_pos_tag(stopwords=False) sa = [w for w, tag in pos_sa if tag == 'n'] sb = [w for w, tag in pos_sb if tag == 'n'] seqs.append(sa) seqs.append(sb) idf_weight = utils.idf_calculator(seqs) vocab = utils.word2index(idf_weight) self.idf_weight = idf_weight self.vocab, self.embeddings = utils.load_word_embedding( vocab, self.emb_file)