def extract_information(self, train_instances):
        seqs = []
        for train_instance in train_instances:
            lemma_sa, lemma_sb = train_instance.get_word(
                type='lemma', stopwords=self.stopwords, lower=True)
            seqs.append(lemma_sa)
            seqs.append(lemma_sb)

        self.idf_weight = utils.idf_calculator(seqs)
        self.vocab = utils.word2index(self.idf_weight)
 def extract_information(self, train_instances):
     seqs = []
     for train_instance in train_instances:
         dep_sa, dep_sb = train_instance.get_dependency()
         dep_sa = [(dep[1], dep[2]) for dep in dep_sa]
         dep_sb = [(dep[1], dep[2]) for dep in dep_sb]
         seqs.append(dep_sa)
         seqs.append(dep_sb)
     self.idf_weight = utils.idf_calculator(seqs)
     self.vocab = utils.word2index(self.idf_weight)
    def extract(self, train_instance):
        warrant0, warrant1, reason, claim, title, info = train_instance.get_six(
            type='word')

        _warrant0 = warrant0 + reason + claim
        _warrant1 = warrant1 + reason + claim

        self.vocab = utils.word2index(self.unigram_dict)
        feat0 = utils.vectorize(_warrant0, self.unigram_dict, self.vocab)
        feat1 = utils.vectorize(_warrant1, self.unigram_dict, self.vocab)
        infos = [len(self.unigram_dict), 'unigram']
        return feat0 + feat1, infos
 def extract(self, train_instance):
     idf_weight = dict_utils.DictLoader().load_dict('global_idf')
     vocab = utils.word2index(idf_weight)
     sa, sb = train_instance.get_word(type='lemma',
                                      stopwords=True,
                                      lower=True)
     features, infos = utils.sentence_vectorize_features(sa,
                                                         sb,
                                                         idf_weight,
                                                         vocab,
                                                         convey='idf')
     return features, infos
 def extract_information(self, train_instances):
     seqs = []
     for train_instance in train_instances:
         word_sa, word_sb = train_instance.get_word(
             type=self.word_type,
             stopwords=self.stopwords,
             lower=self.lower)
         seqs.append(word_sa)
         seqs.append(word_sb)
     self.idf_weight = utils.idf_calculator(seqs)
     self.vocab = utils.word2index(self.idf_weight)
     self.vocab, self.embeddings = utils.load_word_embedding(
         self.vocab, self.emb_file)
Exemple #6
0
    def extract_information(self, train_instances):
        seqs = []
        for train_instance in train_instances:
            pos_sa, pos_sb = train_instance.get_pos_tag(stopwords=False)
            sa = [w for w, tag in pos_sa if tag == 'n']
            sb = [w for w, tag in pos_sb if tag == 'n']
            seqs.append(sa)
            seqs.append(sb)

        idf_weight = utils.idf_calculator(seqs)
        vocab = utils.word2index(idf_weight)
        self.idf_weight = idf_weight
        self.vocab, self.embeddings = utils.load_word_embedding(
            vocab, self.emb_file)