예제 #1
0
 def extract(self, train_instance):
     pos_sa, pos_sb = train_instance.get_word(type='pos', stopwords=True)
     features, infos = utils.sentence_vectorize_features(pos_sa,
                                                         pos_sb,
                                                         self.idf_weight,
                                                         convey='count')
     return features, infos
예제 #2
0
 def extract(self, train_instance):
     sa, sb = train_instance.get_word(type='lemma',
                                      stopwords=self.stopwords,
                                      lower=True)
     features, infos = utils.sentence_vectorize_features(
         sa, sb, self.idf_weight)
     return features, infos
예제 #3
0
    def extract(self, train_instance):
        idf_weight = dict_utils.DictLoader().load_dict('global_idf')
        sa, sb = train_instance.get_word(type='lemma',
                                         stopwords=True,
                                         lower=True)
        features, infos = utils.sentence_vectorize_features(sa, sb, idf_weight)

        return features, infos
    def extract(self, train_instance):
        dep_sa, dep_sb = train_instance.get_dependency()

        features = []
        feature, info = utils.sentence_match_features(dep_sa, dep_sb)
        features += feature

        feature, info = utils.sentence_vectorize_features(dep_sa,
                                                          dep_sb,
                                                          self.idf_weight,
                                                          convey=self.convey)
        features += feature

        infos = [dep_sa, dep_sb]
        return features, infos
    def extract(self, train_instance):
        sa, sb = train_instance.get_preprocess()
        # sa, sb = train_instance.get_word(type='lemma', stopwords=True, lower=True)

        la, lb = len(sa), len(sb)
        l = min(la, lb)

        features = []
        feature, info = utils.sentence_sequence_features(sa, sb)
        features += feature

        feature, info = utils.sentence_match_features(sa, sb)
        features += feature

        bow = utils.idf_calculator([sa, sb])
        feature, info = utils.sentence_vectorize_features(sa,
                                                          sb,
                                                          bow,
                                                          convey='count')
        features += feature
        infos = [sa, sb]
        return features, infos