def extract(self, train_instance):
        dep_sa, dep_sb = train_instance.get_dependency()

        features = []
        feature, info = utils.sentence_match_features(dep_sa, dep_sb)
        features += feature

        feature, info = utils.sentence_vectorize_features(dep_sa,
                                                          dep_sb,
                                                          self.idf_weight,
                                                          convey=self.convey)
        features += feature

        infos = [dep_sa, dep_sb]
        return features, infos
    def extract(self, train_instance):
        sa, sb = train_instance.get_preprocess()
        # sa, sb = train_instance.get_word(type='lemma', stopwords=True, lower=True)

        la, lb = len(sa), len(sb)
        l = min(la, lb)

        features = []
        feature, info = utils.sentence_sequence_features(sa, sb)
        features += feature

        feature, info = utils.sentence_match_features(sa, sb)
        features += feature

        bow = utils.idf_calculator([sa, sb])
        feature, info = utils.sentence_vectorize_features(sa,
                                                          sb,
                                                          bow,
                                                          convey='count')
        features += feature
        infos = [sa, sb]
        return features, infos
Beispiel #3
0
 def extract(self, train_instance):
     pos_sa, pos_sb = train_instance.get_word(type='pos', stopwords=True)
     features, infos = utils.sentence_match_features(pos_sa, pos_sb)
     return features, infos