예제 #1
0
    def test_single(self):
        for t in self.testdata:
            fe = FeatureExtractor(t, "report")
            fe.ngrams(n=5)
            fe.chunk()
            fe.dependency()
            fe.ne()
            fe.bcv()
            fe.srl()
            logging.debug(pformat(zip(fe.SUF, fe.POS)))
            logging.debug(pformat(fe.features))

            vec = DictVectorizer(sparse=True)
            array_f = vec.fit_transform(fe.features).toarray()
            # logging.debug(pformat(array_f))

        raise Exception
예제 #2
0
 def _get_features(self, v="", v_corpus=None, cls2id=None, domain="src"):
     _flist = []
     _labellist_int = []
     _labellist_str = []
     _labelid = cls2id[v]
     if v_corpus:
         for sid, s in enumerate(v_corpus):
             try:
                 fe = FeatureExtractor(s, verb=v)
                 if "chunk" in self.featuretypes:
                     fe.chunk()
                 if "3gram" in self.featuretypes:
                     fe.ngrams(n=3)
                 if "5gram" in self.featuretypes:
                     fe.ngrams(n=5)
                 if "7gram" in self.featuretypes:
                     fe.ngrams(n=7)
                 if "dep" in self.featuretypes:
                     fe.dependency()
                 if "srl" in self.featuretypes:
                     fe.srl()
                 if "ne" in self.featuretypes:
                     fe.ne()
                 if "errorprob" in self.featuretypes:
                     pass
                 if "topic" in self.featuretypes:
                     pass
                 augf = proc_easyadapt(fe.features, domain=domain)
                 _flist.append(augf)
                 _labellist_int.append(_labelid)
                 _labellist_str.append(v)
             except ValueError:
                 logging.debug(pformat("CaseMaker feature extraction: couldn't find the verb"))
             except:
                 print v
                 raise
     else:
         _flist.append(self.nullfeature)
         _labellist_int.append(_labelid)
         _labellist_str.append(v)
     return _flist, _labellist_str, _labellist_int