def test_single(self): for t in self.testdata: fe = FeatureExtractor(t, "report") fe.ngrams(n=5) fe.chunk() fe.dependency() fe.ne() fe.bcv() fe.srl() logging.debug(pformat(zip(fe.SUF, fe.POS))) logging.debug(pformat(fe.features)) vec = DictVectorizer(sparse=True) array_f = vec.fit_transform(fe.features).toarray() # logging.debug(pformat(array_f)) raise Exception
def _get_features(self, v="", v_corpus=None, cls2id=None, domain="src"): _flist = [] _labellist_int = [] _labellist_str = [] _labelid = cls2id[v] if v_corpus: for sid, s in enumerate(v_corpus): try: fe = FeatureExtractor(s, verb=v) if "chunk" in self.featuretypes: fe.chunk() if "3gram" in self.featuretypes: fe.ngrams(n=3) if "5gram" in self.featuretypes: fe.ngrams(n=5) if "7gram" in self.featuretypes: fe.ngrams(n=7) if "dep" in self.featuretypes: fe.dependency() if "srl" in self.featuretypes: fe.srl() if "ne" in self.featuretypes: fe.ne() if "errorprob" in self.featuretypes: pass if "topic" in self.featuretypes: pass augf = proc_easyadapt(fe.features, domain=domain) _flist.append(augf) _labellist_int.append(_labelid) _labellist_str.append(v) except ValueError: logging.debug(pformat("CaseMaker feature extraction: couldn't find the verb")) except: print v raise else: _flist.append(self.nullfeature) _labellist_int.append(_labelid) _labellist_str.append(v) return _flist, _labellist_str, _labellist_int