def _get_features_tgt(self, v_corpus=None, cls2id=None, domain="tgt"): _flist = [] _labellist_int = [] _labellist_str = [] for sid, sdic in enumerate(v_corpus): v = sdic["label_corr"] _labelid = cls2id[v] try: fe = SentenceFeatures(sdic["parsed_corr"], verb=v, v_idx=sdic["vidx_corr"]) if "chunk" in self.featuretypes: fe.chunk() if "3gram" in self.featuretypes: fe.ngrams(n=3) if "5gram" in self.featuretypes: fe.ngrams(n=5) if "7gram" in self.featuretypes: fe.ngrams(n=7) if "dep" in self.featuretypes: fe.dependency() if "srl" in self.featuretypes: fe.srl() if "ne" in self.featuretypes: fe.ne() if "errorprob" in self.featuretypes: pass if "topic" in self.featuretypes: pass augf = proc_easyadapt(fe.features, domain=domain) assert augf and _labelid and v _flist.append(augf) _labellist_int.append(_labelid) _labellist_str.append(v) except ValueError: logging.debug(pformat("CaseMaker feature extraction: couldn't find the verb")) except: print v # else: # _flist.append(self.nullfeature) # _labellist_int.append(_labelid) # _labellist_str.append(v) return _flist, _labellist_str, _labellist_int
def get_features(tags=[], v="", v_idx=None, features=[]): fe = SentenceFeatures(tags=tags, verb=v, v_idx=v_idx) if "chunk" in features: fe.chunk() if "3gram" in features: fe.ngrams(n=3) if "5gram" in features: fe.ngrams(n=5) if "7gram" in features: fe.ngrams(n=7) if "dependency" in features: fe.dependency() if "ne" in features: fe.ne() if "srl" in features: fe.srl() if "topic" in features: fe.topic() if "errorprob" in features: fe.ep() # print pformat(fe.features) return proc_easyadapt(fe.features, domain="tgt")