Python SentenceFeatures Examples

Programming Language: Python

Namespace/Package Name: feature_extractor

Class/Type: SentenceFeatures

Examples at hotexamples.com: 5

Python SentenceFeatures - 5 examples found. These are the top rated real world Python examples of feature_extractor.SentenceFeatures extracted from open source projects. You can rate examples to help us improve the quality of examples.

Frequently Used Methods

Show Hide

ngrams(3)

chunk(3)

dependency(2)

ne(2)

srl(2)

SentenceFeatures(1)

bow(1)

ep(1)

topic(1)

Example #1

Show file

File: test_feature_extractor.py Project: tuxedocat/Nyanco

    def test_single2(self):
        sp = unicode(os.environ["SENNAPATH"])
        sp = u"/Users/tuxedocat/Research/tools/senna/"
        parser = SennaParser(sp)
        txt = open("/Users/tuxedocat/Documents/workspace/Nyanco/sandbox/recognize100.txt").read().split("\n")
        txt = [s for s in txt if not s == ""]
        # testdata = open("/Users/tuxedocat/Documents/workspace/precure/src/test/sennatags.txt").read()
        testdata = []
        for s in txt:
            testdata.append(parser.parseSentence(s))
        for t in testdata:
            fe = SentenceFeatures(t, "recognize")
            fe.ngrams(n=5)
            fe.chunk()
            # fe.dependency()
            # fe.ne()
            # fe.bcv()
            # fe.srl()
            logging.debug(pformat((fe.SUF, fe.CHK, fe.NER)))
            logging.debug(" ".join(fe.SUF))
            logging.debug(fe.v_idx)
            logging.debug(pformat(fe.features))

            vec = DictVectorizer(sparse=True)
            array_f = vec.fit_transform(fe.features).toarray()
            # logging.debug(pformat(array_f))

        raise Exception

Example #2

Show file

File: test_feature_extractor.py Project: tuxedocat/precure

 def test_with_offset(self):
     self.testdata = [doc.split("\n") for doc in open(self.testpath_off).read().split("\n\n") if doc]
     fv = []
     # print pformat(self.testdata)
     for t in self.testdata:
         fe = SentenceFeatures(t)
         fe.length()
         fe.bow()
         logging.debug(pformat(zip(fe.SUF, fe.POS)))
         logging.debug(pformat(fe.OFFSET))
         logging.debug(pformat(fe.features))
         fv.append(fe.features)
     vec = DictVectorizer(sparse=True)
     array_f = vec.fit_transform(fv).toarray()
     logging.debug(pformat(array_f))
     raise Exception

Example #3

Show file

 def test_with_offset(self):
     self.testdata = [
         doc.split("\n")
         for doc in open(self.testpath_off).read().split("\n\n") if doc
     ]
     fv = []
     # print pformat(self.testdata)
     for t in self.testdata:
         fe = SentenceFeatures(t)
         fe.length()
         fe.bow()
         logging.debug(pformat(zip(fe.SUF, fe.POS)))
         logging.debug(pformat(fe.OFFSET))
         logging.debug(pformat(fe.features))
         fv.append(fe.features)
     vec = DictVectorizer(sparse=True)
     array_f = vec.fit_transform(fv).toarray()
     logging.debug(pformat(array_f))
     raise Exception

Example #4

Show file

File: classifier.py Project: tuxedocat/Nyanco

 def _get_features_tgt(self, v_corpus=None, cls2id=None, domain="tgt"):
     _flist = []
     _labellist_int = []
     _labellist_str = []
     for sid, sdic in enumerate(v_corpus):
         v = sdic["label_corr"]
         _labelid = cls2id[v]
         try:
             fe = SentenceFeatures(sdic["parsed_corr"], verb=v, v_idx=sdic["vidx_corr"])
             if "chunk" in self.featuretypes:
                 fe.chunk()
             if "3gram" in self.featuretypes:
                 fe.ngrams(n=3)
             if "5gram" in self.featuretypes:
                 fe.ngrams(n=5)
             if "7gram" in self.featuretypes:
                 fe.ngrams(n=7)
             if "dep" in self.featuretypes:
                 fe.dependency()
             if "srl" in self.featuretypes:
                 fe.srl()
             if "ne" in self.featuretypes:
                 fe.ne()
             if "errorprob" in self.featuretypes:
                 pass
             if "topic" in self.featuretypes:
                 pass
             augf = proc_easyadapt(fe.features, domain=domain)
             assert augf and _labelid and v
             _flist.append(augf)
             _labellist_int.append(_labelid)
             _labellist_str.append(v)
         except ValueError:
             logging.debug(pformat("CaseMaker feature extraction: couldn't find the verb"))
         except:
             print v
     # else:
     # _flist.append(self.nullfeature)
     # _labellist_int.append(_labelid)
     # _labellist_str.append(v)
     return _flist, _labellist_str, _labellist_int

Example #5

Show file

File: suggest_each.py Project: tuxedocat/Nyanco

def get_features(tags=[], v="", v_idx=None, features=[]):
    fe = SentenceFeatures(tags=tags, verb=v, v_idx=v_idx)
    if "chunk" in features:
        fe.chunk()
    if "3gram" in features:
        fe.ngrams(n=3)
    if "5gram" in features:
        fe.ngrams(n=5)
    if "7gram" in features:
        fe.ngrams(n=7)
    if "dependency" in features:
        fe.dependency()
    if "ne" in features:
        fe.ne()
    if "srl" in features:
        fe.srl()
    if "topic" in features:
        fe.topic()
    if "errorprob" in features:
        fe.ep()
    # print pformat(fe.features)
    return proc_easyadapt(fe.features, domain="tgt")