コード例 #1
0
    def numEmoji(self):
        df = pd.DataFrame()

        recs = [features.RawSent(r) for r in self.test]
        df["numsymbols"] = features.numSymbols(recs, normalize=True)
        df["numemoji"] = features.countEmoji(recs, normalize=True)
        df.to_csv("symbol&emoji.csv", encoding="utf-8")
コード例 #2
0
 def fShallow(self):
     normalize = True
     recs = [features.RawSent(r) for r in self.test]
     self._add_feature("avgwordlen", features.avgWordLen(recs))
     self._add_feature("sentlen", features.sentLen(recs))
     self._add_feature("numsymbols", features.numSymbols(recs, normalize))
     self._add_feature("numcapltrs",
                       features.numCapLetters(recs, normalize))
     self._add_feature("numnumbers", features.numNumbers(recs, normalize))
コード例 #3
0
 def fShallow(self):
     normalize = True
     recs = [r.rawsent for r in self.test]
     self._add_feature("sentlen",features.sentLen(recs))
     self._add_feature("numnumbers",features.numNumbers(recs, normalize))
     self._add_feature("numcapltrs",features.numCapLetters(recs, normalize))
     self._add_feature("numsymbols",features.numSymbols(recs, normalize))
     self._add_feature("avgwordlen",features.avgWordLen(recs))
     self._add_feature("numconns",features.numConnectives(recs))
     self._add_feature("fracstopwords",features.fracStopwords(recs))
     polarvals = features.mpqaGenInqInfo(recs)
     keys = ["mpqageninq-subj","mpqageninq-polarity"]
     for (key,vals) in zip(keys,polarvals):
         self._add_feature(key,vals)
     mrcvals = features.mrcInfo(recs)
     keys = ["mrc-fami","mrc-img"]
     for (key,vals) in zip(keys,mrcvals):
         self._add_feature(key,vals)
     idfvals = features.idf(recs)
     keys = ["idf-min", "idf-max", "idf-avg"]
     for (key,vals) in zip(keys,idfvals):
         self._add_feature(key,vals)