def numEmoji(self): df = pd.DataFrame() recs = [features.RawSent(r) for r in self.test] df["numsymbols"] = features.numSymbols(recs, normalize=True) df["numemoji"] = features.countEmoji(recs, normalize=True) df.to_csv("symbol&emoji.csv", encoding="utf-8")
def fShallow(self): normalize = True recs = [features.RawSent(r) for r in self.test] self._add_feature("avgwordlen", features.avgWordLen(recs)) self._add_feature("sentlen", features.sentLen(recs)) self._add_feature("numsymbols", features.numSymbols(recs, normalize)) self._add_feature("numcapltrs", features.numCapLetters(recs, normalize)) self._add_feature("numnumbers", features.numNumbers(recs, normalize))
def fShallow(self): normalize = True recs = [r.rawsent for r in self.test] self._add_feature("sentlen",features.sentLen(recs)) self._add_feature("numnumbers",features.numNumbers(recs, normalize)) self._add_feature("numcapltrs",features.numCapLetters(recs, normalize)) self._add_feature("numsymbols",features.numSymbols(recs, normalize)) self._add_feature("avgwordlen",features.avgWordLen(recs)) self._add_feature("numconns",features.numConnectives(recs)) self._add_feature("fracstopwords",features.fracStopwords(recs)) polarvals = features.mpqaGenInqInfo(recs) keys = ["mpqageninq-subj","mpqageninq-polarity"] for (key,vals) in zip(keys,polarvals): self._add_feature(key,vals) mrcvals = features.mrcInfo(recs) keys = ["mrc-fami","mrc-img"] for (key,vals) in zip(keys,mrcvals): self._add_feature(key,vals) idfvals = features.idf(recs) keys = ["idf-min", "idf-max", "idf-avg"] for (key,vals) in zip(keys,idfvals): self._add_feature(key,vals)