コード例 #1
0
 def loadSentences(self, identifier, sentlist):
     ## sentlist should be a list of sentence strings, tokenized;
     ## identifier is a string serving as the header of this sentlst
     self.test = []
     self.fileid = identifier
     for i,sent in enumerate(sentlist):
         self.test.append(Instance(identifier+"."+str(i),0,features.RawSent(sent)))
コード例 #2
0
    def numEmoji(self):
        df = pd.DataFrame()

        recs = [features.RawSent(r) for r in self.test]
        df["numsymbols"] = features.numSymbols(recs, normalize=True)
        df["numemoji"] = features.countEmoji(recs, normalize=True)
        df.to_csv("symbol&emoji.csv", encoding="utf-8")
コード例 #3
0
 def fShallow(self):
     normalize = True
     recs = [features.RawSent(r) for r in self.test]
     self._add_feature("avgwordlen", features.avgWordLen(recs))
     self._add_feature("sentlen", features.sentLen(recs))
     self._add_feature("numsymbols", features.numSymbols(recs, normalize))
     self._add_feature("numcapltrs",
                       features.numCapLetters(recs, normalize))
     self._add_feature("numnumbers", features.numNumbers(recs, normalize))
コード例 #4
0
 def NE_Concrete(self):
     sentlst = [features.RawSent(r) for r in self.test]
     pos_tag = features.NE_Concrete_Emo(sentlst)
     Useful_Tag = [
         'ORGANIZATION', "PERCENT", 'PERSON', 'DATE', 'MONEY', 'TIME',
         'LOCATION', 'Concrete'
     ]
     for i in Useful_Tag:
         self._add_feature(i, pos_tag.loc[:, i])
コード例 #5
0
 def loadFromFile(self,filename):
     self.test = []
     self.fileid = os.path.basename(filename)
     i = 0
     with open(filename) as f:
         for line in f:
             if len(line.strip()) == 0: continue
             self.test.append(Instance(self.fileid+"."+str(i),0,features.RawSent(line.strip())))
             i += 1
     f.close()
コード例 #6
0
 def transEmotionFeature(self):
     self.transformEmoji()
     try:
         f = pd.read_csv("NE_Concrete_Emo.csv")
         self._add_feature("Negative", f.loc[:, 'Negative'])
         self._add_feature("Positive", f.loc[:, 'Positive'])
     except IOError:
         sentlst = [features.RawSent(r) for r in self.test]
         file = features.NE_Concrete_Emo(sentlst)
         self._add_feature("Negative", file.loc[:, 'Negative'])
         self._add_feature("Positive", file.loc[:, 'Positive'])
コード例 #7
0
    def fNeuralVec(self):

        sentlst = [features.RawSent(r) for r in self.test]
        keys = ["word_embed-" + str(i) for i in range(100)]

        if keys[0] not in self.featurestest:
            embeddingList = features.word_2_weights(sentlst, self.embeddings)
            for fid, fname in enumerate(keys):
                self.featurestest[fname] = [
                    embeddingList[j][fid] for j in range(len(embeddingList))
                ]

            print("Successfully generate word_embdding features")
コード例 #8
0
    def fBrownCluster_100(self):
        sentlst = [features.RawSent(r) for r in self.test]
        keys = ["brnclst_100-" + str(i) for i in range(100)]

        if keys[0] not in self.featurestest:
            print("Start initialize Browncluster ....")

            brownClus, cluster_2_index = utils.readMetaOptimizeBrownCluster_100(
            )
            print("finished generating brownClusterlist !")

            self.brnclst = brownClus

            brownClusterList = features.brownCluster(sentlst, brownClus,
                                                     cluster_2_index, 100)
            for fid, fname in enumerate(keys):
                self.featurestest[fname] = [
                    brownClusterList[j][fid]
                    for j in range(len(brownClusterList))
                ]
コード例 #9
0
 def transformEmoji(self):
     recs = [features.RawSent(r) for r in self.test]
     self._add_feature("numemoji", features.countEmoji(recs,
                                                       normalize=True))
コード例 #10
0
 def fPostag(self):
     sentlst = [features.RawSent(r) for r in self.test]
     pos_tag = features.extractPOS(sentlst)
     Useful_Tag = ['DT', 'NN', "VB", 'JJ', 'IN', '.', 'PRP', 'NNP', 'WP']
     for i in Useful_Tag:
         self._add_feature(i, pos_tag.loc[:, i])