def _grade_(self, gender, text): self.textDict.clear() if self.tag == CLEAR_SCORE_EACH_TIME: del self.scoreList[:] self.scoreList[:] = [] w = Words() f = w.wordFreqList(WORDS_STR, text) self.convert2Dict(f) wordScore = 0 for key in set(self.textDict) & set(self.wordDict): wordScore += self.wordDict[key]*self.textDict[key] suffixScore = 0 for key in set(self.textDict) & set(self.sufDict): suffixScore += self.sufDict[key]*self.textDict[key] symbolScore = 0 for key in set(self.textDict) & set(self.symDict): symbolScore += self.symDict[key]*self.textDict[key] self.scoreList.append(TextScore(gender, wordScore, suffixScore, symbolScore)) print "gender: %d, wordScore: %f, suffixScore: %f, symbolScore: %f" %(gender, wordScore, suffixScore, symbolScore)
def analyzeFreq(file): w = Words() f = w.wordFreqList(WORDS_FILE, file) printLine() print "the amount of unique words is ", len(f) printLine() print f w.save()
def classify(self, gender, text): w = Words(text) f = w.wordFreqList() self.convert2Dict(f) wordScore = 0 for key in set(self.textDict) & set(self.wordDict): wordScore += self.wordDict[key]*self.textDict[key] suffixScore = 0 for key in set(self.textDict) & set(self.sufDict): suffixScore += self.sufDict[key]*self.textDict[key] symbolScore = 0 for key in set(self.textDict) & set(self.symDict): symbolScore += self.symDict[key]*self.textDict[key] self.ScoreList.append(TextScore(gender, wordScore, suffixScore, symbolScore))