def score_sentiment(self, term_subset, parser): starttime = time.time() term, articleSubset = term_subset print ">>SENTIMENTSCORE: Scoring sentiment for '%s'." % term sentences = [] sentimentscore = 0 bowscore = 0 parsedSentencesCount = 0 # logging purposes # Extract sentences from list of articles for articleid in articleSubset: article = self.articleDict[articleid] for sentence in self.get_sentences(article, term): for word in sentence: if word[0] in self.sentimentDict: bowscore += int(self.sentimentDict[word[0]]) sentences.append(sentence) subsetTime = time.time() print ">>SENTIMENTSCORE: Found %s sentences."%len(sentences) scount = 0 rawsentences = [] # Parse all sentences for sentence in sentences: t = parser.parse_sentence(sentence) if t is not None: if scount > 500: break scount += 1 print "scount:",self.print_sentence(sentence) score = t.get_sentiment_score(self.sentimentDict, term) rawsentences.append((sentence,score)) if score != 0: parsedSentencesCount += 1 # logging purposes if sentimentscore == 0: sentimentscore = score else: sentimentscore = (sentimentscore + score) / 2 # Set BOW score if len(sentences) == 0: bowscore = "0 (0)" sentencesCount = "0.0% (0/0)" # logging purposes else: bowscore = "%s (%s)" %(round(bowscore/float(len(sentences)),3),bowscore) sentencesCount = "%s%% (%s/%s)" %(round((parsedSentencesCount/float(len(sentences)))*100, 2), parsedSentencesCount, len(sentences)) # logging purposes print ">>SENTIMENTSCORE: BOW SCORE IS: ", bowscore print ">>SENTIMENTSCORE: Final score is", sentimentscore print parseTime = round((time.time() - subsetTime), 3) # logging purposes subsetTime = round((subsetTime - starttime), 3) # logging purposes sentimentSentenceLog(term, sentencesCount, sentimentscore, bowscore, self.inputfiles, subsetTime, parseTime) # logging purposes return (bowscore, sentimentscore)
def score_sentiment(self, term_subset): starttime = time.time() term, subset = term_subset print ">>SENTIMENTSCORE: Scoring sentiment for '%s'." % term sentences = [] sentimentarr = [] sentimentscore = 0 bowscore = 0 # Get sentences # p = Pool(4) # sentences += (p.map(multiSentence,subset)) for articleid in subset: article = self.articleDict[articleid] tmp = self.get_sentences(article, term) for sentence in tmp: for word in sentence: if word[0] in self.sentimentdict: bowscore += int(self.sentimentdict[word[0]]) # print " ".join([y[:y.find("/")] for y in x]) sentences.append(sentence) subsetTime = time.time() print ">>SENTIMENTSCORE: Found %s sentences."%len(sentences) # p = Pool(4) # sentimentarr += (p.map(self.multiParse,sentences)) # for x in sentimentarr: # if x != 0: # sentimentscore += x # parsedSentencesCount += 1 parser = SyntacticParser() parsedSentencesCount = 0 for sentence in sentences: # test_sentence = [x.split("/") for x in "To/NUM russere/N_INDEF_PLU tror/V_PRES ikke/ADV intet/ADJ ./TEGN".split(" ")] t = parser.parse_sentence(sentence) # print "sentence: ", self.print_sentence(sentence), "\n\n" if t is not None: # print t.tree score = t.get_sentiment_score(self.sentimentdict, term) if score != 0: # if score > 0.5: # print t.tree # print "score: %s" %score # print self.print_sentence(sentence) # print parsedSentencesCount += 1 if sentimentscore == 0: sentimentscore = score else: sentimentscore = (sentimentscore + score) / 2 # if parsedSentencesCount > 3: # break # print ">>SENTIMENTSCORE: ", self.print_sentence(sentence) # print "sentence: ", sentence # print t.tree # print ">>SENTIMENTSCORE: Current score is:", sentimentscore # print if len(sentences) == 0: bowscore = "0 (0)" sentencesCount = "0.0% (0/0)" else: bowscore = "%s (%s)" %(round(bowscore/float(len(sentences)),3),bowscore) sentencesCount = "%s%% (%s/%s)" %(round((parsedSentencesCount/float(len(sentences)))*100, 2), parsedSentencesCount, len(sentences)) print ">>SENTIMENTSCORE: BOW SCORE IS: ", bowscore print ">>SENTIMENTSCORE: Final score is", sentimentscore print parseTime = round((time.time() - subsetTime), 3) subsetTime = round((subsetTime - starttime), 3) sentimentSentenceLog(term, sentencesCount, sentimentscore, bowscore, self.inputfiles, subsetTime, parseTime)