def get_sentence_sentiment(self, text): words = text.split() polarity = [0 for i in xrange(len(self.parse_tree))] for word in words: word = parse_text.reformat_word(word) for i in xrange(len(self.parse_tree)): polarity[i] += self.search_parse_tree(self.parse_tree[i], word) return polarity
def get_word_freq(self, filename, start_date, end_date, pos=None, neg=None): """Returns a tuple (pos_score, neg_score) of the counts of positive words and negative scores. Also creates a csv of word frequencies. pos should be a list of positive words and neg should be a list of negative words. start_date and end_date should be datetime objects while filename should be a string such as 'this_file.csv' for which you would like to write the data. The csv will contain word frequencies for all words seen in between start_date and end_date.""" start = start_date.strftime('%Y-%m-%d %H:%M:%S') end = end_date.strftime('%Y-%m-%d %H:%M:%S') cmd = ('select parsed_text from twitterdb where ' \ 'datetime between \"%s\" and \"%s\"') \ % (start_date, end_date) freq_dic = {} for row in self.db.execute(cmd): text = row[0] words = text.split() for word in words: word = parse_text.reformat_word(word) try: freq_dic[word] += 1 except KeyError: freq_dic[word] = 1 writer = csv.writer(open(filename, 'wb')) writer.writerow(['Word', 'Frequency Count']) ptree = parse_tree.ParseTree() for (word, count) in freq_dic.iteritems(): word = word.encode('utf-8') if word != '': writer.writerow([word, count]) ptree.insert(word, count) pos_score = 0 neg_score = 0 if pos != None: for word in pos: count = ptree.find_value(word) if count != None: pos_score += count if neg != None: for word in neg: count = ptree.find_value(word) if count != None: neg_score += count return (pos_score, neg_score)