def train(self): for trainer in self.trainingSet: #Tweets is a list of dictionaries, where each dictionary is a tweet. The keys are the different parts of the tweet tweetList = TwitterQuery.search(trainer, results = 10) #Dictionary that maps a word to how often it occurs wordOccurences = {} for tweet in tweetList: #Print the tweet, and ask the user to rate if this statement has a positive or negative sentiment tweet.printTweet(); rated_sentiment = int(raw_input('Tweet Sentiment: -3 to 3: ')) tweet.content = tweet.content.lower() tweet.content = tweet.content.replace(trainer, '') word_list = tweet.content.split() for word in word_list: if(rated_sentiment < 0): if(self.negativeWords.has_key(word)): self.negativeWords[word] = self.negativeWords[word] + abs(rated_sentiment) else: self.negativeWords[word] = abs(rated_sentiment) elif(rated_sentiment > 0): if(self.positiveWords.has_key(word)): self.positiveWords[word] = self.positiveWords[word] + abs(rated_sentiment) else: self.positiveWords[word] = abs(rated_sentiment) self.writeFiles()
def analyzeSentiment(self, tweetCount, time_frame = 0): #First, normalize the positiveWords and negativeWords dictionaries posCount = 0 for key in self.positiveWords: posCount += self.positiveWords[key] negCount = 0 for key in self.negativeWords: negCount += float(self.negativeWords[key]) #Normalize for key in self.positiveWords: self.positiveWords[key] = float(self.positiveWords[key])/posCount for key in self.negativeWords: self.negativeWords[key] = float(self.negativeWords[key])/negCount positiveSentiment = 0 negativeSentiment = 0 candidateSentimentLevels = [] for candidate in self.candidates: if(time_frame == 0): tweetList = TwitterQuery.search(candidate, results = tweetCount) else: tweetList = TwitterQuery.search(candidate, results = tweetCount, mintime = time_frame[0], maxtime = time_frame[1]) bestTweet_pos = "nothing interesting" bestTweet_neg = "nothing interesting" bestTweet_prating = 0 bestTweet_nrating = 0 for tweet in tweetList: tweet_sentiment = self.tweetSentiment(tweet) if (tweet_sentiment[0]> bestTweet_prating): bestTweet_pos = tweet.content elif (tweet_sentiment[1] < bestTweet_nrating): bestTweet_neg = tweet.content positiveSentiment += tweet_sentiment[0] negativeSentiment += tweet_sentiment[1] print 'The sentiment for ' + candidate + ' is ' + str(positiveSentiment - negativeSentiment) candidateSentimentLevels.append(positiveSentiment-negativeSentiment) return candidateSentimentLevels
# query = 'santorum' GenericWordsList = ['a', 'about', 'after', 'all', 'and', 'any', 'an', 'are', 'as', 'at', 'been', 'before', 'be', 'but', 'by', 'can', 'could', 'did', 'down', 'do', 'first', 'for', 'from', 'good', 'great', 'had', 'has', 'have', 'her', 'he', 'him', 'his', 'if', 'into', 'in', 'is', 'its', 'it', 'I', 'know', 'like', 'little', 'made', 'man', 'may', 'men', 'me', 'more', 'Mr', 'much', 'must', 'my', 'not', 'now', 'no', 'of', 'on', 'one', 'only', 'or', 'other', 'our', 'out', 'over', 'said', 'see', 'she', 'should', 'some', 'so', 'such', 'than', 'that', 'the', 'their', 'them', 'then', 'there', 'these', 'they', 'this', 'time', 'to', 'two', 'upon', 'up', 'us', 'very', 'was', 'were', 'we', 'what', 'when', 'which', 'who', 'will', 'with', 'would', 'you', 'your', query] GenericWords = {} for GenericWord in GenericWordsList: GenericWords[GenericWord] = 1 WordOccurrences = {} for i in range(10): TweetList = TwitterQuery.search(query,100,i+1) tweetcount = 0 for tweet in TweetList: #Filter out non alphanumerics and overly generic words, and strip out connected punctuation unfiltered_words = tweet.content.lower().split() words = [] for word in unfiltered_words: temp = word.strip('[]{},.<>/?!$%^&*()_-=+|\\;:\'\"') if(temp.isalnum() and not GenericWords.has_key(temp)): words.append(temp) #Delete duplicates for word in words: for i in range(words.count(word)-1): words.remove(word) #count occurrences for word in words:
self.name = word self.CoWords = {} self.count = 1 def printWord(self): print "\n" + self.name + ": count= " + str(self.count) + "\nWords Occurring with this word:" for CoWord, num in self.CoWords.iteritems(): if num / self.count > 0.50: print CoWord + ": " + str(num) # WordOccurrences = {} TweetList = TwitterQuery.search("santorum", 100) for tweet in TweetList: words = tweet.content.lower().split() for word in words: if WordOccurrences.has_key(word): WordOccurrences[word].count += 1 CurrentWord = WordOccurrences[word] else: CurrentWord = Word(word) WordOccurrences[word] = CurrentWord for OtherWord in words: if OtherWord != CurrentWord.name: if CurrentWord.CoWords.has_key(OtherWord): CurrentWord.CoWords[OtherWord] += 1 else: