def train(self):
     for trainer in self.trainingSet:
         #Tweets is a list of dictionaries, where each dictionary is a tweet. The keys are the different parts of the tweet
         tweetList = TwitterQuery.search(trainer, results = 10)
         
         #Dictionary that maps a word to how often it occurs
         wordOccurences = {}      
         for tweet in tweetList:                   
             #Print the tweet, and ask the user to rate if this statement has a positive or negative sentiment        
             tweet.printTweet();
             rated_sentiment = int(raw_input('Tweet Sentiment: -3 to 3: '))
             tweet.content = tweet.content.lower()
             tweet.content = tweet.content.replace(trainer, '')
             word_list = tweet.content.split()
             
             for word in word_list:
                 if(rated_sentiment < 0):
                     if(self.negativeWords.has_key(word)):
                         self.negativeWords[word] = self.negativeWords[word] + abs(rated_sentiment)
                     else:
                         self.negativeWords[word] = abs(rated_sentiment)
                 elif(rated_sentiment > 0):
                     if(self.positiveWords.has_key(word)):
                         self.positiveWords[word] = self.positiveWords[word] + abs(rated_sentiment)
                     else:
                         self.positiveWords[word] = abs(rated_sentiment)
     
     self.writeFiles() 
    def analyzeSentiment(self, tweetCount, time_frame = 0):
        #First, normalize the positiveWords and negativeWords dictionaries
        posCount = 0
        for key in self.positiveWords:
            posCount += self.positiveWords[key]
        negCount = 0
        for key in self.negativeWords:
            negCount += float(self.negativeWords[key])
          
        #Normalize 
          
        for key in self.positiveWords:
            self.positiveWords[key] = float(self.positiveWords[key])/posCount 
        
        for key in self.negativeWords:
            self.negativeWords[key] = float(self.negativeWords[key])/negCount
        
        
        positiveSentiment = 0
        negativeSentiment = 0
        
        candidateSentimentLevels = []
        for candidate in self.candidates:
            if(time_frame == 0):
                tweetList = TwitterQuery.search(candidate, results = tweetCount)
            else:
                tweetList = TwitterQuery.search(candidate, results = tweetCount, mintime = time_frame[0], maxtime = time_frame[1])   

            bestTweet_pos = "nothing interesting"
            bestTweet_neg = "nothing interesting"
            bestTweet_prating = 0
            bestTweet_nrating = 0
            for tweet in tweetList:
                tweet_sentiment = self.tweetSentiment(tweet)
                if (tweet_sentiment[0]> bestTweet_prating):
                    bestTweet_pos = tweet.content
                elif (tweet_sentiment[1] < bestTweet_nrating):
                    bestTweet_neg = tweet.content   
                positiveSentiment += tweet_sentiment[0]
                negativeSentiment += tweet_sentiment[1] 
            print 'The sentiment for ' + candidate + ' is ' + str(positiveSentiment - negativeSentiment)
            
            candidateSentimentLevels.append(positiveSentiment-negativeSentiment)
        
        return candidateSentimentLevels
#
query = 'santorum'
GenericWordsList = ['a', 'about', 'after', 'all', 'and', 'any', 'an', 'are', 'as', 'at', 'been', 'before', 
  'be', 'but', 'by', 'can', 'could', 'did', 'down', 'do', 'first', 'for', 'from', 'good', 'great', 'had', 
  'has', 'have', 'her', 'he', 'him', 'his', 'if', 'into', 'in', 'is', 'its', 'it', 'I', 'know', 'like', 
  'little', 'made', 'man', 'may', 'men', 'me', 'more', 'Mr', 'much', 'must', 'my', 'not', 'now', 'no', 'of', 
  'on', 'one', 'only', 'or', 'other', 'our', 'out', 'over', 'said', 'see', 'she', 'should', 'some', 'so', 
  'such', 'than', 'that', 'the', 'their', 'them', 'then', 'there', 'these', 'they', 'this', 'time', 'to', 
  'two', 'upon', 'up', 'us', 'very', 'was', 'were', 'we', 'what', 'when', 'which', 'who', 'will', 'with', 
  'would', 'you', 'your', query]
GenericWords = {}
for GenericWord in GenericWordsList:
    GenericWords[GenericWord] = 1
WordOccurrences = {}
for i in range(10):
    TweetList = TwitterQuery.search(query,100,i+1)
    tweetcount = 0
    for tweet in TweetList:
    #Filter out non alphanumerics and overly generic words, and strip out connected punctuation
        unfiltered_words = tweet.content.lower().split()
        words = []
        for word in unfiltered_words:
            temp = word.strip('[]{},.<>/?!$%^&*()_-=+|\\;:\'\"')
            if(temp.isalnum() and not GenericWords.has_key(temp)):
                words.append(temp)
    #Delete duplicates
        for word in words:
            for i in range(words.count(word)-1):
                words.remove(word)
    #count occurrences
        for word in words:
Ejemplo n.º 4
0
# set dates for query
fromdate = '2017-01-01'
todate = '2020-01-31'

#Define search term
search_term = 'anti social behaviour'

["Knife Crime", "Anti Social Behaviour", "Sexual Offence", "Shoplifting", "Robbery"]

year = 2017
formated_tweets = []

# set the loop for running through the years until 2019
while year <= 2019 :
    twitter_querier = tq.TwitterApiConstruct()

    #loop through users tweets
    #list_of_tweets = twitter_querier.get_user_timeLine_new('syptweet', fromdate, todate, 10)
    
    try:
        list_of_tweets = twitter_querier.query_tweets('20km', search_term, fromdate, todate, 'Sheffield, United Kingdom', 100)

        if list_of_tweets == []:
            break
        
        #loop through each tweet
        for tweet in list_of_tweets:
            #create a time object from the tweet date
            datetime_object = datetime.strptime(tweet.formatted_date, '%a %b %d %H:%M:%S %z %Y')
        self.name = word
        self.CoWords = {}
        self.count = 1

    def printWord(self):
        print "\n" + self.name + ": count= " + str(self.count) + "\nWords Occurring with this word:"
        for CoWord, num in self.CoWords.iteritems():
            if num / self.count > 0.50:
                print CoWord + ": " + str(num)


#

WordOccurrences = {}

TweetList = TwitterQuery.search("santorum", 100)

for tweet in TweetList:
    words = tweet.content.lower().split()
    for word in words:
        if WordOccurrences.has_key(word):
            WordOccurrences[word].count += 1
            CurrentWord = WordOccurrences[word]
        else:
            CurrentWord = Word(word)
            WordOccurrences[word] = CurrentWord
        for OtherWord in words:
            if OtherWord != CurrentWord.name:
                if CurrentWord.CoWords.has_key(OtherWord):
                    CurrentWord.CoWords[OtherWord] += 1
                else: