def processTweets(targetsFile,sentiTokensFile,exceptSentiTokens,multiWordsFile,tweets): """ Processes a list of tweets: 1. Identify target 2. If target is one of the politicians infer the comment's polarity politiciansFile -> path to the politicians list file sentiTokensFile -> path to the sentiTokens list file exceptSentiTokens -> path to the list of sentiTokens that cannot lose their accents without causing ambiguity for ex: más -> mas tweets -> list of tweets """ print "Loading resources...\nTargets: " + targetsFile targets = None#getFromCache(WIN_PERSONS_CACHE) if targets != None: print "Target list found on cache!" else: targets = Persons.loadPoliticians(targetsFile) putInCache(targets, WIN_PERSONS_CACHE) print "SentiTokens: " + sentiTokensFile + "\nExceptTokens: " + exceptSentiTokens sentiTokens = None#getFromCache(WIN_SENTI_CACHE) if sentiTokens != None: print "SentiTokens found on cache!" else: sentiTokens = SentiTokens.loadSentiTokens(sentiTokensFile,exceptSentiTokens) putInCache(sentiTokens, WIN_SENTI_CACHE) print "Multiword Tokenizer: " + multiWordsFile multiWordTokenizer = None#getFromCache(WIN_MULTIWORD_CACHE) if multiWordTokenizer != None: print "Multiword Tokenizer found on cache" else: multiWordTokenizer = MultiWordHandler(multiWordsFile) multiWordTokenizer.addMultiWords(Persons.getMultiWords(targets)) multiWordTokenizer.addMultiWords(SentiTokens.getMultiWords(sentiTokens)) putInCache(multiWordTokenizer, WIN_MULTIWORD_CACHE) print "Inferring polarity..." naive = Naive(targets,sentiTokens) rules = Rules(targets,sentiTokens) analyzedTweets = [] rejectedTweets = [] for tweet in tweets: t0 = datetime.now() rulesScore,rulesInfo = rules.getRulesScore(tweet,True) cluesScore,clueInfo = rules.getCluesScore(tweet,True) sentiScore,sentiInfo = naive.getSentiScore(tweet,True) tweetScore = int(sentiScore) + int(rulesScore) + int(cluesScore) if tweetScore > 0: tweet.polarity = 1 elif tweetScore < 0: tweet.polarity = -1 else: tweet.polarity = 0 tweet.metadata = sentiInfo+";"+clueInfo+";"+rulesInfo if tweet.polarity == 0: regex = ur'(\W|^)sentiTokens:(.*?);(\W|$)' match = re.search(regex,tweet.metadata).group(2) if len(match.strip(' ')) == 0: rejectedTweets.append(tweet) else: analyzedTweets.append(tweet) else: analyzedTweets.append(tweet) t1 = datetime.now() print tweet.id + " ("+ str(t1-t0) + ")" logClassifiedTweets(rejectedTweets, "./rejectedTweets.csv") return analyzedTweets