def testTweets(): for i in range(1, 8): statuses = api.GetSearch(term=settings_local.USERTRACK, lang="es", page=i, per_page=100) for s in statuses: t = Normalizator.normalize(s.text) if not (t.isspace()): if classifier.classify(extract_features(t.split())) == "reclamo": print "\033[91m" + t else: print "\033[94m" + t
def saveTweetsOf(screen_name, category): for i in range(1,4): statuses = api.GetSearch(term=settings_local.USERTRACK, lang='es', page=i,per_page=100) for s in statuses: if(s.text.find('RT',0,2)==-1): screen_name = s._user._screen_name t = Normalizator.normalize(s.text) em = Normalizator.usedPattern(Patterns.DUPLICATED_LETTERS) print Normalizator.usedPattern(Patterns.SPECIALCHARS_AND_MENTIONS) print smart_str(s.text.lower()) if (not(t.isspace())): selected = raw_input('Es un reclamo?') tweets = db[settings_local.CATEGORIES[int(selected)]] tweet = {'normalized_tweet': t, 'tweet': s.text, 'screen_name': screen_name, 'emphatized': em } tweets.insert(tweet)
for s in statuses: t = Normalizator.normalize(s.text) if not (t.isspace()): if classifier.classify(extract_features(t.split())) == "reclamo": print "\033[91m" + t else: print "\033[94m" + t allTweets = [] for c in settings_local.CATEGORIES: statuses = db[c].find() for s in statuses: t = s["normalized_tweet"] t = Normalizator.normalize(t) if not (t.isspace()): allTweets.append((t, c)) tweets = [] for (words, sentiment) in allTweets: words_filtered = [e.lower() for e in words.split() if len(e) >= 3] tweets.append((words_filtered, sentiment)) def get_words_in_tweets(tweets): all_words = [] for (words, sentiment) in tweets: all_words.extend(words) return all_words