def addTeachedTweets(self, tweets): """ добавляет информацию о вручную определённых твиттах в базу. обновляет индексы для слов """ for tweetId in tweets: if self.tweetsColl.find_one({"_id": int(tweetId)}) is not None: continue tweet = TwitterStuff.getTweet(tweetId) if tweet is None: continue words = Parser().parseTweet(tweet['text']) words.append("@" + tweet['user']['screen_name']) self.tweetsColl.save({"_id": int(tweetId), "happy": tweets[tweetId]}) for word in words: if len(word) < 4: continue self.saveWord(word, tweets[tweetId])
def addTeachedTweets(self, tweets): """ добавляет информацию о вручную определённых твиттах в базу. обновляет индексы для слов """ for tweetId in tweets: if self.tweetsColl.find_one({"_id": int(tweetId)}) is not None: continue tweet = TwitterStuff.getTweet(tweetId) if tweet is None: continue words = Parser().parseTweet(tweet['text']) words.append("@" + tweet['user']['screen_name']) self.tweetsColl.save({ "_id": int(tweetId), "happy": tweets[tweetId] }) for word in words: if len(word) < 4: continue self.saveWord(word, tweets[tweetId])
def predict(self, text, user): words = Parser().parseTweet(text) words.append(user) explains = [] for word in words: explain = self.wordsColl.find_one({"_id": word}) if explain is None: continue explains.append(explain) explains.sort(lambda x1, x2: x1["number"] - x2["number"]) explains.reverse() res = dict({"happy": 0, "neutral": 0, "negative": 0, "spam": 0}) states = {"happy", "neutral", "negative", "spam"} for explain in explains: n = explain["number"] / float(explains[0]["number"]) for state in states: res[state] += explain[state] * n best = "" bestRate = -1 for state in states: if res[state] > bestRate: bestRate = res[state] best = state return best