def infer(self, params): window = {'tweets':[], 'start':0} # storing tweets """ User distribution updating """ for tweet in self.tweets.stream(): if type(tweet) == type({}): self.update_user_distributions(tweet, params) """ Location prediction using user distribution """ for user in self.users.iter(): if user['location_point'] == None: """ unlabeled user """ if user['id'] in self.user_distributions and len(self.user_distributions[user['id']]) > 0: inferred_city = self.predict(self.user_distributions[user['id']], params) inferred_location = Util.str_to_tuple(inferred_city) user['location_point'] = inferred_location
def infer_one(self, user_id): tweets = self.tweets.get(user_id) user_words = {} for tweet in tweets: for w in Util.get_nouns(tweet['text'], params['lang']): if not w in user_words: user_words[w] = 0 user_words[w] += 1 city_probs = {} for w in self.model['pwc']: for city in self.model['pwc'][w]: if not city in city_probs: city_probs[city] = self.model['pc'][city] city_probs[city] *= self.model['pwc'][w][city] max_city = None max_prob = 0 for city in city_probs: if max_prob < city_probs[city]: max_prob = city_probs[city] max_city = city return Util.str_to_tuple(max_city)