def country_code(self): geotags = self.original_data.get("coordinates") if geotags: coords = geotags.get("coordinates") if coords: country = Geo.get_country(coords) if country: return "C" + country return None
def _get_streaming_args(self): self._credentials = settings.Twitter['accounts'][self._daemon_number] segs = ["C" + code for code in Geo.country_codes()] #We're only allowed to track 400 keywords lock = FileLock('/tmp/trackwords') with lock: all_hashtags = self._get_all_entities("hashtag", segs) all_usermentions = self._get_all_entities("user_mention", segs) used_hashtags, used_usermentions = map(set, self._get_used_trackwords()) hashtags = [ht for ht in all_hashtags if not ht in used_hashtags][:200] usermentions = [um for um in all_usermentions if not um in used_usermentions][:200] self._set_used_trackwords(hashtags, usermentions) self._payload = {'track': hashtags + usermentions} return self._credentials, self._payload
def cache_top_tweets(): #initialize stores ts = int(time.time()) countstore = CountStore() tweetstore = TweetStore() cache = RedisCache(namespace=settings.TopTweetsCache["namespace"]) countries = Geo.country_codes() top_tweets_cache = {} for country in countries: print "*************" print country print "*************" top_tweets = {} segmentation = "C" + country for entitytype in ["hashtag", "user_mention"]: top_tweets[entitytype] = [] top_entities = countstore.get_top(entitytype, segmentation, settings.Aggregation['top_entities'], ts) for entity, count in top_entities: data = {"text":entity, "count":count, "tweets":[]} tweets = top_tweets_cache.get((entitytype, entity, ts)) if not tweets: print "fetching tweets for " + str((entitytype, entity, ts)) segmentation = ":".join([entitytype, entity]) tweets = countstore.get_top("tweet", segmentation, settings.Aggregation['top_tweets'], ts) tweets = map(lambda x: (tweetstore.get(x[0]), x[1]), tweets) top_tweets_cache[(entitytype, entity, ts)] = tweets for tweet, count in tweets: data["tweets"].append({"tweet":tweet.data, "count": count}) top_tweets[entitytype].append(data) cache.put(segmentation, top_tweets)
total_points = (steps_x + 1)*(steps_y + 1) c = 0 added = 0 boundaries = get_country_boundaries(countries) for i_x in range(steps_x + 1): for i_y in range(steps_y + 1): point = (min_x + i_x*step, min_y + i_y*step) #Check if point belongs to projection try: proj(point[0], point[1], inverse=True, errcheck=True) #check point is in a country found = locate(point, countries, boundaries, projected=True) if found: lookup[json.dumps(point)] = found added += 1 except RuntimeError: pass c += 1 if not c % 10000: print "processed %s of %s (%s added) in %s" % (c, total_points, added, round(time.time() - start, 1)) return lookup if __name__ == "__main__": countries = Geo.countries() lookup = generate_lookup(countries) datapath = os.path.join(settings.DATA_FOLDER, "country-lookup.json") g = open(datapath, 'w') g.write(json.dumps(lookup)) g.close()