Esempio n. 1
0
 def country_code(self):
     geotags = self.original_data.get("coordinates")
     if geotags:
         coords = geotags.get("coordinates")
         if coords:
             country = Geo.get_country(coords)
             if country: 
                 return "C" + country
     return None
 def _get_streaming_args(self):
     self._credentials = settings.Twitter['accounts'][self._daemon_number]
     segs = ["C" + code for code in Geo.country_codes()]
     #We're only allowed to track 400 keywords
     lock = FileLock('/tmp/trackwords')
     with lock:
         all_hashtags = self._get_all_entities("hashtag", segs)
         all_usermentions = self._get_all_entities("user_mention", segs)
         used_hashtags, used_usermentions = map(set, self._get_used_trackwords())
         hashtags = [ht for ht in all_hashtags if not ht in used_hashtags][:200]
         usermentions = [um for um in all_usermentions if not um in used_usermentions][:200]
         self._set_used_trackwords(hashtags, usermentions)
         self._payload = {'track': hashtags + usermentions}
     return self._credentials, self._payload
Esempio n. 3
0
def cache_top_tweets():
    #initialize stores
    ts = int(time.time())
    countstore = CountStore()
    tweetstore = TweetStore()
    cache = RedisCache(namespace=settings.TopTweetsCache["namespace"])
    countries = Geo.country_codes()
    top_tweets_cache = {}
    for country in countries:
        print "*************"
        print country
        print "*************"
        
        top_tweets = {}
        segmentation = "C" + country
        for entitytype in ["hashtag", "user_mention"]:
            top_tweets[entitytype] = []
            top_entities = countstore.get_top(entitytype, 
                                              segmentation,
                                              settings.Aggregation['top_entities'],
                                              ts)
            for entity, count in top_entities:
                data = {"text":entity, "count":count, "tweets":[]}
                tweets = top_tweets_cache.get((entitytype, entity, ts))
                if not tweets:
                    print "fetching tweets for " + str((entitytype, entity, ts))
                    segmentation = ":".join([entitytype, entity])
                    tweets = countstore.get_top("tweet", 
                                                segmentation,
                                                settings.Aggregation['top_tweets'], 
                                                ts)
                    tweets = map(lambda x: (tweetstore.get(x[0]), x[1]), tweets)
                    top_tweets_cache[(entitytype, entity, ts)] = tweets
                for tweet, count in tweets:
                    data["tweets"].append({"tweet":tweet.data, "count": count})
                top_tweets[entitytype].append(data)
        cache.put(segmentation, top_tweets)
    total_points = (steps_x + 1)*(steps_y + 1)
    c = 0
    added = 0
    boundaries = get_country_boundaries(countries)
    for i_x in range(steps_x + 1):
        for i_y in range(steps_y + 1):
            point = (min_x + i_x*step, min_y + i_y*step)
            #Check if point belongs to projection
            try:
                proj(point[0], point[1], inverse=True, errcheck=True)
                #check point is in a country
                found = locate(point, countries, boundaries, projected=True)
                if found:
                    lookup[json.dumps(point)] = found
                    added += 1
            except RuntimeError:
                pass
            c += 1
            if not c % 10000:
                print "processed %s of %s (%s added) in %s" % (c, total_points, added, round(time.time() - start, 1))
    return lookup

if __name__ == "__main__":
    countries = Geo.countries()
    lookup = generate_lookup(countries)
    datapath = os.path.join(settings.DATA_FOLDER, "country-lookup.json")
    g = open(datapath, 'w')
    g.write(json.dumps(lookup))
    g.close()