def search(twitter_id, params, object_type='', reply_to=None, queue=None, extra_headers=None, high_priority=False): """Twitter Search""" try: q = params.get('q', u'') key_prefix = high_priority and u'amqp.twitter.priosearch' or u'amqp.twitter.search' key = shorten_key(u'%s:%s:%s:%s' % (key_prefix, object_type, twitter_id, q)) tstamp = cache.cache.get(key, None) if tstamp: _log.debug("Skipping already queued Twitter search for %s %s", object_type, twitter_id) return # this twitter search is already in the queue; don't requeue it headers = {'twitter_id':twitter_id, 'object_type':object_type} if extra_headers: headers.update(extra_headers) headers['cache_key'] = key correlation_id = uuid4().hex ds = DataStorage() ds[correlation_id] = {'timestamp':time.time()} msg = aq.Message( params, content_type='application/json', delivery_mode=_DELIVERY_MODE, correlation_id=correlation_id, timestamp=datetime.now(), application_headers=headers, reply_to=reply_to ) q = queue or SearchQ(bind_queue=False) routing_key = high_priority and 'twitter.priosearch.search' or 'twitter.search.search' cache.cache.set(key, int(time.time()), 3600*5) q.send(msg, q.exchange_name, routing_key) ds.close() _log.debug("Twitter search initiated for twitter_id %s. High prio: %s", twitter_id, high_priority) except Exception, e: _log.error("Could not initiate Twitter search for twitter_id %s. High prio: %s", twitter_id, high_priority) _log.exception(e)
def _get_connected_friends(api_key, secret, fb_session_key, fb_uid): key = shorten_key(u'fb_friends:%s' % fb_uid) all_fb_friends = cache.cache.get(key, None) if all_fb_friends is None: fb_connected_friends = set([unicode(id) for id in get_connected_friends(api_key, secret, fb_session_key, fb_uid)]) all_fb_friends = set(UserProfile.objects.active().filter(fb_userid__in=list(fb_connected_friends)).values_list('fb_userid', flat=True)) cache.cache.set(key, all_fb_friends, 120) return all_fb_friends
def _get_connected_friends(api_key, secret, fb_session_key, fb_uid): key = shorten_key(u'fb_friends:%s' % fb_uid) all_fb_friends = cache.cache.get(key, None) if all_fb_friends is None: fb_connected_friends = set([ unicode(id) for id in get_connected_friends( api_key, secret, fb_session_key, fb_uid) ]) all_fb_friends = set(UserProfile.objects.active().filter( fb_userid__in=list(fb_connected_friends)).values_list('fb_userid', flat=True)) cache.cache.set(key, all_fb_friends, 120) return all_fb_friends
def do_foursquare_venues(dt=None): """Collect foursquare checkin stats for each venue that has events happening today.""" from fsq import FoursquareApi from rdutils.mathutils import farey from event.models import Event, Venue, FoursquareTrend etz = tz.tzstr('EST5EDT') # Eastern timezone headers = { 'User-Agent': getattr(settings, 'FOURSQUARE_USER_AGENT', settings.USER_AGENT) } fsq = FoursquareApi(headers) dt = dt or date.today() # Active venues events = Event.objects.filter(is_approved=True, event_date=dt).order_by( 'event_date', 'event_start_time', 'pk') venue_ids = set() venues = [] for ev in events: if ev.venue.pk not in venue_ids: venues.append(ev.venue) venue_ids.add(ev.venue.pk) previous_venues = Venue.objects.exclude(pk__in=list(venue_ids)).filter( fsq_checkins__gt=0) previous_venues.update(fsq_checkins=0, fsq_ratio=u'', fsq_mf=0, fsq_fm=0, fsq_m=0, fsq_f=0) # Get and populate FSQ ids of active venues where fsq_id was not previously obtained for v in venues: if not v.fsq_id and v.geo_loc and ',' in v.geo_loc: keyx = shorten_key(u"venue_id_4sq:%s" % v.pk) vx = cache.cache.get(keyx, None) if vx is None: lat, lng = v.geo_loc.split(',') venue_name = v.name.encode("utf-8", "ignore") _log.debug("Getting 4sq venue ID for %s", venue_name) sleep(.25) vx = fsq.get_venues(geolat=lat, geolong=lng, q=venue_name, l=1) try: vid = vx['groups'][0]['venues'][0]['id'] v.fsq_id = unicode(vid) v.save() except Exception, e: vid = 0 _log.debug( "FSQ ID for venue %s could not be obtained: %s\n%s", v.pk, e, vx) cache.cache.set(keyx, vid, 7200) # cache for 2 hours
def get_asins(artist, force=False): """Get AWS ASIN ID list for artist name""" cache_key = shorten_key('artist:asin:%s' % artist) value = cache.cache.get(cache_key, None) if value is None or force: params = dict( SearchIndex='MP3Downloads', Sort='-releasedate', Keywords='"%s"' % artist, ) xml = get_ecs_response(params) value = xml and get_asins_from_xml(xml, artist) or [] cache.cache.set(cache_key, value, 24*3600) # cache for 24 hours return value
def build_recommended_events(user_profile_id, queue=None): """Refresh recommended events for the given user""" try: kx = uuid4().hex msg = aq.Message( '', delivery_mode=_DELIVERY_MODE, correlation_id=shorten_key('rec_events:%s:%s' % (user_profile_id, kx)), application_headers={'user_profile_id':user_profile_id}, ) q = queue or EventsQ(bind_queue=False) q.send(msg, q.exchange_name, 'signal.events.build_recommended_events') _log.debug("Recommended events builder initiated for user %s", user_profile_id) except Exception, e: _log.debug("Could not build recommended events for user %s", user_profile_id) _log.exception(e)
def recommend_event_to_friends(user_profile_id, event_id, queue=None): """Recommend an event to user's friends""" try: kx = uuid4().hex msg = aq.Message( '', delivery_mode=_DELIVERY_MODE, correlation_id=shorten_key('rec_event:%s:%s:%s' % (user_profile_id, event_id, kx)), application_headers={'user_profile_id':user_profile_id, 'event_id':event_id}, ) q = queue or EventsQ(bind_queue=False) q.send(msg, q.exchange_name, 'signal.events.recommend_event_to_friends') _log.debug("Recommending event %s to user %s's friends", event_id, user_profile_id) except Exception, e: _log.debug("Could not recommend event %s to user %s's friends", event_id, user_profile_id) _log.exception(e)
def refresh_friends(twitter_id, queue=None, extra_headers=None, access_token=''): """Refresh friends of the given twitter user_id. The AMQP chain of functions is: 1. twitter/twitter.get.get_followers (replies to: 2 with next reply to: 3) 2. twitter/twitter.get.get_followees (replies to: 3 with next reply to: 4) 3. twitter/twitter.compute.get_friends (replies to: 4 with next reply to: None) 4. twitter/twitter.compute.save_friends (replies to: final_reply_to/None with next reply to: None) """ try: kx = u"" # uuid4().hex correlation_id = shorten_key('twitter_refresh_friends:%s:%s' % (twitter_id, kx)) ds = DataStorage() val = ds[correlation_id] if val is not None: if (time.time() - val['timestamp']) > THRESHOLD_SECONDS: del ds[correlation_id] # clear out zombie data else: _log.debug( "Another worker is refreshing friends for twitter_id %s", twitter_id) return ds[correlation_id] = {'timestamp': time.time()} app_headers = { 'twitter_id': twitter_id, 'access_token': access_token, } if extra_headers: app_headers.update(extra_headers) msg = aq.Message('', delivery_mode=_DELIVERY_MODE, correlation_id=correlation_id, application_headers=app_headers, reply_to='twitter/twitter.get.get_followees') q = queue or GetQ(bind_queue=False) q.send(msg, q.exchange_name, 'twitter.get.get_followers') ds.close() _log.debug("Friend refresh initiated for twitter_id %s", twitter_id) except Exception, e: _log.debug("Could not refresh friends for twitter_id %s", twitter_id) _log.exception(e)
def get_tiny_url(url): tiny_link = None d = u'' params = u'' try: key = shorten_key(u'tiny-%s' % url) tiny_link = cache.get(key, None) if tiny_link is None: tiny_link = get_or_none(KeyValue.objects, key=url, category='tinyurl') if tiny_link: tiny_link = tiny_link.value if tiny_link is None: tiny_url_api = getattr(settings, 'TINY_URL_API', 'http://tinyurl.com/api-create.php') if "bit.ly" in tiny_url_api: params = { 'longUrl': url, 'login': settings.BITLY_API_LOGIN, 'apiKey': settings.BITLY_API_KEY, 'version': getattr(settings, 'BITLY_API_VERSION', '2.0.1'), 'format': 'json', } else: params = {'url': url} data = urllib.urlencode(params) tiny_link = urllib2.urlopen(tiny_url_api, data).read().strip() if "bit.ly" in tiny_url_api: d = json.loads(tiny_link) if not d: return url tiny_link = d['results'][url]['shortUrl'] if len(tiny_link) > 25: return url KeyValue.objects.get_or_create(key=url, category='tinyurl', defaults=dict(value=tiny_link)) cache.set(key, tiny_link, 3600 * 12) return tiny_link except Exception, e: _log.exception(e) _log.warning( "Failed to get short URL for %s\nwith error: %s\nd=%s\nparams=%s", url, e, d, params) return tiny_link or url
def list_events(request, location=None, template='event/list.html', queryset=None, extra_context=None, hide_top3=False): location = location or request.location request.location = location # stick user to this location going forward request.location_name = settings.LOCATION_DATA.get(location, settings.EMPTY_LOCATION_DATA)[3] if queryset is None: today, td = today_timedelta_by_location(location) queryset = Event.objects.active_by_location(location=location, attending_user=request.user_profile, today=today).order_by('event_date', 'event_start_time', 'pk') # queryset = queryset.exclude(ext_event_source='mlb2010', location='user-entered') queryset = queryset.exclude(location='user-entered') # don't show user entered events on city pages ctx = { 'title':'Upcoming Events', 'events':queryset, 'location_name':Event.LOCATIONMAP.get(location, 'Boston, MA'), } if not hide_top3: userid = request.user.is_authenticated() and request.user.pk or 0 key = shorten_key(u"popular-ourpick-destination:%s:%s" % (location, userid)) px = cache.cache.get(key, None) if px is None: popular = Event.objects.active_by_location( location=location, attending_user=request.user_profile, event_date__lte=datetime.today() + timedelta(days=7), ).order_by('-tweet_count')[:1] ctx['popular'] = get_or_none(popular) ctx['ourpick'] = get_or_none(Event.objects.active_by_location(location=location, attending_user=request.user_profile, is_homepage_worthy=True).order_by('event_date', 'event_start_time', 'pk')[:1]) ctx['destination'] = get_or_none(Event.objects.active_by_location(location='destination', attending_user=request.user_profile).order_by('-destination_timestamp')[:1]) px = (ctx['popular'], ctx['ourpick'], ctx['destination']) cache.cache.set(key, px, 600) else: ctx['popular'], ctx['ourpick'], ctx['destination'] = px if extra_context: ctx.update(extra_context) other_cities = sorted(settings.LOCATION_DATA.keys()) other_cities.remove(location) city_list_html = [ ('<a href="http://%s.%s%s%s">%s</a>' % (settings.LOCATION_SUBDOMAIN_REVERSE_MAP[loc], settings.DISPLAY_SITE_DOMAIN, _SUBDOMAIN_PORT, reverse("list_events"), settings.LOCATION_DATA.get(loc)[3])) for loc in other_cities ] ctx['other_cities'] = ', '.join(city_list_html) if request.mobile: template = 'mobile/event/city.html' if request.user.is_authenticated(): # Get Friends' Favorites count ctx['num_ff'] = get_friends_favorites_count(request.user_profile, location) ctx['needs_fbml'] = True return render_view(request, template, ctx)
def search(twitter_id, params, object_type='', reply_to=None, queue=None, extra_headers=None, high_priority=False): """Twitter Search""" try: q = params.get('q', u'') key_prefix = high_priority and u'amqp.twitter.priosearch' or u'amqp.twitter.search' key = shorten_key(u'%s:%s:%s:%s' % (key_prefix, object_type, twitter_id, q)) tstamp = cache.cache.get(key, None) if tstamp: _log.debug("Skipping already queued Twitter search for %s %s", object_type, twitter_id) return # this twitter search is already in the queue; don't requeue it headers = {'twitter_id': twitter_id, 'object_type': object_type} if extra_headers: headers.update(extra_headers) headers['cache_key'] = key correlation_id = uuid4().hex ds = DataStorage() ds[correlation_id] = {'timestamp': time.time()} msg = aq.Message(params, content_type='application/json', delivery_mode=_DELIVERY_MODE, correlation_id=correlation_id, timestamp=datetime.now(), application_headers=headers, reply_to=reply_to) q = queue or SearchQ(bind_queue=False) routing_key = high_priority and 'twitter.priosearch.search' or 'twitter.search.search' cache.cache.set(key, int(time.time()), 3600 * 5) q.send(msg, q.exchange_name, routing_key) ds.close() _log.debug("Twitter search initiated for twitter_id %s. High prio: %s", twitter_id, high_priority) except Exception, e: _log.error( "Could not initiate Twitter search for twitter_id %s. High prio: %s", twitter_id, high_priority) _log.exception(e)
def refresh_friends(twitter_id, queue=None, extra_headers=None, access_token=''): """Refresh friends of the given twitter user_id. The AMQP chain of functions is: 1. twitter/twitter.get.get_followers (replies to: 2 with next reply to: 3) 2. twitter/twitter.get.get_followees (replies to: 3 with next reply to: 4) 3. twitter/twitter.compute.get_friends (replies to: 4 with next reply to: None) 4. twitter/twitter.compute.save_friends (replies to: final_reply_to/None with next reply to: None) """ try: kx = u"" # uuid4().hex correlation_id = shorten_key('twitter_refresh_friends:%s:%s' % (twitter_id, kx)) ds = DataStorage() val = ds[correlation_id] if val is not None: if (time.time() - val['timestamp']) > THRESHOLD_SECONDS: del ds[correlation_id] # clear out zombie data else: _log.debug("Another worker is refreshing friends for twitter_id %s", twitter_id) return ds[correlation_id] = {'timestamp':time.time()} app_headers = { 'twitter_id':twitter_id, 'access_token':access_token, } if extra_headers: app_headers.update(extra_headers) msg = aq.Message( '', delivery_mode=_DELIVERY_MODE, correlation_id=correlation_id, application_headers=app_headers, reply_to='twitter/twitter.get.get_followees' ) q = queue or GetQ(bind_queue=False) q.send(msg, q.exchange_name, 'twitter.get.get_followers') ds.close() _log.debug("Friend refresh initiated for twitter_id %s", twitter_id) except Exception, e: _log.debug("Could not refresh friends for twitter_id %s", twitter_id) _log.exception(e)
def build_recommended_events(user_profile_id, queue=None): """Refresh recommended events for the given user""" try: kx = uuid4().hex msg = aq.Message( '', delivery_mode=_DELIVERY_MODE, correlation_id=shorten_key('rec_events:%s:%s' % (user_profile_id, kx)), application_headers={'user_profile_id': user_profile_id}, ) q = queue or EventsQ(bind_queue=False) q.send(msg, q.exchange_name, 'signal.events.build_recommended_events') _log.debug("Recommended events builder initiated for user %s", user_profile_id) except Exception, e: _log.debug("Could not build recommended events for user %s", user_profile_id) _log.exception(e)
def do_foursquare_venues(dt=None): """Collect foursquare checkin stats for each venue that has events happening today.""" from fsq import FoursquareApi from rdutils.mathutils import farey from event.models import Event, Venue, FoursquareTrend etz = tz.tzstr('EST5EDT') # Eastern timezone headers = { 'User-Agent': getattr(settings, 'FOURSQUARE_USER_AGENT', settings.USER_AGENT) } fsq = FoursquareApi(headers) dt = dt or date.today() # Active venues events = Event.objects.filter(is_approved=True, event_date=dt).order_by('event_date', 'event_start_time', 'pk') venue_ids = set() venues = [] for ev in events: if ev.venue.pk not in venue_ids: venues.append(ev.venue) venue_ids.add(ev.venue.pk) previous_venues = Venue.objects.exclude(pk__in=list(venue_ids)).filter(fsq_checkins__gt=0) previous_venues.update(fsq_checkins=0, fsq_ratio=u'', fsq_mf=0, fsq_fm=0, fsq_m=0, fsq_f=0) # Get and populate FSQ ids of active venues where fsq_id was not previously obtained for v in venues: if not v.fsq_id and v.geo_loc and ',' in v.geo_loc: keyx = shorten_key(u"venue_id_4sq:%s" % v.pk) vx = cache.cache.get(keyx, None) if vx is None: lat, lng = v.geo_loc.split(',') venue_name = v.name.encode("utf-8", "ignore") _log.debug("Getting 4sq venue ID for %s", venue_name) sleep(.25) vx = fsq.get_venues(geolat=lat, geolong=lng, q=venue_name, l=1) try: vid = vx['groups'][0]['venues'][0]['id'] v.fsq_id = unicode(vid) v.save() except Exception, e: vid = 0 _log.debug("FSQ ID for venue %s could not be obtained: %s\n%s", v.pk, e, vx) cache.cache.set(keyx, vid, 7200) # cache for 2 hours
def get_tiny_url(url): tiny_link = None d = u"" params = u"" try: key = shorten_key(u"tiny-%s" % url) tiny_link = cache.get(key, None) if tiny_link is None: tiny_link = get_or_none(KeyValue.objects, key=url, category="tinyurl") if tiny_link: tiny_link = tiny_link.value if tiny_link is None: tiny_url_api = getattr(settings, "TINY_URL_API", "http://tinyurl.com/api-create.php") if "bit.ly" in tiny_url_api: params = { "longUrl": url, "login": settings.BITLY_API_LOGIN, "apiKey": settings.BITLY_API_KEY, "version": getattr(settings, "BITLY_API_VERSION", "2.0.1"), "format": "json", } else: params = {"url": url} data = urllib.urlencode(params) tiny_link = urllib2.urlopen(tiny_url_api, data).read().strip() if "bit.ly" in tiny_url_api: d = json.loads(tiny_link) if not d: return url tiny_link = d["results"][url]["shortUrl"] if len(tiny_link) > 25: return url KeyValue.objects.get_or_create(key=url, category="tinyurl", defaults=dict(value=tiny_link)) cache.set(key, tiny_link, 3600 * 12) return tiny_link except Exception, e: _log.exception(e) _log.warning("Failed to get short URL for %s\nwith error: %s\nd=%s\nparams=%s", url, e, d, params) return tiny_link or url
def recommend_event_to_friends(user_profile_id, event_id, queue=None): """Recommend an event to user's friends""" try: kx = uuid4().hex msg = aq.Message( '', delivery_mode=_DELIVERY_MODE, correlation_id=shorten_key('rec_event:%s:%s:%s' % (user_profile_id, event_id, kx)), application_headers={ 'user_profile_id': user_profile_id, 'event_id': event_id }, ) q = queue or EventsQ(bind_queue=False) q.send(msg, q.exchange_name, 'signal.events.recommend_event_to_friends') _log.debug("Recommending event %s to user %s's friends", event_id, user_profile_id) except Exception, e: _log.debug("Could not recommend event %s to user %s's friends", event_id, user_profile_id) _log.exception(e)
except Exception, e: vid = 0 _log.debug( "FSQ ID for venue %s could not be obtained: %s\n%s", v.pk, e, vx) cache.cache.set(keyx, vid, 7200) # cache for 2 hours # Get and populate checkin for each venue num = 0 err = 0 api_cache = { } # save results of calls to a venue during this run so that we don't call for the same venue twice for v in venues: if v.fsq_id: _log.debug("Fetching details for 4sq venue %s (our venue %s)", v.fsq_id, v.pk) key = shorten_key(u"venue_4sq:%s" % v.pk) whos_here = cache.cache.get(key, []) fv = api_cache.get(v.fsq_id, None) if fv: _log.debug("Using cached 4sq venue call") try: if not fv: sleep(.5) fv = fv or fsq.get_venue_detail( vid=v.fsq_id, username=settings.FOURSQUARE_USERNAME, password=settings.FOURSQUARE_PASSWORD) except (URLError, AttributeError): sleep(30) # try once more in 30 seconds fv = fsq.get_venue_detail( vid=v.fsq_id,
def search(params, rpp=100, lang='all', count=0): params_q, urlx, object_id, object_type = None, None, None, None try: object_type = params.pop('object_type', 'no_type') object_id = params.pop('object_id', 'no_id') # api_count api_count = cache.get('twitter_search_count', 0) api_count = api_count % 1000000 # reset after every million searches cache.set('twitter_search_count', api_count + 1, 300000) if not 'lang' in params: params['lang'] = lang if not 'rpp' in params: params['rpp'] = rpp if not params.get('q', False): # nothing to search return [] k = params.items() k.sort() params_q = urlencode(params) cache_key = shorten_key(u'%s' % k) tweets = cache.get(cache_key, None) if tweets is None: _log.debug("Searching Twitter: %s", params_q) urlx = TWITTER_SEARCH_URL % params_q req = urllib2.Request(urlx) req.add_header("Content-type", "application/x-www-form-urlencoded") req.add_header('User-Agent', settings.USER_AGENT) req.add_header('Referer', 'http://riotvine.com/') resp = urllib2.urlopen(req) ret = resp.read() resp.close() results = json.loads(ret) # Remove blacklisted users tweets = filter(NotInBlackList(), results.get('results', [])) cache.set(cache_key, tweets, 180) if api_count % 25 == 0: # back off for a few seconds after every 25 searches zzz = 5 if api_count % 100 == 0: zzz += 10 if api_count % 400 == 0: zzz += 10 else: zzz = settings.TWITTER_SEARCH_SLEEP time.sleep(zzz) # search throttle return tweets except urllib2.HTTPError, h: # e.g.: raise HTTPError(req.get_full_url(), code, msg, hdrs, fp) msg = h.info() # httplib.HttpMessage try: retry_after = int(msg.get('Retry-After', 0)) except: retry_after = 0 if h.code != 403: # Skip bad search warning here; it's logged later down below if h.code not in (502, 503, 404, 420): _log.exception(h) _log.warn( "Search count: %s\n%s\n\n%s\n\nRetry count: %s\n\n%s\n%s - %s", api_count, params, unicode(msg), count, urlx, object_type, object_id) if h.code in ( 503, 502, 420): # grab retry-after header (we are being rate-limited) # 503 or 420 - we are being rate limited # 502 - bad gateway - Twitter overloaded if h.code in ( 503, 420): # reset twitter search count on rate limitation cache.set('twitter_search_count', 0, 300000) # reset count at throttle if not retry_after: retry_after = 10 throttle = retry_after * 1.1 time.sleep(throttle) retry_after = 0 _log.debug("%s \n\nThrottled by %s seconds [status: %s]\n%s - %s", params, throttle, h.code, object_type, object_id) if count < settings.TWITTER_SEARCH_RETRY_TIMES: p = dict(object_type=object_type, object_id=object_id) p.update(params) return search(p, rpp=rpp, lang=lang, count=count + 1) # retry elif h.code == 403: _log.warn("Bad search\n%s\n%s\n%s - %s", params, urlx, object_type, object_id) retry_after = 5 elif h.code == 404: pass # skip and move on else: retry_after = 10 if retry_after: throttle = retry_after * 1.1 time.sleep(throttle) retry_after = 0 return []
try: vid = vx['groups'][0]['venues'][0]['id'] v.fsq_id = unicode(vid) v.save() except Exception, e: vid = 0 _log.debug("FSQ ID for venue %s could not be obtained: %s\n%s", v.pk, e, vx) cache.cache.set(keyx, vid, 7200) # cache for 2 hours # Get and populate checkin for each venue num = 0 err = 0 api_cache = {} # save results of calls to a venue during this run so that we don't call for the same venue twice for v in venues: if v.fsq_id: _log.debug("Fetching details for 4sq venue %s (our venue %s)", v.fsq_id, v.pk) key = shorten_key(u"venue_4sq:%s" % v.pk) whos_here = cache.cache.get(key, []) fv = api_cache.get(v.fsq_id, None) if fv: _log.debug("Using cached 4sq venue call") try: if not fv: sleep(.5) fv = fv or fsq.get_venue_detail(vid=v.fsq_id, username=settings.FOURSQUARE_USERNAME, password=settings.FOURSQUARE_PASSWORD) except (URLError, AttributeError): sleep(30) # try once more in 30 seconds fv = fsq.get_venue_detail(vid=v.fsq_id, username=settings.FOURSQUARE_USERNAME, password=settings.FOURSQUARE_PASSWORD) api_cache[v.fsq_id] = fv # cache it if 'ratelimited' in fv: _log.error("We've been ratelimited by FSQ") err += 1
def search(params, rpp=100, lang='all', count=0): params_q, urlx, object_id, object_type = None, None, None, None try: object_type = params.pop('object_type', 'no_type') object_id = params.pop('object_id', 'no_id') # api_count api_count = cache.get('twitter_search_count', 0) api_count = api_count % 1000000 # reset after every million searches cache.set('twitter_search_count', api_count+1, 300000) if not 'lang' in params: params['lang'] = lang if not 'rpp' in params: params['rpp'] = rpp if not params.get('q', False): # nothing to search return [] k = params.items() k.sort() params_q = urlencode(params) cache_key = shorten_key(u'%s' % k) tweets = cache.get(cache_key, None) if tweets is None: _log.debug("Searching Twitter: %s", params_q) urlx = TWITTER_SEARCH_URL % params_q req = urllib2.Request(urlx) req.add_header("Content-type", "application/x-www-form-urlencoded") req.add_header('User-Agent', settings.USER_AGENT) req.add_header('Referer', 'http://riotvine.com/') resp = urllib2.urlopen(req) ret = resp.read() resp.close() results = json.loads(ret) # Remove blacklisted users tweets = filter(NotInBlackList(), results.get('results', [])) cache.set(cache_key, tweets, 180) if api_count % 25 == 0: # back off for a few seconds after every 25 searches zzz = 5 if api_count % 100 == 0: zzz += 10 if api_count % 400 == 0: zzz += 10 else: zzz = settings.TWITTER_SEARCH_SLEEP time.sleep(zzz) # search throttle return tweets except urllib2.HTTPError, h: # e.g.: raise HTTPError(req.get_full_url(), code, msg, hdrs, fp) msg = h.info() # httplib.HttpMessage try: retry_after = int(msg.get('Retry-After', 0)) except: retry_after = 0 if h.code != 403: # Skip bad search warning here; it's logged later down below if h.code not in (502, 503, 404, 420): _log.exception(h) _log.warn("Search count: %s\n%s\n\n%s\n\nRetry count: %s\n\n%s\n%s - %s", api_count, params, unicode(msg), count, urlx, object_type, object_id) if h.code in (503, 502, 420): # grab retry-after header (we are being rate-limited) # 503 or 420 - we are being rate limited # 502 - bad gateway - Twitter overloaded if h.code in (503, 420): # reset twitter search count on rate limitation cache.set('twitter_search_count', 0, 300000) # reset count at throttle if not retry_after: retry_after = 10 throttle = retry_after * 1.1 time.sleep(throttle) retry_after = 0 _log.debug("%s \n\nThrottled by %s seconds [status: %s]\n%s - %s", params, throttle, h.code, object_type, object_id) if count < settings.TWITTER_SEARCH_RETRY_TIMES: p = dict(object_type=object_type, object_id=object_id) p.update(params) return search(p, rpp=rpp, lang=lang, count=count+1) # retry elif h.code == 403: _log.warn("Bad search\n%s\n%s\n%s - %s", params, urlx, object_type, object_id) retry_after = 5 elif h.code == 404: pass # skip and move on else: retry_after = 10 if retry_after: throttle = retry_after * 1.1 time.sleep(throttle) retry_after = 0 return []