Example #1
0
def search(twitter_id, params, object_type='', reply_to=None, queue=None, extra_headers=None, high_priority=False):
    """Twitter Search"""
    try:
        q = params.get('q', u'')
        key_prefix = high_priority and u'amqp.twitter.priosearch' or u'amqp.twitter.search'
        key = shorten_key(u'%s:%s:%s:%s' % (key_prefix, object_type, twitter_id, q))
        tstamp = cache.cache.get(key, None)
        if tstamp:
            _log.debug("Skipping already queued Twitter search for %s %s", object_type, twitter_id)
            return # this twitter search is already in the queue; don't requeue it
        headers = {'twitter_id':twitter_id, 'object_type':object_type}
        if extra_headers:
            headers.update(extra_headers)
        headers['cache_key'] = key
        correlation_id = uuid4().hex
        ds = DataStorage()
        ds[correlation_id] = {'timestamp':time.time()}
        msg = aq.Message(
            params,
            content_type='application/json',
            delivery_mode=_DELIVERY_MODE,
            correlation_id=correlation_id,
            timestamp=datetime.now(),
            application_headers=headers,
            reply_to=reply_to
        )
        q = queue or SearchQ(bind_queue=False)
        routing_key = high_priority and 'twitter.priosearch.search' or 'twitter.search.search'
        cache.cache.set(key, int(time.time()), 3600*5)
        q.send(msg, q.exchange_name, routing_key)
        ds.close()
        _log.debug("Twitter search initiated for twitter_id %s. High prio: %s", twitter_id, high_priority)
    except Exception, e:
        _log.error("Could not initiate Twitter search for twitter_id %s. High prio: %s", twitter_id, high_priority)
        _log.exception(e)
Example #2
0
def _get_connected_friends(api_key, secret, fb_session_key, fb_uid):
    key = shorten_key(u'fb_friends:%s' % fb_uid)
    all_fb_friends = cache.cache.get(key, None)
    if all_fb_friends is None:
        fb_connected_friends = set([unicode(id) for id in get_connected_friends(api_key, secret, fb_session_key, fb_uid)])
        all_fb_friends = set(UserProfile.objects.active().filter(fb_userid__in=list(fb_connected_friends)).values_list('fb_userid', flat=True))
        cache.cache.set(key, all_fb_friends, 120)
    return all_fb_friends
Example #3
0
def _get_connected_friends(api_key, secret, fb_session_key, fb_uid):
    key = shorten_key(u'fb_friends:%s' % fb_uid)
    all_fb_friends = cache.cache.get(key, None)
    if all_fb_friends is None:
        fb_connected_friends = set([
            unicode(id) for id in get_connected_friends(
                api_key, secret, fb_session_key, fb_uid)
        ])
        all_fb_friends = set(UserProfile.objects.active().filter(
            fb_userid__in=list(fb_connected_friends)).values_list('fb_userid',
                                                                  flat=True))
        cache.cache.set(key, all_fb_friends, 120)
    return all_fb_friends
Example #4
0
def do_foursquare_venues(dt=None):
    """Collect foursquare checkin stats for each venue that has events happening today."""
    from fsq import FoursquareApi
    from rdutils.mathutils import farey
    from event.models import Event, Venue, FoursquareTrend

    etz = tz.tzstr('EST5EDT')  # Eastern timezone
    headers = {
        'User-Agent':
        getattr(settings, 'FOURSQUARE_USER_AGENT', settings.USER_AGENT)
    }
    fsq = FoursquareApi(headers)
    dt = dt or date.today()
    # Active venues
    events = Event.objects.filter(is_approved=True, event_date=dt).order_by(
        'event_date', 'event_start_time', 'pk')
    venue_ids = set()
    venues = []
    for ev in events:
        if ev.venue.pk not in venue_ids:
            venues.append(ev.venue)
            venue_ids.add(ev.venue.pk)
    previous_venues = Venue.objects.exclude(pk__in=list(venue_ids)).filter(
        fsq_checkins__gt=0)
    previous_venues.update(fsq_checkins=0,
                           fsq_ratio=u'',
                           fsq_mf=0,
                           fsq_fm=0,
                           fsq_m=0,
                           fsq_f=0)
    # Get and populate FSQ ids of active venues where fsq_id was not previously obtained
    for v in venues:
        if not v.fsq_id and v.geo_loc and ',' in v.geo_loc:
            keyx = shorten_key(u"venue_id_4sq:%s" % v.pk)
            vx = cache.cache.get(keyx, None)
            if vx is None:
                lat, lng = v.geo_loc.split(',')
                venue_name = v.name.encode("utf-8", "ignore")
                _log.debug("Getting 4sq venue ID for %s", venue_name)
                sleep(.25)
                vx = fsq.get_venues(geolat=lat, geolong=lng, q=venue_name, l=1)
                try:
                    vid = vx['groups'][0]['venues'][0]['id']
                    v.fsq_id = unicode(vid)
                    v.save()
                except Exception, e:
                    vid = 0
                    _log.debug(
                        "FSQ ID for venue %s could not be obtained: %s\n%s",
                        v.pk, e, vx)
                cache.cache.set(keyx, vid, 7200)  # cache for 2 hours
Example #5
0
def get_asins(artist, force=False):
    """Get AWS ASIN ID list for artist name"""
    cache_key = shorten_key('artist:asin:%s' % artist)
    value = cache.cache.get(cache_key, None)
    if value is None or force:
        params = dict(
            SearchIndex='MP3Downloads',
            Sort='-releasedate',
            Keywords='"%s"' % artist,
        )
        xml = get_ecs_response(params)
        value = xml and get_asins_from_xml(xml, artist) or []
        cache.cache.set(cache_key, value, 24*3600) # cache for 24 hours
    return value
Example #6
0
def build_recommended_events(user_profile_id, queue=None):
    """Refresh recommended events for the given user"""
    try:
        kx = uuid4().hex
        msg = aq.Message(
            '',
            delivery_mode=_DELIVERY_MODE,
            correlation_id=shorten_key('rec_events:%s:%s' % (user_profile_id, kx)),
            application_headers={'user_profile_id':user_profile_id},
        )
        q = queue or EventsQ(bind_queue=False)
        q.send(msg, q.exchange_name, 'signal.events.build_recommended_events')
        _log.debug("Recommended events builder initiated for user %s", user_profile_id)
    except Exception, e:
        _log.debug("Could not build recommended events for user %s", user_profile_id)
        _log.exception(e)
Example #7
0
def recommend_event_to_friends(user_profile_id, event_id, queue=None):
    """Recommend an event to user's friends"""
    try:
        kx = uuid4().hex
        msg = aq.Message(
            '',
            delivery_mode=_DELIVERY_MODE,
            correlation_id=shorten_key('rec_event:%s:%s:%s' % (user_profile_id, event_id, kx)),
            application_headers={'user_profile_id':user_profile_id, 'event_id':event_id},
        )
        q = queue or EventsQ(bind_queue=False)
        q.send(msg, q.exchange_name, 'signal.events.recommend_event_to_friends')
        _log.debug("Recommending event %s to user %s's friends", event_id, user_profile_id)
    except Exception, e:
        _log.debug("Could not recommend event %s to user %s's friends", event_id, user_profile_id)
        _log.exception(e)
Example #8
0
def refresh_friends(twitter_id,
                    queue=None,
                    extra_headers=None,
                    access_token=''):
    """Refresh friends of the given twitter user_id.

    The AMQP chain of functions is:

        1. twitter/twitter.get.get_followers (replies to: 2 with next reply to: 3)
        2. twitter/twitter.get.get_followees (replies to: 3 with next reply to: 4)
        3. twitter/twitter.compute.get_friends (replies to: 4 with next reply to: None)
        4. twitter/twitter.compute.save_friends (replies to: final_reply_to/None with next reply to: None)

    """
    try:
        kx = u""  # uuid4().hex
        correlation_id = shorten_key('twitter_refresh_friends:%s:%s' %
                                     (twitter_id, kx))
        ds = DataStorage()
        val = ds[correlation_id]
        if val is not None:
            if (time.time() - val['timestamp']) > THRESHOLD_SECONDS:
                del ds[correlation_id]  # clear out zombie data
            else:
                _log.debug(
                    "Another worker is refreshing friends for twitter_id %s",
                    twitter_id)
                return
        ds[correlation_id] = {'timestamp': time.time()}
        app_headers = {
            'twitter_id': twitter_id,
            'access_token': access_token,
        }
        if extra_headers:
            app_headers.update(extra_headers)
        msg = aq.Message('',
                         delivery_mode=_DELIVERY_MODE,
                         correlation_id=correlation_id,
                         application_headers=app_headers,
                         reply_to='twitter/twitter.get.get_followees')
        q = queue or GetQ(bind_queue=False)
        q.send(msg, q.exchange_name, 'twitter.get.get_followers')
        ds.close()
        _log.debug("Friend refresh initiated for twitter_id %s", twitter_id)
    except Exception, e:
        _log.debug("Could not refresh friends for twitter_id %s", twitter_id)
        _log.exception(e)
Example #9
0
def get_tiny_url(url):
    tiny_link = None
    d = u''
    params = u''
    try:
        key = shorten_key(u'tiny-%s' % url)
        tiny_link = cache.get(key, None)
        if tiny_link is None:
            tiny_link = get_or_none(KeyValue.objects,
                                    key=url,
                                    category='tinyurl')
            if tiny_link:
                tiny_link = tiny_link.value
            if tiny_link is None:
                tiny_url_api = getattr(settings, 'TINY_URL_API',
                                       'http://tinyurl.com/api-create.php')
                if "bit.ly" in tiny_url_api:
                    params = {
                        'longUrl': url,
                        'login': settings.BITLY_API_LOGIN,
                        'apiKey': settings.BITLY_API_KEY,
                        'version': getattr(settings, 'BITLY_API_VERSION',
                                           '2.0.1'),
                        'format': 'json',
                    }
                else:
                    params = {'url': url}
                data = urllib.urlencode(params)
                tiny_link = urllib2.urlopen(tiny_url_api, data).read().strip()
                if "bit.ly" in tiny_url_api:
                    d = json.loads(tiny_link)
                    if not d:
                        return url
                    tiny_link = d['results'][url]['shortUrl']
                if len(tiny_link) > 25:
                    return url
                KeyValue.objects.get_or_create(key=url,
                                               category='tinyurl',
                                               defaults=dict(value=tiny_link))
            cache.set(key, tiny_link, 3600 * 12)
        return tiny_link
    except Exception, e:
        _log.exception(e)
        _log.warning(
            "Failed to get short URL for %s\nwith error: %s\nd=%s\nparams=%s",
            url, e, d, params)
        return tiny_link or url
Example #10
0
def list_events(request, location=None, template='event/list.html', queryset=None, extra_context=None, hide_top3=False):
    location = location or request.location
    request.location = location # stick user to this location going forward
    request.location_name = settings.LOCATION_DATA.get(location, settings.EMPTY_LOCATION_DATA)[3]
    if queryset is None:
        today, td = today_timedelta_by_location(location)
        queryset = Event.objects.active_by_location(location=location, attending_user=request.user_profile, today=today).order_by('event_date', 'event_start_time', 'pk')
        # queryset = queryset.exclude(ext_event_source='mlb2010', location='user-entered')
        queryset = queryset.exclude(location='user-entered') # don't show user entered events on city pages
    ctx = {
        'title':'Upcoming Events',
        'events':queryset,
        'location_name':Event.LOCATIONMAP.get(location, 'Boston, MA'),
    }
    if not hide_top3:
        userid = request.user.is_authenticated() and request.user.pk or 0
        key = shorten_key(u"popular-ourpick-destination:%s:%s" % (location, userid))
        px = cache.cache.get(key, None)
        if px is None:
            popular = Event.objects.active_by_location(
                location=location,
                attending_user=request.user_profile,
                event_date__lte=datetime.today() + timedelta(days=7),
            ).order_by('-tweet_count')[:1]
            ctx['popular'] = get_or_none(popular)
            ctx['ourpick'] = get_or_none(Event.objects.active_by_location(location=location, attending_user=request.user_profile, is_homepage_worthy=True).order_by('event_date', 'event_start_time', 'pk')[:1])
            ctx['destination'] = get_or_none(Event.objects.active_by_location(location='destination', attending_user=request.user_profile).order_by('-destination_timestamp')[:1])
            px = (ctx['popular'], ctx['ourpick'], ctx['destination'])
            cache.cache.set(key, px, 600)
        else:
            ctx['popular'], ctx['ourpick'], ctx['destination'] = px
    if extra_context:
        ctx.update(extra_context)
    other_cities = sorted(settings.LOCATION_DATA.keys())
    other_cities.remove(location)
    city_list_html = [
        ('<a href="http://%s.%s%s%s">%s</a>' % (settings.LOCATION_SUBDOMAIN_REVERSE_MAP[loc], settings.DISPLAY_SITE_DOMAIN,  _SUBDOMAIN_PORT, reverse("list_events"), settings.LOCATION_DATA.get(loc)[3])) for loc in other_cities
    ]
    ctx['other_cities'] = ', '.join(city_list_html)
    if request.mobile:
        template = 'mobile/event/city.html'
    if request.user.is_authenticated():
        # Get Friends' Favorites count
        ctx['num_ff'] = get_friends_favorites_count(request.user_profile, location)
    ctx['needs_fbml'] = True
    return render_view(request, template, ctx)
Example #11
0
def search(twitter_id,
           params,
           object_type='',
           reply_to=None,
           queue=None,
           extra_headers=None,
           high_priority=False):
    """Twitter Search"""
    try:
        q = params.get('q', u'')
        key_prefix = high_priority and u'amqp.twitter.priosearch' or u'amqp.twitter.search'
        key = shorten_key(u'%s:%s:%s:%s' %
                          (key_prefix, object_type, twitter_id, q))
        tstamp = cache.cache.get(key, None)
        if tstamp:
            _log.debug("Skipping already queued Twitter search for %s %s",
                       object_type, twitter_id)
            return  # this twitter search is already in the queue; don't requeue it
        headers = {'twitter_id': twitter_id, 'object_type': object_type}
        if extra_headers:
            headers.update(extra_headers)
        headers['cache_key'] = key
        correlation_id = uuid4().hex
        ds = DataStorage()
        ds[correlation_id] = {'timestamp': time.time()}
        msg = aq.Message(params,
                         content_type='application/json',
                         delivery_mode=_DELIVERY_MODE,
                         correlation_id=correlation_id,
                         timestamp=datetime.now(),
                         application_headers=headers,
                         reply_to=reply_to)
        q = queue or SearchQ(bind_queue=False)
        routing_key = high_priority and 'twitter.priosearch.search' or 'twitter.search.search'
        cache.cache.set(key, int(time.time()), 3600 * 5)
        q.send(msg, q.exchange_name, routing_key)
        ds.close()
        _log.debug("Twitter search initiated for twitter_id %s. High prio: %s",
                   twitter_id, high_priority)
    except Exception, e:
        _log.error(
            "Could not initiate Twitter search for twitter_id %s. High prio: %s",
            twitter_id, high_priority)
        _log.exception(e)
Example #12
0
def refresh_friends(twitter_id, queue=None, extra_headers=None, access_token=''):
    """Refresh friends of the given twitter user_id.

    The AMQP chain of functions is:

        1. twitter/twitter.get.get_followers (replies to: 2 with next reply to: 3)
        2. twitter/twitter.get.get_followees (replies to: 3 with next reply to: 4)
        3. twitter/twitter.compute.get_friends (replies to: 4 with next reply to: None)
        4. twitter/twitter.compute.save_friends (replies to: final_reply_to/None with next reply to: None)

    """
    try:
        kx = u"" # uuid4().hex
        correlation_id = shorten_key('twitter_refresh_friends:%s:%s' % (twitter_id, kx))
        ds = DataStorage()
        val = ds[correlation_id]
        if val is not None:
            if (time.time() - val['timestamp']) > THRESHOLD_SECONDS:
                del ds[correlation_id] # clear out zombie data
            else:
                _log.debug("Another worker is refreshing friends for twitter_id %s", twitter_id)
                return
        ds[correlation_id] = {'timestamp':time.time()}
        app_headers = {
            'twitter_id':twitter_id,
            'access_token':access_token,
        }
        if extra_headers:
            app_headers.update(extra_headers)
        msg = aq.Message(
            '',
            delivery_mode=_DELIVERY_MODE,
            correlation_id=correlation_id,
            application_headers=app_headers,
            reply_to='twitter/twitter.get.get_followees'
        )
        q = queue or GetQ(bind_queue=False)
        q.send(msg, q.exchange_name, 'twitter.get.get_followers')
        ds.close()
        _log.debug("Friend refresh initiated for twitter_id %s", twitter_id)
    except Exception, e:
        _log.debug("Could not refresh friends for twitter_id %s", twitter_id)
        _log.exception(e)
Example #13
0
def build_recommended_events(user_profile_id, queue=None):
    """Refresh recommended events for the given user"""
    try:
        kx = uuid4().hex
        msg = aq.Message(
            '',
            delivery_mode=_DELIVERY_MODE,
            correlation_id=shorten_key('rec_events:%s:%s' %
                                       (user_profile_id, kx)),
            application_headers={'user_profile_id': user_profile_id},
        )
        q = queue or EventsQ(bind_queue=False)
        q.send(msg, q.exchange_name, 'signal.events.build_recommended_events')
        _log.debug("Recommended events builder initiated for user %s",
                   user_profile_id)
    except Exception, e:
        _log.debug("Could not build recommended events for user %s",
                   user_profile_id)
        _log.exception(e)
Example #14
0
def do_foursquare_venues(dt=None):
    """Collect foursquare checkin stats for each venue that has events happening today."""
    from fsq import FoursquareApi
    from rdutils.mathutils import farey
    from event.models import Event, Venue, FoursquareTrend
    
    etz = tz.tzstr('EST5EDT') # Eastern timezone
    headers = {
        'User-Agent': getattr(settings, 'FOURSQUARE_USER_AGENT', settings.USER_AGENT)
    }
    fsq = FoursquareApi(headers)
    dt = dt or date.today()
    # Active venues
    events = Event.objects.filter(is_approved=True, event_date=dt).order_by('event_date', 'event_start_time', 'pk')
    venue_ids = set()
    venues = []
    for ev in events:
        if ev.venue.pk not in venue_ids:
            venues.append(ev.venue)
            venue_ids.add(ev.venue.pk)
    previous_venues = Venue.objects.exclude(pk__in=list(venue_ids)).filter(fsq_checkins__gt=0)
    previous_venues.update(fsq_checkins=0, fsq_ratio=u'', fsq_mf=0, fsq_fm=0, fsq_m=0, fsq_f=0)
    # Get and populate FSQ ids of active venues where fsq_id was not previously obtained
    for v in venues:
        if not v.fsq_id and v.geo_loc and ',' in v.geo_loc:            
            keyx = shorten_key(u"venue_id_4sq:%s" % v.pk)            
            vx = cache.cache.get(keyx, None)
            if vx is None:                
                lat, lng = v.geo_loc.split(',')            
                venue_name = v.name.encode("utf-8", "ignore")                
                _log.debug("Getting 4sq venue ID for %s", venue_name)       
                sleep(.25)         
                vx = fsq.get_venues(geolat=lat, geolong=lng, q=venue_name, l=1)                
                try:
                    vid = vx['groups'][0]['venues'][0]['id']
                    v.fsq_id = unicode(vid)
                    v.save()
                except Exception, e:
                    vid = 0
                    _log.debug("FSQ ID for venue %s could not be obtained: %s\n%s", v.pk, e, vx)
                cache.cache.set(keyx, vid, 7200) # cache for 2 hours
Example #15
0
def get_tiny_url(url):
    tiny_link = None
    d = u""
    params = u""
    try:
        key = shorten_key(u"tiny-%s" % url)
        tiny_link = cache.get(key, None)
        if tiny_link is None:
            tiny_link = get_or_none(KeyValue.objects, key=url, category="tinyurl")
            if tiny_link:
                tiny_link = tiny_link.value
            if tiny_link is None:
                tiny_url_api = getattr(settings, "TINY_URL_API", "http://tinyurl.com/api-create.php")
                if "bit.ly" in tiny_url_api:
                    params = {
                        "longUrl": url,
                        "login": settings.BITLY_API_LOGIN,
                        "apiKey": settings.BITLY_API_KEY,
                        "version": getattr(settings, "BITLY_API_VERSION", "2.0.1"),
                        "format": "json",
                    }
                else:
                    params = {"url": url}
                data = urllib.urlencode(params)
                tiny_link = urllib2.urlopen(tiny_url_api, data).read().strip()
                if "bit.ly" in tiny_url_api:
                    d = json.loads(tiny_link)
                    if not d:
                        return url
                    tiny_link = d["results"][url]["shortUrl"]
                if len(tiny_link) > 25:
                    return url
                KeyValue.objects.get_or_create(key=url, category="tinyurl", defaults=dict(value=tiny_link))
            cache.set(key, tiny_link, 3600 * 12)
        return tiny_link
    except Exception, e:
        _log.exception(e)
        _log.warning("Failed to get short URL for %s\nwith error: %s\nd=%s\nparams=%s", url, e, d, params)
        return tiny_link or url
Example #16
0
def recommend_event_to_friends(user_profile_id, event_id, queue=None):
    """Recommend an event to user's friends"""
    try:
        kx = uuid4().hex
        msg = aq.Message(
            '',
            delivery_mode=_DELIVERY_MODE,
            correlation_id=shorten_key('rec_event:%s:%s:%s' %
                                       (user_profile_id, event_id, kx)),
            application_headers={
                'user_profile_id': user_profile_id,
                'event_id': event_id
            },
        )
        q = queue or EventsQ(bind_queue=False)
        q.send(msg, q.exchange_name,
               'signal.events.recommend_event_to_friends')
        _log.debug("Recommending event %s to user %s's friends", event_id,
                   user_profile_id)
    except Exception, e:
        _log.debug("Could not recommend event %s to user %s's friends",
                   event_id, user_profile_id)
        _log.exception(e)
Example #17
0
             except Exception, e:
                 vid = 0
                 _log.debug(
                     "FSQ ID for venue %s could not be obtained: %s\n%s",
                     v.pk, e, vx)
             cache.cache.set(keyx, vid, 7200)  # cache for 2 hours
 # Get and populate checkin for each venue
 num = 0
 err = 0
 api_cache = {
 }  # save results of calls to a venue during this run so that we don't call for the same venue twice
 for v in venues:
     if v.fsq_id:
         _log.debug("Fetching details for 4sq venue %s (our venue %s)",
                    v.fsq_id, v.pk)
         key = shorten_key(u"venue_4sq:%s" % v.pk)
         whos_here = cache.cache.get(key, [])
         fv = api_cache.get(v.fsq_id, None)
         if fv:
             _log.debug("Using cached 4sq venue call")
         try:
             if not fv:
                 sleep(.5)
             fv = fv or fsq.get_venue_detail(
                 vid=v.fsq_id,
                 username=settings.FOURSQUARE_USERNAME,
                 password=settings.FOURSQUARE_PASSWORD)
         except (URLError, AttributeError):
             sleep(30)  # try once more in 30 seconds
             fv = fsq.get_venue_detail(
                 vid=v.fsq_id,
Example #18
0
def search(params, rpp=100, lang='all', count=0):
    params_q, urlx, object_id, object_type = None, None, None, None
    try:
        object_type = params.pop('object_type', 'no_type')
        object_id = params.pop('object_id', 'no_id')
        # api_count
        api_count = cache.get('twitter_search_count', 0)
        api_count = api_count % 1000000  # reset after every million searches
        cache.set('twitter_search_count', api_count + 1, 300000)
        if not 'lang' in params:
            params['lang'] = lang
        if not 'rpp' in params:
            params['rpp'] = rpp
        if not params.get('q', False):
            # nothing to search
            return []
        k = params.items()
        k.sort()
        params_q = urlencode(params)
        cache_key = shorten_key(u'%s' % k)
        tweets = cache.get(cache_key, None)
        if tweets is None:
            _log.debug("Searching Twitter: %s", params_q)
            urlx = TWITTER_SEARCH_URL % params_q
            req = urllib2.Request(urlx)
            req.add_header("Content-type", "application/x-www-form-urlencoded")
            req.add_header('User-Agent', settings.USER_AGENT)
            req.add_header('Referer', 'http://riotvine.com/')
            resp = urllib2.urlopen(req)
            ret = resp.read()
            resp.close()
            results = json.loads(ret)
            # Remove blacklisted users
            tweets = filter(NotInBlackList(), results.get('results', []))
            cache.set(cache_key, tweets, 180)
            if api_count % 25 == 0:
                # back off for a few seconds after every 25 searches
                zzz = 5
                if api_count % 100 == 0:
                    zzz += 10
                if api_count % 400 == 0:
                    zzz += 10
            else:
                zzz = settings.TWITTER_SEARCH_SLEEP
            time.sleep(zzz)  # search throttle
        return tweets
    except urllib2.HTTPError, h:
        # e.g.: raise HTTPError(req.get_full_url(), code, msg, hdrs, fp)
        msg = h.info()  # httplib.HttpMessage
        try:
            retry_after = int(msg.get('Retry-After', 0))
        except:
            retry_after = 0
        if h.code != 403:  # Skip bad search warning here; it's logged later down below
            if h.code not in (502, 503, 404, 420):
                _log.exception(h)
                _log.warn(
                    "Search count: %s\n%s\n\n%s\n\nRetry count: %s\n\n%s\n%s - %s",
                    api_count, params, unicode(msg), count, urlx, object_type,
                    object_id)
        if h.code in (
                503, 502,
                420):  # grab retry-after header (we are being rate-limited)
            # 503 or 420 - we are being rate limited
            # 502 - bad gateway - Twitter overloaded
            if h.code in (
                    503, 420):  # reset twitter search count on rate limitation
                cache.set('twitter_search_count', 0,
                          300000)  # reset count at throttle
            if not retry_after:
                retry_after = 10
            throttle = retry_after * 1.1
            time.sleep(throttle)
            retry_after = 0
            _log.debug("%s \n\nThrottled by %s seconds [status: %s]\n%s - %s",
                       params, throttle, h.code, object_type, object_id)
            if count < settings.TWITTER_SEARCH_RETRY_TIMES:
                p = dict(object_type=object_type, object_id=object_id)
                p.update(params)
                return search(p, rpp=rpp, lang=lang, count=count + 1)  # retry
        elif h.code == 403:
            _log.warn("Bad search\n%s\n%s\n%s - %s", params, urlx, object_type,
                      object_id)
            retry_after = 5
        elif h.code == 404:
            pass  # skip and move on
        else:
            retry_after = 10
        if retry_after:
            throttle = retry_after * 1.1
            time.sleep(throttle)
            retry_after = 0
        return []
Example #19
0
             try:
                 vid = vx['groups'][0]['venues'][0]['id']
                 v.fsq_id = unicode(vid)
                 v.save()
             except Exception, e:
                 vid = 0
                 _log.debug("FSQ ID for venue %s could not be obtained: %s\n%s", v.pk, e, vx)
             cache.cache.set(keyx, vid, 7200) # cache for 2 hours
 # Get and populate checkin for each venue
 num = 0
 err = 0
 api_cache = {} # save results of calls to a venue during this run so that we don't call for the same venue twice
 for v in venues:
     if v.fsq_id:            
         _log.debug("Fetching details for 4sq venue %s (our venue %s)", v.fsq_id, v.pk)
         key = shorten_key(u"venue_4sq:%s" % v.pk)
         whos_here = cache.cache.get(key, [])            
         fv = api_cache.get(v.fsq_id, None)
         if fv:
             _log.debug("Using cached 4sq venue call")
         try:
             if not fv:
                 sleep(.5)
             fv = fv or fsq.get_venue_detail(vid=v.fsq_id, username=settings.FOURSQUARE_USERNAME, password=settings.FOURSQUARE_PASSWORD)            
         except (URLError, AttributeError):
             sleep(30) # try once more in 30 seconds
             fv = fsq.get_venue_detail(vid=v.fsq_id, username=settings.FOURSQUARE_USERNAME, password=settings.FOURSQUARE_PASSWORD)            
         api_cache[v.fsq_id] = fv # cache it
         if 'ratelimited' in fv:
             _log.error("We've been ratelimited by FSQ")
             err += 1
Example #20
0
def search(params, rpp=100, lang='all', count=0):
    params_q, urlx, object_id, object_type = None, None, None, None
    try:
        object_type = params.pop('object_type', 'no_type')
        object_id = params.pop('object_id', 'no_id')
        # api_count
        api_count = cache.get('twitter_search_count', 0)
        api_count = api_count % 1000000 # reset after every million searches
        cache.set('twitter_search_count', api_count+1, 300000)
        if not 'lang' in params:
            params['lang'] = lang
        if not 'rpp' in params:
            params['rpp'] = rpp
        if not params.get('q', False):
            # nothing to search
            return []       
        k = params.items()
        k.sort()
        params_q = urlencode(params)
        cache_key = shorten_key(u'%s' % k)
        tweets = cache.get(cache_key, None)
        if tweets is None:
            _log.debug("Searching Twitter: %s", params_q)
            urlx = TWITTER_SEARCH_URL % params_q
            req = urllib2.Request(urlx)
            req.add_header("Content-type", "application/x-www-form-urlencoded")
            req.add_header('User-Agent', settings.USER_AGENT)
            req.add_header('Referer', 'http://riotvine.com/')
            resp = urllib2.urlopen(req)
            ret = resp.read()
            resp.close()
            results = json.loads(ret)
            # Remove blacklisted users
            tweets = filter(NotInBlackList(), results.get('results', []))
            cache.set(cache_key, tweets, 180)
            if api_count % 25 == 0:
                # back off for a few seconds after every 25 searches
                zzz = 5
                if api_count % 100 == 0:
                    zzz += 10
                if api_count % 400 == 0:
                    zzz += 10
            else:
                zzz = settings.TWITTER_SEARCH_SLEEP
            time.sleep(zzz) # search throttle
        return tweets
    except urllib2.HTTPError, h: 
        # e.g.: raise HTTPError(req.get_full_url(), code, msg, hdrs, fp)
        msg = h.info() # httplib.HttpMessage
        try:
            retry_after = int(msg.get('Retry-After', 0))
        except:
            retry_after = 0
        if h.code != 403: # Skip bad search warning here; it's logged later down below
            if h.code not in (502, 503, 404, 420):
                _log.exception(h)
                _log.warn("Search count: %s\n%s\n\n%s\n\nRetry count: %s\n\n%s\n%s - %s", api_count, params, unicode(msg), count,  urlx, object_type, object_id)
        if h.code in (503, 502, 420): # grab retry-after header (we are being rate-limited)
            # 503 or 420 - we are being rate limited
            # 502 - bad gateway - Twitter overloaded
            if h.code in (503, 420): # reset twitter search count on rate limitation
                cache.set('twitter_search_count', 0, 300000) # reset count at throttle
            if not retry_after:
                retry_after = 10
            throttle = retry_after * 1.1
            time.sleep(throttle)
            retry_after = 0
            _log.debug("%s \n\nThrottled by %s seconds [status: %s]\n%s - %s", params, throttle, h.code, object_type, object_id)
            if count < settings.TWITTER_SEARCH_RETRY_TIMES:
                p = dict(object_type=object_type, object_id=object_id)
                p.update(params)
                return search(p, rpp=rpp, lang=lang, count=count+1) # retry
        elif h.code == 403:
            _log.warn("Bad search\n%s\n%s\n%s - %s", params, urlx, object_type, object_id)
            retry_after = 5
        elif h.code == 404:
            pass # skip and move on
        else:
            retry_after = 10
        if retry_after:
            throttle = retry_after * 1.1
            time.sleep(throttle)
            retry_after = 0
        return []