Example #1
0
def index_feed_polling(url, context, timeout=15, request_info=None):
    """
    poll the feed at the url given and index it immediately on 
    the calling thread. 
    """
    if request_info is None:
        request_info = {}

    feed = RemoteFeed.get_by_url(url, context)
    if feed is None:
        feed = RemoteFeed.create_from_url(url, context)

    if check_request_approved(feed, request_info, context) == False:
        log.warn("Rejected index request for %s" % url)
        return

    reschedule = not request_info.get('skip_reschedule', False)
    http_cache = context.config.get('http', {}).get('cache', None)

    # fetch
    http = Http(cache=http_cache, timeout=timeout)
    http.force_exception_to_status_code = True
    response, content = http.request(url, 'GET')

    updated_docs = []
    if response.fromcache:
        feed.record_update_info(success=True, updates=0, method=METHOD_POLL)
    elif response.status != 200:
        feed.record_update_info(success=False, updates=0, 
                           reason=response.reason, method=METHOD_POLL)
    else:
        # 200 status code, not from cache, do update...
        feed.update_from_feed(content, method=METHOD_POLL)

    # compute the next time to check...
    next_interval = compute_next_fetch_interval(feed.update_history)
    log.debug("next update interval for %s = %s" % (feed.url, next_interval))
    feed.next_poll_time = datetime.utcnow() + next_interval
    feed.poll_in_progress = False
    feed.save()

    log.info("Updated feed %s success: %s, %d new items" % 
      (feed.url, feed.update_history[0].success, feed.update_history[0].updates))

    # whee... request at the next time !
    if reschedule:
        message_id = 'periodic_index_%s' % RemoteFeed.id_for_url(feed.url)
        schedule_feed_index(feed.url, feed.next_poll_time, context, message_id=message_id)

    run_post_index_hooks(feed, context)