Example #1
0
def test_disabled_unsubscribes(ctx):
    """
    tests that if pubsub is disabled for a 
    feed, it becomes unsubscribed from it's 
    hub.
    """
    from eventlet import sleep, spawn
    from melkman.db import RemoteFeed
    from melkman.fetch.pubsubhubbub import WSGISubClient, callback_url_for 
    from melkman.fetch.pubsubhubbub import hubbub_sub, update_pubsub_state
    
    w = WSGISubClient(ctx)
    client = spawn(w.run)

    hub = FakeHub()
    hub_proc = spawn(hub.run)    
    hub_url = 'http://localhost:%d/' % hub.port

    feed_url = 'http://example.org/feeds/99'
    rf = RemoteFeed.create_from_url(feed_url, ctx)
    rf.feed_info = {'links': [{'rel': 'self', 'href': feed_url},
                              {'rel': 'hub', 'href': hub_url}]}
    rf.save()
    
    # subscribe to the feed on the hub
    cb = callback_url_for(feed_url, ctx)
    assert not hub.is_verified(cb, feed_url)
    r, c = hubbub_sub(rf, ctx)
    assert r.status == 202, 'Expected 202, got %d' % r.status
    sleep(.5)
    assert hub.is_verified(cb, feed_url)
    
    # disable pubsub for the feed
    rf = RemoteFeed.get_by_url(feed_url, ctx)
    assert rf.hub_info.enabled == True and rf.hub_info.subscribed == True
    rf.hub_info.enabled = False
    rf.save()
    
    # trigger an update
    update_pubsub_state(rf, ctx)
    
    # check that it is now unsubscribed.
    sleep(.5)
    assert not hub.is_verified(cb, feed_url)
    rf = RemoteFeed.get_by_url(feed_url, ctx)
    assert rf.hub_info.enabled == False and rf.hub_info.subscribed == False
    
    client.kill()
    client.wait()
    hub_proc.kill()
    hub_proc.wait()
Example #2
0
def test_get_or_immediate_create_by_url(ctx):
    """
    test that get_or_immediate_create_by_url retrieves existing feeds by url
    if they in fact exist, and creates them if they don't.
    """
    from datetime import datetime, timedelta
    from eventlet import sleep
    from melkman.db.remotefeed import RemoteFeed, get_or_immediate_create_by_url

    feed_url = 'http://example.org/1'
    # make sure it doesn't exist yet
    feed = RemoteFeed.get_by_url(feed_url, ctx)
    assert feed is None

    # this should result in its immediate creation
    creationdt = datetime.utcnow()
    feed = get_or_immediate_create_by_url(feed_url, ctx)
    assert feed is not None
    assert feed.last_modification_date - creationdt < timedelta(seconds=1)

    sleep(1)
    
    # this should retrieve the existing feed, not create a new one
    now = datetime.utcnow()
    samefeed = get_or_immediate_create_by_url(feed_url, ctx)
    assert samefeed.id == feed.id
    assert samefeed.last_modification_date == feed.last_modification_date
    assert now - samefeed.last_modification_date >= timedelta(seconds=1)
Example #3
0
    def _validate_sub_request(self, req):
        mode = req.GET.get('hub.mode', None)
        topic = req.GET.get('hub.topic', None)
        verify_token = req.GET.get('hub.verify_token', None)
        url = _determine_feed_url(req)

        if topic is None or mode is None or verify_token is None or url is None:
            return False

        rf = RemoteFeed.get_by_url(url, self.context)
        if rf is None:
            # for feeds we don't know about, confirm unsubscribes
            # and reject subscribes.
            return mode == 'unsubscribe'

        if topic != topic_url_for(rf):
            log.warn("hub sent mismatched feed / topic: (%s, %s)" % (topic, topic_url_for(rf)))
            return False

        if mode == 'subscribe':
            if (rf.hub_info.enabled and
                rf.hub_info.verify_token == verify_token):
                
                try:
                    lease_time = int(req.GET.get('hub.lease_seconds', DEFAULT_LEASE))
                except:
                    lease_time = DEFAULT_LEASE
                     
                lease_time = min(lease_time, DEFAULT_LEASE)
                next_sub_time = datetime.utcnow() + timedelta(seconds=lease_time/2)
                
                # mark the feed as subscribed only when we have recieved 
                # a proper subscription verification from the hub.
                rf.hub_info.subscribed = True
                rf.hub_info.next_sub_time = next_sub_time
                rf.save()
                return True
            else:
                return False

        elif mode == 'unsubscribe':
            ps = rf.hub_info
            if ps.enabled and ps.subscribed and verify_token == ps.verify_token:
                """
                deny any valid unsubscribe requests for enabled feeds
                that we believe should be subscribed.
                """
                return False
            # anything else we approve, invalid, disabled, unsubscribed.
            return True


        else:
            log.warn("hub sent unknown sub mode: %s" % mode)
            return False
Example #4
0
def test_auto_sub(ctx):
    # tests autosubscription when feeds are indexed 
    # with <link rel="hub" /> entries. 
    from datetime import datetime
    from eventlet import sleep, spawn
    from melkman.db import RemoteFeed
    from melkman.fetch import push_feed_index
    from melkman.fetch.pubsubhubbub import WSGISubClient, callback_url_for
    from melkman.fetch.worker import run_feed_indexer

    
    w = WSGISubClient(ctx)
    client = spawn(w.run)
    indexer = spawn(run_feed_indexer, ctx)

    hub = FakeHub()
    hub_proc = spawn(hub.run)    
    hub_url = 'http://localhost:%d/' % hub.port

    feed_url = 'http://www.example.org/feeds/12'

    content = """<?xml version="1.0" encoding="utf-8"?>
      <feed xmlns="http://www.w3.org/2005/Atom">
      <id>%s</id>
      <title>Blah</title>
      <link rel="self" href="%s"/>
      <link rel="hub" href="%s" />
      <updated>%s</updated>
      <author>
        <name>Joop Doderer</name>
      </author>
      </feed>
    """ %  (feed_url, feed_url, hub_url,
            rfc3339_date(datetime.utcnow()))

    # push content in...
    push_feed_index(feed_url, content, ctx)
    sleep(.5)
    
    # check for automatic subscription...
    cb = callback_url_for(feed_url, ctx)
    assert hub.is_verified(cb, feed_url)

    rf = RemoteFeed.get_by_url(feed_url, ctx)
    assert rf.hub_info.enabled
    assert rf.hub_info.hub_url == hub_url

    client.kill()
    client.wait()
    indexer.kill()
    indexer.wait()
    hub_proc.kill()
    hub_proc.wait()
Example #5
0
def index_feed_polling(url, context, timeout=15, request_info=None):
    """
    poll the feed at the url given and index it immediately on 
    the calling thread. 
    """
    if request_info is None:
        request_info = {}

    feed = RemoteFeed.get_by_url(url, context)
    if feed is None:
        feed = RemoteFeed.create_from_url(url, context)

    if check_request_approved(feed, request_info, context) == False:
        log.warn("Rejected index request for %s" % url)
        return

    reschedule = not request_info.get('skip_reschedule', False)
    http_cache = context.config.get('http', {}).get('cache', None)

    # fetch
    http = Http(cache=http_cache, timeout=timeout)
    http.force_exception_to_status_code = True
    response, content = http.request(url, 'GET')

    updated_docs = []
    if response.fromcache:
        feed.record_update_info(success=True, updates=0, method=METHOD_POLL)
    elif response.status != 200:
        feed.record_update_info(success=False, updates=0, 
                           reason=response.reason, method=METHOD_POLL)
    else:
        # 200 status code, not from cache, do update...
        feed.update_from_feed(content, method=METHOD_POLL)

    # compute the next time to check...
    next_interval = compute_next_fetch_interval(feed.update_history)
    log.debug("next update interval for %s = %s" % (feed.url, next_interval))
    feed.next_poll_time = datetime.utcnow() + next_interval
    feed.poll_in_progress = False
    feed.save()

    log.info("Updated feed %s success: %s, %d new items" % 
      (feed.url, feed.update_history[0].success, feed.update_history[0].updates))

    # whee... request at the next time !
    if reschedule:
        message_id = 'periodic_index_%s' % RemoteFeed.id_for_url(feed.url)
        schedule_feed_index(feed.url, feed.next_poll_time, context, message_id=message_id)

    run_post_index_hooks(feed, context)
Example #6
0
def test_hub_lease_renew(ctx):
    """
    tests that we resubscribe with a hub 
    within the hub specified lease window.
    """
    from eventlet import sleep, spawn
    from melkman.db import RemoteFeed
    from melkman.fetch.pubsubhubbub import WSGISubClient, callback_url_for 
    from melkman.fetch.pubsubhubbub import hubbub_sub, update_pubsub_state

    w = WSGISubClient(ctx)
    client = spawn(w.run)

    # create a hub with a very short lease time.
    hub = FakeHub(lease_seconds=2)
    hub_proc = spawn(hub.run)    
    hub_url = 'http://localhost:%d/' % hub.port

    feed_url = 'http://example.org/feeds/99'
    rf = RemoteFeed.create_from_url(feed_url, ctx)
    rf.feed_info = {'links': [{'rel': 'self', 'href': feed_url},
                              {'rel': 'hub', 'href': hub_url}]}
    rf.save()
    
    # subscribe to the feed on the hub
    cb = callback_url_for(feed_url, ctx)
    assert not hub.is_verified(cb, feed_url)
    r, c = hubbub_sub(rf, ctx)
    assert r.status == 202, 'Expected 202, got %d' % r.status
    sleep(.5)
    assert hub.is_verified(cb, feed_url)
    rf = RemoteFeed.get_by_url(feed_url, ctx)

    assert hub.renewals(cb, feed_url) == 0
    
    sleep(2)
    
    update_pubsub_state(rf, ctx)
    
    # make sure we triggered a lease renewal
    assert hub.renewals(cb, feed_url) == 1

    client.kill()
    client.wait()
    hub_proc.kill()
    hub_proc.wait()
Example #7
0
def index_feed_push(url, content, context, request_info=None):
    if request_info is None:
        request_info = {}

    feed = RemoteFeed.get_by_url(url, context)

    updated_docs = []
    if feed is None:
        feed = RemoteFeed.create_from_url(url, context)

    if check_request_approved(feed, request_info, context) == False:
        log.warn("Rejected index request for %s" % url)
        return 


    # 200 status code, not from cache, do update...
    feed.update_from_feed(content, method=METHOD_PUSH)
    feed.save()

    log.info("Updated feed %s success: %s, %d new items" % 
      (feed.url, feed.update_history[0].success, feed.update_history[0].updates))

    run_post_index_hooks(feed, context)
Example #8
0
def test_hub_lease_renew_failover(ctx):
    """
    tests that if we fail to renew a lease with a hub 
    we will failover to a different hub if one is available.
    """
    from eventlet import sleep, spawn
    from melkman.db import RemoteFeed
    from melkman.fetch.pubsubhubbub import WSGISubClient, callback_url_for 
    from melkman.fetch.pubsubhubbub import hubbub_sub, update_pubsub_state


    w = WSGISubClient(ctx)
    client = spawn(w.run)

    # create a hub with a very short lease time.
    hub = FakeHub(lease_seconds=2)
    hub_proc = spawn(hub.run)    
    hub_url = 'http://localhost:%d/' % hub.port

    hub2 = FakeHub(port=9298)
    hub2_proc = spawn(hub2.run)
    hub2_url = 'http://localhost:%d/' % hub2.port

    feed_url = 'http://example.org/feeds/99'
    rf = RemoteFeed.create_from_url(feed_url, ctx)
    rf.feed_info = {'links': [{'rel': 'self', 'href': feed_url},
                              {'rel': 'hub', 'href': hub_url}, 
                              {'rel': 'hub', 'href': hub2_url}]}
    rf.save()

    # subscribe to the feed on the hub
    cb = callback_url_for(feed_url, ctx)
    assert not hub.is_verified(cb, feed_url)
    r, c = hubbub_sub(rf, ctx)
    assert r.status == 202, 'Expected 202, got %d' % r.status
    sleep(.5)
    
    rf = RemoteFeed.get_by_url(feed_url, ctx)
    assert rf.hub_info.subscribed == True
    assert rf.hub_info.hub_url == hub_url
    assert hub.is_verified(cb, feed_url)
    assert not hub2.is_verified(cb, feed_url)

    assert hub.renewals(cb, feed_url) == 0
    sleep(2)
    
    # kill the first hub so that when we update, 
    # the renewal will fail...
    hub_proc.kill()

    # when this update is triggered, renewal should fail and 
    # we should instead subscribe to the alternate hub.
    update_pubsub_state(rf, ctx)
    
    assert hub.renewals(cb, feed_url) == 0
    rf = RemoteFeed.get_by_url(feed_url, ctx)    
    assert rf.hub_info.subscribed == True
    assert rf.hub_info.hub_url == hub2_url
    assert hub2.is_verified(cb, feed_url)

    client.kill()
    client.wait()
    hub2_proc.kill()
    hub2_proc.wait()
Example #9
0
def test_hub_invalidation_resub(ctx):
    """
    tests that if a currently subscribed hub is 
    no longer listed, we subscribe to a different
    hub if any are listed.
    """
    from eventlet import sleep, spawn
    from melkman.db import RemoteFeed
    from melkman.fetch.pubsubhubbub import WSGISubClient, callback_url_for 
    from melkman.fetch.pubsubhubbub import hubbub_sub, update_pubsub_state

    w = WSGISubClient(ctx)
    client = spawn(w.run)

    # create two hubs
    hub = FakeHub()
    hub_proc = spawn(hub.run)    
    hub_url = 'http://localhost:%d/' % hub.port

    hub2 = FakeHub(port=9298)
    hub2_proc = spawn(hub2.run)    
    hub2_url = 'http://localhost:%d/' % hub2.port


    feed_url = 'http://example.org/feeds/99'
    rf = RemoteFeed.create_from_url(feed_url, ctx)
    rf.feed_info = {'links': [{'rel': 'self', 'href': feed_url},
                              {'rel': 'hub', 'href': hub_url}]}
    rf.save()
    
    # subscribe to the feed on the hub
    cb = callback_url_for(feed_url, ctx)
    assert not hub.is_verified(cb, feed_url)
    r, c = hubbub_sub(rf, ctx)
    assert r.status == 202, 'Expected 202, got %d' % r.status
    sleep(.5)
    assert hub.is_verified(cb, feed_url)

    # remove the hub from the list of hubs, but replace it with another
    rf = RemoteFeed.get_by_url(feed_url, ctx)
    rf.feed_info = {'links': [{'rel': 'self', 'href': feed_url}, 
                              {'rel': 'hub', 'href': hub2_url}]}
    rf.save()

    # trigger an update
    update_pubsub_state(rf, ctx)
    
    # check that it is now unsubscribed from the original hub, and 
    # is now subscribed to the new hub.
    sleep(2)
    assert not hub.is_verified(cb, feed_url)
    assert hub2.is_verified(cb, feed_url)
    
    rf = RemoteFeed.get_by_url(feed_url, ctx)
    assert rf.hub_info.enabled == True and rf.hub_info.subscribed == True
    
    client.kill()
    client.wait()
    hub_proc.kill()
    hub_proc.wait()
    hub2_proc.kill()
    hub2_proc.wait()
Example #10
0
def test_push_index_digest(ctx):
    from melk.util.nonce import nonce_str
    from melkman.db.remotefeed import RemoteFeed
    from melkman.fetch import push_feed_index
    from melkman.fetch.worker import run_feed_indexer
    from eventlet import sleep, spawn
    from melkman.fetch.pubsubhubbub import psh_digest


    # start a feed indexer
    indexer = spawn(run_feed_indexer, ctx)

    url = 'http://www.example.com/feeds/2'
    rf = RemoteFeed.create_from_url(url, ctx)
    rf.hub_info.enabled = True
    rf.hub_info.subscribed = True
    rf.save()

    secret = nonce_str()

    content = random_atom_feed(url, 10)
    ids = melk_ids_in(content, url)

    correct_digest = 'sha1=%s' % psh_digest(content, secret)
    wrong_digest = 'wrong digest'

    #
    # no hub secret is specified on the feed
    #
    push_feed_index(url, content, ctx, digest=wrong_digest, from_hub=True)
    sleep(.5)
    rf = RemoteFeed.get_by_url(url, ctx)
    for iid in ids:
        assert iid not in rf.entries
    push_feed_index(url, content, ctx, digest=None, from_hub=True)
    sleep(.5)
    rf = RemoteFeed.get_by_url(url, ctx)
    for iid in ids:
        assert iid not in rf.entries
    # even the correct digest fails as no digest has been set 
    push_feed_index(url, content, ctx, digest=correct_digest, from_hub=True)
    sleep(.5)
    rf = RemoteFeed.get_by_url(url, ctx)
    for iid in ids:
        assert iid not in rf.entries

    #
    # now set the hub secret
    #
    rf.hub_info.secret = secret
    rf.save()

    push_feed_index(url, content, ctx, digest=wrong_digest, from_hub=True)
    sleep(.5)
    rf = RemoteFeed.get_by_url(url, ctx)
    for iid in ids:
        assert iid not in rf.entries
    push_feed_index(url, content, ctx, digest=None, from_hub=True)
    sleep(.5)
    rf = RemoteFeed.get_by_url(url, ctx)
    for iid in ids:
        assert iid not in rf.entries

    # finally, the correct digest should work now...
    push_feed_index(url, content, ctx, digest=correct_digest, from_hub=True)
    sleep(.5)
    rf = RemoteFeed.get_by_url(url, ctx)
    for iid in ids:
        assert iid in rf.entries

    indexer.kill()
    indexer.wait()
Example #11
0
def test_sub_to_hub(ctx):
    """
    test make_sub_request and make_unsub_request
    """
    
    from httplib2 import Http
    from eventlet import sleep, spawn
    from melk.util.nonce import nonce_str
    import traceback

    from melkman.db import RemoteFeed
    from melkman.fetch.worker import run_feed_indexer
    from melkman.fetch.pubsubhubbub import WSGISubClient
    from melkman.fetch.pubsubhubbub import callback_url_for
    from melkman.fetch.pubsubhubbub import hubbub_sub
    from melkman.fetch.pubsubhubbub import hubbub_unsub
    from melkman.fetch.pubsubhubbub import psh_digest
    
    import logging
    logging.basicConfig(level=logging.WARN)
    


    w = WSGISubClient(ctx)
    client = spawn(w.run)
    indexer = spawn(run_feed_indexer, ctx)

    hub = FakeHub()
    hub_proc = spawn(hub.run)
    
    hub_url = 'http://localhost:%d/' % hub.port
    
    feed_url = 'http://example.org/feeds/99'
    rf = RemoteFeed.create_from_url(feed_url, ctx)
    rf.feed_info = {'links': [{'rel': 'self', 'href': feed_url},
                              {'rel': 'hub', 'href': hub_url}]}
    rf.save()
    
    cb = callback_url_for(feed_url, ctx)
    
    # subscribe to the hub
    assert not hub.is_verified(cb, feed_url)
    r, c = hubbub_sub(rf, ctx)
    assert r.status == 202, 'Expected 202, got %d' % r.status
    sleep(.5)
    assert hub.is_verified(cb, feed_url)
    secret = hub.secret_for(cb, feed_url)
    
    http = Http()

    # simulate hub posting to callback URL
    content = random_atom_feed(feed_url, 10, link=feed_url, hub_urls=[hub_url])
    digest = 'sha1=%s' % psh_digest(content, secret)
    r, c = http.request(cb, 'POST', body=content, headers={'X-Hub-Signature': digest})
    assert r.status == 200, 'Expected 200, got %d' % r.status
    sleep(0.5)
    
    # since we are subscribed, new items should be in the feed now
    rf = RemoteFeed.get_by_url(feed_url, ctx)
    assert len(rf.entries) == 10
    for iid in melk_ids_in(content, feed_url):
        assert iid in rf.entries
    
    # unsubscribe from hub
    r, c = hubbub_unsub(rf, ctx)
    assert r.status == 202, 'Expected 202, got %d' % r.status
    sleep(.5)
    assert not hub.is_verified(cb, feed_url)
    
    # simulate another POST to the callback URL 
    # this time it should fail (we are not subscribed)
    content = random_atom_feed(feed_url, 10, link=feed_url, hub_urls=[hub_url])
    digest = "sha1=%s" % psh_digest(content, secret)
    r, c = http.request(cb, 'POST', body=content, headers={'X-Hub-Signature': digest})
    assert r.status == 200, 'Expected 200, got %d' % r.status
    sleep(0.5)
    # items should be the same as before (not subscribed)
    rf = RemoteFeed.get_by_url(feed_url, ctx)
    assert len(rf.entries) == 10
    for iid in melk_ids_in(content, feed_url):
        assert not iid in rf.entries
        
    client.kill()
    client.wait()
    indexer.kill()
    indexer.wait()
    hub_proc.kill()
    hub_proc.wait()
Example #12
0
def test_sub_push(ctx):
    from httplib2 import Http
    from eventlet import sleep, spawn
    from melk.util.nonce import nonce_str
    from melkman.db import RemoteFeed
    from melkman.fetch.worker import run_feed_indexer
    from melkman.fetch.pubsubhubbub import WSGISubClient, callback_url_for, psh_digest
    
    import logging
    logging.basicConfig(level=logging.WARN)
    
    
    w = WSGISubClient(ctx)
    client = spawn(w.run)
    indexer = spawn(run_feed_indexer, ctx)
    
    http = Http()
    url = 'http://example.org/feed/0'
    content = random_atom_feed(url, 10)
    secret = nonce_str()
    
    digest = 'sha1=%s' % psh_digest(content, secret)
    cb = callback_url_for(url, ctx)
    
    assert RemoteFeed.get_by_url(url, ctx) == None
    
    # try posting something that is not subscribed
    r, c = http.request(cb, 'POST', body=content, headers={'X-Hub-Signature': digest})
    assert r.status == 200, 'Expected 200, got %d' % r.status
    sleep(1)
    # nothing should happen...
    assert RemoteFeed.get(url, ctx) == None

    # set up the feed, but don't subscribe
    rf = RemoteFeed.create_from_url(url, ctx)
    rf.save()
    r, c = http.request(cb, 'POST', body=content, headers={'X-Hub-Signature': digest})
    assert r.status == 200, 'Expected 200, got %d' % r.status
    sleep(1)
    # nothing should happen...
    rf = RemoteFeed.get_by_url(url, ctx)
    assert len(rf.entries) == 0
    
    # now set it up 
    rf.hub_info.enabled = True
    rf.hub_info.subscribed = True
    rf.hub_info.secret = secret
    rf.save()

    # try with wrong digest...
    r, c = http.request(cb, 'POST', body=content, headers={'X-Hub-Signature': 'wrong'})
    assert r.status == 200, 'Expected 200, got %d' % r.status
    sleep(0.5)
    # nothing should happen...
    rf = RemoteFeed.get_by_url(url, ctx)
    assert len(rf.entries) == 0
    
    # try with no digest
    r, c = http.request(cb, 'POST', body=content)
    assert r.status == 200, 'Expected 200, got %d' % r.status
    sleep(0.5)
    # nothing should happen...
    rf = RemoteFeed.get_by_url(url, ctx)
    assert len(rf.entries) == 0
    
    # finally, try with correct digest
    r, c = http.request(cb, 'POST', body=content, headers={'X-Hub-Signature': digest})
    assert r.status == 200, 'Expected 200, got %d' % r.status
    sleep(0.5)
    # nothing should happen...
    rf = RemoteFeed.get_by_url(url, ctx)
    assert len(rf.entries) == 10
    for iid in melk_ids_in(content, url):
        assert iid in rf.entries 
    
    client.kill()
    client.wait()
    indexer.kill()
    indexer.wait()