def test_disabled_unsubscribes(ctx): """ tests that if pubsub is disabled for a feed, it becomes unsubscribed from it's hub. """ from eventlet import sleep, spawn from melkman.db import RemoteFeed from melkman.fetch.pubsubhubbub import WSGISubClient, callback_url_for from melkman.fetch.pubsubhubbub import hubbub_sub, update_pubsub_state w = WSGISubClient(ctx) client = spawn(w.run) hub = FakeHub() hub_proc = spawn(hub.run) hub_url = 'http://localhost:%d/' % hub.port feed_url = 'http://example.org/feeds/99' rf = RemoteFeed.create_from_url(feed_url, ctx) rf.feed_info = {'links': [{'rel': 'self', 'href': feed_url}, {'rel': 'hub', 'href': hub_url}]} rf.save() # subscribe to the feed on the hub cb = callback_url_for(feed_url, ctx) assert not hub.is_verified(cb, feed_url) r, c = hubbub_sub(rf, ctx) assert r.status == 202, 'Expected 202, got %d' % r.status sleep(.5) assert hub.is_verified(cb, feed_url) # disable pubsub for the feed rf = RemoteFeed.get_by_url(feed_url, ctx) assert rf.hub_info.enabled == True and rf.hub_info.subscribed == True rf.hub_info.enabled = False rf.save() # trigger an update update_pubsub_state(rf, ctx) # check that it is now unsubscribed. sleep(.5) assert not hub.is_verified(cb, feed_url) rf = RemoteFeed.get_by_url(feed_url, ctx) assert rf.hub_info.enabled == False and rf.hub_info.subscribed == False client.kill() client.wait() hub_proc.kill() hub_proc.wait()
def test_get_or_immediate_create_by_url(ctx): """ test that get_or_immediate_create_by_url retrieves existing feeds by url if they in fact exist, and creates them if they don't. """ from datetime import datetime, timedelta from eventlet import sleep from melkman.db.remotefeed import RemoteFeed, get_or_immediate_create_by_url feed_url = 'http://example.org/1' # make sure it doesn't exist yet feed = RemoteFeed.get_by_url(feed_url, ctx) assert feed is None # this should result in its immediate creation creationdt = datetime.utcnow() feed = get_or_immediate_create_by_url(feed_url, ctx) assert feed is not None assert feed.last_modification_date - creationdt < timedelta(seconds=1) sleep(1) # this should retrieve the existing feed, not create a new one now = datetime.utcnow() samefeed = get_or_immediate_create_by_url(feed_url, ctx) assert samefeed.id == feed.id assert samefeed.last_modification_date == feed.last_modification_date assert now - samefeed.last_modification_date >= timedelta(seconds=1)
def _validate_sub_request(self, req): mode = req.GET.get('hub.mode', None) topic = req.GET.get('hub.topic', None) verify_token = req.GET.get('hub.verify_token', None) url = _determine_feed_url(req) if topic is None or mode is None or verify_token is None or url is None: return False rf = RemoteFeed.get_by_url(url, self.context) if rf is None: # for feeds we don't know about, confirm unsubscribes # and reject subscribes. return mode == 'unsubscribe' if topic != topic_url_for(rf): log.warn("hub sent mismatched feed / topic: (%s, %s)" % (topic, topic_url_for(rf))) return False if mode == 'subscribe': if (rf.hub_info.enabled and rf.hub_info.verify_token == verify_token): try: lease_time = int(req.GET.get('hub.lease_seconds', DEFAULT_LEASE)) except: lease_time = DEFAULT_LEASE lease_time = min(lease_time, DEFAULT_LEASE) next_sub_time = datetime.utcnow() + timedelta(seconds=lease_time/2) # mark the feed as subscribed only when we have recieved # a proper subscription verification from the hub. rf.hub_info.subscribed = True rf.hub_info.next_sub_time = next_sub_time rf.save() return True else: return False elif mode == 'unsubscribe': ps = rf.hub_info if ps.enabled and ps.subscribed and verify_token == ps.verify_token: """ deny any valid unsubscribe requests for enabled feeds that we believe should be subscribed. """ return False # anything else we approve, invalid, disabled, unsubscribed. return True else: log.warn("hub sent unknown sub mode: %s" % mode) return False
def test_auto_sub(ctx): # tests autosubscription when feeds are indexed # with <link rel="hub" /> entries. from datetime import datetime from eventlet import sleep, spawn from melkman.db import RemoteFeed from melkman.fetch import push_feed_index from melkman.fetch.pubsubhubbub import WSGISubClient, callback_url_for from melkman.fetch.worker import run_feed_indexer w = WSGISubClient(ctx) client = spawn(w.run) indexer = spawn(run_feed_indexer, ctx) hub = FakeHub() hub_proc = spawn(hub.run) hub_url = 'http://localhost:%d/' % hub.port feed_url = 'http://www.example.org/feeds/12' content = """<?xml version="1.0" encoding="utf-8"?> <feed xmlns="http://www.w3.org/2005/Atom"> <id>%s</id> <title>Blah</title> <link rel="self" href="%s"/> <link rel="hub" href="%s" /> <updated>%s</updated> <author> <name>Joop Doderer</name> </author> </feed> """ % (feed_url, feed_url, hub_url, rfc3339_date(datetime.utcnow())) # push content in... push_feed_index(feed_url, content, ctx) sleep(.5) # check for automatic subscription... cb = callback_url_for(feed_url, ctx) assert hub.is_verified(cb, feed_url) rf = RemoteFeed.get_by_url(feed_url, ctx) assert rf.hub_info.enabled assert rf.hub_info.hub_url == hub_url client.kill() client.wait() indexer.kill() indexer.wait() hub_proc.kill() hub_proc.wait()
def index_feed_polling(url, context, timeout=15, request_info=None): """ poll the feed at the url given and index it immediately on the calling thread. """ if request_info is None: request_info = {} feed = RemoteFeed.get_by_url(url, context) if feed is None: feed = RemoteFeed.create_from_url(url, context) if check_request_approved(feed, request_info, context) == False: log.warn("Rejected index request for %s" % url) return reschedule = not request_info.get('skip_reschedule', False) http_cache = context.config.get('http', {}).get('cache', None) # fetch http = Http(cache=http_cache, timeout=timeout) http.force_exception_to_status_code = True response, content = http.request(url, 'GET') updated_docs = [] if response.fromcache: feed.record_update_info(success=True, updates=0, method=METHOD_POLL) elif response.status != 200: feed.record_update_info(success=False, updates=0, reason=response.reason, method=METHOD_POLL) else: # 200 status code, not from cache, do update... feed.update_from_feed(content, method=METHOD_POLL) # compute the next time to check... next_interval = compute_next_fetch_interval(feed.update_history) log.debug("next update interval for %s = %s" % (feed.url, next_interval)) feed.next_poll_time = datetime.utcnow() + next_interval feed.poll_in_progress = False feed.save() log.info("Updated feed %s success: %s, %d new items" % (feed.url, feed.update_history[0].success, feed.update_history[0].updates)) # whee... request at the next time ! if reschedule: message_id = 'periodic_index_%s' % RemoteFeed.id_for_url(feed.url) schedule_feed_index(feed.url, feed.next_poll_time, context, message_id=message_id) run_post_index_hooks(feed, context)
def test_hub_lease_renew(ctx): """ tests that we resubscribe with a hub within the hub specified lease window. """ from eventlet import sleep, spawn from melkman.db import RemoteFeed from melkman.fetch.pubsubhubbub import WSGISubClient, callback_url_for from melkman.fetch.pubsubhubbub import hubbub_sub, update_pubsub_state w = WSGISubClient(ctx) client = spawn(w.run) # create a hub with a very short lease time. hub = FakeHub(lease_seconds=2) hub_proc = spawn(hub.run) hub_url = 'http://localhost:%d/' % hub.port feed_url = 'http://example.org/feeds/99' rf = RemoteFeed.create_from_url(feed_url, ctx) rf.feed_info = {'links': [{'rel': 'self', 'href': feed_url}, {'rel': 'hub', 'href': hub_url}]} rf.save() # subscribe to the feed on the hub cb = callback_url_for(feed_url, ctx) assert not hub.is_verified(cb, feed_url) r, c = hubbub_sub(rf, ctx) assert r.status == 202, 'Expected 202, got %d' % r.status sleep(.5) assert hub.is_verified(cb, feed_url) rf = RemoteFeed.get_by_url(feed_url, ctx) assert hub.renewals(cb, feed_url) == 0 sleep(2) update_pubsub_state(rf, ctx) # make sure we triggered a lease renewal assert hub.renewals(cb, feed_url) == 1 client.kill() client.wait() hub_proc.kill() hub_proc.wait()
def index_feed_push(url, content, context, request_info=None): if request_info is None: request_info = {} feed = RemoteFeed.get_by_url(url, context) updated_docs = [] if feed is None: feed = RemoteFeed.create_from_url(url, context) if check_request_approved(feed, request_info, context) == False: log.warn("Rejected index request for %s" % url) return # 200 status code, not from cache, do update... feed.update_from_feed(content, method=METHOD_PUSH) feed.save() log.info("Updated feed %s success: %s, %d new items" % (feed.url, feed.update_history[0].success, feed.update_history[0].updates)) run_post_index_hooks(feed, context)
def test_hub_lease_renew_failover(ctx): """ tests that if we fail to renew a lease with a hub we will failover to a different hub if one is available. """ from eventlet import sleep, spawn from melkman.db import RemoteFeed from melkman.fetch.pubsubhubbub import WSGISubClient, callback_url_for from melkman.fetch.pubsubhubbub import hubbub_sub, update_pubsub_state w = WSGISubClient(ctx) client = spawn(w.run) # create a hub with a very short lease time. hub = FakeHub(lease_seconds=2) hub_proc = spawn(hub.run) hub_url = 'http://localhost:%d/' % hub.port hub2 = FakeHub(port=9298) hub2_proc = spawn(hub2.run) hub2_url = 'http://localhost:%d/' % hub2.port feed_url = 'http://example.org/feeds/99' rf = RemoteFeed.create_from_url(feed_url, ctx) rf.feed_info = {'links': [{'rel': 'self', 'href': feed_url}, {'rel': 'hub', 'href': hub_url}, {'rel': 'hub', 'href': hub2_url}]} rf.save() # subscribe to the feed on the hub cb = callback_url_for(feed_url, ctx) assert not hub.is_verified(cb, feed_url) r, c = hubbub_sub(rf, ctx) assert r.status == 202, 'Expected 202, got %d' % r.status sleep(.5) rf = RemoteFeed.get_by_url(feed_url, ctx) assert rf.hub_info.subscribed == True assert rf.hub_info.hub_url == hub_url assert hub.is_verified(cb, feed_url) assert not hub2.is_verified(cb, feed_url) assert hub.renewals(cb, feed_url) == 0 sleep(2) # kill the first hub so that when we update, # the renewal will fail... hub_proc.kill() # when this update is triggered, renewal should fail and # we should instead subscribe to the alternate hub. update_pubsub_state(rf, ctx) assert hub.renewals(cb, feed_url) == 0 rf = RemoteFeed.get_by_url(feed_url, ctx) assert rf.hub_info.subscribed == True assert rf.hub_info.hub_url == hub2_url assert hub2.is_verified(cb, feed_url) client.kill() client.wait() hub2_proc.kill() hub2_proc.wait()
def test_hub_invalidation_resub(ctx): """ tests that if a currently subscribed hub is no longer listed, we subscribe to a different hub if any are listed. """ from eventlet import sleep, spawn from melkman.db import RemoteFeed from melkman.fetch.pubsubhubbub import WSGISubClient, callback_url_for from melkman.fetch.pubsubhubbub import hubbub_sub, update_pubsub_state w = WSGISubClient(ctx) client = spawn(w.run) # create two hubs hub = FakeHub() hub_proc = spawn(hub.run) hub_url = 'http://localhost:%d/' % hub.port hub2 = FakeHub(port=9298) hub2_proc = spawn(hub2.run) hub2_url = 'http://localhost:%d/' % hub2.port feed_url = 'http://example.org/feeds/99' rf = RemoteFeed.create_from_url(feed_url, ctx) rf.feed_info = {'links': [{'rel': 'self', 'href': feed_url}, {'rel': 'hub', 'href': hub_url}]} rf.save() # subscribe to the feed on the hub cb = callback_url_for(feed_url, ctx) assert not hub.is_verified(cb, feed_url) r, c = hubbub_sub(rf, ctx) assert r.status == 202, 'Expected 202, got %d' % r.status sleep(.5) assert hub.is_verified(cb, feed_url) # remove the hub from the list of hubs, but replace it with another rf = RemoteFeed.get_by_url(feed_url, ctx) rf.feed_info = {'links': [{'rel': 'self', 'href': feed_url}, {'rel': 'hub', 'href': hub2_url}]} rf.save() # trigger an update update_pubsub_state(rf, ctx) # check that it is now unsubscribed from the original hub, and # is now subscribed to the new hub. sleep(2) assert not hub.is_verified(cb, feed_url) assert hub2.is_verified(cb, feed_url) rf = RemoteFeed.get_by_url(feed_url, ctx) assert rf.hub_info.enabled == True and rf.hub_info.subscribed == True client.kill() client.wait() hub_proc.kill() hub_proc.wait() hub2_proc.kill() hub2_proc.wait()
def test_push_index_digest(ctx): from melk.util.nonce import nonce_str from melkman.db.remotefeed import RemoteFeed from melkman.fetch import push_feed_index from melkman.fetch.worker import run_feed_indexer from eventlet import sleep, spawn from melkman.fetch.pubsubhubbub import psh_digest # start a feed indexer indexer = spawn(run_feed_indexer, ctx) url = 'http://www.example.com/feeds/2' rf = RemoteFeed.create_from_url(url, ctx) rf.hub_info.enabled = True rf.hub_info.subscribed = True rf.save() secret = nonce_str() content = random_atom_feed(url, 10) ids = melk_ids_in(content, url) correct_digest = 'sha1=%s' % psh_digest(content, secret) wrong_digest = 'wrong digest' # # no hub secret is specified on the feed # push_feed_index(url, content, ctx, digest=wrong_digest, from_hub=True) sleep(.5) rf = RemoteFeed.get_by_url(url, ctx) for iid in ids: assert iid not in rf.entries push_feed_index(url, content, ctx, digest=None, from_hub=True) sleep(.5) rf = RemoteFeed.get_by_url(url, ctx) for iid in ids: assert iid not in rf.entries # even the correct digest fails as no digest has been set push_feed_index(url, content, ctx, digest=correct_digest, from_hub=True) sleep(.5) rf = RemoteFeed.get_by_url(url, ctx) for iid in ids: assert iid not in rf.entries # # now set the hub secret # rf.hub_info.secret = secret rf.save() push_feed_index(url, content, ctx, digest=wrong_digest, from_hub=True) sleep(.5) rf = RemoteFeed.get_by_url(url, ctx) for iid in ids: assert iid not in rf.entries push_feed_index(url, content, ctx, digest=None, from_hub=True) sleep(.5) rf = RemoteFeed.get_by_url(url, ctx) for iid in ids: assert iid not in rf.entries # finally, the correct digest should work now... push_feed_index(url, content, ctx, digest=correct_digest, from_hub=True) sleep(.5) rf = RemoteFeed.get_by_url(url, ctx) for iid in ids: assert iid in rf.entries indexer.kill() indexer.wait()
def test_sub_to_hub(ctx): """ test make_sub_request and make_unsub_request """ from httplib2 import Http from eventlet import sleep, spawn from melk.util.nonce import nonce_str import traceback from melkman.db import RemoteFeed from melkman.fetch.worker import run_feed_indexer from melkman.fetch.pubsubhubbub import WSGISubClient from melkman.fetch.pubsubhubbub import callback_url_for from melkman.fetch.pubsubhubbub import hubbub_sub from melkman.fetch.pubsubhubbub import hubbub_unsub from melkman.fetch.pubsubhubbub import psh_digest import logging logging.basicConfig(level=logging.WARN) w = WSGISubClient(ctx) client = spawn(w.run) indexer = spawn(run_feed_indexer, ctx) hub = FakeHub() hub_proc = spawn(hub.run) hub_url = 'http://localhost:%d/' % hub.port feed_url = 'http://example.org/feeds/99' rf = RemoteFeed.create_from_url(feed_url, ctx) rf.feed_info = {'links': [{'rel': 'self', 'href': feed_url}, {'rel': 'hub', 'href': hub_url}]} rf.save() cb = callback_url_for(feed_url, ctx) # subscribe to the hub assert not hub.is_verified(cb, feed_url) r, c = hubbub_sub(rf, ctx) assert r.status == 202, 'Expected 202, got %d' % r.status sleep(.5) assert hub.is_verified(cb, feed_url) secret = hub.secret_for(cb, feed_url) http = Http() # simulate hub posting to callback URL content = random_atom_feed(feed_url, 10, link=feed_url, hub_urls=[hub_url]) digest = 'sha1=%s' % psh_digest(content, secret) r, c = http.request(cb, 'POST', body=content, headers={'X-Hub-Signature': digest}) assert r.status == 200, 'Expected 200, got %d' % r.status sleep(0.5) # since we are subscribed, new items should be in the feed now rf = RemoteFeed.get_by_url(feed_url, ctx) assert len(rf.entries) == 10 for iid in melk_ids_in(content, feed_url): assert iid in rf.entries # unsubscribe from hub r, c = hubbub_unsub(rf, ctx) assert r.status == 202, 'Expected 202, got %d' % r.status sleep(.5) assert not hub.is_verified(cb, feed_url) # simulate another POST to the callback URL # this time it should fail (we are not subscribed) content = random_atom_feed(feed_url, 10, link=feed_url, hub_urls=[hub_url]) digest = "sha1=%s" % psh_digest(content, secret) r, c = http.request(cb, 'POST', body=content, headers={'X-Hub-Signature': digest}) assert r.status == 200, 'Expected 200, got %d' % r.status sleep(0.5) # items should be the same as before (not subscribed) rf = RemoteFeed.get_by_url(feed_url, ctx) assert len(rf.entries) == 10 for iid in melk_ids_in(content, feed_url): assert not iid in rf.entries client.kill() client.wait() indexer.kill() indexer.wait() hub_proc.kill() hub_proc.wait()
def test_sub_push(ctx): from httplib2 import Http from eventlet import sleep, spawn from melk.util.nonce import nonce_str from melkman.db import RemoteFeed from melkman.fetch.worker import run_feed_indexer from melkman.fetch.pubsubhubbub import WSGISubClient, callback_url_for, psh_digest import logging logging.basicConfig(level=logging.WARN) w = WSGISubClient(ctx) client = spawn(w.run) indexer = spawn(run_feed_indexer, ctx) http = Http() url = 'http://example.org/feed/0' content = random_atom_feed(url, 10) secret = nonce_str() digest = 'sha1=%s' % psh_digest(content, secret) cb = callback_url_for(url, ctx) assert RemoteFeed.get_by_url(url, ctx) == None # try posting something that is not subscribed r, c = http.request(cb, 'POST', body=content, headers={'X-Hub-Signature': digest}) assert r.status == 200, 'Expected 200, got %d' % r.status sleep(1) # nothing should happen... assert RemoteFeed.get(url, ctx) == None # set up the feed, but don't subscribe rf = RemoteFeed.create_from_url(url, ctx) rf.save() r, c = http.request(cb, 'POST', body=content, headers={'X-Hub-Signature': digest}) assert r.status == 200, 'Expected 200, got %d' % r.status sleep(1) # nothing should happen... rf = RemoteFeed.get_by_url(url, ctx) assert len(rf.entries) == 0 # now set it up rf.hub_info.enabled = True rf.hub_info.subscribed = True rf.hub_info.secret = secret rf.save() # try with wrong digest... r, c = http.request(cb, 'POST', body=content, headers={'X-Hub-Signature': 'wrong'}) assert r.status == 200, 'Expected 200, got %d' % r.status sleep(0.5) # nothing should happen... rf = RemoteFeed.get_by_url(url, ctx) assert len(rf.entries) == 0 # try with no digest r, c = http.request(cb, 'POST', body=content) assert r.status == 200, 'Expected 200, got %d' % r.status sleep(0.5) # nothing should happen... rf = RemoteFeed.get_by_url(url, ctx) assert len(rf.entries) == 0 # finally, try with correct digest r, c = http.request(cb, 'POST', body=content, headers={'X-Hub-Signature': digest}) assert r.status == 200, 'Expected 200, got %d' % r.status sleep(0.5) # nothing should happen... rf = RemoteFeed.get_by_url(url, ctx) assert len(rf.entries) == 10 for iid in melk_ids_in(content, url): assert iid in rf.entries client.kill() client.wait() indexer.kill() indexer.wait()