def test_disabled_unsubscribes(ctx): """ tests that if pubsub is disabled for a feed, it becomes unsubscribed from it's hub. """ from eventlet import sleep, spawn from melkman.db import RemoteFeed from melkman.fetch.pubsubhubbub import WSGISubClient, callback_url_for from melkman.fetch.pubsubhubbub import hubbub_sub, update_pubsub_state w = WSGISubClient(ctx) client = spawn(w.run) hub = FakeHub() hub_proc = spawn(hub.run) hub_url = 'http://localhost:%d/' % hub.port feed_url = 'http://example.org/feeds/99' rf = RemoteFeed.create_from_url(feed_url, ctx) rf.feed_info = {'links': [{'rel': 'self', 'href': feed_url}, {'rel': 'hub', 'href': hub_url}]} rf.save() # subscribe to the feed on the hub cb = callback_url_for(feed_url, ctx) assert not hub.is_verified(cb, feed_url) r, c = hubbub_sub(rf, ctx) assert r.status == 202, 'Expected 202, got %d' % r.status sleep(.5) assert hub.is_verified(cb, feed_url) # disable pubsub for the feed rf = RemoteFeed.get_by_url(feed_url, ctx) assert rf.hub_info.enabled == True and rf.hub_info.subscribed == True rf.hub_info.enabled = False rf.save() # trigger an update update_pubsub_state(rf, ctx) # check that it is now unsubscribed. sleep(.5) assert not hub.is_verified(cb, feed_url) rf = RemoteFeed.get_by_url(feed_url, ctx) assert rf.hub_info.enabled == False and rf.hub_info.subscribed == False client.kill() client.wait() hub_proc.kill() hub_proc.wait()
def index_feed_polling(url, context, timeout=15, request_info=None): """ poll the feed at the url given and index it immediately on the calling thread. """ if request_info is None: request_info = {} feed = RemoteFeed.get_by_url(url, context) if feed is None: feed = RemoteFeed.create_from_url(url, context) if check_request_approved(feed, request_info, context) == False: log.warn("Rejected index request for %s" % url) return reschedule = not request_info.get('skip_reschedule', False) http_cache = context.config.get('http', {}).get('cache', None) # fetch http = Http(cache=http_cache, timeout=timeout) http.force_exception_to_status_code = True response, content = http.request(url, 'GET') updated_docs = [] if response.fromcache: feed.record_update_info(success=True, updates=0, method=METHOD_POLL) elif response.status != 200: feed.record_update_info(success=False, updates=0, reason=response.reason, method=METHOD_POLL) else: # 200 status code, not from cache, do update... feed.update_from_feed(content, method=METHOD_POLL) # compute the next time to check... next_interval = compute_next_fetch_interval(feed.update_history) log.debug("next update interval for %s = %s" % (feed.url, next_interval)) feed.next_poll_time = datetime.utcnow() + next_interval feed.poll_in_progress = False feed.save() log.info("Updated feed %s success: %s, %d new items" % (feed.url, feed.update_history[0].success, feed.update_history[0].updates)) # whee... request at the next time ! if reschedule: message_id = 'periodic_index_%s' % RemoteFeed.id_for_url(feed.url) schedule_feed_index(feed.url, feed.next_poll_time, context, message_id=message_id) run_post_index_hooks(feed, context)
def test_delete(ctx): from melkman.db import RemoteFeed, NewsItem, NewsItemRef feed_url = 'http://example.org/feeds/1' dummy_feed = random_atom_feed(feed_url, 25) items = melk_ids_in(dummy_feed, feed_url) rf = RemoteFeed.create_from_url(feed_url, ctx) rf.update_from_feed(dummy_feed, 'test') rf.save() bucket_id = rf.id ref_ids = [] assert bucket_id in ctx.db for iid in items: assert iid in rf.entries ref_id = NewsItemRef.dbid(bucket_id, iid) ref_ids.append(ref_id) assert ref_id in ctx.db # a news item was created too... assert iid in ctx.db # now destroy! rf.delete() assert not bucket_id in ctx.db for ref_id in ref_ids: assert not ref_id in ctx.db for iid in items: assert not iid in ctx.db
def test_get_or_immediate_create_by_url(ctx): """ test that get_or_immediate_create_by_url retrieves existing feeds by url if they in fact exist, and creates them if they don't. """ from datetime import datetime, timedelta from eventlet import sleep from melkman.db.remotefeed import RemoteFeed, get_or_immediate_create_by_url feed_url = 'http://example.org/1' # make sure it doesn't exist yet feed = RemoteFeed.get_by_url(feed_url, ctx) assert feed is None # this should result in its immediate creation creationdt = datetime.utcnow() feed = get_or_immediate_create_by_url(feed_url, ctx) assert feed is not None assert feed.last_modification_date - creationdt < timedelta(seconds=1) sleep(1) # this should retrieve the existing feed, not create a new one now = datetime.utcnow() samefeed = get_or_immediate_create_by_url(feed_url, ctx) assert samefeed.id == feed.id assert samefeed.last_modification_date == feed.last_modification_date assert now - samefeed.last_modification_date >= timedelta(seconds=1)
def test_hub_lease_renew(ctx): """ tests that we resubscribe with a hub within the hub specified lease window. """ from eventlet import sleep, spawn from melkman.db import RemoteFeed from melkman.fetch.pubsubhubbub import WSGISubClient, callback_url_for from melkman.fetch.pubsubhubbub import hubbub_sub, update_pubsub_state w = WSGISubClient(ctx) client = spawn(w.run) # create a hub with a very short lease time. hub = FakeHub(lease_seconds=2) hub_proc = spawn(hub.run) hub_url = 'http://localhost:%d/' % hub.port feed_url = 'http://example.org/feeds/99' rf = RemoteFeed.create_from_url(feed_url, ctx) rf.feed_info = {'links': [{'rel': 'self', 'href': feed_url}, {'rel': 'hub', 'href': hub_url}]} rf.save() # subscribe to the feed on the hub cb = callback_url_for(feed_url, ctx) assert not hub.is_verified(cb, feed_url) r, c = hubbub_sub(rf, ctx) assert r.status == 202, 'Expected 202, got %d' % r.status sleep(.5) assert hub.is_verified(cb, feed_url) rf = RemoteFeed.get_by_url(feed_url, ctx) assert hub.renewals(cb, feed_url) == 0 sleep(2) update_pubsub_state(rf, ctx) # make sure we triggered a lease renewal assert hub.renewals(cb, feed_url) == 1 client.kill() client.wait() hub_proc.kill() hub_proc.wait()
def update_pubsub_state(feed, context): """ perform any (un/re)subscription needed based on the state of the feed given and currently listed hubs. """ hubs = feed.find_hub_urls() ps = feed.hub_info # if pubsub is disabled for this feed if not ps.enabled: if ps.subscribed: try: hubbub_unsub(feed, context) except: log.warn("Error unsubscribing from hub %s for feed %s: %s" % (ps.hub_url, feed.url, traceback.format_exc())) return # if the currently subscribed hub is no longer # listed in the feed, unsubscribe from it. if ps.subscribed and ps.hub_url not in hubs: try: hubbub_unsub(feed, context) except: log.warn("Error unsubscribing from current hub: %s" % traceback.format_exc()) feed = RemoteFeed.get(feed.id, context) # refresh # if it is time to resubscribe to the current hub, try to # resubscribe elif ps.subscribed and datetime.utcnow() > ps.next_sub_time: log.info('resubscribe %s to hub %s' % (feed.url, feed.hub_info.hub_url)) if not _sub_any(feed, [feed.hub_info.hub_url], context): log.warn("Failed to resubscribe to %s for feed %s." % (ps.hub_url, feed.url)) try: hubbub_unsub(feed, context) except: log.warn("Error unsubscribing from hub %s for feed %s: %s" % (ps.hub_url, feed.url, traceback.format_exc())) feed = RemoteFeed.get(feed.id, context) # refresh # if it is not subscribed, subscribe to first thing # that works. if not ps.subscribed: _sub_any(feed, hubs, context)
def _validate_sub_request(self, req): mode = req.GET.get('hub.mode', None) topic = req.GET.get('hub.topic', None) verify_token = req.GET.get('hub.verify_token', None) url = _determine_feed_url(req) if topic is None or mode is None or verify_token is None or url is None: return False rf = RemoteFeed.get_by_url(url, self.context) if rf is None: # for feeds we don't know about, confirm unsubscribes # and reject subscribes. return mode == 'unsubscribe' if topic != topic_url_for(rf): log.warn("hub sent mismatched feed / topic: (%s, %s)" % (topic, topic_url_for(rf))) return False if mode == 'subscribe': if (rf.hub_info.enabled and rf.hub_info.verify_token == verify_token): try: lease_time = int(req.GET.get('hub.lease_seconds', DEFAULT_LEASE)) except: lease_time = DEFAULT_LEASE lease_time = min(lease_time, DEFAULT_LEASE) next_sub_time = datetime.utcnow() + timedelta(seconds=lease_time/2) # mark the feed as subscribed only when we have recieved # a proper subscription verification from the hub. rf.hub_info.subscribed = True rf.hub_info.next_sub_time = next_sub_time rf.save() return True else: return False elif mode == 'unsubscribe': ps = rf.hub_info if ps.enabled and ps.subscribed and verify_token == ps.verify_token: """ deny any valid unsubscribe requests for enabled feeds that we believe should be subscribed. """ return False # anything else we approve, invalid, disabled, unsubscribed. return True else: log.warn("hub sent unknown sub mode: %s" % mode) return False
def test_item_trace_update(ctx): from melkman.db import NewsItem, RemoteFeed def _check_item(item, info): for k, v in info.items(): if k != 'id': assert getattr(item, k) == v, "Key %s: Expected %s, got %s" % (k, v, getattr(item, k)) def check_item(f, iid, info): _check_item(f.entries[iid], info) _check_item(NewsItem.get(iid, ctx), info) feed_url = 'http://example.org/feed' feed = RemoteFeed.create_from_url(feed_url, ctx) atom_id = 'http://example.org/articles.php?id=1' time1 = no_micro(datetime.utcnow()) info1 = {'id': atom_id, 'title': 'Title1', 'author': 'author1', 'link': 'http://example.org/link1', 'summary': 'summary text 1', 'timestamp': time1} feed_v1 = make_atom_feed(feed_url, [make_atom_entry(**info1)]) feed.update_from_feed(feed_v1, method='test') feed.save() melk_id = melk_ids_in(feed_v1, feed_url)[0] check_item(feed, melk_id, info1) # change the info, but not the timestamp, should stay the same info2 = dict(info1) info2['title'] = 'Title 2' feed_v2 = make_atom_feed(feed_url, [make_atom_entry(**info2)]) feed.update_from_feed(feed_v2, method='test') feed.save() # should still match info1. (no update) check_item(feed, melk_id, info1) # now update the timestamp along with other fields time3 = no_micro(time1 + timedelta(seconds=1)) info3 = {'id': atom_id, 'title': 'Title3', 'author': 'author3', 'link': 'http://example.org/link3', 'summary': 'summary text 3', 'timestamp': time3} feed_v3 = make_atom_feed(feed_url, [make_atom_entry(**info3)]) feed.update_from_feed(feed_v3, method='test') feed.save() check_item(feed, melk_id, info3)
def test_auto_sub(ctx): # tests autosubscription when feeds are indexed # with <link rel="hub" /> entries. from datetime import datetime from eventlet import sleep, spawn from melkman.db import RemoteFeed from melkman.fetch import push_feed_index from melkman.fetch.pubsubhubbub import WSGISubClient, callback_url_for from melkman.fetch.worker import run_feed_indexer w = WSGISubClient(ctx) client = spawn(w.run) indexer = spawn(run_feed_indexer, ctx) hub = FakeHub() hub_proc = spawn(hub.run) hub_url = 'http://localhost:%d/' % hub.port feed_url = 'http://www.example.org/feeds/12' content = """<?xml version="1.0" encoding="utf-8"?> <feed xmlns="http://www.w3.org/2005/Atom"> <id>%s</id> <title>Blah</title> <link rel="self" href="%s"/> <link rel="hub" href="%s" /> <updated>%s</updated> <author> <name>Joop Doderer</name> </author> </feed> """ % (feed_url, feed_url, hub_url, rfc3339_date(datetime.utcnow())) # push content in... push_feed_index(feed_url, content, ctx) sleep(.5) # check for automatic subscription... cb = callback_url_for(feed_url, ctx) assert hub.is_verified(cb, feed_url) rf = RemoteFeed.get_by_url(feed_url, ctx) assert rf.hub_info.enabled assert rf.hub_info.hub_url == hub_url client.kill() client.wait() indexer.kill() indexer.wait() hub_proc.kill() hub_proc.wait()
def index_feed_push(url, content, context, request_info=None): if request_info is None: request_info = {} feed = RemoteFeed.get_by_url(url, context) updated_docs = [] if feed is None: feed = RemoteFeed.create_from_url(url, context) if check_request_approved(feed, request_info, context) == False: log.warn("Rejected index request for %s" % url) return # 200 status code, not from cache, do update... feed.update_from_feed(content, method=METHOD_PUSH) feed.save() log.info("Updated feed %s success: %s, %d new items" % (feed.url, feed.update_history[0].success, feed.update_history[0].updates)) run_post_index_hooks(feed, context)
def test_max_history_len(ctx): from melkman.db.remotefeed import RemoteFeed, MAX_HISTORY feed_url = 'http://example.org/feeds/1' rf = RemoteFeed.create_from_url(feed_url, ctx) for i in range(5*MAX_HISTORY): reason = 'update %d' % i rf.record_update_info(reason=reason) if i < MAX_HISTORY: assert len(rf.update_history) == i + 1 else: assert len(rf.update_history) == MAX_HISTORY assert rf.update_history[0].reason == reason
def test_update_feed_partial_repeat(ctx): """ test that indexing the some of same content twice only updates new things. """ from melkman.db import RemoteFeed # create a document with a 10 entry feed feed_url = 'http://example.org/%s' % random_id() entries1 = dummy_atom_entries(10) entries2 = dummy_atom_entries(10) content = make_atom_feed(feed_url, entries1) # extract the ids from the document expect_ids = melk_ids_in(content, feed_url) assert len(expect_ids) == 10 feed = RemoteFeed.create_from_url(feed_url, ctx) # update a remote feed with the content. updates = feed.update_from_feed(content, method='test') feed.save() # make sure all the items come back as new/updated assert updates == 10 # make sure all the items are in the feed for iid in expect_ids: assert feed.has_news_item(iid) # add some additional entries content = make_atom_feed(feed_url, entries2 + entries1) expect_ids = set([x for x in melk_ids_in(content, feed_url) if not x in expect_ids]) assert len(expect_ids) == 10 updates = feed.update_from_feed(content, method='test') # this list should be ready to push to the db with no probs feed.save() assert updates == 10 # make sure all the items are in the feed for iid in expect_ids: assert feed.has_news_item(iid)
def test_update_feed_repeat_index(ctx): """ test that indexing the same content twice has no effect """ from melkman.db import RemoteFeed # create a document with a 10 entry feed feed_url = 'http://example.org/%s' % random_id() content = random_atom_feed(feed_url, 10) ids = melk_ids_in(content, feed_url) assert len(ids) == 10 feed = RemoteFeed.create_from_url(feed_url, ctx) # update a remote feed with the content. updates = feed.update_from_feed(content, method='test') feed.save() assert updates == 10 # make sure all the items are in the feed for iid in ids: assert feed.has_news_item(iid) # update again with identical content, # should have no effect. updates = feed.update_from_feed(content, method='test') assert updates == 0 feed.save() # reload... feed.reload() # update again with identical content, # should have no effect. updates = feed.update_from_feed(content, method='test') assert updates == 0
def try_sub(tries): if tries > 1: ff = RemoteFeed.load(context.db, feed.id) else: ff = feed return hubbub_sub(ff, context, hub_url=hub)
def test_sub_verify(ctx): from httplib2 import Http from eventlet import spawn from melk.util.nonce import nonce_str from melkman.db import RemoteFeed from melkman.fetch.pubsubhubbub import WSGISubClient, callback_url_for import logging logging.basicConfig(level=logging.WARN) w = WSGISubClient(ctx) client = spawn(w.run) http = Http() url = 'http://example.org/feed/0' challenge = nonce_str() verify_token = nonce_str() secret = nonce_str() cb = callback_url_for(url, ctx) cb += '?hub.mode=subscribe' cb += '&hub.topic=%s' % url cb += '&hub.challenge=%s' % challenge cb += '&hub.verify_token=%s' % verify_token # try verifying something that doesn't exist r, c = http.request(cb, 'GET') assert r.status == 404, 'Expected 404, got %d' % r.status # now create it rf = RemoteFeed.create_from_url(url, ctx) rf.feed_info = {"links": [{"rel": "self", "href": url}]} rf.save() # still should not verify r, c = http.request(cb, 'GET') assert r.status == 404, 'Expected 404, got %d' % r.status # now set appropriate fields on the feed object rf.hub_info.enabled = True rf.hub_info.verify_token = verify_token rf.hub_info.secret = secret rf.save() # now it should accept verification... for i in range(3): r, c = http.request(cb, 'GET') assert r.status == 200, 'Expected 200, got %d' % r.status assert c == challenge, 'expected %s, got %s' % (challence, c) # create unsubscribe callback... cb = callback_url_for(url, ctx) cb += '?hub.mode=unsubscribe' cb += '&hub.topic=%s' % url cb += '&hub.challenge=%s' % challenge cb += '&hub.verify_token=%s' % verify_token # currently it should fail, we are not unsubscribed r, c = http.request(cb, 'GET') assert r.status == 404, 'Expected 404, got %d' % r.status # after disabling, the unsub verify should be okay rf.reload() rf.hub_info.enabled = False rf.save() r, c = http.request(cb, 'GET') assert r.status == 200, 'Expected 200, got %d' % r.status assert c == challenge, 'expected %s, got %s' % (challence, c) # now destroy the feed entirely, # unsub request for stuff that # does not exist should also # verify. del ctx.db[rf.id] r, c = http.request(cb, 'GET') assert r.status == 200, 'Expected 200, got %d' % r.status assert c == challenge, 'expected %s, got %s' % (challence, c) client.kill() client.wait()
def test_hub_lease_renew_failover(ctx): """ tests that if we fail to renew a lease with a hub we will failover to a different hub if one is available. """ from eventlet import sleep, spawn from melkman.db import RemoteFeed from melkman.fetch.pubsubhubbub import WSGISubClient, callback_url_for from melkman.fetch.pubsubhubbub import hubbub_sub, update_pubsub_state w = WSGISubClient(ctx) client = spawn(w.run) # create a hub with a very short lease time. hub = FakeHub(lease_seconds=2) hub_proc = spawn(hub.run) hub_url = 'http://localhost:%d/' % hub.port hub2 = FakeHub(port=9298) hub2_proc = spawn(hub2.run) hub2_url = 'http://localhost:%d/' % hub2.port feed_url = 'http://example.org/feeds/99' rf = RemoteFeed.create_from_url(feed_url, ctx) rf.feed_info = {'links': [{'rel': 'self', 'href': feed_url}, {'rel': 'hub', 'href': hub_url}, {'rel': 'hub', 'href': hub2_url}]} rf.save() # subscribe to the feed on the hub cb = callback_url_for(feed_url, ctx) assert not hub.is_verified(cb, feed_url) r, c = hubbub_sub(rf, ctx) assert r.status == 202, 'Expected 202, got %d' % r.status sleep(.5) rf = RemoteFeed.get_by_url(feed_url, ctx) assert rf.hub_info.subscribed == True assert rf.hub_info.hub_url == hub_url assert hub.is_verified(cb, feed_url) assert not hub2.is_verified(cb, feed_url) assert hub.renewals(cb, feed_url) == 0 sleep(2) # kill the first hub so that when we update, # the renewal will fail... hub_proc.kill() # when this update is triggered, renewal should fail and # we should instead subscribe to the alternate hub. update_pubsub_state(rf, ctx) assert hub.renewals(cb, feed_url) == 0 rf = RemoteFeed.get_by_url(feed_url, ctx) assert rf.hub_info.subscribed == True assert rf.hub_info.hub_url == hub2_url assert hub2.is_verified(cb, feed_url) client.kill() client.wait() hub2_proc.kill() hub2_proc.wait()
def test_hub_invalidation_resub(ctx): """ tests that if a currently subscribed hub is no longer listed, we subscribe to a different hub if any are listed. """ from eventlet import sleep, spawn from melkman.db import RemoteFeed from melkman.fetch.pubsubhubbub import WSGISubClient, callback_url_for from melkman.fetch.pubsubhubbub import hubbub_sub, update_pubsub_state w = WSGISubClient(ctx) client = spawn(w.run) # create two hubs hub = FakeHub() hub_proc = spawn(hub.run) hub_url = 'http://localhost:%d/' % hub.port hub2 = FakeHub(port=9298) hub2_proc = spawn(hub2.run) hub2_url = 'http://localhost:%d/' % hub2.port feed_url = 'http://example.org/feeds/99' rf = RemoteFeed.create_from_url(feed_url, ctx) rf.feed_info = {'links': [{'rel': 'self', 'href': feed_url}, {'rel': 'hub', 'href': hub_url}]} rf.save() # subscribe to the feed on the hub cb = callback_url_for(feed_url, ctx) assert not hub.is_verified(cb, feed_url) r, c = hubbub_sub(rf, ctx) assert r.status == 202, 'Expected 202, got %d' % r.status sleep(.5) assert hub.is_verified(cb, feed_url) # remove the hub from the list of hubs, but replace it with another rf = RemoteFeed.get_by_url(feed_url, ctx) rf.feed_info = {'links': [{'rel': 'self', 'href': feed_url}, {'rel': 'hub', 'href': hub2_url}]} rf.save() # trigger an update update_pubsub_state(rf, ctx) # check that it is now unsubscribed from the original hub, and # is now subscribed to the new hub. sleep(2) assert not hub.is_verified(cb, feed_url) assert hub2.is_verified(cb, feed_url) rf = RemoteFeed.get_by_url(feed_url, ctx) assert rf.hub_info.enabled == True and rf.hub_info.subscribed == True client.kill() client.wait() hub_proc.kill() hub_proc.wait() hub2_proc.kill() hub2_proc.wait()
def test_sub_push(ctx): from httplib2 import Http from eventlet import sleep, spawn from melk.util.nonce import nonce_str from melkman.db import RemoteFeed from melkman.fetch.worker import run_feed_indexer from melkman.fetch.pubsubhubbub import WSGISubClient, callback_url_for, psh_digest import logging logging.basicConfig(level=logging.WARN) w = WSGISubClient(ctx) client = spawn(w.run) indexer = spawn(run_feed_indexer, ctx) http = Http() url = 'http://example.org/feed/0' content = random_atom_feed(url, 10) secret = nonce_str() digest = 'sha1=%s' % psh_digest(content, secret) cb = callback_url_for(url, ctx) assert RemoteFeed.get_by_url(url, ctx) == None # try posting something that is not subscribed r, c = http.request(cb, 'POST', body=content, headers={'X-Hub-Signature': digest}) assert r.status == 200, 'Expected 200, got %d' % r.status sleep(1) # nothing should happen... assert RemoteFeed.get(url, ctx) == None # set up the feed, but don't subscribe rf = RemoteFeed.create_from_url(url, ctx) rf.save() r, c = http.request(cb, 'POST', body=content, headers={'X-Hub-Signature': digest}) assert r.status == 200, 'Expected 200, got %d' % r.status sleep(1) # nothing should happen... rf = RemoteFeed.get_by_url(url, ctx) assert len(rf.entries) == 0 # now set it up rf.hub_info.enabled = True rf.hub_info.subscribed = True rf.hub_info.secret = secret rf.save() # try with wrong digest... r, c = http.request(cb, 'POST', body=content, headers={'X-Hub-Signature': 'wrong'}) assert r.status == 200, 'Expected 200, got %d' % r.status sleep(0.5) # nothing should happen... rf = RemoteFeed.get_by_url(url, ctx) assert len(rf.entries) == 0 # try with no digest r, c = http.request(cb, 'POST', body=content) assert r.status == 200, 'Expected 200, got %d' % r.status sleep(0.5) # nothing should happen... rf = RemoteFeed.get_by_url(url, ctx) assert len(rf.entries) == 0 # finally, try with correct digest r, c = http.request(cb, 'POST', body=content, headers={'X-Hub-Signature': digest}) assert r.status == 200, 'Expected 200, got %d' % r.status sleep(0.5) # nothing should happen... rf = RemoteFeed.get_by_url(url, ctx) assert len(rf.entries) == 10 for iid in melk_ids_in(content, url): assert iid in rf.entries client.kill() client.wait() indexer.kill() indexer.wait()
def test_push_index_digest(ctx): from melk.util.nonce import nonce_str from melkman.db.remotefeed import RemoteFeed from melkman.fetch import push_feed_index from melkman.fetch.worker import run_feed_indexer from eventlet import sleep, spawn from melkman.fetch.pubsubhubbub import psh_digest # start a feed indexer indexer = spawn(run_feed_indexer, ctx) url = 'http://www.example.com/feeds/2' rf = RemoteFeed.create_from_url(url, ctx) rf.hub_info.enabled = True rf.hub_info.subscribed = True rf.save() secret = nonce_str() content = random_atom_feed(url, 10) ids = melk_ids_in(content, url) correct_digest = 'sha1=%s' % psh_digest(content, secret) wrong_digest = 'wrong digest' # # no hub secret is specified on the feed # push_feed_index(url, content, ctx, digest=wrong_digest, from_hub=True) sleep(.5) rf = RemoteFeed.get_by_url(url, ctx) for iid in ids: assert iid not in rf.entries push_feed_index(url, content, ctx, digest=None, from_hub=True) sleep(.5) rf = RemoteFeed.get_by_url(url, ctx) for iid in ids: assert iid not in rf.entries # even the correct digest fails as no digest has been set push_feed_index(url, content, ctx, digest=correct_digest, from_hub=True) sleep(.5) rf = RemoteFeed.get_by_url(url, ctx) for iid in ids: assert iid not in rf.entries # # now set the hub secret # rf.hub_info.secret = secret rf.save() push_feed_index(url, content, ctx, digest=wrong_digest, from_hub=True) sleep(.5) rf = RemoteFeed.get_by_url(url, ctx) for iid in ids: assert iid not in rf.entries push_feed_index(url, content, ctx, digest=None, from_hub=True) sleep(.5) rf = RemoteFeed.get_by_url(url, ctx) for iid in ids: assert iid not in rf.entries # finally, the correct digest should work now... push_feed_index(url, content, ctx, digest=correct_digest, from_hub=True) sleep(.5) rf = RemoteFeed.get_by_url(url, ctx) for iid in ids: assert iid in rf.entries indexer.kill() indexer.wait()
def test_sub_to_hub(ctx): """ test make_sub_request and make_unsub_request """ from httplib2 import Http from eventlet import sleep, spawn from melk.util.nonce import nonce_str import traceback from melkman.db import RemoteFeed from melkman.fetch.worker import run_feed_indexer from melkman.fetch.pubsubhubbub import WSGISubClient from melkman.fetch.pubsubhubbub import callback_url_for from melkman.fetch.pubsubhubbub import hubbub_sub from melkman.fetch.pubsubhubbub import hubbub_unsub from melkman.fetch.pubsubhubbub import psh_digest import logging logging.basicConfig(level=logging.WARN) w = WSGISubClient(ctx) client = spawn(w.run) indexer = spawn(run_feed_indexer, ctx) hub = FakeHub() hub_proc = spawn(hub.run) hub_url = 'http://localhost:%d/' % hub.port feed_url = 'http://example.org/feeds/99' rf = RemoteFeed.create_from_url(feed_url, ctx) rf.feed_info = {'links': [{'rel': 'self', 'href': feed_url}, {'rel': 'hub', 'href': hub_url}]} rf.save() cb = callback_url_for(feed_url, ctx) # subscribe to the hub assert not hub.is_verified(cb, feed_url) r, c = hubbub_sub(rf, ctx) assert r.status == 202, 'Expected 202, got %d' % r.status sleep(.5) assert hub.is_verified(cb, feed_url) secret = hub.secret_for(cb, feed_url) http = Http() # simulate hub posting to callback URL content = random_atom_feed(feed_url, 10, link=feed_url, hub_urls=[hub_url]) digest = 'sha1=%s' % psh_digest(content, secret) r, c = http.request(cb, 'POST', body=content, headers={'X-Hub-Signature': digest}) assert r.status == 200, 'Expected 200, got %d' % r.status sleep(0.5) # since we are subscribed, new items should be in the feed now rf = RemoteFeed.get_by_url(feed_url, ctx) assert len(rf.entries) == 10 for iid in melk_ids_in(content, feed_url): assert iid in rf.entries # unsubscribe from hub r, c = hubbub_unsub(rf, ctx) assert r.status == 202, 'Expected 202, got %d' % r.status sleep(.5) assert not hub.is_verified(cb, feed_url) # simulate another POST to the callback URL # this time it should fail (we are not subscribed) content = random_atom_feed(feed_url, 10, link=feed_url, hub_urls=[hub_url]) digest = "sha1=%s" % psh_digest(content, secret) r, c = http.request(cb, 'POST', body=content, headers={'X-Hub-Signature': digest}) assert r.status == 200, 'Expected 200, got %d' % r.status sleep(0.5) # items should be the same as before (not subscribed) rf = RemoteFeed.get_by_url(feed_url, ctx) assert len(rf.entries) == 10 for iid in melk_ids_in(content, feed_url): assert not iid in rf.entries client.kill() client.wait() indexer.kill() indexer.wait() hub_proc.kill() hub_proc.wait()