def test_multiple(): """Test handling of multiple callbacks for a hook. """ hooks.reset() # making this an attribute of a global avoids all kinds of scoping issues test_multiple.counter = 0 def mkinc(): # can't use same callback twice def inc(): test_multiple.counter += 1 return 42 return inc hooks.add_callback("alien_invasion", mkinc()) hooks.add_callback("alien_invasion", mkinc()) hooks.add_callback("alien_invasion", mkinc()) # by default, the first successfull callback returns test_multiple.counter = 0 assert hooks.trigger("alien_invasion") == 42 assert test_multiple.counter == 1 # we can forcefully go through all callbacks (and get None back) test_multiple.counter = 0 assert hooks.trigger("alien_invasion", all=True) == None assert test_multiple.counter == 3
def test_priority(): # fifo: without a priority, the callback added first is called first hooks.reset() hooks.add_callback("alien_invasion", lambda: 1) hooks.add_callback("alien_invasion", lambda: 2) hooks.add_callback("alien_invasion", lambda: 3) assert hooks.trigger("alien_invasion") == 1 # but callback priorization works as well hooks.reset() hooks.add_callback("alien_invasion", lambda: "p10", priority=10) hooks.add_callback("alien_invasion", lambda: "p20", priority=20) hooks.add_callback("alien_invasion", lambda: "p5", priority=5) assert hooks.trigger("alien_invasion") == "p20"
def test_custom(): """Test custom, non-default hooks. """ # this fails, hook doesn't yet exist assert_raises(Exception, hooks.add_callback, "i_love_you", lambda: None) assert_raises(Exception, hooks.trigger, "i_love_you") # after we register the hook, it works hooks.register("i_love_you") hooks.add_callback("i_love_you", lambda: None) hooks.trigger("i_love_you") # registering the same hook multiple times is a no-op hooks.register("i_love_you") hooks.register("i_love_you")
def on_after_parse(self, feed, data_dict): # determine the image url, and bail out early if it is missing image_href = None image_dict = data_dict.feed.get('image') if image_dict: image_href = image_dict.get('href') if not image_href: return image = RemoteImage(image_href) try: # HOOK: FEED_IMAGE stop = hooks.trigger('feed_image', args=[feed, image_dict, image]) if stop: return # HOOK: UPDATE_FEED_IMAGE hooks.trigger('update_feed_image', args=[feed, image_dict, image], all=True) # HOOK: FEED_IMAGE_UPDATED hooks.trigger('feed_image_updated', args=[feed, image_dict, image],) except ImageError, e: self.log.warning('Feed #%d: error handling image "%s" (%s)' % (feed.id, image_href, e)) # HOOK: FEED_IMAGE_FAILED hooks.trigger('feed_image_failed', args=[feed, image_dict, image, e],) return
def on_process_item(self, feed, item, entry_dict, item_created): """ Per the suggested protocol, we're using ``process_item``, since we don't want nor need to cause an update to the item, but instead require it to be flushed, so we can hook up enclosures to it. """ enclosures = entry_dict.get("enclosures", ()) # check for deleted enclosures (don't bother with new items) if not item_created: available_hrefs = [e.get("href") for e in enclosures] for enclosure in item.enclosures: if not enclosure.href in available_hrefs: self.log.debug( 'Item #%d: enclosure #%d ("%s") no ' "longer exists - deleting." % (item.id, enclosure.id, enclosure.href) ) db.store.remove(enclosure) # add new enclosures for enclosure_dict in enclosures: href = enclosure_dict.get("href") if not href: self.log.debug("Item #%d: enclosure has no href " "- skipping." % item.id) continue try: enclosure = db.get_one( db.store.find( db.models.Enclosure, db.models.Enclosure.href == href, db.models.Enclosure.item_id == item.id ) ) except db.MultipleObjectsReturned: # TODO: log a warning/error, provide a hook # TODO: test for this case pass if enclosure is None: # HOOK: CREATE_ENCLOSURE enclosure = hooks.trigger("create_enclosure", args=[feed, item, enclosure_dict, href]) if not enclosure: enclosure = db.models.Enclosure() enclosure.item = item enclosure.href = href db.store.add(enclosure) # HOOK: NEW_ENCLOSURE hooks.trigger("new_enclosure", args=[feed, enclosure, enclosure_dict]) enclosure_created = True self.log.debug("Item #%d: new enclosure: %s" % (item.id, href)) else: # HOOK: FOUND_ENCLOSURE hooks.trigger("found_enclosure", args=[feed, enclosure, enclosure_dict]) enclosure_created = False # HOOK: PROCESS_ENCLOSURE hooks.trigger("process_enclosure", args=[feed, enclosure, enclosure_dict, enclosure_created])
def test_reset(): # reset() was already used throughout previous tests, # but for good measure, do it specifically. # callback is no longer registered after a reset hooks.add_callback("alien_invasion", lambda: 42) hooks.reset() assert hooks.trigger("alien_invasion") == None # custom hook is gone after a reset hooks.register("i_love_you") hooks.reset() assert_raises(Exception, hooks.add_callback, "i_love_you", lambda: None)
def _load_data(self): """Download the image while yielding chunks as they are coming in. Called internally when access to the image data is needed. The fact that the data is yielded live means the caller may already start using it before the download is complete. """ # TODO: store bigger files on disk? self._data = StringIO.StringIO() while True: chunk = self.request.read(self.chunk_size) if not chunk: break self._data.write(chunk) # HOOK: FEED_IMAGE_DOWNLOAD_CHUNK if hooks.exists('feed_image_download_chunk'): hooks.trigger('feed_image_download_chunk', args=[self, self.data.tell()]) yield chunk # reset once we initially loaded the data self.data.seek(0)
def test_validity(): # invalid identifers result in exceptions assert_raises(KeyError, hooks.add_callback, "worldpeace", lambda: None) assert_raises(KeyError, hooks.trigger, "worldpeace") assert_raises(KeyError, hooks.any, "worldpeace") # can't register the same function twice def foo(): pass hooks.add_callback("alien_invasion", foo) assert_raises(ValueError, hooks.add_callback, "alien_invasion", foo) # valid identifers work hooks.reset() hooks.add_callback("alien_invasion", lambda x: x) assert hooks.trigger("alien_invasion", [5]) == 5
def update_feed(feed, options={}): """Parse and update a single feed, as specified by the instance of the ``Feed`` model in ``feed``. This is the one, main, most important core function, at the epicenter of the package, providing different hooks to the rest of the world. ``options`` can contain any values, and addins may choose to act differently depending on what they find there. For example, this allows you to support a "full" and "light" mode, whereas performance heavy jobs like downloading a feed image are only processed when necessary in light mode, but will be forced in full mode. """ # instead of adding an additional argument every hook, pass # the option along via ``feed``. feed._options = options.copy() # HOOK: BEFORE_PARSE parser_args = { 'agent': config.USER_AGENT, 'handlers': list(config.URLLIB2_HANDLERS), } stop = hooks.trigger('before_parse', args=[feed, parser_args]) if stop: log.info('Feed #%d skipped by addin' % (feed.id)) return # ACTION: PARSE FEED log.info('Updating feed #%d: %s' % (feed.id, feed.url)) # It may be worth noting that FeedParser already IDNA-encodes by # itself, but expects the path/query etc. to already be quoted, # or it'll screw up the url badly. data_dict = feedparser.parse(asciify_url(feed.url), **parser_args) # HOOK: AFTER_PARSE stop = hooks.trigger('after_parse', args=[feed, data_dict]) if stop: log.info('Feed #%d: Futher processing skipped by addin' % (feed.id)) return # The bozo feature Universal Feed Parser allow it to parse feeds # that are not well-formed (http://feedparser.org/docs/bozo.html). # While very useful in many cases, it also means that just about # anything, from 404 to parking pages will be represented as a # feed object with the bozo flag set (about without any useful # feed data obviously - for example, the whole page content will # be inside the ``subtitle`` field). # # How do we differentiate between "valid" feed problems like a # missing closing tag, that could potentially be ignored while # still extracting useful content, and completely invalid data? # Simple, we don't. This will be the job of the error handling # addin, and should not be our care right now. Suffice to say # though that it is important for the addin to make sure that # those completely invalid feeds are skipped early so that e.g. # a previously valid feed title in the database is not overridden # with empty or clearly erroneous data. # # We will log the problem, though. if data_dict.bozo: # TODO: add a hook here log.warn('Feed #%d bozo: %s' % (feed.id, data_dict.bozo_exception)) # ACTION: HANDLE ITEMS for entry_dict in data_dict.entries: # HOOK: ITEM stop = hooks.trigger('item', args=[feed, data_dict, entry_dict]) if stop: log.debug('Feed #%d: Item was skipped by addin' % (feed.id)) continue # ACTION: DETERMINE GUID; HOOKS: GET_GUID, NEED_GUID # # Determine a unique id for the item; this is one of the # few fixed requirements that we have: we need a guid. # Addins can provide new ways to determine one, but if all # fails, we just can't handle the item. guid = hooks.trigger('get_guid', args=[feed, entry_dict]) if not guid: guid = entry_dict.get('guid') if not guid: guid = hooks.trigger('need_guid', args=[feed, entry_dict]) # HOOK: NO_GUID if not guid: hooks.trigger('no_guid', args=[feed, entry_dict]) log.warn('Feed #%d: unable to determine item guid' % (feed.id)) continue else: log.debug('Feed #%d: determined item guid "%s"' % (feed.id, guid)) # ACTION: RESOLVE GUID TO ITEM; HOOKS: GET_ITEM, NEED_ITEM # # XXX: we need more extensive testing here, with all the variants # of returning items, return false etc. involved. # # Note how each hook result is passed through get_one, since a # possible issue when resolving a guid is that for whatever # reason the database may contain multiple matching rows. This # is a error, and we handle it here for both our default query # as well as results delievered via a hook (the latter means # that addins don't have to care about this situation # themselves). try: item = db.get_one(hooks.trigger('get_item', args=[feed, entry_dict, guid])) if item is None: # does the item already exist for *this feed*? item = db.get_one((db.store.find(db.models.Item, db.models.Item.feed==feed, db.models.Item.guid==guid))) if item is None: item = db.get_one(hooks.trigger('need_item', args=[feed, entry_dict, guid])) except db.MultipleObjectsReturned: # TODO: log a warning/error, provide a hook # TODO: test for this case return if not item: # HOOK: CREATE_ITEM item = hooks.trigger('create_item', args=[feed, entry_dict, guid]) if not item: item = db.models.Item() item.feed = feed item.guid = guid db.store.add(item) # HOOK: NEW_ITEM # # Note how this happens before flushing(), so that any # changes made by this hook will go into the initial # INSERT query. If you need an existing primary key, use # the process_item hook instead. hooks.trigger('new_item', args=[feed, item, entry_dict]) db.store.flush() log.info('Feed #%d: found new item (#%d)' % (feed.id, item.id)) item_created = True else: # HOOK: FOUND_ITEM hooks.trigger('found_item', args=[feed, item, entry_dict]) item_created = False # HOOK: PROCESS_ITEM hooks.trigger('process_item', args=[feed, item, entry_dict, item_created]) # flush once for each item db.store.flush() # commit once for each feed db.store.commit()