Example #1
0
def test_multiple():
    """Test handling of multiple callbacks for a hook.
    """

    hooks.reset()
    # making this an attribute of a global avoids all kinds of scoping issues
    test_multiple.counter = 0

    def mkinc():  # can't use same callback twice
        def inc():
            test_multiple.counter += 1
            return 42

        return inc

    hooks.add_callback("alien_invasion", mkinc())
    hooks.add_callback("alien_invasion", mkinc())
    hooks.add_callback("alien_invasion", mkinc())

    # by default, the first successfull callback returns
    test_multiple.counter = 0
    assert hooks.trigger("alien_invasion") == 42
    assert test_multiple.counter == 1

    # we can forcefully go through all callbacks (and get None back)
    test_multiple.counter = 0
    assert hooks.trigger("alien_invasion", all=True) == None
    assert test_multiple.counter == 3
Example #2
0
def test_priority():
    # fifo: without a priority, the callback added first is called first
    hooks.reset()
    hooks.add_callback("alien_invasion", lambda: 1)
    hooks.add_callback("alien_invasion", lambda: 2)
    hooks.add_callback("alien_invasion", lambda: 3)
    assert hooks.trigger("alien_invasion") == 1

    # but callback priorization works as well
    hooks.reset()
    hooks.add_callback("alien_invasion", lambda: "p10", priority=10)
    hooks.add_callback("alien_invasion", lambda: "p20", priority=20)
    hooks.add_callback("alien_invasion", lambda: "p5", priority=5)
    assert hooks.trigger("alien_invasion") == "p20"
Example #3
0
def test_custom():
    """Test custom, non-default hooks.
    """

    # this fails, hook doesn't yet exist
    assert_raises(Exception, hooks.add_callback, "i_love_you", lambda: None)
    assert_raises(Exception, hooks.trigger, "i_love_you")

    # after we register the hook, it works
    hooks.register("i_love_you")
    hooks.add_callback("i_love_you", lambda: None)
    hooks.trigger("i_love_you")

    # registering the same hook multiple times is a no-op
    hooks.register("i_love_you")
    hooks.register("i_love_you")
Example #4
0
    def on_after_parse(self, feed, data_dict):

        # determine the image url, and bail out early if it is missing
        image_href = None
        image_dict = data_dict.feed.get('image')
        if image_dict:
            image_href = image_dict.get('href')
        if not image_href:
            return

        image = RemoteImage(image_href)

        try:
            # HOOK: FEED_IMAGE
            stop = hooks.trigger('feed_image', args=[feed, image_dict, image])
            if stop:
                return

            # HOOK: UPDATE_FEED_IMAGE
            hooks.trigger('update_feed_image',
                          args=[feed, image_dict, image],
                          all=True)

            # HOOK: FEED_IMAGE_UPDATED
            hooks.trigger('feed_image_updated',
                        args=[feed, image_dict, image],)

        except ImageError, e:
            self.log.warning('Feed #%d: error handling image "%s" (%s)' %
                (feed.id, image_href, e))

            # HOOK: FEED_IMAGE_FAILED
            hooks.trigger('feed_image_failed',
                        args=[feed, image_dict, image, e],)
            return
Example #5
0
    def on_process_item(self, feed, item, entry_dict, item_created):
        """
        Per the suggested protocol, we're using ``process_item``, since we
        don't want nor need to cause an update to the item, but instead
        require it to be flushed, so we can hook up enclosures to it.
        """

        enclosures = entry_dict.get("enclosures", ())

        # check for deleted enclosures (don't bother with new items)
        if not item_created:
            available_hrefs = [e.get("href") for e in enclosures]
            for enclosure in item.enclosures:
                if not enclosure.href in available_hrefs:
                    self.log.debug(
                        'Item #%d: enclosure #%d ("%s") no '
                        "longer exists - deleting." % (item.id, enclosure.id, enclosure.href)
                    )
                    db.store.remove(enclosure)

        # add new enclosures
        for enclosure_dict in enclosures:
            href = enclosure_dict.get("href")
            if not href:
                self.log.debug("Item #%d: enclosure has no href " "- skipping." % item.id)
                continue

            try:
                enclosure = db.get_one(
                    db.store.find(
                        db.models.Enclosure, db.models.Enclosure.href == href, db.models.Enclosure.item_id == item.id
                    )
                )
            except db.MultipleObjectsReturned:
                # TODO: log a warning/error, provide a hook
                # TODO: test for this case
                pass

            if enclosure is None:
                # HOOK: CREATE_ENCLOSURE
                enclosure = hooks.trigger("create_enclosure", args=[feed, item, enclosure_dict, href])
                if not enclosure:
                    enclosure = db.models.Enclosure()
                    enclosure.item = item
                    enclosure.href = href
                    db.store.add(enclosure)

                # HOOK: NEW_ENCLOSURE
                hooks.trigger("new_enclosure", args=[feed, enclosure, enclosure_dict])
                enclosure_created = True

                self.log.debug("Item #%d: new enclosure: %s" % (item.id, href))
            else:
                # HOOK: FOUND_ENCLOSURE
                hooks.trigger("found_enclosure", args=[feed, enclosure, enclosure_dict])
                enclosure_created = False

            # HOOK: PROCESS_ENCLOSURE
            hooks.trigger("process_enclosure", args=[feed, enclosure, enclosure_dict, enclosure_created])
Example #6
0
def test_reset():
    # reset() was already used throughout previous tests,
    # but for good measure, do it specifically.

    # callback is no longer registered after a reset
    hooks.add_callback("alien_invasion", lambda: 42)
    hooks.reset()
    assert hooks.trigger("alien_invasion") == None

    # custom hook is gone after a reset
    hooks.register("i_love_you")
    hooks.reset()
    assert_raises(Exception, hooks.add_callback, "i_love_you", lambda: None)
Example #7
0
    def _load_data(self):
        """Download the image while yielding chunks as they are
        coming in.

        Called internally when access to the image data is needed. The
        fact that the data is yielded live means the caller may already
        start using it before the download is complete.
        """
        # TODO: store bigger files on disk?
        self._data = StringIO.StringIO()
        while True:
            chunk = self.request.read(self.chunk_size)
            if not chunk:
                break
            self._data.write(chunk)
            # HOOK: FEED_IMAGE_DOWNLOAD_CHUNK
            if hooks.exists('feed_image_download_chunk'):
                hooks.trigger('feed_image_download_chunk',
                              args=[self, self.data.tell()])
            yield chunk
        # reset once we initially loaded the data
        self.data.seek(0)
Example #8
0
def test_validity():
    # invalid identifers result in exceptions
    assert_raises(KeyError, hooks.add_callback, "worldpeace", lambda: None)
    assert_raises(KeyError, hooks.trigger, "worldpeace")
    assert_raises(KeyError, hooks.any, "worldpeace")

    # can't register the same function twice
    def foo():
        pass

    hooks.add_callback("alien_invasion", foo)
    assert_raises(ValueError, hooks.add_callback, "alien_invasion", foo)

    # valid identifers work
    hooks.reset()
    hooks.add_callback("alien_invasion", lambda x: x)
    assert hooks.trigger("alien_invasion", [5]) == 5
Example #9
0
def update_feed(feed, options={}):
    """Parse and update a single feed, as specified by the instance
    of the ``Feed`` model in ``feed``.

    This is the one, main, most important core function, at the
    epicenter of the package, providing different hooks to the rest
    of the world.

    ``options`` can contain any values, and addins may choose to act
    differently depending on what they find there. For example, this
    allows you to support a "full" and "light" mode, whereas
    performance heavy jobs like downloading a feed image are only
    processed when necessary in light mode, but will be forced in
    full mode.
    """

    # instead of adding an additional argument every hook, pass
    # the option along via ``feed``.
    feed._options = options.copy()

    # HOOK: BEFORE_PARSE
    parser_args = {
        'agent': config.USER_AGENT,
        'handlers': list(config.URLLIB2_HANDLERS),
    }
    stop = hooks.trigger('before_parse', args=[feed, parser_args])
    if stop:
        log.info('Feed #%d skipped by addin' % (feed.id))
        return

    # ACTION: PARSE FEED
    log.info('Updating feed #%d: %s' % (feed.id, feed.url))
    # It may be worth noting that FeedParser already IDNA-encodes by
    # itself, but expects the path/query etc. to already be quoted,
    # or it'll screw up the url badly.
    data_dict = feedparser.parse(asciify_url(feed.url), **parser_args)

    # HOOK: AFTER_PARSE
    stop = hooks.trigger('after_parse', args=[feed, data_dict])
    if stop:
        log.info('Feed #%d: Futher processing skipped by addin' % (feed.id))
        return

    # The bozo feature Universal Feed Parser allow it to parse feeds
    # that are not well-formed (http://feedparser.org/docs/bozo.html).
    # While very useful in many cases, it also means that just about
    # anything, from 404 to parking pages will be represented as a
    # feed object with the bozo flag set (about without any useful
    # feed data obviously - for example, the whole page content will
    # be inside the ``subtitle`` field).
    #
    # How do we differentiate between "valid" feed problems like a
    # missing closing tag, that could potentially be ignored while
    # still extracting useful content, and completely invalid data?
    # Simple, we don't. This will be the job of the error handling
    # addin, and should not be our care right now. Suffice to say
    # though that it is important for the addin to make sure that
    # those completely invalid feeds are skipped early so that e.g.
    # a previously valid feed title in the database is not overridden
    # with empty or clearly erroneous data.
    #
    # We will log the problem, though.
    if data_dict.bozo:
        # TODO: add a hook here
        log.warn('Feed #%d bozo: %s' % (feed.id, data_dict.bozo_exception))

    # ACTION: HANDLE ITEMS
    for entry_dict in data_dict.entries:

        # HOOK: ITEM
        stop = hooks.trigger('item', args=[feed, data_dict, entry_dict])
        if stop:
            log.debug('Feed #%d: Item was skipped by addin' % (feed.id))
            continue

        # ACTION: DETERMINE GUID; HOOKS: GET_GUID, NEED_GUID
        #
        # Determine a unique id for the item; this is one of the
        # few fixed requirements that we have: we need a guid.
        # Addins can provide new ways to determine one, but if all
        # fails, we just can't handle the item.
        guid = hooks.trigger('get_guid', args=[feed, entry_dict])
        if not guid:
            guid = entry_dict.get('guid')
        if not guid:
            guid = hooks.trigger('need_guid', args=[feed, entry_dict])

        # HOOK: NO_GUID
        if not guid:
            hooks.trigger('no_guid', args=[feed, entry_dict])
            log.warn('Feed #%d: unable to determine item guid' % (feed.id))
            continue
        else:
            log.debug('Feed #%d: determined item guid "%s"' % (feed.id, guid))


        # ACTION: RESOLVE GUID TO ITEM; HOOKS: GET_ITEM, NEED_ITEM
        #
        # XXX: we need more extensive testing here, with all the variants
        # of returning items, return false etc. involved.
        #
        # Note how each hook result is passed through get_one, since a
        # possible issue when resolving a guid is that for whatever
        # reason the database may contain multiple matching rows. This
        # is a error, and we handle it here for both our default query
        # as well as results delievered via a hook (the latter means
        # that addins don't have to care about this situation
        # themselves).
        try:
            item = db.get_one(hooks.trigger('get_item',
                                            args=[feed, entry_dict, guid]))
            if item is None:
                # does the item already exist for *this feed*?
                item = db.get_one((db.store.find(db.models.Item,
                                                 db.models.Item.feed==feed,
                                                 db.models.Item.guid==guid)))
            if item is None:
                item = db.get_one(hooks.trigger('need_item',
                                                args=[feed, entry_dict, guid]))
        except db.MultipleObjectsReturned:
               # TODO: log a warning/error, provide a hook
               # TODO: test for this case
               return


        if not item:
            # HOOK: CREATE_ITEM
            item = hooks.trigger('create_item', args=[feed, entry_dict, guid])
            if not item:
                item = db.models.Item()
                item.feed = feed
                item.guid = guid

            db.store.add(item)
            # HOOK: NEW_ITEM
            #
            # Note how this happens before flushing(), so that any
            # changes made by this hook will go into the initial
            # INSERT query. If you need an existing primary key, use
            # the process_item hook instead.
            hooks.trigger('new_item', args=[feed, item, entry_dict])

            db.store.flush()
            log.info('Feed #%d: found new item (#%d)' % (feed.id, item.id))
            item_created = True
        else:
            # HOOK: FOUND_ITEM
            hooks.trigger('found_item', args=[feed, item, entry_dict])
            item_created = False

        # HOOK: PROCESS_ITEM
        hooks.trigger('process_item', args=[feed, item, entry_dict, item_created])

        # flush once for each item
        db.store.flush()

    # commit once for each feed
    db.store.commit()