Python get_one Examples, feedplatform.db.get_one Python Examples

Example #1

0

Show file

File: daemons.py Project: miracle2k/feedplatform

        def handle(self):
            result = '200 Ok'
            try:
                rstr = self.rfile.readline().strip()
                if rstr.isdigit():
                    feed = db.store.get(db.models.Feed, int(rstr))
                else:
                    feed = db.get_one(db.store.find(
                        db.models.Feed, db.models.Feed.url == \
                            unicode(rstr, 'utf8')))

                if not feed:
                    result = '404 Feed not found'
                else:
                    # We're accessing queue's internal ``deque`` object
                    # here, in order to be able to use "in". Since we're
                    # locking with the mutex, we should be perfectly safe
                    # (it might not even be necessary).
                    self.server.queue.mutex.acquire()
                    try:
                        exists = feed in self.server.queue.queue
                    finally:
                        self.server.queue.mutex.release()
                    if not exists:
                        try:
                            self.server.queue.put(feed,
                                self.server.queue_timeout)
                        except Queue.Full:
                            result = '507 Queue is full'
                    else:
                        result = '304 Feed already in queue'
            except Exception, e:
                result = '500 %s' % e

Example #2

0

Show file

File: enclosures.py Project: miracle2k/feedplatform

    def on_process_item(self, feed, item, entry_dict, item_created):
        """
        Per the suggested protocol, we're using ``process_item``, since we
        don't want nor need to cause an update to the item, but instead
        require it to be flushed, so we can hook up enclosures to it.
        """

        enclosures = entry_dict.get("enclosures", ())

        # check for deleted enclosures (don't bother with new items)
        if not item_created:
            available_hrefs = [e.get("href") for e in enclosures]
            for enclosure in item.enclosures:
                if not enclosure.href in available_hrefs:
                    self.log.debug(
                        'Item #%d: enclosure #%d ("%s") no '
                        "longer exists - deleting." % (item.id, enclosure.id, enclosure.href)
                    )
                    db.store.remove(enclosure)

        # add new enclosures
        for enclosure_dict in enclosures:
            href = enclosure_dict.get("href")
            if not href:
                self.log.debug("Item #%d: enclosure has no href " "- skipping." % item.id)
                continue

            try:
                enclosure = db.get_one(
                    db.store.find(
                        db.models.Enclosure, db.models.Enclosure.href == href, db.models.Enclosure.item_id == item.id
                    )
                )
            except db.MultipleObjectsReturned:
                # TODO: log a warning/error, provide a hook
                # TODO: test for this case
                pass

            if enclosure is None:
                # HOOK: CREATE_ENCLOSURE
                enclosure = hooks.trigger("create_enclosure", args=[feed, item, enclosure_dict, href])
                if not enclosure:
                    enclosure = db.models.Enclosure()
                    enclosure.item = item
                    enclosure.href = href
                    db.store.add(enclosure)

                # HOOK: NEW_ENCLOSURE
                hooks.trigger("new_enclosure", args=[feed, enclosure, enclosure_dict])
                enclosure_created = True

                self.log.debug("Item #%d: new enclosure: %s" % (item.id, href))
            else:
                # HOOK: FOUND_ENCLOSURE
                hooks.trigger("found_enclosure", args=[feed, enclosure, enclosure_dict])
                enclosure_created = False

            # HOOK: PROCESS_ENCLOSURE
            hooks.trigger("process_enclosure", args=[feed, enclosure, enclosure_dict, enclosure_created])

Example #3

0

Show file

File: parse.py Project: miracle2k/feedplatform

def update_feed(feed, options={}):
    """Parse and update a single feed, as specified by the instance
    of the ``Feed`` model in ``feed``.

    This is the one, main, most important core function, at the
    epicenter of the package, providing different hooks to the rest
    of the world.

    ``options`` can contain any values, and addins may choose to act
    differently depending on what they find there. For example, this
    allows you to support a "full" and "light" mode, whereas
    performance heavy jobs like downloading a feed image are only
    processed when necessary in light mode, but will be forced in
    full mode.
    """

    # instead of adding an additional argument every hook, pass
    # the option along via ``feed``.
    feed._options = options.copy()

    # HOOK: BEFORE_PARSE
    parser_args = {
        'agent': config.USER_AGENT,
        'handlers': list(config.URLLIB2_HANDLERS),
    }
    stop = hooks.trigger('before_parse', args=[feed, parser_args])
    if stop:
        log.info('Feed #%d skipped by addin' % (feed.id))
        return

    # ACTION: PARSE FEED
    log.info('Updating feed #%d: %s' % (feed.id, feed.url))
    # It may be worth noting that FeedParser already IDNA-encodes by
    # itself, but expects the path/query etc. to already be quoted,
    # or it'll screw up the url badly.
    data_dict = feedparser.parse(asciify_url(feed.url), **parser_args)

    # HOOK: AFTER_PARSE
    stop = hooks.trigger('after_parse', args=[feed, data_dict])
    if stop:
        log.info('Feed #%d: Futher processing skipped by addin' % (feed.id))
        return

    # The bozo feature Universal Feed Parser allow it to parse feeds
    # that are not well-formed (http://feedparser.org/docs/bozo.html).
    # While very useful in many cases, it also means that just about
    # anything, from 404 to parking pages will be represented as a
    # feed object with the bozo flag set (about without any useful
    # feed data obviously - for example, the whole page content will
    # be inside the ``subtitle`` field).
    #
    # How do we differentiate between "valid" feed problems like a
    # missing closing tag, that could potentially be ignored while
    # still extracting useful content, and completely invalid data?
    # Simple, we don't. This will be the job of the error handling
    # addin, and should not be our care right now. Suffice to say
    # though that it is important for the addin to make sure that
    # those completely invalid feeds are skipped early so that e.g.
    # a previously valid feed title in the database is not overridden
    # with empty or clearly erroneous data.
    #
    # We will log the problem, though.
    if data_dict.bozo:
        # TODO: add a hook here
        log.warn('Feed #%d bozo: %s' % (feed.id, data_dict.bozo_exception))

    # ACTION: HANDLE ITEMS
    for entry_dict in data_dict.entries:

        # HOOK: ITEM
        stop = hooks.trigger('item', args=[feed, data_dict, entry_dict])
        if stop:
            log.debug('Feed #%d: Item was skipped by addin' % (feed.id))
            continue

        # ACTION: DETERMINE GUID; HOOKS: GET_GUID, NEED_GUID
        #
        # Determine a unique id for the item; this is one of the
        # few fixed requirements that we have: we need a guid.
        # Addins can provide new ways to determine one, but if all
        # fails, we just can't handle the item.
        guid = hooks.trigger('get_guid', args=[feed, entry_dict])
        if not guid:
            guid = entry_dict.get('guid')
        if not guid:
            guid = hooks.trigger('need_guid', args=[feed, entry_dict])

        # HOOK: NO_GUID
        if not guid:
            hooks.trigger('no_guid', args=[feed, entry_dict])
            log.warn('Feed #%d: unable to determine item guid' % (feed.id))
            continue
        else:
            log.debug('Feed #%d: determined item guid "%s"' % (feed.id, guid))


        # ACTION: RESOLVE GUID TO ITEM; HOOKS: GET_ITEM, NEED_ITEM
        #
        # XXX: we need more extensive testing here, with all the variants
        # of returning items, return false etc. involved.
        #
        # Note how each hook result is passed through get_one, since a
        # possible issue when resolving a guid is that for whatever
        # reason the database may contain multiple matching rows. This
        # is a error, and we handle it here for both our default query
        # as well as results delievered via a hook (the latter means
        # that addins don't have to care about this situation
        # themselves).
        try:
            item = db.get_one(hooks.trigger('get_item',
                                            args=[feed, entry_dict, guid]))
            if item is None:
                # does the item already exist for *this feed*?
                item = db.get_one((db.store.find(db.models.Item,
                                                 db.models.Item.feed==feed,
                                                 db.models.Item.guid==guid)))
            if item is None:
                item = db.get_one(hooks.trigger('need_item',
                                                args=[feed, entry_dict, guid]))
        except db.MultipleObjectsReturned:
               # TODO: log a warning/error, provide a hook
               # TODO: test for this case
               return


        if not item:
            # HOOK: CREATE_ITEM
            item = hooks.trigger('create_item', args=[feed, entry_dict, guid])
            if not item:
                item = db.models.Item()
                item.feed = feed
                item.guid = guid

            db.store.add(item)
            # HOOK: NEW_ITEM
            #
            # Note how this happens before flushing(), so that any
            # changes made by this hook will go into the initial
            # INSERT query. If you need an existing primary key, use
            # the process_item hook instead.
            hooks.trigger('new_item', args=[feed, item, entry_dict])

            db.store.flush()
            log.info('Feed #%d: found new item (#%d)' % (feed.id, item.id))
            item_created = True
        else:
            # HOOK: FOUND_ITEM
            hooks.trigger('found_item', args=[feed, item, entry_dict])
            item_created = False

        # HOOK: PROCESS_ITEM
        hooks.trigger('process_item', args=[feed, item, entry_dict, item_created])

        # flush once for each item
        db.store.flush()

    # commit once for each feed
    db.store.commit()