Example #1
0
    def update(self, entry):
        """Update the item from the feedparser entry given."""
        for key in entry.keys():
            if key in self.IGNORE_KEYS or key + "_parsed" in self.IGNORE_KEYS:
                # Ignored fields
                pass
            elif entry.has_key(key + "_parsed"):
                # Ignore unparsed date fields
                pass
            elif key.endswith("_detail"):
                # Ignore detail fields
                pass
            elif key.endswith("_parsed"):
                # Date fields
                if entry[key] is not None:
                    self.set_as_date(key[:-len("_parsed")], entry[key])
            elif key == "source":
                # Source field: save both url and value
                if entry[key].has_key("value"):
                    self.set_as_string(key + "_name", entry[key].value)
                if entry[key].has_key("url"):
                    self.set_as_string(key + "_link", entry[key].url)
            elif key == "content":
                # Content field: concatenate the values
                value = ""
                for item in entry[key]:
                    value += cache.utf8(item.value)
                self.set_as_string(key, value)
            else:
                # String fields
                try:
                    self.set_as_string(key, entry[key])
                except KeyboardInterrupt:
                    raise
                except:
                    log.exception("Ignored '%s' of <%s>, unknown format", key,
                                  self.id)

        # Generate the date field if we need to
        self.get_date("date")
Example #2
0
    def update(self, entry):
        """Update the item from the feedparser entry given."""
        for key in entry.keys():
            if key in self.IGNORE_KEYS or key + "_parsed" in self.IGNORE_KEYS:
                # Ignored fields
                pass
            elif entry.has_key(key + "_parsed"):
                # Ignore unparsed date fields
                pass
            elif key.endswith("_detail"):
                # retain name, email, and language sub-fields
                if entry[key].has_key("name") and entry[key].name:
                    self.set_as_string(key.replace("_detail", "_name"), entry[key].name)
                if entry[key].has_key("email") and entry[key].email:
                    self.set_as_string(key.replace("_detail", "_email"), entry[key].email)
                if (
                    entry[key].has_key("language")
                    and entry[key].language
                    and (not self._channel.has_key("language") or entry[key].language != self._channel.language)
                ):
                    self.set_as_string(key.replace("_detail", "_language"), entry[key].language)
            elif key.endswith("_parsed"):
                # Date fields
                if entry[key] is not None:
                    self.set_as_date(key[: -len("_parsed")], entry[key])
            elif key == "source":
                # Source field: save both url and value
                if entry[key].has_key("value"):
                    self.set_as_string(key + "_name", entry[key].value)
                if entry[key].has_key("url"):
                    self.set_as_string(key + "_link", entry[key].url)
            elif key == "content":
                # Content field: concatenate the values
                value = ""
                for item in entry[key]:
                    if item.type == "text/html":
                        item.value = sanitize.HTML(item.value)
                    elif item.type == "text/plain":
                        item.value = escape(item.value)
                    if (
                        item.has_key("language")
                        and item.language
                        and (not self._channel.has_key("language") or item.language != self._channel.language)
                    ):
                        self.set_as_string(key + "_language", item.language)
                    value += cache.utf8(item.value)
                self.set_as_string(key, value)
            elif isinstance(entry[key], (str, unicode)):
                # String fields
                try:
                    detail = key + "_detail"
                    if entry.has_key(detail):
                        if entry[detail].has_key("type"):
                            if entry[detail].type == "text/html":
                                entry[key] = sanitize.HTML(entry[key])
                            elif entry[detail].type == "text/plain":
                                entry[key] = escape(entry[key])
                    self.set_as_string(key, entry[key])
                except KeyboardInterrupt:
                    raise
                except:
                    log.exception("Ignored '%s' of <%s>, unknown format", key, self.id)

        # Generate the date field if we need to
        self.get_date("date")
Example #3
0
    def update_entries(self, entries):
        """Update entries from the feed.

        This reads the entries supplied by feedparser and updates the
        cached information about them.  It's at this point we update
        the 'updated' timestamp and keep the old one in 'last_updated',
        these provide boundaries for acceptable entry times.

        If this is the first time a feed has been updated then most of the
        items will be marked as hidden, according to Planet.new_feed_items.

        If the feed does not contain items which, according to the sort order,
        should be there; those items are assumed to have been expired from
        the feed or replaced and are removed from the cache.
        """
        if not len(entries):
            return

        self.last_updated = self.updated
        self.updated = time.gmtime()

        new_items = []
        feed_items = []
        for entry in entries:
            # Try really hard to find some kind of unique identifier
            if entry.has_key("id"):
                entry_id = cache.utf8(entry.id)
            elif entry.has_key("link"):
                entry_id = cache.utf8(entry.link)
            elif entry.has_key("title"):
                entry_id = self.url + "/" + md5.new(cache.utf8(entry.title)).hexdigest()
            elif entry.has_key("summary"):
                entry_id = self.url + "/" + md5.new(cache.utf8(entry.summary)).hexdigest()
            else:
                log.error("Unable to find or generate id, entry ignored")
                continue

            # Ignore posts based on tag filter if specified
            include_entry = False
            if self.relevant_tags and entry.has_key("tags"):
                for t in entry["tags"]:
                    if t["term"].lower() in self.tags():
                        include_entry = True
            elif not self.relevant_tags:
                include_entry = True

            # Create the item if necessary and update
            if include_entry:
                if self.has_item(entry_id):
                    item = self._items[entry_id]
                else:
                    item = NewsItem(self, entry_id)
                    self._items[entry_id] = item
                    new_items.append(item)

                item.update(entry)
                feed_items.append(entry_id)

            # Hide excess items the first time through
            if (
                self.last_updated is None
                and self._planet.new_feed_items
                and len(feed_items) > self._planet.new_feed_items
            ):
                item.hidden = "yes"
                log.debug("Marked <%s> as hidden (new feed)", entry_id)

        # Assign order numbers in reverse
        new_items.reverse()
        for item in new_items:
            item.order = self.next_order = str(int(self.next_order) + 1)

        # Check for expired or replaced items
        feed_count = len(feed_items)
        log.debug("Items in Feed: %d", feed_count)
        for item in self.items(sorted=1):
            if feed_count < 1:
                break
            elif item.id in feed_items:
                feed_count -= 1
            elif item._channel.url_status != "226":
                del (self._items[item.id])
                self._expired.append(item)
                log.debug("Removed expired or replaced item <%s>", item.id)
Example #4
0
    def update_entries(self, entries):
        """Update entries from the feed.

        This reads the entries supplied by feedparser and updates the
        cached information about them.  It's at this point we update
        the 'updated' timestamp and keep the old one in 'last_updated',
        these provide boundaries for acceptable entry times.

        If this is the first time a feed has been updated then most of the
        items will be marked as hidden, according to Planet.new_feed_items.

        If the feed does not contain items which, according to the sort order,
        should be there; those items are assumed to have been expired from
        the feed or replaced and are removed from the cache.
        """
        if not len(entries):
            return

        self.last_updated = self.updated
        self.updated = time.gmtime()

        new_items = []
        feed_items = []
        for entry in entries:
            # Try really hard to find some kind of unique identifier
            if entry.has_key("id"):
                entry_id = cache.utf8(entry.id)
            elif entry.has_key("link"):
                entry_id = cache.utf8(entry.link)
            elif entry.has_key("title"):
                entry_id = (self.url + "/" +
                            md5.new(cache.utf8(entry.title)).hexdigest())
            elif entry.has_key("summary"):
                entry_id = (self.url + "/" +
                            md5.new(cache.utf8(entry.summary)).hexdigest())
            else:
                log.error("Unable to find or generate id, entry ignored")
                continue

            # Create the item if necessary and update
            if self.has_item(entry_id):
                item = self._items[entry_id]
            else:
                item = NewsItem(self, entry_id)
                self._items[entry_id] = item
                new_items.append(item)
            item.update(entry)
            feed_items.append(entry_id)

            # Hide excess items the first time through
            if self.last_updated is None  and self._planet.new_feed_items \
                   and len(feed_items) > self._planet.new_feed_items:
                item.hidden = "yes"
                log.debug("Marked <%s> as hidden (new feed)", entry_id)

        # Assign order numbers in reverse
        new_items.reverse()
        for item in new_items:
            item.order = self.next_order = str(int(self.next_order) + 1)

        # Check for expired or replaced items
        feed_count = len(feed_items)
        log.debug("Items in Feed: %d", feed_count)
        for item in self.items(sorted=1):
            if feed_count < 1:
                break
            elif item.id in feed_items:
                feed_count -= 1
            else:
                del (self._items[item.id])
                self._expired.append(item)
                log.debug("Removed expired or replaced item <%s>", item.id)
Example #5
0
    def update(self, entry):
        """Update the item from the feedparser entry given."""
        for key in entry.keys():
            if key in self.IGNORE_KEYS or key + "_parsed" in self.IGNORE_KEYS:
                # Ignored fields
                pass
            elif entry.has_key(key + "_parsed"):
                # Ignore unparsed date fields
                pass
            elif key.endswith("_detail"):
                # retain name, email, and language sub-fields
                if entry[key].has_key('name') and entry[key].name:
                    self.set_as_string(key.replace("_detail","_name"), \
                        entry[key].name)
                if entry[key].has_key('email') and entry[key].email:
                    self.set_as_string(key.replace("_detail","_email"), \
                        entry[key].email)
                if entry[key].has_key('language') and entry[key].language and \
                   (not self._channel.has_key('language') or \
                   entry[key].language != self._channel.language):
                    self.set_as_string(key.replace("_detail","_language"), \
                        entry[key].language)
            elif key.endswith("_parsed"):
                # Date fields
                if entry[key] is not None:
                    self.set_as_date(key[:-len("_parsed")], entry[key])
            elif key == "source":
                # Source field: save both url and value
                if entry[key].has_key("value"):
                    self.set_as_string(key + "_name", entry[key].value)
                if entry[key].has_key("url"):
                    self.set_as_string(key + "_link", entry[key].url)
            elif key == "content":
                # Content field: concatenate the values
                value = ""
                for item in entry[key]:
                    if item.type == 'text/html':
                        item.value = sanitize.HTML(item.value)
                    elif item.type == 'text/plain':
                        item.value = escape(item.value)
                    if item.has_key('language') and item.language and \
                       (not self._channel.has_key('language') or
                       item.language != self._channel.language) :
                        self.set_as_string(key + "_language", item.language)
                    value += cache.utf8(item.value)
                self.set_as_string(key, value)
            elif isinstance(entry[key], (str, unicode)):
                # String fields
                try:
                    detail = key + '_detail'
                    if entry.has_key(detail):
                        if entry[detail].has_key('type'):
                            if entry[detail].type == 'text/html':
                                entry[key] = sanitize.HTML(entry[key])
                            elif entry[detail].type == 'text/plain':
                                entry[key] = escape(entry[key])
                    self.set_as_string(key, entry[key])
                except KeyboardInterrupt:
                    raise
                except:
                    log.exception("Ignored '%s' of <%s>, unknown format", key,
                                  self.id)

        # Generate the date field if we need to
        self.get_date("date")