def update(self, entry): """Update the item from the feedparser entry given.""" for key in entry.keys(): if key in self.IGNORE_KEYS or key + "_parsed" in self.IGNORE_KEYS: # Ignored fields pass elif entry.has_key(key + "_parsed"): # Ignore unparsed date fields pass elif key.endswith("_detail"): # Ignore detail fields pass elif key.endswith("_parsed"): # Date fields if entry[key] is not None: self.set_as_date(key[:-len("_parsed")], entry[key]) elif key == "source": # Source field: save both url and value if entry[key].has_key("value"): self.set_as_string(key + "_name", entry[key].value) if entry[key].has_key("url"): self.set_as_string(key + "_link", entry[key].url) elif key == "content": # Content field: concatenate the values value = "" for item in entry[key]: value += cache.utf8(item.value) self.set_as_string(key, value) else: # String fields try: self.set_as_string(key, entry[key]) except KeyboardInterrupt: raise except: log.exception("Ignored '%s' of <%s>, unknown format", key, self.id) # Generate the date field if we need to self.get_date("date")
def update(self, entry): """Update the item from the feedparser entry given.""" for key in entry.keys(): if key in self.IGNORE_KEYS or key + "_parsed" in self.IGNORE_KEYS: # Ignored fields pass elif entry.has_key(key + "_parsed"): # Ignore unparsed date fields pass elif key.endswith("_detail"): # retain name, email, and language sub-fields if entry[key].has_key("name") and entry[key].name: self.set_as_string(key.replace("_detail", "_name"), entry[key].name) if entry[key].has_key("email") and entry[key].email: self.set_as_string(key.replace("_detail", "_email"), entry[key].email) if ( entry[key].has_key("language") and entry[key].language and (not self._channel.has_key("language") or entry[key].language != self._channel.language) ): self.set_as_string(key.replace("_detail", "_language"), entry[key].language) elif key.endswith("_parsed"): # Date fields if entry[key] is not None: self.set_as_date(key[: -len("_parsed")], entry[key]) elif key == "source": # Source field: save both url and value if entry[key].has_key("value"): self.set_as_string(key + "_name", entry[key].value) if entry[key].has_key("url"): self.set_as_string(key + "_link", entry[key].url) elif key == "content": # Content field: concatenate the values value = "" for item in entry[key]: if item.type == "text/html": item.value = sanitize.HTML(item.value) elif item.type == "text/plain": item.value = escape(item.value) if ( item.has_key("language") and item.language and (not self._channel.has_key("language") or item.language != self._channel.language) ): self.set_as_string(key + "_language", item.language) value += cache.utf8(item.value) self.set_as_string(key, value) elif isinstance(entry[key], (str, unicode)): # String fields try: detail = key + "_detail" if entry.has_key(detail): if entry[detail].has_key("type"): if entry[detail].type == "text/html": entry[key] = sanitize.HTML(entry[key]) elif entry[detail].type == "text/plain": entry[key] = escape(entry[key]) self.set_as_string(key, entry[key]) except KeyboardInterrupt: raise except: log.exception("Ignored '%s' of <%s>, unknown format", key, self.id) # Generate the date field if we need to self.get_date("date")
def update_entries(self, entries): """Update entries from the feed. This reads the entries supplied by feedparser and updates the cached information about them. It's at this point we update the 'updated' timestamp and keep the old one in 'last_updated', these provide boundaries for acceptable entry times. If this is the first time a feed has been updated then most of the items will be marked as hidden, according to Planet.new_feed_items. If the feed does not contain items which, according to the sort order, should be there; those items are assumed to have been expired from the feed or replaced and are removed from the cache. """ if not len(entries): return self.last_updated = self.updated self.updated = time.gmtime() new_items = [] feed_items = [] for entry in entries: # Try really hard to find some kind of unique identifier if entry.has_key("id"): entry_id = cache.utf8(entry.id) elif entry.has_key("link"): entry_id = cache.utf8(entry.link) elif entry.has_key("title"): entry_id = self.url + "/" + md5.new(cache.utf8(entry.title)).hexdigest() elif entry.has_key("summary"): entry_id = self.url + "/" + md5.new(cache.utf8(entry.summary)).hexdigest() else: log.error("Unable to find or generate id, entry ignored") continue # Ignore posts based on tag filter if specified include_entry = False if self.relevant_tags and entry.has_key("tags"): for t in entry["tags"]: if t["term"].lower() in self.tags(): include_entry = True elif not self.relevant_tags: include_entry = True # Create the item if necessary and update if include_entry: if self.has_item(entry_id): item = self._items[entry_id] else: item = NewsItem(self, entry_id) self._items[entry_id] = item new_items.append(item) item.update(entry) feed_items.append(entry_id) # Hide excess items the first time through if ( self.last_updated is None and self._planet.new_feed_items and len(feed_items) > self._planet.new_feed_items ): item.hidden = "yes" log.debug("Marked <%s> as hidden (new feed)", entry_id) # Assign order numbers in reverse new_items.reverse() for item in new_items: item.order = self.next_order = str(int(self.next_order) + 1) # Check for expired or replaced items feed_count = len(feed_items) log.debug("Items in Feed: %d", feed_count) for item in self.items(sorted=1): if feed_count < 1: break elif item.id in feed_items: feed_count -= 1 elif item._channel.url_status != "226": del (self._items[item.id]) self._expired.append(item) log.debug("Removed expired or replaced item <%s>", item.id)
def update_entries(self, entries): """Update entries from the feed. This reads the entries supplied by feedparser and updates the cached information about them. It's at this point we update the 'updated' timestamp and keep the old one in 'last_updated', these provide boundaries for acceptable entry times. If this is the first time a feed has been updated then most of the items will be marked as hidden, according to Planet.new_feed_items. If the feed does not contain items which, according to the sort order, should be there; those items are assumed to have been expired from the feed or replaced and are removed from the cache. """ if not len(entries): return self.last_updated = self.updated self.updated = time.gmtime() new_items = [] feed_items = [] for entry in entries: # Try really hard to find some kind of unique identifier if entry.has_key("id"): entry_id = cache.utf8(entry.id) elif entry.has_key("link"): entry_id = cache.utf8(entry.link) elif entry.has_key("title"): entry_id = (self.url + "/" + md5.new(cache.utf8(entry.title)).hexdigest()) elif entry.has_key("summary"): entry_id = (self.url + "/" + md5.new(cache.utf8(entry.summary)).hexdigest()) else: log.error("Unable to find or generate id, entry ignored") continue # Create the item if necessary and update if self.has_item(entry_id): item = self._items[entry_id] else: item = NewsItem(self, entry_id) self._items[entry_id] = item new_items.append(item) item.update(entry) feed_items.append(entry_id) # Hide excess items the first time through if self.last_updated is None and self._planet.new_feed_items \ and len(feed_items) > self._planet.new_feed_items: item.hidden = "yes" log.debug("Marked <%s> as hidden (new feed)", entry_id) # Assign order numbers in reverse new_items.reverse() for item in new_items: item.order = self.next_order = str(int(self.next_order) + 1) # Check for expired or replaced items feed_count = len(feed_items) log.debug("Items in Feed: %d", feed_count) for item in self.items(sorted=1): if feed_count < 1: break elif item.id in feed_items: feed_count -= 1 else: del (self._items[item.id]) self._expired.append(item) log.debug("Removed expired or replaced item <%s>", item.id)
def update(self, entry): """Update the item from the feedparser entry given.""" for key in entry.keys(): if key in self.IGNORE_KEYS or key + "_parsed" in self.IGNORE_KEYS: # Ignored fields pass elif entry.has_key(key + "_parsed"): # Ignore unparsed date fields pass elif key.endswith("_detail"): # retain name, email, and language sub-fields if entry[key].has_key('name') and entry[key].name: self.set_as_string(key.replace("_detail","_name"), \ entry[key].name) if entry[key].has_key('email') and entry[key].email: self.set_as_string(key.replace("_detail","_email"), \ entry[key].email) if entry[key].has_key('language') and entry[key].language and \ (not self._channel.has_key('language') or \ entry[key].language != self._channel.language): self.set_as_string(key.replace("_detail","_language"), \ entry[key].language) elif key.endswith("_parsed"): # Date fields if entry[key] is not None: self.set_as_date(key[:-len("_parsed")], entry[key]) elif key == "source": # Source field: save both url and value if entry[key].has_key("value"): self.set_as_string(key + "_name", entry[key].value) if entry[key].has_key("url"): self.set_as_string(key + "_link", entry[key].url) elif key == "content": # Content field: concatenate the values value = "" for item in entry[key]: if item.type == 'text/html': item.value = sanitize.HTML(item.value) elif item.type == 'text/plain': item.value = escape(item.value) if item.has_key('language') and item.language and \ (not self._channel.has_key('language') or item.language != self._channel.language) : self.set_as_string(key + "_language", item.language) value += cache.utf8(item.value) self.set_as_string(key, value) elif isinstance(entry[key], (str, unicode)): # String fields try: detail = key + '_detail' if entry.has_key(detail): if entry[detail].has_key('type'): if entry[detail].type == 'text/html': entry[key] = sanitize.HTML(entry[key]) elif entry[detail].type == 'text/plain': entry[key] = escape(entry[key]) self.set_as_string(key, entry[key]) except KeyboardInterrupt: raise except: log.exception("Ignored '%s' of <%s>, unknown format", key, self.id) # Generate the date field if we need to self.get_date("date")