def parse_post(self, entry): # parse the dates first. updated = parse_iso8601(entry.findtext(atom.updated)) published = entry.findtext(atom.published) if published is not None: published = parse_iso8601(published) else: published = updated # figure out tags and categories by invoking the # callbacks on the extensions first. If no extension # was able to figure out what to do with it, we treat it # as category. tags, categories = self.parse_categories(entry) link = entry.find(atom.link) if link is not None: link = link.attrib.get('href') post = Post( None, _get_text_content(entry.findall(atom.title)), link, published, self.parse_author(entry), # XXX: the Post is prefixing the intro before the actual # content. This is the default Rezine behavior and makes sense # for Rezine. However nearly every blog works differently and # treats summary completely different from content. We should # think about that. None, _get_html_content(entry.findall(atom.content)), tags, categories, parser='html', updated=updated, uid=entry.findtext(atom.id) ) post.element = entry # now parse the comments for the post self.parse_comments(post) for extension in self.extensions: try: extension.postprocess_post(post) except SkipItem: return None return post
def parse_comments(self, post): comments = {} unresolved_parents = {} for element in post.element.findall(rezine.comment): author = element.find(rezine.author) dependency = author.attrib.get('dependency') if dependency is not None: author = self._get_author(dependency) email = www = None else: email = author.findtext(rezine.email) www = author.findtext(rezine.uri) author = author.findtext(rezine.name) body = _get_html_content(element.findall(rezine.content)) comment = Comment(author, body, email, www, None, parse_iso8601(element.findtext(rezine.published)), element.findtext(rezine.submitter_ip), 'html', _to_bool(element.findtext(rezine.is_pingback)), int(element.findtext(rezine.status)), element.findtext(rezine.blocked_msg), _parser_data(element.findtext(rezine.parser_data))) comments[int(element.attrib['id'])] = comment parent = element.findtext(rezine.parent) if parent: unresolved_parents[comment] = int(parent) for comment, parent_id in unresolved_parents.iteritems(): comment.parent = comments[parent_id] return comments.values()
def __init__(self, timestamp, level, location, module, message=None): self.timestamp = parse_iso8601(timestamp) self.level = gettext(level) self.internal_level = level self.location = location self.module = module self.lines = [] if message is not None: self.lines.append(message)