def parse_post(self, entry): # parse the dates first. updated = parse_iso8601(entry.findtext(atom.updated)) published = entry.findtext(atom.published) if published is not None: published = parse_iso8601(published) else: published = updated # figure out tags and categories by invoking the # callbacks on the extensions first. If no extension # was able to figure out what to do with it, we treat it # as category. tags, categories = self.parse_categories(entry) link = entry.find(atom.link) if link is not None: link = link.attrib.get('href') post_parser = _pickle(entry.findall(textpress.data)[0].text).get('parser', 'html') if post_parser not in get_application().parsers: post_parser = 'html' post = Post( entry.findtext(textpress.slug), # slug _get_text_content(entry.findall(atom.title)), # title link, # link published, # pub_date self.parse_author(entry), # author # XXX: the Post is prefixing the intro before the actual # content. This is the default Zine behavior and makes sense # for Zine. However nearly every blog works differently and # treats summary completely different from content. We should # think about that. _get_html_content(entry.findall(atom.summary)), # intro _get_html_content(entry.findall(atom.content)), # body tags, # tags categories, # categories parser=post_parser, updated=updated, uid=entry.findtext(atom.id) ) post.element = entry content_type = entry.findtext(textpress.content_type) if content_type not in ('page', 'entry'): post.content_type = 'entry' # now parse the comments for the post self.parse_comments(post) for extension in self.extensions: extension.postprocess_post(post) return post
def parse_post(self, entry): # parse the dates first. updated = parse_iso8601(entry.findtext(atom.updated)) published = entry.findtext(atom.published) if published is not None: published = parse_iso8601(published) else: published = updated # figure out tags and categories by invoking the # callbacks on the extensions first. If no extension # was able to figure out what to do with it, we treat it # as category. tags, categories = self.parse_categories(entry) link = entry.find(atom.link) if link is not None: link = link.attrib.get('href') post = Post( None, _get_text_content(entry.findall(atom.title)), link, published, self.parse_author(entry), # XXX: the Post is prefixing the intro before the actual # content. This is the default Zine behavior and makes sense # for Zine. However nearly every blog works differently and # treats summary completely different from content. We should # think about that. None, _get_html_content(entry.findall(atom.content)), tags, categories, parser='html', updated=updated, uid=entry.findtext(atom.id) ) post.element = entry # now parse the comments for the post self.parse_comments(post) for extension in self.extensions: try: extension.postprocess_post(post) except SkipItem: return None return post
def parse_post(self, entry): # parse the dates first. updated = parse_iso8601(entry.findtext(atom.updated)) published = entry.findtext(atom.published) if published is not None: published = parse_iso8601(published) else: published = updated # figure out tags and categories by invoking the # callbacks on the extensions first. If no extension # was able to figure out what to do with it, we treat it # as category. tags, categories = self.parse_categories(entry) link = entry.find(atom.link) if link is not None: link = link.attrib.get('href') post = Post( None, _get_text_content(entry.findall(atom.title)), link, published, self.parse_author(entry), # XXX: the Post is prefixing the intro before the actual # content. This is the default Zine behavior and makes sense # for Zine. However nearly every blog works differently and # treats summary completely different from content. We should # think about that. None, _get_html_content(entry.findall(atom.content)), tags, categories, parser='html', updated=updated, uid=entry.findtext(atom.id)) post.element = entry # now parse the comments for the post self.parse_comments(post) for extension in self.extensions: try: extension.postprocess_post(post) except SkipItem: return None return post
def parse_comments(self, post): comments = {} unresolved_parents = {} for element in post.element.findall(zine.comment): author = element.find(zine.author) dependency = author.attrib.get('dependency') if dependency is not None: author = self._get_author(dependency) email = www = None else: email = author.findtext(zine.email) www = author.findtext(zine.uri) author = author.findtext(zine.name) body = _get_html_content(element.findall(zine.content)) comment = Comment(author, body, email, www, None, parse_iso8601(element.findtext(zine.published)), element.findtext(zine.submitter_ip), 'html', _to_bool(element.findtext(zine.is_pingback)), int(element.findtext(zine.status)), element.findtext(zine.blocked_msg), _parser_data(element.findtext(zine.parser_data))) comments[int(element.attrib['id'])] = comment parent = element.findtext(zine.parent) if parent: unresolved_parents[comment] = int(parent) for comment, parent_id in unresolved_parents.iteritems(): comment.parent = comments[parent_id] return comments.values()
def __init__(self, timestamp, level, location, module, message=None): self.timestamp = parse_iso8601(timestamp) self.level = gettext(level) self.internal_level = level self.location = location self.module = module self.lines = [] if message is not None: self.lines.append(message)
def parse_comments(self, post): comments = {} unresolved_parents = {} for element in post.element.findall(textpress.comment): author = element.find(textpress.author) dependency = author.attrib.get('dependency') if dependency is not None: author = self._get_author(author) email = www = None else: email = author.findtext(textpress.email) www = author.findtext(textpress.uri) author = author.findtext(textpress.name) body = element.findall(textpress.data) if body: pickled = _pickle(body[0].text) body = pickled.get('raw_body', u'') comment_parser = pickled.get('parser', 'html') if comment_parser not in get_application().parsers: comment_parser = 'html' comment = Comment( author, body, email, www, None, parse_iso8601(element.findtext(textpress.published)), element.findtext(textpress.submitter_ip), comment_parser, _to_bool(element.findtext(textpress.is_pingback)), int(element.findtext(textpress.status)), element.findtext(textpress.blocked_msg), _parser_data(element.findtext(textpress.parser_data)) ) comments[int(element.findtext(textpress.id))] = comment parent = element.findtext(textpress.parent) if parent is not None or '': unresolved_parents[comment] = int(parent) for comment, parent_id in unresolved_parents.iteritems(): comment.parent = comments[parent_id] return comments.values()