def parse_post(self, entry):
        # parse the dates first.
        updated = parse_iso8601(entry.findtext(atom.updated))
        published = entry.findtext(atom.published)
        if published is not None:
            published = parse_iso8601(published)
        else:
            published = updated

        # figure out tags and categories by invoking the
        # callbacks on the extensions first.  If no extension
        # was able to figure out what to do with it, we treat it
        # as category.
        tags, categories = self.parse_categories(entry)

        link = entry.find(atom.link)
        if link is not None:
            link = link.attrib.get('href')

        post_parser = _pickle(entry.findall(textpress.data)[0].text).get('parser', 'html')
        if post_parser not in get_application().parsers:
            post_parser = 'html'

        post = Post(
            entry.findtext(textpress.slug),                 # slug
            _get_text_content(entry.findall(atom.title)),   # title
            link,                                           # link
            published,                                      # pub_date
            self.parse_author(entry),                       # author
            # XXX: the Post is prefixing the intro before the actual
            # content.  This is the default Zine behavior and makes sense
            # for Zine.  However nearly every blog works differently and
            # treats summary completely different from content.  We should
            # think about that.
            _get_html_content(entry.findall(atom.summary)), # intro
            _get_html_content(entry.findall(atom.content)), # body
            tags,                                           # tags
            categories,                                     # categories
            parser=post_parser,
            updated=updated,
            uid=entry.findtext(atom.id)
        )
        post.element = entry
        content_type = entry.findtext(textpress.content_type)
        if content_type not in ('page', 'entry'):
            post.content_type = 'entry'

        # now parse the comments for the post
        self.parse_comments(post)

        for extension in self.extensions:
            extension.postprocess_post(post)

        return post
Exemple #2
0
    def parse_post(self, entry):
        # parse the dates first.
        updated = parse_iso8601(entry.findtext(atom.updated))
        published = entry.findtext(atom.published)
        if published is not None:
            published = parse_iso8601(published)
        else:
            published = updated

        # figure out tags and categories by invoking the
        # callbacks on the extensions first.  If no extension
        # was able to figure out what to do with it, we treat it
        # as category.
        tags, categories = self.parse_categories(entry)

        link = entry.find(atom.link)
        if link is not None:
            link = link.attrib.get('href')

        post = Post(
            None,
            _get_text_content(entry.findall(atom.title)),
            link,
            published,
            self.parse_author(entry),
            # XXX: the Post is prefixing the intro before the actual
            # content.  This is the default Zine behavior and makes sense
            # for Zine.  However nearly every blog works differently and
            # treats summary completely different from content.  We should
            # think about that.
            None,
            _get_html_content(entry.findall(atom.content)),
            tags,
            categories,
            parser='html',
            updated=updated,
            uid=entry.findtext(atom.id)
        )
        post.element = entry

        # now parse the comments for the post
        self.parse_comments(post)

        for extension in self.extensions:
            try:
                extension.postprocess_post(post)
            except SkipItem:
                return None

        return post
Exemple #3
0
    def parse_post(self, entry):
        # parse the dates first.
        updated = parse_iso8601(entry.findtext(atom.updated))
        published = entry.findtext(atom.published)
        if published is not None:
            published = parse_iso8601(published)
        else:
            published = updated

        # figure out tags and categories by invoking the
        # callbacks on the extensions first.  If no extension
        # was able to figure out what to do with it, we treat it
        # as category.
        tags, categories = self.parse_categories(entry)

        link = entry.find(atom.link)
        if link is not None:
            link = link.attrib.get('href')

        post = Post(
            None,
            _get_text_content(entry.findall(atom.title)),
            link,
            published,
            self.parse_author(entry),
            # XXX: the Post is prefixing the intro before the actual
            # content.  This is the default Zine behavior and makes sense
            # for Zine.  However nearly every blog works differently and
            # treats summary completely different from content.  We should
            # think about that.
            None,
            _get_html_content(entry.findall(atom.content)),
            tags,
            categories,
            parser='html',
            updated=updated,
            uid=entry.findtext(atom.id))
        post.element = entry

        # now parse the comments for the post
        self.parse_comments(post)

        for extension in self.extensions:
            try:
                extension.postprocess_post(post)
            except SkipItem:
                return None

        return post
Exemple #4
0
    def parse_comments(self, post):
        comments = {}
        unresolved_parents = {}

        for element in post.element.findall(zine.comment):
            author = element.find(zine.author)
            dependency = author.attrib.get('dependency')
            if dependency is not None:
                author = self._get_author(dependency)
                email = www = None
            else:
                email = author.findtext(zine.email)
                www = author.findtext(zine.uri)
                author = author.findtext(zine.name)

            body = _get_html_content(element.findall(zine.content))
            comment = Comment(author, body, email, www, None,
                              parse_iso8601(element.findtext(zine.published)),
                              element.findtext(zine.submitter_ip), 'html',
                              _to_bool(element.findtext(zine.is_pingback)),
                              int(element.findtext(zine.status)),
                              element.findtext(zine.blocked_msg),
                              _parser_data(element.findtext(zine.parser_data)))
            comments[int(element.attrib['id'])] = comment
            parent = element.findtext(zine.parent)
            if parent:
                unresolved_parents[comment] = int(parent)

        for comment, parent_id in unresolved_parents.iteritems():
            comment.parent = comments[parent_id]

        return comments.values()
Exemple #5
0
    def parse_comments(self, post):
        comments = {}
        unresolved_parents = {}

        for element in post.element.findall(zine.comment):
            author = element.find(zine.author)
            dependency = author.attrib.get('dependency')
            if dependency is not None:
                author = self._get_author(dependency)
                email = www = None
            else:
                email = author.findtext(zine.email)
                www = author.findtext(zine.uri)
                author = author.findtext(zine.name)

            body = _get_html_content(element.findall(zine.content))
            comment = Comment(author, body, email, www, None,
                              parse_iso8601(element.findtext(zine.published)),
                              element.findtext(zine.submitter_ip), 'html',
                              _to_bool(element.findtext(zine.is_pingback)),
                              int(element.findtext(zine.status)),
                              element.findtext(zine.blocked_msg),
                              _parser_data(element.findtext(zine.parser_data)))
            comments[int(element.attrib['id'])] = comment
            parent = element.findtext(zine.parent)
            if parent:
                unresolved_parents[comment] = int(parent)

        for comment, parent_id in unresolved_parents.iteritems():
            comment.parent = comments[parent_id]

        return comments.values()
Exemple #6
0
 def __init__(self, timestamp, level, location, module, message=None):
     self.timestamp = parse_iso8601(timestamp)
     self.level = gettext(level)
     self.internal_level = level
     self.location = location
     self.module = module
     self.lines = []
     if message is not None:
         self.lines.append(message)
Exemple #7
0
 def __init__(self, timestamp, level, location, module, message=None):
     self.timestamp = parse_iso8601(timestamp)
     self.level = gettext(level)
     self.internal_level = level
     self.location = location
     self.module = module
     self.lines = []
     if message is not None:
         self.lines.append(message)
    def parse_comments(self, post):
        comments = {}
        unresolved_parents = {}

        for element in post.element.findall(textpress.comment):
            author = element.find(textpress.author)
            dependency = author.attrib.get('dependency')
            if dependency is not None:
                author = self._get_author(author)
                email = www = None
            else:
                email = author.findtext(textpress.email)
                www = author.findtext(textpress.uri)
                author = author.findtext(textpress.name)

            body = element.findall(textpress.data)
            if body:
                pickled = _pickle(body[0].text)
                body = pickled.get('raw_body', u'')

                comment_parser = pickled.get('parser', 'html')
                if comment_parser not in get_application().parsers:
                    comment_parser = 'html'

            comment = Comment(
                author, body, email, www, None,
                parse_iso8601(element.findtext(textpress.published)),
                element.findtext(textpress.submitter_ip), comment_parser,
                _to_bool(element.findtext(textpress.is_pingback)),
                int(element.findtext(textpress.status)),
                element.findtext(textpress.blocked_msg),
                _parser_data(element.findtext(textpress.parser_data))
            )
            comments[int(element.findtext(textpress.id))] = comment
            parent = element.findtext(textpress.parent)
            if parent is not None or '':
                unresolved_parents[comment] = int(parent)

        for comment, parent_id in unresolved_parents.iteritems():
            comment.parent = comments[parent_id]

        return comments.values()