Example #1
0
    def set_auto_slug(self):
        """Generate a slug for this post."""
        #cfg = get_application().cfg
        slug = gen_slug(self.title)
        if not slug:
            slug = to_blog_timezone(self.pub_date).strftime('%H%M')

        full_slug = gen_timestamped_slug(slug, self.content_type, self.pub_date)

        if full_slug != self.slug:
            while Post.query.autoflush(False).filter_by(slug=full_slug) \
                      .limit(1).count():
                full_slug = increment_string(full_slug)
            self.slug = full_slug
Example #2
0
    def set_auto_slug(self):
        """Generate a slug for this post."""
        #cfg = get_application().cfg
        slug = gen_slug(self.title)
        if not slug:
            slug = to_blog_timezone(self.pub_date).strftime('%H%M')

        full_slug = gen_timestamped_slug(slug, self.content_type,
                                         self.pub_date)

        if full_slug != self.slug:
            while Post.query.autoflush(False).filter_by(slug=full_slug) \
                      .limit(1).count():
                full_slug = increment_string(full_slug)
            self.slug = full_slug
Example #3
0
def parse_feed(fd):
    """Parse an extended WordPress RSS feed into a structure the general
    importer system can handle.  The return value is a `Blog` object.
    """
    tree = parse_broken_wxr(fd)

    authors = {}

    def get_author(name):
        if name:
            author = authors.get(name)
            if author is None:
                author = authors[name] = Author(name, None)
            return author

    tags = {}
    for item in tree.findall(WORDPRESS.tag):
        tag = Tag(item.findtext(WORDPRESS.tag_slug), item.findtext(WORDPRESS.tag_name))
        tags[tag.name] = tag

    categories = {}
    for item in tree.findall(WORDPRESS.category):
        category = Category(item.findtext(WORDPRESS.category_nicename), item.findtext(WORDPRESS.cat_name))
        categories[category.name] = category

    posts = []
    clean_empty_tags = re.compile("\<(?P<tag>\w+?)\>[\r\n]?\</(?P=tag)\>")

    for item in tree.findall("item"):
        status = {"draft": STATUS_DRAFT}.get(item.findtext(WORDPRESS.status), STATUS_PUBLISHED)
        post_name = item.findtext(WORDPRESS.post_name)
        pub_date = parse_wordpress_date(item.findtext(WORDPRESS.post_date_gmt))
        content_type = {"post": "entry", "page": "page"}.get(item.findtext(WORDPRESS.post_type), "entry")
        slug = None

        if pub_date is None or post_name is None:
            status = STATUS_DRAFT
        if status == STATUS_PUBLISHED:
            slug = gen_timestamped_slug(post_name, content_type, pub_date)

        # Store WordPress comment ids mapped to Comment objects
        comments = {}
        for x in item.findall(WORDPRESS.comment):
            if x.findtext(WORDPRESS.comment_approved) == "spam":
                continue
            commentobj = Comment(
                x.findtext(WORDPRESS.comment_author),
                x.findtext(WORDPRESS.comment_content),
                x.findtext(WORDPRESS.comment_author_email),
                x.findtext(WORDPRESS.comment_author_url),
                comments.get(x.findtext(WORDPRESS.comment_parent), None),
                parse_wordpress_date(x.findtext(WORDPRESS.comment_date_gmt)),
                x.findtext(WORDPRESS.comment_author_ip),
                "html",
                x.findtext(WORDPRESS.comment_type) in ("pingback", "traceback"),
                (COMMENT_UNMODERATED, COMMENT_MODERATED)[x.findtext(WORDPRESS.comment_approved) == "1"],
            )
            comments[x.findtext(WORDPRESS.comment_id)] = commentobj

        post_body = item.findtext(CONTENT.encoded)
        post_intro = item.findtext("description")
        if post_intro and not post_body:
            post_body = post_intro
            post_intro = None
        elif post_body:
            find_more_results = re.split("<!--more ?.*?-->", post_body)
            if len(find_more_results) > 1:
                post_intro = clean_empty_tags.sub("", _wordpress_to_html(find_more_results[0]))
                post_body = find_more_results[1]
        else:
            # hmm. nothing to process. skip that entry
            continue

        post_body = clean_empty_tags.sub("", _wordpress_to_html(post_body))

        post = Post(
            slug,
            item.findtext("title"),
            item.findtext("link"),
            pub_date,
            get_author(item.findtext(DC_METADATA.creator)),
            post_intro,
            post_body,
            [tags[x.text] for x in item.findall("tag") if x.text in tags],
            [categories[x.text] for x in item.findall("category") if x.text in categories],
            comments.values(),
            item.findtext("comment_status") != "closed",
            item.findtext("ping_status") != "closed",
            parser="html",
            content_type=content_type,
        )
        posts.append(post)

    return Blog(
        tree.findtext("title"),
        tree.findtext("link"),
        tree.findtext("description") or "",
        tree.findtext("language") or "en",
        tags.values(),
        categories.values(),
        posts,
        authors.values(),
    )
Example #4
0
def parse_feed(fd):
    """Parse an extended WordPress RSS feed into a structure the general
    importer system can handle.  The return value is a `Blog` object.
    """
    tree = parse_broken_wxr(fd)

    authors = {}
    def get_author(name):
        if name:
            author = authors.get(name)
            if author is None:
                author = authors[name] = Author(name, None)
            return author

    tags = {}
    for item in tree.findall(WORDPRESS.tag):
        tag = Tag(item.findtext(WORDPRESS.tag_slug),
                  item.findtext(WORDPRESS.tag_name))
        tags[tag.name] = tag

    categories = {}
    for item in tree.findall(WORDPRESS.category):
        category = Category(item.findtext(WORDPRESS.category_nicename),
                            item.findtext(WORDPRESS.cat_name))
        categories[category.name] = category

    posts = []
    clean_empty_tags = re.compile("\<(?P<tag>\w+?)\>[\r\n]?\</(?P=tag)\>")

    for item in tree.findall('item'):
        status = {
            'draft':            STATUS_DRAFT
        }.get(item.findtext(WORDPRESS.status), STATUS_PUBLISHED)
        post_name = item.findtext(WORDPRESS.post_name)
        pub_date = parse_wordpress_date(item.findtext(WORDPRESS.post_date_gmt))
        content_type={'post': 'entry', 'page': 'page'}.get(
                                item.findtext(WORDPRESS.post_type), 'entry')
        slug = None

        if pub_date is None or post_name is None:
            status = STATUS_DRAFT
        if status == STATUS_PUBLISHED:
            slug = gen_timestamped_slug(post_name, content_type, pub_date)

        # Store WordPress comment ids mapped to Comment objects
        comments = {}
        for x in item.findall(WORDPRESS.comment):
            if x.findtext(WORDPRESS.comment_approved) == 'spam':
                continue
            commentobj = Comment(
                x.findtext(WORDPRESS.comment_author),
                x.findtext(WORDPRESS.comment_content),
                x.findtext(WORDPRESS.comment_author_email),
                x.findtext(WORDPRESS.comment_author_url),
                comments.get(x.findtext(WORDPRESS.comment_parent), None),
                parse_wordpress_date(x.findtext(
                                            WORDPRESS.comment_date_gmt)),
                x.findtext(WORDPRESS.comment_author_ip),
                'html',
                x.findtext(WORDPRESS.comment_type) in ('pingback',
                                                       'traceback'),
                (COMMENT_UNMODERATED, COMMENT_MODERATED)
                    [x.findtext(WORDPRESS.comment_approved) == '1']
            )
            comments[x.findtext(WORDPRESS.comment_id)] = commentobj

        post_body = item.findtext(CONTENT.encoded)
        post_intro = item.findtext('description')
        if post_intro and not post_body:
            post_body = post_intro
            post_intro = None
        elif post_body:
            find_more_results = re.split('<!--more ?.*?-->', post_body)
            if len(find_more_results) > 1:
                post_intro = clean_empty_tags.sub('',
                                       _wordpress_to_html(find_more_results[0]))
                post_body = find_more_results[1]
        else:
            # hmm. nothing to process. skip that entry
            continue

        post_body = clean_empty_tags.sub('', _wordpress_to_html(post_body))

        post = Post(
            slug,
            item.findtext('title'),
            item.findtext('link'),
            pub_date,
            get_author(item.findtext(DC_METADATA.creator)),
            post_intro,
            post_body,
            [tags[x.text] for x in item.findall('tag')
             if x.text in tags],
            [categories[x.text] for x in item.findall('category')
             if x.text in categories],
            comments.values(),
            item.findtext('comment_status') != 'closed',
            item.findtext('ping_status') != 'closed',
            parser='html',
            content_type=content_type
        )
        posts.append(post)

    return Blog(
        tree.findtext('title'),
        tree.findtext('link'),
        tree.findtext('description') or '',
        tree.findtext('language') or 'en',
        tags.values(),
        categories.values(),
        posts,
        authors.values()
    )