Example #1
0
    def gen_posts():
        logger.info("Transferring posts")

        posts = PostsPost.objects.order_by("id")
        elapsed, progress = timer_func()
        stream = zip(count(1), posts)
        stream = islice(stream, limit)

        for index, post in stream:
            progress(index, msg="posts")

            author = users_set.get(str(post.author_id))
            lastedit_user = users_set.get(str(post.lastedit_user_id))
            # Incomplete author information loaded or existing posts.
            if not (author and lastedit_user):
                continue

            is_toplevel = post.type in Post.TOP_LEVEL

            rank = post.lastedit_date.timestamp()
            # Convert the content to markdown if its html

            force_text = post.content.strip().startswith("<")
            if force_text:
                try:
                    content = html2text.html2text(post.content, bodywidth=0)
                except Exception as exc:
                    content = post.content
                    logger.error(f"Failed parsing post={post.id}.")

                html = markdown.parse(content)
            else:
                content = post.content
                html = post.html

            new_post = Post(uid=post.id,
                            html=decode(html),
                            type=post.type,
                            is_toplevel=is_toplevel,
                            lastedit_user=lastedit_user,
                            thread_votecount=post.thread_score,
                            author=author,
                            status=post.status,
                            rank=rank,
                            accept_count=int(post.has_accepted),
                            lastedit_date=post.lastedit_date,
                            book_count=post.book_count,
                            content=decode(content),
                            title=decode(post.title),
                            vote_count=post.vote_count,
                            creation_date=post.creation_date,
                            tag_val=decode(post.tag_val),
                            view_count=post.view_count)

            # Store parent and root for every post.
            relations[str(
                new_post.uid)] = [str(post.root_id),
                                  str(post.parent_id)]
            yield new_post
Example #2
0
def slow_update(threads):
    rows = threads.get('rows', [])
    column = threads.get('column', [])
    for row in rows:
        row = {col: val for col, val in zip(column, row)}

        # Get an exisiting post or start an empty one.
        post = Post.objects.filter(uid=row['id']).first() or Post()

        sync_post(post, **row)

        # Trigger save
        post.save()
Example #3
0
    def gen_posts():
        logger.info("transferring posts")

        posts = PostsPost.objects.order_by("id")

        elapsed, progress = timer_func()
        stream = zip(count(1), posts)
        stream = islice(stream, limit)
        for index, post in stream:
            progress(index, msg="posts")

            author = users_set.get(str(post.author_id))
            lastedit_user = users_set.get(str(post.lastedit_user_id))
            # Incomplete author information loaded or existing posts.
            if not (author and lastedit_user):
                continue

            siblings = posts.filter(root_id=post.root_id)
            # Record replies, comments, and answers to root
            reply_count = siblings.count()
            comment_count = siblings.filter(type=Post.COMMENT).count()

            rank = post.lastedit_date.timestamp()
            content = util.strip_tags(post.content)
            new_post = Post(uid=post.id,
                            html=post.html,
                            type=post.type,
                            reply_count=reply_count,
                            lastedit_user=lastedit_user,
                            thread_votecount=post.thread_score,
                            author=author,
                            status=post.status,
                            rank=rank,
                            accept_count=int(post.has_accepted),
                            lastedit_date=post.lastedit_date,
                            book_count=post.book_count,
                            comment_count=comment_count,
                            content=content,
                            title=post.title,
                            vote_count=post.vote_count,
                            creation_date=post.creation_date,
                            tag_val=post.tag_val,
                            answer_count=post.reply_count,
                            view_count=post.view_count)

            # Store parent and root for every post.
            relations[str(
                new_post.uid)] = [str(post.root_id),
                                  str(post.parent_id)]
            yield new_post
Example #4
0
    def gen_posts():
        logger.info("Transferring posts")

        posts = PostsPost.objects.order_by("id")
        elapsed, progress = timer_func()
        stream = zip(count(1), posts)
        stream = islice(stream, limit)

        for index, post in stream:
            progress(index, msg="posts")

            author = users_set.get(str(post.author_id))
            lastedit_user = users_set.get(str(post.lastedit_user_id))
            # Incomplete author information loaded or existing posts.
            if not (author and lastedit_user):
                continue

            is_toplevel = post.type in Post.TOP_LEVEL

            rank = post.lastedit_date.timestamp()
            content = post.content
            html = post.html

            new_post = Post(uid=post.id,
                            html=decode(html),
                            type=post.type,
                            is_toplevel=is_toplevel,
                            lastedit_user=lastedit_user,
                            thread_votecount=post.thread_score,
                            author=author,
                            status=post.status,
                            rank=rank,
                            accept_count=int(post.has_accepted),
                            lastedit_date=post.lastedit_date,
                            book_count=post.book_count,
                            content=decode(content),
                            title=decode(post.title),
                            vote_count=post.vote_count,
                            creation_date=post.creation_date,
                            tag_val=decode(post.tag_val),
                            view_count=post.view_count)

            # Store parent and root for every post.
            relations[str(
                new_post.uid)] = [str(post.root_id),
                                  str(post.parent_id)]
            yield new_post
Example #5
0
def bulk_create_posts(rows, column, users, relations=dict()):
    for row in rows:
        # Map column names to row.
        row = {col: val for col, val in zip(column, row)}
        post = Post(lastedit_user=users[row['lastedit_user_id']],
                    author=users[row['author_id']],
                    uid=row['id'],
                    view_count=row['view_count'],
                    vote_count=row['vote_count'],
                    book_count=row['book_count'],
                    accept_count=int(row['has_accepted']),
                    thread_votecount=row['thread_score'],
                    creation_date=row['creation_date'],
                    html=row['html'],
                    content=row['content'],
                    title=row['title'],
                    status=row['status'],
                    type=row['type'],
                    tag_val=row['tag_val'],
                    lastedit_date=row['lastedit_date'],
                    rank=row['rank'])
        relations[str(row['id'])] = str(row['root_id']), str(row['parent_id'])
        yield post
Example #6
0
 def output_post_link(self, m):
     uid = m.group("uid")
     post = Post.objects.filter(uid=uid).first() or Post(
         title=f"Invalid post uid: {uid}")
     link = m.group(0)
     return f'<a href="{link}">{post.title}</a>'