def gen_posts(): logger.info("Transferring posts") posts = PostsPost.objects.order_by("id") elapsed, progress = timer_func() stream = zip(count(1), posts) stream = islice(stream, limit) for index, post in stream: progress(index, msg="posts") author = users_set.get(str(post.author_id)) lastedit_user = users_set.get(str(post.lastedit_user_id)) # Incomplete author information loaded or existing posts. if not (author and lastedit_user): continue is_toplevel = post.type in Post.TOP_LEVEL rank = post.lastedit_date.timestamp() # Convert the content to markdown if its html force_text = post.content.strip().startswith("<") if force_text: try: content = html2text.html2text(post.content, bodywidth=0) except Exception as exc: content = post.content logger.error(f"Failed parsing post={post.id}.") html = markdown.parse(content) else: content = post.content html = post.html new_post = Post(uid=post.id, html=decode(html), type=post.type, is_toplevel=is_toplevel, lastedit_user=lastedit_user, thread_votecount=post.thread_score, author=author, status=post.status, rank=rank, accept_count=int(post.has_accepted), lastedit_date=post.lastedit_date, book_count=post.book_count, content=decode(content), title=decode(post.title), vote_count=post.vote_count, creation_date=post.creation_date, tag_val=decode(post.tag_val), view_count=post.view_count) # Store parent and root for every post. relations[str( new_post.uid)] = [str(post.root_id), str(post.parent_id)] yield new_post
def slow_update(threads): rows = threads.get('rows', []) column = threads.get('column', []) for row in rows: row = {col: val for col, val in zip(column, row)} # Get an exisiting post or start an empty one. post = Post.objects.filter(uid=row['id']).first() or Post() sync_post(post, **row) # Trigger save post.save()
def gen_posts(): logger.info("transferring posts") posts = PostsPost.objects.order_by("id") elapsed, progress = timer_func() stream = zip(count(1), posts) stream = islice(stream, limit) for index, post in stream: progress(index, msg="posts") author = users_set.get(str(post.author_id)) lastedit_user = users_set.get(str(post.lastedit_user_id)) # Incomplete author information loaded or existing posts. if not (author and lastedit_user): continue siblings = posts.filter(root_id=post.root_id) # Record replies, comments, and answers to root reply_count = siblings.count() comment_count = siblings.filter(type=Post.COMMENT).count() rank = post.lastedit_date.timestamp() content = util.strip_tags(post.content) new_post = Post(uid=post.id, html=post.html, type=post.type, reply_count=reply_count, lastedit_user=lastedit_user, thread_votecount=post.thread_score, author=author, status=post.status, rank=rank, accept_count=int(post.has_accepted), lastedit_date=post.lastedit_date, book_count=post.book_count, comment_count=comment_count, content=content, title=post.title, vote_count=post.vote_count, creation_date=post.creation_date, tag_val=post.tag_val, answer_count=post.reply_count, view_count=post.view_count) # Store parent and root for every post. relations[str( new_post.uid)] = [str(post.root_id), str(post.parent_id)] yield new_post
def gen_posts(): logger.info("Transferring posts") posts = PostsPost.objects.order_by("id") elapsed, progress = timer_func() stream = zip(count(1), posts) stream = islice(stream, limit) for index, post in stream: progress(index, msg="posts") author = users_set.get(str(post.author_id)) lastedit_user = users_set.get(str(post.lastedit_user_id)) # Incomplete author information loaded or existing posts. if not (author and lastedit_user): continue is_toplevel = post.type in Post.TOP_LEVEL rank = post.lastedit_date.timestamp() content = post.content html = post.html new_post = Post(uid=post.id, html=decode(html), type=post.type, is_toplevel=is_toplevel, lastedit_user=lastedit_user, thread_votecount=post.thread_score, author=author, status=post.status, rank=rank, accept_count=int(post.has_accepted), lastedit_date=post.lastedit_date, book_count=post.book_count, content=decode(content), title=decode(post.title), vote_count=post.vote_count, creation_date=post.creation_date, tag_val=decode(post.tag_val), view_count=post.view_count) # Store parent and root for every post. relations[str( new_post.uid)] = [str(post.root_id), str(post.parent_id)] yield new_post
def bulk_create_posts(rows, column, users, relations=dict()): for row in rows: # Map column names to row. row = {col: val for col, val in zip(column, row)} post = Post(lastedit_user=users[row['lastedit_user_id']], author=users[row['author_id']], uid=row['id'], view_count=row['view_count'], vote_count=row['vote_count'], book_count=row['book_count'], accept_count=int(row['has_accepted']), thread_votecount=row['thread_score'], creation_date=row['creation_date'], html=row['html'], content=row['content'], title=row['title'], status=row['status'], type=row['type'], tag_val=row['tag_val'], lastedit_date=row['lastedit_date'], rank=row['rank']) relations[str(row['id'])] = str(row['root_id']), str(row['parent_id']) yield post
def output_post_link(self, m): uid = m.group("uid") post = Post.objects.filter(uid=uid).first() or Post( title=f"Invalid post uid: {uid}") link = m.group(0) return f'<a href="{link}">{post.title}</a>'