def process_post(submission: Submission): """ Process a single PRAW Submission. Adds it to the database if it didn't previously exist, updates post if necessary. """ # If we've already saved the post and sent it to Discord, no need to do anything (likely upon restart). post = post_service.get_post_by_id(submission.id) if post and post.sent_to_feed: logger.debug(f"Already processed, skipping post {submission.id}") return author_name = submission.author.name if submission.author is not None else "[deleted]" logger.info( f"Processing post {submission.id} - /u/{author_name} - {submission.link_flair_text}" ) if post: post = post_service.update_post(post, submission) else: post = post_service.add_post(submission) send_new_submission_message(submission) post.sent_to_feed = True base_data_service.update(post) logger.debug(f"Finished processing {submission.id}")
def process_comment(reddit_comment: Comment): """ Process a single PRAW Comment. Adds it to the database if it didn't previously exist as well as parent comments and the thread it belongs to. """ comment = comment_service.get_comment_by_id(reddit_comment.id) if comment: # Update our record of the comment if necessary. comment_service.update_comment(comment, reddit_comment) return author_name = reddit_comment.author.name if reddit_comment.author is not None else "[deleted]" logger.info( f"Processing comment {reddit_comment.id} - /u/{author_name} (post {reddit_comment.submission.id})" ) # Post needs to exist before we can add a comment for it, start with that. post = post_service.get_post_by_id(reddit_comment.submission.id) if not post: post_service.add_post(reddit_comment.submission) # Since all comments will reference a parent if it exists, add all parent comments first. logger.debug(f"Saving parent comments of {reddit_comment.id}") comment_service.add_comment_parent_tree(reddit, reddit_comment) logger.debug(f"Saving comment {reddit_comment.id}") comment = comment_service.add_comment(reddit_comment) logger.debug(f"Finished processing {comment.id}")
def add_comment_parent_tree(reddit: Reddit, reddit_comment: Comment): """ Starting with the comment that's the *parent* of the specified comment (non-inclusive), recursively crawl up the tree and add all of them to the database. Stops when it reaches a comment that already exists in the database or upon reaching the root. Needs improvements for efficiency. """ # Could do this with recursive calls to this function, but # I don't know how deep reddit comment chains are allowed to get. # So instead we need to keep a stack of comments so we can insert # them in the correct order, root first then down the chain. # This is necessary because the parent_id of each needs to already exist. comment_stack = [] # At the start of each loop, if we're at the top comment of the tree there will be no parents to add. # parent_id will return a submission for top level comments, so check is_root instead. while not reddit_comment.is_root: parent_id = reddit_comment.parent_id.split("t1_")[1] parent_exists = get_comment_by_id(parent_id) # Once we reach a child where the parent already exists, we can stop adding new comments up the chain. if parent_exists: break # Parent now becomes the base comment, then create a model for it (but don't insert yet). reddit_comment = reddit.comment(id=parent_id) comment = _create_comment_model(reddit_comment) comment_stack.append(comment) # Insert the author into the database if they don't exist yet. if reddit_comment.author is not None and not user_service.get_user( reddit_comment.author.name): user_service.add_user(reddit_comment.author) # Insert post into the database if it doesn't exist yet. if not post_service.get_post_by_id(reddit_comment.submission.id): post_service.add_post(reddit_comment.submission) # Reverse the order that we're iterating through the stack for inserting, last->first. for comment in comment_stack[::-1]: _comment_data.insert(comment, error_on_conflict=False)
def migrate_posts(offset=0): """Grabs posts in batches of 1000 at a time and migrates them to the new database. Returns number of processed rows. If less than 1000, at end of the table.""" conn = sqlite3.connect(DB_FILE) conn.row_factory = sqlite3.Row rows = conn.execute("SELECT * FROM posts LIMIT 1000 OFFSET ?;", (offset, )).fetchall() conn.close() row = None for row in rows: # If the post already exists in the database we don't need to do anything. post_id36 = row["id"] post = post_service.get_post_by_id(post_id36) if post: continue # OH RIGHT NO USER DATA IS SAVED IN THE OLD DATABASE. # username = row["name"] # if not user_service.get_user(username): # user = UserModel() # user.username = username # _user_data.insert(user, error_on_conflict=False) post = PostModel() post.set_id(post_id36) # post.author = username post.title = row["title"] post.created_time = row["created_time"] post.flair_text = row[ "flair"] # will add flair id in later mass update/backfill.. and user info _post_data.insert(post, error_on_conflict=False) if not row: logger.warning("No rows processed!") else: logger.info( f"Most recent migrated row: psk={row['psk']}, id={row['id']}") return len(rows)
def add_comment(reddit_comment: Comment) -> CommentModel: """ Parses some basic information for a comment and adds it to the database. Creates author and post if necessary. This also assumes its parent comment is already created, call add_comment_parent_tree first if necessary. """ comment = _create_comment_model(reddit_comment) # Insert the author into the database if they don't exist yet. if reddit_comment.author is not None and not user_service.get_user( reddit_comment.author.name): user_service.add_user(reddit_comment.author) # Insert post into the database if it doesn't exist yet. if not post_service.get_post_by_id(reddit_comment.submission.id): post_service.add_post(reddit_comment.submission) new_comment = _comment_data.insert(comment, error_on_conflict=False) return new_comment
def add_frontpage_post(reddit_post: Submission, snapshot: SnapshotModel, rank: int) -> SnapshotFrontpageModel: """Adds the specified post ranking for the snapshot. Also inserts the post itself if necessary.""" post_id = reddit_post.id post = post_service.get_post_by_id(post_id) # Add or update post as necessary. if not post: logger.debug(f"Saving post {post_id}") post = post_service.add_post(reddit_post) else: post = post_service.update_post(post, reddit_post) frontpage_model = SnapshotFrontpageModel() frontpage_model.post_id = post.id frontpage_model.snapshot_id = snapshot.id frontpage_model.rank = rank frontpage_model.score = reddit_post.score saved_frontpage_model = _snapshot_data.insert(frontpage_model) return saved_frontpage_model
def send_discord_message(mod_action: ModActionModel): logger.info(f"Attempting to send a message to Discord for {mod_action}") embed_json = { "author": { "name": f"Mod Log - /u/{mod_action.mod}", }, "title": f"{mod_action.mod}: {mod_action.action}", "timestamp": mod_action.created_time.isoformat(), "fields": [], "color": 0xCC0000, } # Add a URL if there's a specific thing we can focus on, also update title if possible. target = None if mod_action.target_comment_id: target = comment_service.get_comment_by_id(mod_action.target_comment_id) embed_json["title"] = f"{mod_action.mod}: {mod_action.action} by {mod_action.target_user}" elif mod_action.target_post_id: target = post_service.get_post_by_id(mod_action.target_post_id) title = discord.escape_formatting(f"{mod_action.mod}: {mod_action.action} - {target.title}") embed_json["title"] = title[:253] + "..." if len(title) > 256 else title elif mod_action.target_user: target = user_service.get_user(mod_action.target_user) embed_json["title"] = f"{mod_action.mod}: {mod_action.action} - {mod_action.target_user}" if target: embed_json["url"] = reddit_utils.make_permalink(target) if mod_action.details: embed_json["description"] = mod_action.details if mod_action.description: desc_info = {"name": "Description", "value": mod_action.description} embed_json["fields"].append(desc_info) discord.send_webhook_message(config_loader.DISCORD["webhook_url"], {"embeds": [embed_json]})
def parse_mod_action(mod_action: ModAction): """ Process a single PRAW ModAction. Assumes that reddit and subreddit are already instantiated by one of the two entry points (monitor_stream or load_archive). """ # Check if we've already processed this mod action, do nothing if so. mod_action_id = mod_action.id.replace("ModAction_", "") if mod_action_service.get_mod_action_by_id(mod_action_id): logger.debug(f"Already processed, skipping mod action {mod_action_id}") return logger.info( f"Processing mod action {mod_action_id}: {mod_action.mod.name} - " f"{mod_action.action} - {mod_action.target_fullname}" ) # If there's an action by an unknown moderator or admin, make a note of it and check to see # if they should be added to the mod list. send_notification = False if mod_action.action in mod_constants.MOD_ACTIONS_ALWAYS_NOTIFY: send_notification = True if mod_action.mod.name not in active_mods: # Add them to the database if necessary. mod_user = user_service.get_user(mod_action.mod.name) if not mod_user: mod_user = user_service.add_user(mod_action.mod) # We'd normally send a notification for all actions from non-mods, but temporary mutes expiring # always come from reddit and we don't really care about those. # Similarly, crowd control removals as those are filtered to the mod queue. if not ( (mod_action.mod.name == "reddit" and mod_action.action == "unmuteuser") or ( mod_action.mod.name == "reddit" and mod_action.action in ("removecomment", "removelink") and mod_action.details == "Crowd Control" ) ): send_notification = True # For non-admin cases, check to see if they're a [new] mod of the subreddit and refresh the list if so. if mod_action.mod not in ("Anti-Evil Operations", "reddit"): logger.info(f"Unknown mod found: {mod_action.mod.name}") if mod_user.username in subreddit.moderator(): logger.debug(f"Updating mod status for {mod_user}") mod_user.moderator = True base_data_service.update(mod_user) _get_moderators() # See if the user targeted by this action exists in the system, add them if not. # Bans and similar user-focused actions independent of posts/comments will also have # a target_fullname value (t2_...) but won't be necessary to check after this. if mod_action.target_author: user = user_service.get_user(mod_action.target_author) if not user: logger.debug(f"Saving user {mod_action.target_author}") user = user_service.add_user(reddit.redditor(name=mod_action.target_author)) # For bans and unbans, update the user in the database. if mod_action.action == "banuser": # Weirdly this returns a ListingGenerator so we have to iterate over it; there should only be one though. # If the user isn't banned, the loop won't execute. for ban_user in subreddit.banned(redditor=user.username): # Permanent if days_left is None if ban_user.days_left is None: user.banned_until = "infinity" # days_left will show 0 if they were banned for 1 day a few seconds ago; it seems like it rounds down # based on the time of the ban occurring, so we can safely assume that even if the ban happened # a few seconds before getting to this point, we should add an extra day onto the reported number. else: ban_start = datetime.fromtimestamp(mod_action.created_utc, tz=timezone.utc) user.banned_until = ban_start + timedelta(days=ban_user.days_left + 1) break base_data_service.update(user) elif mod_action.action == "unbanuser": user.banned_until = None base_data_service.update(user) elif mod_action.action == "removemoderator": logger.debug(f"Updating mod status for {user}") user.moderator = False base_data_service.update(user) _get_moderators() # See if the post targeted by this action exists in the system, add it if not. if mod_action.target_fullname and mod_action.target_fullname.startswith("t3_"): post_id = mod_action.target_fullname.split("_")[1] post = post_service.get_post_by_id(post_id) reddit_post = reddit.submission(id=post_id) # Add or update post as necessary. if not post: logger.debug(f"Saving post {post_id}") post = post_service.add_post(reddit_post) else: post = post_service.update_post(post, reddit_post) # If the user deleted their text post, the mod action still has the post body that we can save in place. if post.deleted and post.body == "[deleted]" and post.body != mod_action.target_body: post.body = mod_action.target_body base_data_service.update(post) # See if the comment targeted by this action *and its post* exist in the system, add either if not. if mod_action.target_fullname and mod_action.target_fullname.startswith("t1_"): comment_id = mod_action.target_fullname.split("_")[1] comment = comment_service.get_comment_by_id(comment_id) reddit_comment = reddit.comment(id=comment_id) if not comment: # Post needs to exist before we can add a comment for it, start with that. post = post_service.get_post_by_id(reddit_comment.submission.id) if not post: post_service.add_post(reddit_comment.submission) # Since all comments will reference a parent if it exists, add all parent comments first. logger.debug(f"Saving parent comments of {comment_id}") comment_service.add_comment_parent_tree(reddit, reddit_comment) logger.debug(f"Saving comment {comment_id}") comment = comment_service.add_comment(reddit_comment) else: # Update our record of the comment if necessary. comment = comment_service.update_comment(comment, reddit_comment) # If the user deleted their comment, the mod action still has the body that we can save in place. if comment.deleted and comment.body != mod_action.target_body: comment.body = mod_action.target_body base_data_service.update(comment) logger.debug(f"Saving mod action {mod_action_id}") mod_action = mod_action_service.add_mod_action(mod_action) if send_notification: send_discord_message(mod_action)