Esempio n. 1
0
def process_post(submission: Submission):
    """
    Process a single PRAW Submission. Adds it to the database if it didn't previously exist, updates post if necessary.
    """

    # If we've already saved the post and sent it to Discord, no need to do anything (likely upon restart).
    post = post_service.get_post_by_id(submission.id)
    if post and post.sent_to_feed:
        logger.debug(f"Already processed, skipping post {submission.id}")
        return

    author_name = submission.author.name if submission.author is not None else "[deleted]"
    logger.info(
        f"Processing post {submission.id} - /u/{author_name} - {submission.link_flair_text}"
    )

    if post:
        post = post_service.update_post(post, submission)
    else:
        post = post_service.add_post(submission)

    send_new_submission_message(submission)
    post.sent_to_feed = True

    base_data_service.update(post)

    logger.debug(f"Finished processing {submission.id}")
Esempio n. 2
0
def process_comment(reddit_comment: Comment):
    """
    Process a single PRAW Comment. Adds it to the database if it didn't previously exist as well as parent comments
    and the thread it belongs to.
    """

    comment = comment_service.get_comment_by_id(reddit_comment.id)

    if comment:
        # Update our record of the comment if necessary.
        comment_service.update_comment(comment, reddit_comment)
        return

    author_name = reddit_comment.author.name if reddit_comment.author is not None else "[deleted]"
    logger.info(
        f"Processing comment {reddit_comment.id} - /u/{author_name} (post {reddit_comment.submission.id})"
    )

    # Post needs to exist before we can add a comment for it, start with that.
    post = post_service.get_post_by_id(reddit_comment.submission.id)

    if not post:
        post_service.add_post(reddit_comment.submission)

    # Since all comments will reference a parent if it exists, add all parent comments first.
    logger.debug(f"Saving parent comments of {reddit_comment.id}")
    comment_service.add_comment_parent_tree(reddit, reddit_comment)
    logger.debug(f"Saving comment {reddit_comment.id}")
    comment = comment_service.add_comment(reddit_comment)

    logger.debug(f"Finished processing {comment.id}")
Esempio n. 3
0
def add_comment_parent_tree(reddit: Reddit, reddit_comment: Comment):
    """
    Starting with the comment that's the *parent* of the specified comment (non-inclusive),
    recursively crawl up the tree and add all of them to the database.
    Stops when it reaches a comment that already exists in the database or upon reaching the root.
    Needs improvements for efficiency.
    """

    # Could do this with recursive calls to this function, but
    # I don't know how deep reddit comment chains are allowed to get.
    # So instead we need to keep a stack of comments so we can insert
    # them in the correct order, root first then down the chain.
    # This is necessary because the parent_id of each needs to already exist.
    comment_stack = []

    # At the start of each loop, if we're at the top comment of the tree there will be no parents to add.
    # parent_id will return a submission for top level comments, so check is_root instead.
    while not reddit_comment.is_root:
        parent_id = reddit_comment.parent_id.split("t1_")[1]
        parent_exists = get_comment_by_id(parent_id)

        # Once we reach a child where the parent already exists, we can stop adding new comments up the chain.
        if parent_exists:
            break

        # Parent now becomes the base comment, then create a model for it (but don't insert yet).
        reddit_comment = reddit.comment(id=parent_id)
        comment = _create_comment_model(reddit_comment)
        comment_stack.append(comment)

        # Insert the author into the database if they don't exist yet.
        if reddit_comment.author is not None and not user_service.get_user(
                reddit_comment.author.name):
            user_service.add_user(reddit_comment.author)

        # Insert post into the database if it doesn't exist yet.
        if not post_service.get_post_by_id(reddit_comment.submission.id):
            post_service.add_post(reddit_comment.submission)

    # Reverse the order that we're iterating through the stack for inserting, last->first.
    for comment in comment_stack[::-1]:
        _comment_data.insert(comment, error_on_conflict=False)
def migrate_posts(offset=0):
    """Grabs posts in batches of 1000 at a time and migrates them to the new database.
    Returns number of processed rows. If less than 1000, at end of the table."""
    conn = sqlite3.connect(DB_FILE)
    conn.row_factory = sqlite3.Row

    rows = conn.execute("SELECT * FROM posts LIMIT 1000 OFFSET ?;",
                        (offset, )).fetchall()

    conn.close()

    row = None
    for row in rows:
        # If the post already exists in the database we don't need to do anything.
        post_id36 = row["id"]
        post = post_service.get_post_by_id(post_id36)
        if post:
            continue

        # OH RIGHT NO USER DATA IS SAVED IN THE OLD DATABASE.
        # username = row["name"]
        # if not user_service.get_user(username):
        #     user = UserModel()
        #     user.username = username
        #     _user_data.insert(user, error_on_conflict=False)

        post = PostModel()
        post.set_id(post_id36)
        # post.author = username
        post.title = row["title"]
        post.created_time = row["created_time"]
        post.flair_text = row[
            "flair"]  # will add flair id in later mass update/backfill.. and user info
        _post_data.insert(post, error_on_conflict=False)

    if not row:
        logger.warning("No rows processed!")
    else:
        logger.info(
            f"Most recent migrated row: psk={row['psk']}, id={row['id']}")
    return len(rows)
Esempio n. 5
0
def add_comment(reddit_comment: Comment) -> CommentModel:
    """
    Parses some basic information for a comment and adds it to the database.
    Creates author and post if necessary.
    This also assumes its parent comment is already created, call
    add_comment_parent_tree first if necessary.
    """

    comment = _create_comment_model(reddit_comment)

    # Insert the author into the database if they don't exist yet.
    if reddit_comment.author is not None and not user_service.get_user(
            reddit_comment.author.name):
        user_service.add_user(reddit_comment.author)

    # Insert post into the database if it doesn't exist yet.
    if not post_service.get_post_by_id(reddit_comment.submission.id):
        post_service.add_post(reddit_comment.submission)

    new_comment = _comment_data.insert(comment, error_on_conflict=False)
    return new_comment
Esempio n. 6
0
def add_frontpage_post(reddit_post: Submission, snapshot: SnapshotModel,
                       rank: int) -> SnapshotFrontpageModel:
    """Adds the specified post ranking for the snapshot. Also inserts the post itself if necessary."""

    post_id = reddit_post.id
    post = post_service.get_post_by_id(post_id)

    # Add or update post as necessary.
    if not post:
        logger.debug(f"Saving post {post_id}")
        post = post_service.add_post(reddit_post)
    else:
        post = post_service.update_post(post, reddit_post)

    frontpage_model = SnapshotFrontpageModel()
    frontpage_model.post_id = post.id
    frontpage_model.snapshot_id = snapshot.id
    frontpage_model.rank = rank
    frontpage_model.score = reddit_post.score

    saved_frontpage_model = _snapshot_data.insert(frontpage_model)
    return saved_frontpage_model
Esempio n. 7
0
def send_discord_message(mod_action: ModActionModel):
    logger.info(f"Attempting to send a message to Discord for {mod_action}")

    embed_json = {
        "author": {
            "name": f"Mod Log - /u/{mod_action.mod}",
        },
        "title": f"{mod_action.mod}: {mod_action.action}",
        "timestamp": mod_action.created_time.isoformat(),
        "fields": [],
        "color": 0xCC0000,
    }

    # Add a URL if there's a specific thing we can focus on, also update title if possible.
    target = None
    if mod_action.target_comment_id:
        target = comment_service.get_comment_by_id(mod_action.target_comment_id)
        embed_json["title"] = f"{mod_action.mod}: {mod_action.action} by {mod_action.target_user}"
    elif mod_action.target_post_id:
        target = post_service.get_post_by_id(mod_action.target_post_id)
        title = discord.escape_formatting(f"{mod_action.mod}: {mod_action.action} - {target.title}")
        embed_json["title"] = title[:253] + "..." if len(title) > 256 else title
    elif mod_action.target_user:
        target = user_service.get_user(mod_action.target_user)
        embed_json["title"] = f"{mod_action.mod}: {mod_action.action} - {mod_action.target_user}"
    if target:
        embed_json["url"] = reddit_utils.make_permalink(target)

    if mod_action.details:
        embed_json["description"] = mod_action.details

    if mod_action.description:
        desc_info = {"name": "Description", "value": mod_action.description}
        embed_json["fields"].append(desc_info)

    discord.send_webhook_message(config_loader.DISCORD["webhook_url"], {"embeds": [embed_json]})
Esempio n. 8
0
def parse_mod_action(mod_action: ModAction):
    """
    Process a single PRAW ModAction. Assumes that reddit and subreddit are already instantiated by
    one of the two entry points (monitor_stream or load_archive).
    """

    # Check if we've already processed this mod action, do nothing if so.
    mod_action_id = mod_action.id.replace("ModAction_", "")
    if mod_action_service.get_mod_action_by_id(mod_action_id):
        logger.debug(f"Already processed, skipping mod action {mod_action_id}")
        return

    logger.info(
        f"Processing mod action {mod_action_id}: {mod_action.mod.name} - "
        f"{mod_action.action} - {mod_action.target_fullname}"
    )

    # If there's an action by an unknown moderator or admin, make a note of it and check to see
    # if they should be added to the mod list.
    send_notification = False
    if mod_action.action in mod_constants.MOD_ACTIONS_ALWAYS_NOTIFY:
        send_notification = True

    if mod_action.mod.name not in active_mods:
        # Add them to the database if necessary.
        mod_user = user_service.get_user(mod_action.mod.name)
        if not mod_user:
            mod_user = user_service.add_user(mod_action.mod)

        # We'd normally send a notification for all actions from non-mods, but temporary mutes expiring
        # always come from reddit and we don't really care about those.
        # Similarly, crowd control removals as those are filtered to the mod queue.
        if not (
            (mod_action.mod.name == "reddit" and mod_action.action == "unmuteuser")
            or (
                mod_action.mod.name == "reddit"
                and mod_action.action in ("removecomment", "removelink")
                and mod_action.details == "Crowd Control"
            )
        ):
            send_notification = True

        # For non-admin cases, check to see if they're a [new] mod of the subreddit and refresh the list if so.
        if mod_action.mod not in ("Anti-Evil Operations", "reddit"):
            logger.info(f"Unknown mod found: {mod_action.mod.name}")
            if mod_user.username in subreddit.moderator():
                logger.debug(f"Updating mod status for {mod_user}")
                mod_user.moderator = True
                base_data_service.update(mod_user)
                _get_moderators()

    # See if the user targeted by this action exists in the system, add them if not.
    # Bans and similar user-focused actions independent of posts/comments will also have
    # a target_fullname value (t2_...) but won't be necessary to check after this.
    if mod_action.target_author:
        user = user_service.get_user(mod_action.target_author)
        if not user:
            logger.debug(f"Saving user {mod_action.target_author}")
            user = user_service.add_user(reddit.redditor(name=mod_action.target_author))

        # For bans and unbans, update the user in the database.
        if mod_action.action == "banuser":
            # Weirdly this returns a ListingGenerator so we have to iterate over it; there should only be one though.
            # If the user isn't banned, the loop won't execute.
            for ban_user in subreddit.banned(redditor=user.username):
                # Permanent if days_left is None
                if ban_user.days_left is None:
                    user.banned_until = "infinity"
                # days_left will show 0 if they were banned for 1 day a few seconds ago; it seems like it rounds down
                # based on the time of the ban occurring, so we can safely assume that even if the ban happened
                # a few seconds before getting to this point, we should add an extra day onto the reported number.
                else:
                    ban_start = datetime.fromtimestamp(mod_action.created_utc, tz=timezone.utc)
                    user.banned_until = ban_start + timedelta(days=ban_user.days_left + 1)
                break
            base_data_service.update(user)
        elif mod_action.action == "unbanuser":
            user.banned_until = None
            base_data_service.update(user)
        elif mod_action.action == "removemoderator":
            logger.debug(f"Updating mod status for {user}")
            user.moderator = False
            base_data_service.update(user)
            _get_moderators()

    # See if the post targeted by this action exists in the system, add it if not.
    if mod_action.target_fullname and mod_action.target_fullname.startswith("t3_"):
        post_id = mod_action.target_fullname.split("_")[1]
        post = post_service.get_post_by_id(post_id)
        reddit_post = reddit.submission(id=post_id)

        # Add or update post as necessary.
        if not post:
            logger.debug(f"Saving post {post_id}")
            post = post_service.add_post(reddit_post)
        else:
            post = post_service.update_post(post, reddit_post)

        # If the user deleted their text post, the mod action still has the post body that we can save in place.
        if post.deleted and post.body == "[deleted]" and post.body != mod_action.target_body:
            post.body = mod_action.target_body
            base_data_service.update(post)

    # See if the comment targeted by this action *and its post* exist in the system, add either if not.
    if mod_action.target_fullname and mod_action.target_fullname.startswith("t1_"):
        comment_id = mod_action.target_fullname.split("_")[1]
        comment = comment_service.get_comment_by_id(comment_id)
        reddit_comment = reddit.comment(id=comment_id)

        if not comment:
            # Post needs to exist before we can add a comment for it, start with that.
            post = post_service.get_post_by_id(reddit_comment.submission.id)

            if not post:
                post_service.add_post(reddit_comment.submission)

            # Since all comments will reference a parent if it exists, add all parent comments first.
            logger.debug(f"Saving parent comments of {comment_id}")
            comment_service.add_comment_parent_tree(reddit, reddit_comment)
            logger.debug(f"Saving comment {comment_id}")
            comment = comment_service.add_comment(reddit_comment)
        else:
            # Update our record of the comment if necessary.
            comment = comment_service.update_comment(comment, reddit_comment)

        # If the user deleted their comment, the mod action still has the body that we can save in place.
        if comment.deleted and comment.body != mod_action.target_body:
            comment.body = mod_action.target_body
            base_data_service.update(comment)

    logger.debug(f"Saving mod action {mod_action_id}")
    mod_action = mod_action_service.add_mod_action(mod_action)

    if send_notification:
        send_discord_message(mod_action)