Esempio n. 1
0
def run_plan_bot(
    send_replies=False,
    skip_tracking=False,
    simulate_replies=False,
    limit=10,
    praw_site="dev",
    project="wpb-dev",
    log_level="1",
):
    """
    Run a single pass of Warren Plan Bot

    \b
    - Check list of posts replied to (If tracking is on)
    - Search for any new comments and submissions not on that list
    - Reply to any unreplied matching comments (If replies are on)
    - Update replied_to list (If replies and tracking is on)
    """
    if log_level == "0":
        level_name = logging.WARNING
    elif log_level == "1":
        level_name = logging.INFO
    else:
        level_name = logging.DEBUG

    logging.basicConfig(
        level=level_name, format="%(asctime)s %(levelname)s %(name)s: %(message)s"
    )
    if log_level == "2":
        # silence debug-logging from external imports so we can have a
        # "quiet" debug log when we want it.   Additional imports
        # may mean needing to update this.
        for logger_name in ("prawcore", "urllib3", "smart_open"):
            logging.getLogger(logger_name).setLevel(logging.INFO)

    logger.info("Running a single pass of plan bot")
    pass_start_time = time.time()

    if simulate_replies and send_replies:
        raise ValueError(
            "--simulate-replies and --send-replies options are incompatible. at most one may be set"
        )

    # Change working directory so that praw.ini works, and so all files can be in this same folder. FIXME
    os.chdir(os.path.dirname(os.path.realpath(__file__)))
    # change dev to prod to shift to production bot
    reddit = praw.Reddit(praw_site)

    # Ensure that we don't accidentally write to Reddit
    reddit.read_only = not send_replies

    with open(PLANS_FILE) as json_file:
        pure_plans = json.load(json_file)

    with open(PLANS_CLUSTERS_FILE) as json_file:
        plan_clusters = json.load(json_file)

    for plan in plan_clusters:
        plan["is_cluster"] = True
        plan["plans"] = [
            next(filter(lambda p: p["id"] == plan_id, pure_plans))
            for plan_id in plan["plan_ids"]
        ]

    plans = pure_plans + plan_clusters

    with open(VERBATIMS_FILE) as json_file:
        verbatims = json.load(json_file)

    if skip_tracking:
        posts_db = None
        comments_progress_ref = None
    else:
        db = firestore.Client(project=project)

        posts_db = db.collection("posts")

        # get post ids from database only if we don't already have them
        if not POST_IDS_PROCESSED:
            # Load the list of posts processed to or start with empty list if none
            posts_processed = posts_db.where("processed", "==", True).stream()

            POST_IDS_PROCESSED.update({post.id for post in posts_processed})

        # Track progress of comments
        comments_progress_ref = db.collection("progress").document("comments")

    process_the_post = lambda post: process_post(
        post,
        plans,
        verbatims,
        posts_db,
        POST_IDS_PROCESSED,
        send=send_replies,
        simulate=simulate_replies,
        skip_tracking=skip_tracking,
    )

    subreddit_name = "ElizabethWarren" if praw_site == "prod" else "WPBSandbox"

    # Get the subreddit
    subreddit = reddit.subreddit(subreddit_name)

    # Get the number of new submissions up to the limit
    # Note: If this gets slow, we could switch this to pushshift
    for submission in subreddit.search(
        "warrenplanbot", sort="new", time_filter="all", limit=limit
    ):
        # turn this into our more standardized class
        submission = reddit_util.Submission(submission)
        process_the_post(submission)

    for pushshift_comment in pushshift.search_comments(
        "warrenplanbot", subreddit_name, limit=limit
    ):

        comment = reddit_util.Comment(
            praw.models.Comment(reddit, _data=pushshift_comment)
        )

        process_the_post(comment)

    # Get new comments since we last ran.
    #
    # subreddit.comments() returns the newest comments first so we
    # need to reverse it so that the comments we're iterating over are getting newer.
    # With no specified params, it returns newest 100 comments in the
    # subreddit.
    comments_params = get_comments_params(comments_progress_ref)
    for comment in reversed(list(subreddit.comments(params=comments_params))):
        comment = reddit_util.Comment(comment)
        if re.search("warrenplanbot", comment.text, re.IGNORECASE):
            process_the_post(comment)

        # update the cursor after processing the comment
        if not skip_tracking:
            comments_progress_ref.set({"newest": comment.fullname}, merge=True)

    logger.info(
        f"Single pass of plan bot took: {round(time.time() - pass_start_time, 2)}s"
    )
Esempio n. 2
0
def run_plan_bot(
    send_replies=False,
    skip_tracking=False,
    simulate_replies=False,
    limit=10,
    praw_site="dev",
    project="wpb-dev",
):
    """
    Run a single pass of Warren Plan Bot

    \b
    - Check list of posts replied to (If tracking is on)
    - Search for any new comments and submissions not on that list
    - Reply to any unreplied matching comments (If replies are on)
    - Update replied_to list (If replies and tracking is on)
    """
    print("Running a single pass of plan bot")
    pass_start_time = time.time()

    if simulate_replies and send_replies:
        raise ValueError(
            "--simulate-replies and --send-replies options are incompatible. at most one may be set"
        )

    # Change working directory so that praw.ini works, and so all files can be in this same folder. FIXME
    os.chdir(os.path.dirname(os.path.realpath(__file__)))
    # change dev to prod to shift to production bot
    reddit = praw.Reddit(praw_site)

    # Ensure that we don't accidentally write to Reddit
    reddit.read_only = not send_replies

    with open(PLANS_FILE) as json_file:
        pure_plans = json.load(json_file)

    with open(PLANS_CLUSTERS_FILE) as json_file:
        plan_clusters = json.load(json_file)

    for plan in plan_clusters:
        plan["is_cluster"] = True
        plan["plans"] = [
            next(filter(lambda p: p["id"] == plan_id, pure_plans))
            for plan_id in plan["plan_ids"]
        ]

    plans = pure_plans + plan_clusters

    if skip_tracking:
        posts_db = None
        post_ids_processed = {}
    else:
        db = firestore.Client(project=project)

        posts_db = db.collection("posts")

        # Load the list of posts replied to or start with empty list if none
        posts_replied_to = posts_db.where("replied", "==", True).stream()
        # TODO migrate posts replied=True to have processed=True, and remove the query above (#84)
        posts_processed = posts_db.where("processed", "==", True).stream()

        # include processed posts in replied to
        post_ids_processed = {post.id for post in posts_replied_to}.union(
            {post.id for post in posts_processed}
        )

    subreddit_name = "ElizabethWarren" if praw_site == "prod" else "WPBSandbox"

    # Get the subreddit
    subreddit = reddit.subreddit(subreddit_name)

    # Get the number of new submissions up to the limit
    # Note: If this gets slow, we could switch this to pushshift
    for submission in subreddit.search(
        "warrenplanbot", sort="new", time_filter="all", limit=limit
    ):
        # turn this into our more standardized class
        submission = reddit_util.Submission(submission)
        process_post(
            submission,
            plans,
            posts_db,
            post_ids_processed,
            send=send_replies,
            simulate=simulate_replies,
            skip_tracking=skip_tracking,
        )

    for pushshift_comment in pushshift.search_comments(
        "warrenplanbot", subreddit_name, limit=limit
    ):

        comment = reddit_util.Comment(
            praw.models.Comment(reddit, _data=pushshift_comment)
        )

        process_post(
            comment,
            plans,
            posts_db,
            post_ids_processed,
            send=send_replies,
            simulate=simulate_replies,
            skip_tracking=skip_tracking,
        )

    print(f"Single pass of plan bot took: {round(time.time() - pass_start_time, 2)}s")
Esempio n. 3
0
 def test_nonexistent_attribute(self, mock_submission):
     submission = reddit_util.Submission(mock_submission)
     with pytest.raises(AttributeError):
         submission.non_existent
Esempio n. 4
0
 def test_reply(self, mock_submission):
     submission = reddit_util.Submission(mock_submission)
     reply = submission.reply("a nice reply")
     assert reply == "a comment"
     mock_submission.reply.assert_called_once_with("a nice reply")
Esempio n. 5
0
 def test_type(self, mock_submission):
     submission = reddit_util.Submission(mock_submission)
     assert submission.type == "submission"
Esempio n. 6
0
 def test_text(self, mock_submission):
     submission = reddit_util.Submission(mock_submission)
     assert (submission.text == "title of submission\ntext of submission"
             ), "text attribute added"
     assert submission.selftext == "text of submission", "selftext preserved"