def run_plan_bot( send_replies=False, skip_tracking=False, simulate_replies=False, limit=10, praw_site="dev", project="wpb-dev", log_level="1", ): """ Run a single pass of Warren Plan Bot \b - Check list of posts replied to (If tracking is on) - Search for any new comments and submissions not on that list - Reply to any unreplied matching comments (If replies are on) - Update replied_to list (If replies and tracking is on) """ if log_level == "0": level_name = logging.WARNING elif log_level == "1": level_name = logging.INFO else: level_name = logging.DEBUG logging.basicConfig( level=level_name, format="%(asctime)s %(levelname)s %(name)s: %(message)s" ) if log_level == "2": # silence debug-logging from external imports so we can have a # "quiet" debug log when we want it. Additional imports # may mean needing to update this. for logger_name in ("prawcore", "urllib3", "smart_open"): logging.getLogger(logger_name).setLevel(logging.INFO) logger.info("Running a single pass of plan bot") pass_start_time = time.time() if simulate_replies and send_replies: raise ValueError( "--simulate-replies and --send-replies options are incompatible. at most one may be set" ) # Change working directory so that praw.ini works, and so all files can be in this same folder. FIXME os.chdir(os.path.dirname(os.path.realpath(__file__))) # change dev to prod to shift to production bot reddit = praw.Reddit(praw_site) # Ensure that we don't accidentally write to Reddit reddit.read_only = not send_replies with open(PLANS_FILE) as json_file: pure_plans = json.load(json_file) with open(PLANS_CLUSTERS_FILE) as json_file: plan_clusters = json.load(json_file) for plan in plan_clusters: plan["is_cluster"] = True plan["plans"] = [ next(filter(lambda p: p["id"] == plan_id, pure_plans)) for plan_id in plan["plan_ids"] ] plans = pure_plans + plan_clusters with open(VERBATIMS_FILE) as json_file: verbatims = json.load(json_file) if skip_tracking: posts_db = None comments_progress_ref = None else: db = firestore.Client(project=project) posts_db = db.collection("posts") # get post ids from database only if we don't already have them if not POST_IDS_PROCESSED: # Load the list of posts processed to or start with empty list if none posts_processed = posts_db.where("processed", "==", True).stream() POST_IDS_PROCESSED.update({post.id for post in posts_processed}) # Track progress of comments comments_progress_ref = db.collection("progress").document("comments") process_the_post = lambda post: process_post( post, plans, verbatims, posts_db, POST_IDS_PROCESSED, send=send_replies, simulate=simulate_replies, skip_tracking=skip_tracking, ) subreddit_name = "ElizabethWarren" if praw_site == "prod" else "WPBSandbox" # Get the subreddit subreddit = reddit.subreddit(subreddit_name) # Get the number of new submissions up to the limit # Note: If this gets slow, we could switch this to pushshift for submission in subreddit.search( "warrenplanbot", sort="new", time_filter="all", limit=limit ): # turn this into our more standardized class submission = reddit_util.Submission(submission) process_the_post(submission) for pushshift_comment in pushshift.search_comments( "warrenplanbot", subreddit_name, limit=limit ): comment = reddit_util.Comment( praw.models.Comment(reddit, _data=pushshift_comment) ) process_the_post(comment) # Get new comments since we last ran. # # subreddit.comments() returns the newest comments first so we # need to reverse it so that the comments we're iterating over are getting newer. # With no specified params, it returns newest 100 comments in the # subreddit. comments_params = get_comments_params(comments_progress_ref) for comment in reversed(list(subreddit.comments(params=comments_params))): comment = reddit_util.Comment(comment) if re.search("warrenplanbot", comment.text, re.IGNORECASE): process_the_post(comment) # update the cursor after processing the comment if not skip_tracking: comments_progress_ref.set({"newest": comment.fullname}, merge=True) logger.info( f"Single pass of plan bot took: {round(time.time() - pass_start_time, 2)}s" )
def run_plan_bot( send_replies=False, skip_tracking=False, simulate_replies=False, limit=10, praw_site="dev", project="wpb-dev", ): """ Run a single pass of Warren Plan Bot \b - Check list of posts replied to (If tracking is on) - Search for any new comments and submissions not on that list - Reply to any unreplied matching comments (If replies are on) - Update replied_to list (If replies and tracking is on) """ print("Running a single pass of plan bot") pass_start_time = time.time() if simulate_replies and send_replies: raise ValueError( "--simulate-replies and --send-replies options are incompatible. at most one may be set" ) # Change working directory so that praw.ini works, and so all files can be in this same folder. FIXME os.chdir(os.path.dirname(os.path.realpath(__file__))) # change dev to prod to shift to production bot reddit = praw.Reddit(praw_site) # Ensure that we don't accidentally write to Reddit reddit.read_only = not send_replies with open(PLANS_FILE) as json_file: pure_plans = json.load(json_file) with open(PLANS_CLUSTERS_FILE) as json_file: plan_clusters = json.load(json_file) for plan in plan_clusters: plan["is_cluster"] = True plan["plans"] = [ next(filter(lambda p: p["id"] == plan_id, pure_plans)) for plan_id in plan["plan_ids"] ] plans = pure_plans + plan_clusters if skip_tracking: posts_db = None post_ids_processed = {} else: db = firestore.Client(project=project) posts_db = db.collection("posts") # Load the list of posts replied to or start with empty list if none posts_replied_to = posts_db.where("replied", "==", True).stream() # TODO migrate posts replied=True to have processed=True, and remove the query above (#84) posts_processed = posts_db.where("processed", "==", True).stream() # include processed posts in replied to post_ids_processed = {post.id for post in posts_replied_to}.union( {post.id for post in posts_processed} ) subreddit_name = "ElizabethWarren" if praw_site == "prod" else "WPBSandbox" # Get the subreddit subreddit = reddit.subreddit(subreddit_name) # Get the number of new submissions up to the limit # Note: If this gets slow, we could switch this to pushshift for submission in subreddit.search( "warrenplanbot", sort="new", time_filter="all", limit=limit ): # turn this into our more standardized class submission = reddit_util.Submission(submission) process_post( submission, plans, posts_db, post_ids_processed, send=send_replies, simulate=simulate_replies, skip_tracking=skip_tracking, ) for pushshift_comment in pushshift.search_comments( "warrenplanbot", subreddit_name, limit=limit ): comment = reddit_util.Comment( praw.models.Comment(reddit, _data=pushshift_comment) ) process_post( comment, plans, posts_db, post_ids_processed, send=send_replies, simulate=simulate_replies, skip_tracking=skip_tracking, ) print(f"Single pass of plan bot took: {round(time.time() - pass_start_time, 2)}s")
def test_nonexistent_attribute(self, mock_submission): submission = reddit_util.Submission(mock_submission) with pytest.raises(AttributeError): submission.non_existent
def test_reply(self, mock_submission): submission = reddit_util.Submission(mock_submission) reply = submission.reply("a nice reply") assert reply == "a comment" mock_submission.reply.assert_called_once_with("a nice reply")
def test_type(self, mock_submission): submission = reddit_util.Submission(mock_submission) assert submission.type == "submission"
def test_text(self, mock_submission): submission = reddit_util.Submission(mock_submission) assert (submission.text == "title of submission\ntext of submission" ), "text attribute added" assert submission.selftext == "text of submission", "selftext preserved"