def migrate_upvoted(origin_account: praw.Reddit, destination_account: praw.Reddit, posts: list, verbose: bool = True): """ Migrates a friend list from one reddit account to another """ if utils.is_null_or_empty(posts): print("Friends list is empty or was not found.") return for post in posts: try: # Remove from origin account origin_account.submission(id=post.id).clear_vote() # Add to destination account if type(post) == praw.models.Submission: submission = destination_account.submission(id=post.id) if submission.likes is None: submission.upvote() elif type(post) == praw.models.Comment: comment = destination_account.comment(id=post.id) if submission.likes is None: comment.upvote() except Exception as ex: log.error(ex, f"An error occurred while migrating the post id {post.id}.")
def migrate_saved(origin_account: praw.Reddit, destination_account: praw.Reddit, posts: list, verbose: bool = True): """ Migrates saved posts from one reddit account to another """ if utils.is_null_or_empty(posts): print("Posts list is empty or was not found.") return print(f"Total items: {len(posts)}") for index, post in enumerate(posts): try: print(f"Migrating post #{index + 1} with id: {post.id}") # Remove from origin account origin_account.submission(id=post.id).unsave() # Add to destination account if type(post) == praw.models.Submission: submission = destination_account.submission(id=post.id) if submission.saved is False: submission.save() elif type(post) == praw.models.Comment: comment = destination_account.comment(id=post.id) if comment.saved is False: comment.save() except Exception as ex: log.error(ex, f"An error occurred while migrating the post id {post.id}.")
def init_submissions( reddit: Reddit, subreddit: Subreddit, database: Database, patch_notes_file: PatchNotesFile, submission_content_path: str, community_submission_content_path: str, ) -> Tuple[Submission, Submission]: """ Initializes the primary and community submission (i.e. "Reddit threads") objects. If they do not exist in the database, then this function creates them. Otherwise, it retrieves the submissions via their URL from the database. Returns: - A tuple containing the primary submission and community submission objects """ # Main submission submission_content = processed_submission_content(submission_content_path, patch_notes_file) submission: Submission = None submission_url = database.get_submission_url(tag="main") # Get main submission if it does not exist if submission_url is None: submission = subreddit.submit(title=SUBMISSION_TITLE, selftext=submission_content) database.insert_submission_url("main", submission.url) submission_url = submission.url else: # Obtain submission via URL submission = reddit.submission(url=submission_url) # Community submission community_submission_content = processed_community_notes_thread_submission_content( community_submission_content_path, patch_notes_file, submission_url) community_submission: Submission = None community_submission_url = database.get_submission_url(tag="community") # Get community submission if it does not exist if community_submission_url is None: community_submission = subreddit.submit( title=COMMUNITY_SUBMISSION_TITLE, selftext=community_submission_content, ) database.insert_submission_url("community", community_submission.url) # Update main Reddit Thread's in-line URL to connect to the community submission URL updated_text = submission.selftext.replace( "#community-patch-notes-thread-url", community_submission.url) submission.edit(body=updated_text) else: # Obtain submission via URL community_submission = reddit.submission(url=community_submission_url) return submission, community_submission
def check_messages(reddit: praw.Reddit, db: DatabaseHelper, rh: RedditHelper, config): # processing each message and adding subscriptions for message in reddit.inbox.messages(limit=25): # we only have one DM action if message.subject != "subscribe": continue # checking whether the ID makes sense elif not id_regex.match(message.body.strip()): continue elif db.check_subscription(message.body, message.author.name): # user already subscribed continue author = message.author.name # catching 404 errors in case the post doesn't exist submission = reddit.submission(id=message.body) try: if submission.subreddit.display_name != config["subreddit"]: # wrong subreddit, continue continue elif rh.already_solved(submission): continue except exceptions.NotFound: # post doesn't exist, continue continue db.add_subscriber(submission.id, author) logger.info(f"{author} subscribed to {submission.id}.")
def _set_submission_upvotes(api: praw.Reddit, id_): sub = api.submission(id_) return """ MATCH (n {id: "%s"}) WITH n SET n.score = %s, n.upvote_ratio = %s; """ % (sub.id, sub.score, sub.upvote_ratio)
class ThreadUpdater(GameThreadThread): interval = timedelta(minutes=3) def __init__(self, *args, **kwargs): self.r = Reddit('gamethread') self.renderer = Renderer() self.envs = {} super().__init__(*args, **kwargs) def lap(self): self.session = self.Session() for game in self.unarchived_games(): for thread in game.threads: try: self.logger.debug("Update %r", thread) sub = thread.sub reddit_sub = self.r.subreddit(sub.name) thread_config = list( filter(lambda x: x['id'] == thread.thread_type, sub.config['threads']))[0] title, body = self.renderer.render_thread( reddit_sub, sub, thread_config, game, thread) if body != thread.body: self.logger.info("Updating thread %s", thread) submission = self.r.submission(id=thread.thread_id) submission.edit(body) thread.body = body except Exception as e: self.logger.exception("Updating submission %s failed", thread) self.session.commit()
def post_vid_to_reddit(vid_id: str, post_id: str, reddit: praw.Reddit = None): if vid_id is None or post_id is None: return comment = "lazer replay https://www.youtube.com/watch?v=" + vid_id if reddit is None: reddit = initialize() return reddit.submission(id=post_id).reply(comment)
def get_via_praw(post_id, post_type,praw_cred): if praw_cred is None: raise IOError("Missing praw credentials") from praw import Reddit reddit = Reddit(client_id=praw_cred["client_id"], client_secret=praw_cred["client_secret"], password=praw_cred["password"], user_agent=praw_cred["user_agent"],username=praw_cred["username"]) if post_type == "post": submission = reddit.submission(post_id) created_utc = submission.mod.thing.created_utc selftext = submission.mod.thing.selftext selftext = re.sub(r'\s+',' ',selftext) selftext = selftext.replace("'","\\'") title = submission.mod.thing.title title = title.replace("'","\\'") out_json = "[{'id':'"+post_id+"','selftext':'"+selftext+"','created_utc':"+str(int(created_utc))+",'title':'"+title+"'}]" else: submission = reddit.comment(post_id) created_utc = submission.mod.thing.created_utc selftext = submission.mod.thing.body selftext = re.sub(r'\s+',' ',selftext) selftext = selftext.replace("'","\\'") title = "" out_json = "[{'id':'"+post_id+"','body':'"+selftext+"','created_utc':"+str(int(created_utc))+"}]" return out_json
def get_comments(directory: str, reddit: praw.Reddit, submission_id: str, comments_amount: int) -> list: """Gets all of the comments from a specific reddit post but only returns the amount specified Args: directory (str): The directory you would like to save the comments to reddit (praw.Reddit): The reddit object you're using submission_id (str): The id of the reddit post you want to grab the comments from comments_amount (int): The amount of comments you want returned Returns: list: The comments list but only up to the amount you've specified """ submission = reddit.submission(id=submission_id) print(submission.title) submission.comment_sort = 'best' submission.comments.replace_more() comments = [] comments.append("{} submitted by {}|||{}".format(submission.title, submission.author, submission.url)) for index, top_comment in enumerate(submission.comments): formatted = "{}|||{}|||https://www.reddit.com{}|||{}".format( str(index + 1), top_comment.body, top_comment.permalink, top_comment.id) comments.append(formatted) return comments[:comments_amount]
def get_top_level_comments( reddit_instance: praw.Reddit, delta: int, depth: int, sort: str, relative: Union[int, float, str] = "now", ) -> List[Comment]: if delta < -1: logger.error(f"Negative time delta {delta}.") sys.exit(1) # total_comments = [] submission = reddit_instance.submission(id=CONFIG.reddit_thread_id) submission.comment_sort = sort get_more_comments( submission.comments.list(), delta=delta, depth=depth, relative=relative, sort=sort, ) total_comments = [ i for i in top_level_comments if ((_check_time(i, delta=delta) or delta != -1) and not i.stickied) ] # return None return total_comments
def get_context(comment: Comment, reddit: Reddit): submission = reddit.submission(id=comment.link_id.replace('t3_', '')) parent_comment = None if not comment.parent_id == comment.link_id: # not a top level comment, try to retrieve parent comment parent_comment = reddit.comment( id=comment.parent_id.replace('t1_', '')) return parent_comment, submission
def test_write_submission_json(test_submission_id: str, tmp_path: Path, test_format: str, reddit_instance: praw.Reddit): archiver_mock = MagicMock() archiver_mock.args.format = test_format test_path = Path(tmp_path, 'test') test_submission = reddit_instance.submission(id=test_submission_id) archiver_mock.file_name_formatter.format_path.return_value = test_path Archiver.write_entry(archiver_mock, test_submission)
def test_find_resources(test_submission_id: str, reddit_instance: praw.Reddit): test_submission = reddit_instance.submission(id=test_submission_id) downloader = VReddit(test_submission) resources = downloader.find_resources() assert len(resources) == 1 assert isinstance(resources[0], Resource) resources[0].download(120) assert resources[0].content is not None
def test_gallery_download(test_submission_id: str, expected_hashes: set[str], reddit_instance: praw.Reddit): test_submission = reddit_instance.submission(id=test_submission_id) gallery = Gallery(test_submission) results = gallery.find_resources() [res.download() for res in results] hashes = [res.hash.hexdigest() for res in results] assert set(hashes) == expected_hashes
def test_write_submission_xml(test_submission_id: str, tmp_path: Path, reddit_instance: praw.Reddit): archiver_mock = MagicMock() test_path = Path(tmp_path, 'test.xml') test_submission = reddit_instance.submission(id=test_submission_id) archiver_mock.file_name_formatter.format_path.return_value = test_path test_entry = SubmissionArchiveEntry(test_submission) Archiver._write_entry_xml(archiver_mock, test_entry) archiver_mock._write_content_to_disk.assert_called_once()
def test_find_resource(test_submission_id: str, expected_hash: str, reddit_instance: praw.Reddit): submission = reddit_instance.submission(id=test_submission_id) downloader = SelfPost(submission) results = downloader.find_resources() assert len(results) == 1 assert isinstance(results[0], Resource) assert results[0].hash.hexdigest() == expected_hash
def defuzzed_submission_score(connection: Reddit, submission: Submission, iterations: int) -> float: """"De-fuzzes" a single submission's score by requesting the score from Reddit multiple times and returning the average.""" score_sum = 0 for _ in range(iterations): score_sum += connection.submission(submission.id).score return (score_sum / iterations)
class InstanceLogin: def __init__(self): """Generate reddit instance""" self.reddit = Reddit(user_agent='Comment Extraction (by /u/sgdzhou5)', client_id='zanmra52bp9GSg', client_secret='jrm-DL_IxEexh8WZbi1VduOmAFk') self.start_time = datetime.utcnow() def submission_extraction(self): url = "https://api.pushshift.io/reddit/submission/search?subreddit=iot&before={}&sort=desc&size=1000" count = 0 id_ls = [] previous_epoch = int(self.start_time.timestamp()) while True: new_url = url.format(str(previous_epoch)) json = requests.get( new_url, headers={'User-Agent': 'Comment Extraction (by /u/sgdzhou5)'}) time.sleep(1) json_data = json.json() if 'data' not in json_data: break objects = json_data['data'] if len(objects) == 0: break for object in objects: previous_epoch = object['created_utc'] - 1 count += 1 if object['is_self']: if 'selftext' not in object: continue try: id_ls.append(object['id']) except Exception as err: print(f"Couldn't print post: {object['url']}") print("Saved {} submissions through {}.".format( count, datetime.fromtimestamp(previous_epoch).strftime( "%Y-%m-%d"))) print(f"Saved {count}") return id_ls def extract_comment(self, id_ls): """Harvest comment data from Reddit""" corpus = pd.DataFrame(columns=["Title", "Comment"]) for i in id_ls: submission = self.reddit.submission(id=i) submission.comments.replace_more(limit=None) title = submission.title comment_queue = submission.comments[:] while comment_queue: comment = comment_queue.pop(0) print(title) temp = comment.body d = {"Title": title, "Comment": temp} corpus = corpus.append(d, ignore_index=True) comment_queue.extend(comment.replies) return corpus
def get_comments(submission_id): reddit = Reddit(check_for_updates=False, user_agent=AGENT) submission = reddit.submission(id=submission_id) more_comments = submission.comments.replace_more() if more_comments: skipped_comments = sum(x.count for x in more_comments) logger.debug('Skipped %d MoreComments (%d comments)', len(more_comments), skipped_comments) return submission.comments.list()
def test_get_post_details(test_submission_id: str, expected_dict: dict, reddit_instance: praw.Reddit): test_submission = reddit_instance.submission(id=test_submission_id) test_archive_entry = SubmissionArchiveEntry(test_submission) test_archive_entry._get_post_details() assert all([ test_archive_entry.post_details.get(key) == expected_dict[key] for key in expected_dict.keys() ])
def __init__(self, post_id, ok_id=None, todo_id=None): """Initialize.""" reddit = Reddit(check_for_updates=False) self.post = reddit.submission(id=post_id) self.ok = None self.todo = None if ok_id: self.ok = reddit.comment(id=ok_id) if todo_id: self.todo = reddit.comment(id=todo_id)
def get_reddit_details(posts: List[PostType], reddit_inst: praw.Reddit) -> List[PostType]: """ Gets details of each post (i.e. score, title, etc.) and puts them into their PostType objects. :param posts: List of PostType objects with post ID and partial :param reddit_inst: Current reddit instance from praw. :return: """ post: PostType submission: praw.models.submission.Submission getting_sub: bool = True log.info("Collecting post data.") for post in posts: # Iterate through posts while getting_sub: # Loop to make sure the submission gets grabbed. try: submission = reddit_inst.submission(post.post_id) getting_sub = False except RECOVERABLE_EXC as err: log.debug(f"Couldn't get submission: {err}") time.sleep(5) try: # First get string items from post. post.post_url = f"https://old.reddit.com{submission.permalink}" post.title = submission.title post.author = submission.author if submission.author is not None else "Deleted" post.flair = submission.link_flair_text if submission.link_flair_text is not None else "" post.link = submission.url if not submission.is_self else "" post.self_text = submission.selftext if submission.is_self else "" post.subreddit = submission.subreddit.display_name # Then booleans post.stickied = submission.stickied post.locked = submission.locked post.edited = submission.edited post.distinguished = submission.distinguished post.is_self = submission.is_self post.locked = submission.locked post.spoiler = submission.spoiler # Lastly numerics post.post_score = submission.score post.num_of_comments = submission.num_comments post.percent_upvoted = submission.upvote_ratio post.time_created = submission.created_utc except NameError: log.debug("Submission could not be found and was not caught.") log.info("All post data collected.") return posts
class RedditConnector(object): def __init__(self): super().__init__() self.reddit = Reddit(client_id = "", client_secret = "", user_agent = "", username = "", password = "") def get_url_details(self, url): return self.reddit.submission(url=url)
def get_comments(url): # get all top-level coments from reddit thread reddit = Reddit(client_id=reddit_api['client_id'], client_secret=reddit_api['client_secret'], user_agent=reddit_api['user_agent']) submissionId = url[url.find('comments'):].split('/')[1] submission = reddit.submission(submissionId) submission.comments.replace_more(limit=None) comments = [] for comment in submission.comments: comments.append(comment.body) return comments
def scrape_cfc(reddit: Reddit, url: str, discern_candidates: bool = False) -> Vote: """Scrapes a CFC from Reddit.""" try: post = reddit.submission(url=url) except praw.exceptions.InvalidURL: send_to_log('Invalid URL passed to scrape_cfc!', name='scrape') raise # Parse the title. title = post.title for cfc_indicator in ('cfc', 'call for candidates'): if title.lower().endswith(cfc_indicator): title = title[:-len(cfc_indicator)] elif title.lower().startswith(cfc_indicator): title = title[len(cfc_indicator):] title = title.strip().strip(':').strip() # Grab the options. options = [] for top_level_comment in post.comments: if not top_level_comment.author or top_level_comment.author.name.lower( ) == 'automoderator': # Skip AutoModerator posts and posts by deleted accounts. continue option = parse_cfc(top_level_comment.body, discern_candidates=discern_candidates) if option is None or any(opt['id'] == option['id'] for opt in options): continue options.append(option) return { 'id': 'new-vote', 'name': title, 'description': 'A vote on something.', 'deadline': time.time() + 60 * 60 * 24, 'options': options, 'type': { 'tally': 'spsv', 'positions': 7, 'min': 0, 'max': 5 } }
def get_via_praw(post_id, post_type, praw_cred): if praw_cred is None: raise IOError("Missing praw credentials") from praw import Reddit reddit = Reddit(client_id=praw_cred["client_id"], client_secret=praw_cred["client_secret"], password=praw_cred["password"], user_agent=praw_cred["user_agent"], username=praw_cred["username"]) if post_type == "post": submission = reddit.submission(post_id) try: created_utc = submission.mod.thing.created_utc except: return "" selftext = submission.mod.thing.selftext selftext = re.sub(r'\s+', ' ', selftext) #selftext = selftext.replace("\\","\\\\").replace('"', '\\"').replace("\t"," ").replace("\t","\\t").replace("\r","\\r").replace("\n","\\n") selftext = json.dumps(selftext) title = submission.mod.thing.title title = title.replace('"', """) title = json.dumps(title) #title = title.replace('"', '\\"').replace("\t","\\t").replace("\r","\\r").replace("\n","\\n")#replace("'","\\'") author = submission.mod.thing.author if author is not None: author = author.name else: author = "unknown" out_json = '{"id":"'+post_id+'","selftext":'+selftext+',"created_utc":'+str(int(created_utc))+\ ',"title":'+title+',"author":"'+author+'"}' else: submission = reddit.comment(post_id) created_utc = submission.mod.thing.created_utc selftext = submission.mod.thing.body selftext = re.sub(r'\s+', ' ', selftext) selftext = selftext.replace('"', '\\"').replace("'", "\\'") out_json = "[{'id':'" + post_id + "','body':'" + selftext + "','created_utc':" + str( int(created_utc)) + "}]" try: out_json = json.loads(out_json) except: print("Invalid json: " + out_json) quit() return out_json
def submit_post(reddit: praw.Reddit, subreddit: praw.models.Subreddit, name, rss_link, title, summary, link): title = get_title(name, title) submission_id = subreddit.submit(title, url=link) submission = reddit.submission(id=submission_id) comment = format_comment(title, summary, link, rss_link) submission.reply(comment) logging.info('New entry submitted:' '\n\tTitle: {}' '\n\tSummary: {}' '\n\tLink: {}\n'.format(title, summary, link)) logging.info('Commented on entry for {}:' '\nComment: {}\n'.format(title, comment))
def make_package_infer_url(url: str, reddit: Reddit) -> dict: """Return package like astroturf.prawtools.make_package_training but for inference. Args: url (str): url for inference reddit (Reddit): ... """ try: refobj = reddit.comment(url=url) except InvalidURL: refobj = reddit.submission(url=url) if isinstance(refobj, Submission): return make_package_infer_submission(refobj) elif isinstance(refobj, Comment): return make_package_infer_comment(refobj, reddit)
def get_comments( reddit: Reddit, post: Post, fetch_time_sec: float, fetch_wait_time_sec: float, ) -> List[Comment]: # Sleep for a random amount to stagger the requests time.sleep(random.uniform(0, fetch_wait_time_sec)) LOG.debug("get_comments for %s", post.post_id) start_time = time.time() fetch_times = np.arange(start_time, start_time + fetch_time_sec, fetch_wait_time_sec) comment_id_dict = dict() for i, fetch_time in enumerate(fetch_times): submission = reddit.submission(post.post_id) # Sleep until fetch_time current_time = time.time() if current_time < fetch_time: time.sleep(fetch_time - current_time) LOG.debug( "get_comments for %s at time %f (%d/%d)", post.post_id, fetch_time, i + 1, len(fetch_times), ) # Get post's comments submission.comments.replace_more() for top_level_comment in submission.comments: comment_id = top_level_comment.id if comment_id not in comment_id_dict: comment_id_dict[comment_id] = Comment(comment_id, top_level_comment.body, current_time, []) comment_id_dict[comment_id].votes.append( Vote(top_level_comment.score, current_time)) LOG.debug( "get_comments for %s finished with %d comments", post.post_id, len(comment_id_dict), ) return list(comment_id_dict.values())
def clean_cache(sr_name): # Remove references to threads that were deleted. House cleaning. r = Reddit('aaf_gamethread') ensure_scopes(r) gt = AAFGameThread(r, sr_name, ",".join(subreddits.keys())) for game_id in gt.games: game = gt.games[game_id] for thread_type in ('gamethread', 'post_gamethread'): if thread_type not in game.threads: continue if game.threads[thread_type] is None: del (game.threads[thread_type]) continue thread = r.submission(id=game.threads[thread_type]) if thread.author is None: del (game.threads[thread_type]) gt.games[game_id] = game
class SubRedditStats(object): """Contain all the functionality of the subreddit_stats command.""" post_prefix = tt('Subreddit Stats:') post_header = tt('---\n###{}\n') post_footer = tt('>Generated with [BBoe](/u/bboe)\'s [Subreddit Stats]' '(https://github.com/praw-dev/prawtools) \n{}' 'SRS Marker: {}') re_marker = re.compile('SRS Marker: (\d+)') @staticmethod def _previous_max(submission): return float(SubRedditStats.re_marker.findall(submission.selftext)[-1]) @staticmethod def _permalink(item): if isinstance(item, Submission): return tt('/comments/{}').format(item.id) else: # comment return tt('/comments/{}//{}?context=1').format(item.submission.id, item.id) @staticmethod def _pts(points): return '1 pt' if points == 1 else '{} pts'.format(points) @staticmethod def _user(user): if user is None: return '_deleted_' elif isinstance(user, Redditor): user = str(user) return tt('[{}](/user/{})').format(user.replace('_', '\_'), user) def __init__(self, subreddit, site, verbosity, distinguished): """Initialize the SubRedditStats instance with config options.""" self.reddit = Reddit(site, disable_update_check=True, user_agent='prawtools/{}'.format(__version__)) self.subreddit = self.reddit.subreddit(subreddit) self.verbosity = verbosity self.distinguished = distinguished self.submissions = [] self.comments = [] self.submitters = defaultdict(list) self.commenters = defaultdict(list) self.min_date = 0 self.max_date = time.time() - DAYS_IN_SECONDS * 3 self.prev_srs = None def msg(self, msg, level, overwrite=False): """Output a messaage to the screen if the verbosity is sufficient.""" if self.verbosity and self.verbosity >= level: sys.stdout.write(msg) if overwrite: sys.stdout.write('\r') sys.stdout.flush() else: sys.stdout.write('\n') def prev_stat(self, prev_id): """Load the previous subreddit stat.""" self.prev_srs = self.reddit.submission(prev_id) self.min_date = self._previous_max(self.prev_srs) def fetch_recent_submissions(self, max_duration, after, exclude_self, exclude_link, since_last=True): """Fetch recent submissions in subreddit with boundaries. Does not include posts within the last three days as their scores may not be representative. :param max_duration: When set, specifies the number of days to include :param after: When set, fetch all submission after this submission id. :param exclude_self: When true, don't include self posts. :param exclude_link: When true, don't include links. :param since_last: When true use info from last submission to determine the stop point :returns: True if any submissions were found. """ if exclude_self and exclude_link: raise TypeError('Cannot set both exclude_self and exclude_link.') if max_duration: self.min_date = self.max_date - DAYS_IN_SECONDS * max_duration params = {'after': after} if after else None self.msg('DEBUG: Fetching submissions', 1) for submission in self.subreddit.new(limit=None, params=params): if submission.created_utc <= self.min_date: break if since_last and submission.title.startswith(self.post_prefix) \ and submission.author == self.reddit.config.username: # Use info in this post to update the min_date # And don't include this post self.msg(tt('Found previous: {}') .format(safe_title(submission)), 2) if self.prev_srs is None: # Only use the most recent self.min_date = max(self.min_date, self._previous_max(submission)) self.prev_srs = submission continue if submission.created_utc > self.max_date: continue if exclude_self and submission.is_self: continue if exclude_link and not submission.is_self: continue self.submissions.append(submission) num_submissions = len(self.submissions) self.msg('DEBUG: Found {} submissions'.format(num_submissions), 1) if num_submissions == 0: return False # Update real min and max dates self.submissions.sort(key=lambda x: x.created_utc) self.min_date = self.submissions[0].created_utc self.max_date = self.submissions[-1].created_utc return True def fetch_top_submissions(self, top, exclude_self, exclude_link): """Fetch top 1000 submissions by some top value. :param top: One of week, month, year, all :param exclude_self: When true, don't include self posts. :param exclude_link: When true, include only self posts :returns: True if any submissions were found. """ if exclude_self and exclude_link: raise TypeError('Cannot set both exclude_self and exclude_link.') if top not in ('day', 'week', 'month', 'year', 'all'): raise TypeError('{!r} is not a valid top value'.format(top)) self.msg('DEBUG: Fetching submissions', 1) params = {'t': top} for submission in self.subreddit.top(limit=None, params=params): if exclude_self and submission.is_self: continue if exclude_link and not submission.is_self: continue self.submissions.append(submission) num_submissions = len(self.submissions) self.msg('DEBUG: Found {} submissions'.format(num_submissions), 1) if num_submissions == 0: return False # Update real min and max dates self.submissions.sort(key=lambda x: x.created_utc) self.min_date = self.submissions[0].created_utc self.max_date = self.submissions[-1].created_utc return True def process_submitters(self): """Group submissions by author.""" self.msg('DEBUG: Processing Submitters', 1) for submission in self.submissions: if submission.author and (self.distinguished or submission.distinguished is None): self.submitters[str(submission.author)].append(submission) def process_commenters(self): """Group comments by author.""" num = len(self.submissions) self.msg('DEBUG: Processing Commenters on {} submissions'.format(num), 1) for i, submission in enumerate(self.submissions): submission.comment_sort = 'top' self.msg('{}/{} submissions'.format(i + 1, num), 2, overwrite=True) if submission.num_comments == 0: continue skipped = submission.comments.replace_more() if skipped: skip_num = sum(x.count for x in skipped) print('Ignored {} comments ({} MoreComment objects)' .format(skip_num, len(skipped))) comments = [x for x in submission.comments.list() if self.distinguished or x.distinguished is None] self.comments.extend(comments) for comment in self.comments: if comment.author: self.commenters[str(comment.author)].append(comment) def basic_stats(self): """Return a markdown representation of simple statistics.""" sub_score = sum(x.score for x in self.submissions) comm_score = sum(x.score for x in self.comments) sub_duration = self.max_date - self.min_date sub_rate = (86400. * len(self.submissions) / sub_duration if sub_duration else len(self.submissions)) # Compute comment rate if self.comments: self.comments.sort(key=lambda x: x.created_utc) duration = (self.comments[-1].created_utc - self.comments[0].created_utc) comm_rate = (86400. * len(self.comments) / duration if duration else len(self.comments)) else: comm_rate = 0 values = [('Total', len(self.submissions), len(self.comments)), ('Rate (per day)', '{:.2f}'.format(sub_rate), '{:.2f}'.format(comm_rate)), ('Unique Redditors', len(self.submitters), len(self.commenters)), ('Combined Score', sub_score, comm_score)] retval = 'Period: {:.2f} days\n\n'.format(sub_duration / 86400.) retval += '||Submissions|Comments|\n:-:|--:|--:\n' for quad in values: retval += '__{}__|{}|{}\n'.format(*quad) return retval + '\n' def top_submitters(self, num, num_submissions): """Return a markdown representation of the top submitters.""" num = min(num, len(self.submitters)) if num <= 0: return '' top_submitters = sorted(iteritems(self.submitters), reverse=True, key=lambda x: (sum(y.score for y in x[1]), len(x[1])))[:num] retval = self.post_header.format('Top Submitters\' Top Submissions') for (author, submissions) in top_submitters: retval += '0. {}, {} submission{}: {}\n'.format( self._pts(sum(x.score for x in submissions)), len(submissions), 's' if len(submissions) > 1 else '', self._user(author)) for sub in sorted(submissions, reverse=True, key=lambda x: x.score)[:num_submissions]: title = safe_title(sub) if sub.permalink != sub.url: retval += tt(' 0. [{}]({})').format(title, sub.url) else: retval += tt(' 0. {}').format(title) retval += ' ({}, [{} comment{}]({}))\n'.format( self._pts(sub.score), sub.num_comments, 's' if sub.num_comments > 1 else '', self._permalink(sub)) retval += '\n' return retval def top_commenters(self, num): """Return a markdown representation of the top commenters.""" num = min(num, len(self.commenters)) if num <= 0: return '' top_commenters = sorted(iteritems(self.commenters), reverse=True, key=lambda x: (sum(y.score for y in x[1]), len(x[1])))[:num] retval = self.post_header.format('Top Commenters') for author, comments in top_commenters: retval += '0. {} ({}, {} comment{})\n'.format( self._user(author), self._pts(sum(x.score for x in comments)), len(comments), 's' if len(comments) > 1 else '') return '{}\n'.format(retval) def top_submissions(self, num): """Return a markdown representation of the top submissions.""" num = min(num, len(self.submissions)) if num <= 0: return '' top_submissions = sorted( [x for x in self.submissions if self.distinguished or x.distinguished is None], reverse=True, key=lambda x: x.score)[:num] if not top_submissions: return '' retval = self.post_header.format('Top Submissions') for sub in top_submissions: title = safe_title(sub) if sub.permalink != sub.url: retval += tt('0. [{}]({})').format(title, sub.url) else: retval += tt('0. {}').format(title) retval += ' by {} ({}, [{} comment{}]({}))\n'.format( self._user(sub.author), self._pts(sub.score), sub.num_comments, 's' if sub.num_comments > 1 else '', self._permalink(sub)) return tt('{}\n').format(retval) def top_comments(self, num): """Return a markdown representation of the top comments.""" num = min(num, len(self.comments)) if num <= 0: return '' top_comments = sorted(self.comments, reverse=True, key=lambda x: x.score)[:num] retval = self.post_header.format('Top Comments') for comment in top_comments: title = safe_title(comment.submission) retval += tt('0. {}: {}\'s [comment]({}) in {}\n').format( self._pts(comment.score), self._user(comment.author), self._permalink(comment), title) return tt('{}\n').format(retval) def publish_results(self, subreddit, submitters, commenters, submissions, comments, top, debug=False): """Submit the results to the subreddit. Has no return value (None).""" def timef(timestamp, date_only=False): """Return a suitable string representaation of the timestamp.""" dtime = datetime.fromtimestamp(timestamp) if date_only: retval = dtime.strftime('%Y-%m-%d') else: retval = dtime.strftime('%Y-%m-%d %H:%M PDT') return retval if self.prev_srs: prev = '[Prev SRS]({}) \n'.format(self._permalink(self.prev_srs)) else: prev = '' basic = self.basic_stats() t_commenters = self.top_commenters(commenters) t_submissions = self.top_submissions(submissions) t_comments = self.top_comments(comments) footer = self.post_footer.format(prev, self.max_date) body = '' num_submissions = 10 while body == '' or len(body) > MAX_BODY_SIZE and num_submissions > 2: t_submitters = self.top_submitters(submitters, num_submissions) body = (basic + t_submitters + t_commenters + t_submissions + t_comments + footer) num_submissions -= 1 if len(body) > MAX_BODY_SIZE: print('The resulting message is too big. Not submitting.') debug = True # Set the initial title base_title = '{} {} {}posts from {} to {}'.format( self.post_prefix, str(self.subreddit), 'top ' if top else '', timef(self.min_date, True), timef(self.max_date)) submitted = False while not debug and not submitted: if subreddit: # Verify the user wants to submit to the subreddit msg = ('You are about to submit to subreddit {} as {}.\n' 'Are you sure? yes/[no]: ' .format(subreddit, self.reddit.config.username)) sys.stdout.write(msg) sys.stdout.flush() if sys.stdin.readline().strip().lower() not in ['y', 'yes']: subreddit = None elif not subreddit: # Prompt for the subreddit to submit to msg = ('Please enter a subreddit to submit to (press return to' ' abort): ') sys.stdout.write(msg) sys.stdout.flush() subreddit = sys.stdin.readline().strip() if not subreddit: print('Submission aborted\n') debug = True # Vary the title depending on where posting if str(self.subreddit) == subreddit: title = '{} {}posts from {} to {}'.format( self.post_prefix, 'top ' if top else '', timef(self.min_date, True), timef(self.max_date)) else: title = base_title if subreddit: subreddit = self.reddit.subreddit(subreddit) try: # Attempt to make the submission print(subreddit.submit(title, selftext=body).permalink) submitted = True except Exception as error: print('The submission failed: {!r}'.format(error)) subreddit = None if not submitted: print(base_title) print(body) def save_csv(self, filename): """Create csv file containing comments and submissions by author.""" redditors = set(self.submitters.keys()).union(self.commenters.keys()) mapping = dict((x.lower(), x) for x in redditors) with codecs.open(filename, 'w', encoding='utf-8') as outfile: outfile.write('username, type, permalink, score\n') for _, redditor in sorted(mapping.items()): for submission in self.submitters.get(redditor, []): outfile.write(u'{}, submission, {}, {}\n' .format(redditor, submission.permalink, submission.score)) for comment in self.commenters.get(redditor, []): outfile.write(u'{}, comment, {}, {}\n' .format(redditor, comment.permalink, comment.score))