def retrieve_usernames_from_subreddit(subreddit): start_time = time() #if subreddit in subreddits_dict: return if subreddit in subreddits_queue: subreddits_queue.remove(subreddit) subredditr = r.get_subreddit(subreddit) this_subreddit_users = Set() count = 0 count2 = 0 for submission in subredditr.get_hot(limit=None): user = str(submission.author) this_subreddit_users.add(user) for comment in flatten_tree(submission.comments): if (hasattr(comment, 'author')): user = str(comment.author) this_subreddit_users.add(user) count += 1 if count % 10 == 0: print "#", count2 += 1 if count2 % 100 == 0: print "\nsubmission count: " + str(count2) subreddits_dict[subreddit] = this_subreddit_users for user in this_subreddit_users: if user not in users_dict and user not in users_queue: users_queue.add(user) print_subreddit_to_subreddit_file(subreddit) print "\ntime for subreddit " + subreddit + ": " + str(time() - start_time)
def main(thread_id, include_author=False, sample_size=1): print('Logging in to Reddit...') r = praw.Reddit(USER_AGENT) try: if '/' in thread_id: # Looks like a URL thread = r.get_submission(url=thread_id) else: # Hopefully a thread identifier thread = r.get_submission(submission_id=thread_id) except Exception: print('Error while searching for a thread with url or id of "{}"\n{}'.format( thread_id, sys.exc_info())) return users = [] if include_author: print('Including original thread author.') users.append(thread.author.name) users = users + [c.author.name for c in flatten_tree(thread.comments)] candidates = set(users) print('Found {} unique users in {} comments.'.format(len(candidates), len(users))) print('Randomly selecting {} winners...'.format(sample_size)) winners = random.sample(candidates, sample_size) print('Chosen Winners:') for w in winners: print('/u/{}'.format(w)) print('Done!')
def update_playlists(self, update_all=False): """ Checks all threads with playlists to see if their content should be updated """ logging.info("Updating Playlists") for submission_name, playlist in self.playlists.iteritems(): youtube_links = [] # if the thread is still being watched submissions_with_name = [s for s in self.submissions if s.name == submission_name] if submissions_with_name or update_all: submission = submissions_with_name[0] all_comments = submission.comments flat_comments = flatten_tree(all_comments) # keep a record of yt_links in comments for comment in flat_comments: if type(comment) != MoreComments and comment.score >= MINIMUM_COMMENT_UPVOTES: links_in_comment = YOUTUBE_REGEX.findall(comment.body) if links_in_comment: youtube_links = youtube_links + links_in_comment # add new ones for video_id in youtube_links: if video_id not in playlist.videos: logging.info("Adding Video " + str(video_id) + " to Playlist - " + submission.title) playlist.add_video(video_id)
def process_commenters(self): """Group comments by author.""" num = len(self.submissions) self.msg('DEBUG: Processing Commenters on {0} submissions'.format(num), 1) for i, submission in enumerate(self.submissions): # Explicitly fetch as many comments as possible by top sort # Note that this is the first time the complete submission object # is obtained. Only a partial object was returned when getting the # subreddit listings. try: submission = self.reddit.get_submission(submission.permalink, comment_limit=None, comment_sort='top') except HTTPError as exc: print('Ignoring comments on {0} due to HTTP status {1}' .format(submission.url, exc.response.status_code)) continue self.msg('{0}/{1} submissions'.format(i + 1, num), 2, overwrite=True) if submission.num_comments == 0: continue skipped = submission.replace_more_comments() if skipped: skip_num = sum(x.count for x in skipped) print('Ignored {0} comments ({1} MoreComment objects)' .format(skip_num, len(skipped))) self.comments.extend(flatten_tree(submission.comments)) # pylint: disable-msg=W0212 for orphans in itervalues(submission._orphaned): self.comments.extend(orphans) # pylint: enable-msg=W0212 for comment in self.comments: if comment.author: self.commenters[str(comment.author)].append(comment)
def has_subredditurl_in_comments(submission, SUBREDDITURL): try: for comment in flatten_tree(submission.comments): if (SUBREDDITURL in comment.body.lower()): print("Found url in comments") return True except: pass
def test_all_comments(self): c_len = len(self.submission.comments) flat = helpers.flatten_tree(self.submission.comments) continue_items = [x for x in flat if isinstance(x, MoreComments) and x.count == 0] self.assertTrue(continue_items) cf_len = len(flat) saved = self.submission.replace_more_comments(threshold=2) ac_len = len(self.submission.comments) flat = helpers.flatten_tree(self.submission.comments) acf_len = len(flat) for item in continue_items: self.assertTrue(item.id in [x.id for x in flat]) self.assertEqual(len(self.submission._comments_by_id), acf_len) self.assertTrue(c_len < ac_len) self.assertTrue(c_len < cf_len) self.assertTrue(ac_len < acf_len) self.assertTrue(cf_len < acf_len) self.assertTrue(saved)
def test_all_comments(self): c_len = len(self.submission.comments) flat = helpers.flatten_tree(self.submission.comments) continue_items = [ x for x in flat if isinstance(x, MoreComments) and x.count == 0 ] self.assertTrue(continue_items) cf_len = len(flat) saved = self.submission.replace_more_comments(threshold=2) ac_len = len(self.submission.comments) flat = helpers.flatten_tree(self.submission.comments) acf_len = len(flat) for item in continue_items: self.assertTrue(item.id in [x.id for x in flat]) self.assertEqual(len(self.submission._comments_by_id), acf_len) self.assertTrue(c_len < ac_len) self.assertTrue(c_len < cf_len) self.assertTrue(ac_len < acf_len) self.assertTrue(cf_len < acf_len) self.assertTrue(saved)
def get_comments(submissions, top=40): submissions = map(lambda s: praw.objects.Submission(r, json_dict=s), submissions) comments = [] for submission in submissions: for comment in flatten_tree(submission.comments): if not isinstance(comment, praw.objects.Comment): continue comment._json_data['replies'] = None comments.append(comment._json_data) return comments
def should_notify(submission): """ Looks for other snapshot bot comments in the comment chain and doesn't post if they do. :param submission: Submission to check :return: If we should comment or not """ cur.execute("SELECT * FROM links WHERE id=?", (submission.name,)) if cur.fetchone(): return False submission.replace_more_comments() for comment in flatten_tree(submission.comments): if comment.author and comment.author.name in ignorelist: return False return True
def should_notify(submission): """ Looks for other snapshot bot comments in the comment chain and doesn't post if they do. :param submission: Submission to check :return: If we should comment or not """ cur.execute("SELECT * FROM links WHERE id=?", (submission.name, )) if cur.fetchone(): return False submission.replace_more_comments() for comment in flatten_tree(submission.comments): if comment.author and comment.author.name in ignorelist: return False return True
def extract_comments(comments_raw): _comments = [[comment] + flatten_tree(comment.replies) for comment in comments_raw] comments = [] for obj in _comments: inner_comments = [] for obj_item in obj: try: inner_comments.append({ 'body': obj_item.body, 'score': obj_item.score }) except AttributeError: pass comments.append(inner_comments) return comments
def has_video_content(submission): """ Return boolean based on whether submission is viable to have a youtube playlist made """ all_comments = submission.comments # imported from praw.helpers flat_comments = flatten_tree(all_comments) video_count = 0 for comment in flat_comments: if type(comment) != MoreComments and comment.score >= MINIMUM_COMMENT_UPVOTES: links_in_comment = YOUTUBE_REGEX.findall(comment.body) if links_in_comment: video_count = video_count + len(links_in_comment) if video_count >= MINIMUM_PLAYLIST_SIZE: return True return False
def details_scan(post, pauthor, ptime): found = False curtime = datetime.now(timezone.utc).timestamp() if post.is_self is False: print("Checking details comments...") difference = curtime - ptime comments = helpers.flatten_tree(post.comments) for comment in comments: try: cauthor = comment.author.name except AttributeError: cauthor = "[deleted]" if cauthor == pauthor and found is False: print("\tFound comment by OP") found = True if found is True: print("\tComment is okay") # Deletes all /u/upmo comments for comment in comments: try: cauthor = comment.author.name except AttributeError: cauthor = "[deleted]" if cauthor == USERNAME: comment.delete() print("\tDeleted old comments") else: if difference > DELAY: slay(post, NODETAILS) elif difference > (DELAY * 0.5): commenters = [comment.author.name for comment in comments] if (found is False) and ("upmo" not in commenters): print("\tWarning OP") response = post.add_comment(DETAILSWARN) response.distinguish() return False else: if difference < (DELAY * 0.5): differences = str("%.0f" % ((DELAY * 0.5) - difference)) print("\tStill has " + differences + "s before warning") elif difference < DELAY: differences = str("%.0f" % (DELAY - difference)) print("\tStill has " + differences + "s before removal") return False
def process_commenters(self): """Group comments by author.""" num = len(self.submissions) self.msg('DEBUG: Processing Commenters on {0} submissions'.format(num), 1) for i, submission in enumerate(self.submissions): # Explicitly fetch as many comments as possible by top sort # Note that this is the first time the complete submission object # is obtained. Only a partial object was returned when getting the # subreddit listings. try: submission = self.reddit.get_submission(submission.permalink, comment_limit=None, comment_sort='top') except HTTPError as exc: print('Ignoring comments on {0} due to HTTP status {1}'.format( submission.url, exc.response.status_code)) continue self.msg('{0}/{1} submissions'.format(i + 1, num), 2, overwrite=True) if submission.num_comments == 0: continue skipped = submission.replace_more_comments() if skipped: skip_num = sum(x.count for x in skipped) print('Ignored {0} comments ({1} MoreComment objects)'.format( skip_num, len(skipped))) comments = [ x for x in flatten_tree(submission.comments) if self.distinguished or x.distinguished is None ] self.comments.extend(comments) # pylint: disable=W0212 for orphans in itervalues(submission._orphaned): self.comments.extend(orphans) # pylint: enable=W0212 for comment in self.comments: if comment.author: self.commenters[str(comment.author)].append(comment)
def get_comments(results): """ Add comment responses to the submissions that matched the search terms. Arguments: results is a list of search results Returns: A list of search results and their flattened comment trees. """ comment_list = [] num_subs = len(results) for i in range(num_subs): print ("Fetching comments from submission " + str(i + 1) + " of " + str(len(results)) + " ..." ) results[i].replace_more_comments(limit = None, threshold = 0) comments = flatten_tree(results[i].comments) comment_list += comments num_comms = len(comment_list) print ("All " + str(num_comms) + " comments successfully fetched," " for a total of " + str(num_subs + num_comms) + " search results." ) return (results + comment_list)
def fetch_discussion(reddit_handler, url): """ Fetches the full discussion under a submission and stores it as a .json file. """ #################################################################################################################### # Fetch submission and extract ALL comments - may be slow for large discussions #################################################################################################################### # Get UNIX timestamp of the fetch. fetch_datetime = datetime.datetime.now() fetch_timestamp = fetch_datetime.timestamp() # print(submission_id) submission = reddit_handler.get_submission(submission_id=url) submission.replace_more_comments(limit=None, threshold=0) redditor_name = submission.json_dict["author"] redditor = reddit_handler.get_redditor(redditor_name, fetch=True) comments = submission.comments comments = flatten_tree(comments) return submission, redditor, comments, fetch_timestamp
def create_playlist(self, submission): """ For a reddit submission, create an associated youtube playlist """ logging.info("--- EXECUTING FUNCTION CREATE_PLAYLIST FOR CLASS PLAYLISTBOT ---") youtube_links = [] # submission.replace_more_comments(limit=None, threshold=0) all_comments = submission.comments # imported from praw.helpers flat_comments = flatten_tree(all_comments) for comment in flat_comments: if type(comment) != MoreComments and comment.score >= MINIMUM_COMMENT_UPVOTES: links_in_comment = YOUTUBE_REGEX.findall(comment.body) if links_in_comment: youtube_links = youtube_links + links_in_comment new_playlist = Playlist(submission) for video_id in youtube_links: logging.info("Adding Video " + str(video_id) + " to Playlist - " + submission.title) new_playlist.add_video(video_id) return new_playlist
def comments(self): if not self._comments: for subm in self.submissions: self._comments.extend(flatten_tree(subm.comments)) return self._comments
def main(): # Initialize sentiment analyzer sentiment_analyzer = MashapeSentimentAnalyzer() # Initialize connection to reddit API user_agent = "Percepto 0.1 by /u/_allocate" r = praw.Reddit(user_agent=user_agent) # Initialize logging info datetime = time.strftime("%Y-%m-%d_%H:%M") filename = "matches_{0}.txt".format(datetime) matches = [] # Get hottest submissions in r/askreddit subreddit = r.get_subreddit(SUBREDDIT) for submission in subreddit.get_hot(limit=SUBMISSION_LIMIT): # Get top-level comments while len(submission.comments) < MAX_TOP_LEVEL_COMMENTS and \ isinstance(submission.comments[-1], MoreComments): submission.replace_more_comments(limit=1) # Trim off extra top-level comments submission.comments = submission.comments[:MAX_TOP_LEVEL_COMMENTS] # Resolve remaining replies, # and flatten comment tree (hierarchy doesn't matter) submission.replace_more_comments(limit=None) all_comments = flatten_tree(submission.comments) debug_count = 0 # Iterate through comments for comment in all_comments: # TODO: Refine this # Only process comment objects (not more comments) # if None != comment.body: # If suitable length, analyze comment word_count = comment.body.split(DELIMITER) debug_count += 1 if len(word_count) >= MIN_COMMENT_LENGTH: # Determine if comment is match result = {} match = False try: match = sentiment_analyzer.is_match(comment) except Exception as exception: break # If match found, record it if match: matches.append({ "comment": comment.__dict__, "result": result }) # Write to file output = open(filename, "w+") output.write(json.dumps(matches, indent=4, separators=(',', ': '))) os.close(output)
def get_comments(subm): request_with_retry(subm.replace_more_comments, limit=None) comms = subm.comments flatten_comms = flatten_tree(comms) return flatten_comms
from RedditBotClass import RedditBot from praw import helpers, errors, objects rBot = RedditBot('greenbaypackers') submissions = rBot.Get_Submissions_Since_Last_Post() already_submitted_ids = rBot.dAccessor.Get_Previous_Submission_IDs() insert_set = [] for s in submissions: s.replace_more_comments(limit=10, threshold=3) flat_comments = helpers.flatten_tree(s.comments) print s.title for c in flat_comments: if not isinstance(c, objects.MoreComments) and 'rogers' in c.body.lower() and c.id not in already_submitted_ids: try: c.reply('Its spelled Rodgers') print 'Replied to comment {0} by user {1}'.format(c.body.lower(), c.author) insert_set.append(c) except errors.RateLimitExceeded, e: print e.message except BaseException, e: print e.message rBot.Insert_Submissions(insert_set) raw_input('hit button to continue')