Beispiel #1
0
def retrieve_usernames_from_subreddit(subreddit):
    start_time = time()
    #if subreddit in subreddits_dict: return
    if subreddit in subreddits_queue: subreddits_queue.remove(subreddit)
    subredditr = r.get_subreddit(subreddit)

    this_subreddit_users = Set()

    count = 0
    count2 = 0
    for submission in subredditr.get_hot(limit=None):
        user = str(submission.author)
        this_subreddit_users.add(user)
        for comment in flatten_tree(submission.comments):
            if (hasattr(comment, 'author')):
                user = str(comment.author)
                this_subreddit_users.add(user)
            count += 1
            if count % 10 == 0: print "#",
        count2 += 1
        if count2 % 100 == 0: print "\nsubmission count: " + str(count2)

    subreddits_dict[subreddit] = this_subreddit_users

    for user in this_subreddit_users:
        if user not in users_dict and user not in users_queue:
            users_queue.add(user)

    print_subreddit_to_subreddit_file(subreddit)

    print "\ntime for subreddit " + subreddit + ": " + str(time() - start_time)
Beispiel #2
0
def main(thread_id, include_author=False, sample_size=1):
    print('Logging in to Reddit...')
    r = praw.Reddit(USER_AGENT)

    try:
        if '/' in thread_id:
            # Looks like a URL
            thread = r.get_submission(url=thread_id)
        else:
            # Hopefully a thread identifier
            thread = r.get_submission(submission_id=thread_id)
    except Exception:
        print('Error while searching for a thread with url or id of "{}"\n{}'.format(
            thread_id, sys.exc_info()))
        return

    users = []
    if include_author:
        print('Including original thread author.')
        users.append(thread.author.name)

    users = users + [c.author.name for c in flatten_tree(thread.comments)]
    candidates = set(users)
    print('Found {} unique users in {} comments.'.format(len(candidates), len(users)))

    print('Randomly selecting {} winners...'.format(sample_size))
    winners = random.sample(candidates, sample_size)
    print('Chosen Winners:')
    for w in winners:
        print('/u/{}'.format(w))

    print('Done!')
Beispiel #3
0
    def update_playlists(self, update_all=False):
        """ Checks all threads with playlists to see if their content should be updated """

        logging.info("Updating Playlists")

        for submission_name, playlist in self.playlists.iteritems():
            youtube_links = []
            # if the thread is still being watched
            submissions_with_name = [s for s in self.submissions if s.name == submission_name]
            if submissions_with_name or update_all:
                submission = submissions_with_name[0]
                all_comments = submission.comments
                flat_comments = flatten_tree(all_comments)

                # keep a record of yt_links in comments
                for comment in flat_comments:
                    if type(comment) != MoreComments and comment.score >= MINIMUM_COMMENT_UPVOTES:
                        links_in_comment = YOUTUBE_REGEX.findall(comment.body)
                        if links_in_comment:
                            youtube_links = youtube_links + links_in_comment

                # add new ones
                for video_id in youtube_links:
                    if video_id not in playlist.videos:
                        logging.info("Adding Video " + str(video_id) + " to Playlist - " + submission.title)
                        playlist.add_video(video_id)
Beispiel #4
0
 def process_commenters(self):
     """Group comments by author."""
     num = len(self.submissions)
     self.msg('DEBUG: Processing Commenters on {0} submissions'.format(num),
              1)
     for i, submission in enumerate(self.submissions):
         # Explicitly fetch as many comments as possible by top sort
         # Note that this is the first time the complete submission object
         # is obtained. Only a partial object was returned when getting the
         # subreddit listings.
         try:
             submission = self.reddit.get_submission(submission.permalink,
                                                     comment_limit=None,
                                                     comment_sort='top')
         except HTTPError as exc:
             print('Ignoring comments on {0} due to HTTP status {1}'
                   .format(submission.url, exc.response.status_code))
             continue
         self.msg('{0}/{1} submissions'.format(i + 1, num), 2,
                  overwrite=True)
         if submission.num_comments == 0:
             continue
         skipped = submission.replace_more_comments()
         if skipped:
             skip_num = sum(x.count for x in skipped)
             print('Ignored {0} comments ({1} MoreComment objects)'
                   .format(skip_num, len(skipped)))
         self.comments.extend(flatten_tree(submission.comments))
         # pylint: disable-msg=W0212
         for orphans in itervalues(submission._orphaned):
             self.comments.extend(orphans)
         # pylint: enable-msg=W0212
     for comment in self.comments:
         if comment.author:
             self.commenters[str(comment.author)].append(comment)
def has_subredditurl_in_comments(submission, SUBREDDITURL):
    try:
        for comment in flatten_tree(submission.comments):
            if (SUBREDDITURL in comment.body.lower()):
                print("Found url in comments")
                return True
    except:
        pass
Beispiel #6
0
    def test_all_comments(self):
        c_len = len(self.submission.comments)
        flat = helpers.flatten_tree(self.submission.comments)
        continue_items = [x for x in flat if isinstance(x, MoreComments) and
                          x.count == 0]
        self.assertTrue(continue_items)
        cf_len = len(flat)
        saved = self.submission.replace_more_comments(threshold=2)
        ac_len = len(self.submission.comments)
        flat = helpers.flatten_tree(self.submission.comments)
        acf_len = len(flat)
        for item in continue_items:
            self.assertTrue(item.id in [x.id for x in flat])

        self.assertEqual(len(self.submission._comments_by_id), acf_len)
        self.assertTrue(c_len < ac_len)
        self.assertTrue(c_len < cf_len)
        self.assertTrue(ac_len < acf_len)
        self.assertTrue(cf_len < acf_len)
        self.assertTrue(saved)
Beispiel #7
0
    def test_all_comments(self):
        c_len = len(self.submission.comments)
        flat = helpers.flatten_tree(self.submission.comments)
        continue_items = [
            x for x in flat if isinstance(x, MoreComments) and x.count == 0
        ]
        self.assertTrue(continue_items)
        cf_len = len(flat)
        saved = self.submission.replace_more_comments(threshold=2)
        ac_len = len(self.submission.comments)
        flat = helpers.flatten_tree(self.submission.comments)
        acf_len = len(flat)
        for item in continue_items:
            self.assertTrue(item.id in [x.id for x in flat])

        self.assertEqual(len(self.submission._comments_by_id), acf_len)
        self.assertTrue(c_len < ac_len)
        self.assertTrue(c_len < cf_len)
        self.assertTrue(ac_len < acf_len)
        self.assertTrue(cf_len < acf_len)
        self.assertTrue(saved)
Beispiel #8
0
def get_comments(submissions, top=40):
    submissions = map(lambda s: praw.objects.Submission(r, json_dict=s), submissions)
    comments    = []

    for submission in submissions:
        for comment in flatten_tree(submission.comments):
            if not isinstance(comment, praw.objects.Comment):
                continue

            comment._json_data['replies'] = None
            comments.append(comment._json_data)

    return comments
Beispiel #9
0
def should_notify(submission):
    """
    Looks for other snapshot bot comments in the comment chain and doesn't
    post if they do.
    :param submission: Submission to check
    :return: If we should comment or not
    """
    cur.execute("SELECT * FROM links WHERE id=?", (submission.name,))
    if cur.fetchone():
        return False
    submission.replace_more_comments()
    for comment in flatten_tree(submission.comments):
        if comment.author and comment.author.name in ignorelist:
            return False
    return True
Beispiel #10
0
def should_notify(submission):
    """
    Looks for other snapshot bot comments in the comment chain and doesn't
    post if they do.
    :param submission: Submission to check
    :return: If we should comment or not
    """
    cur.execute("SELECT * FROM links WHERE id=?", (submission.name, ))
    if cur.fetchone():
        return False
    submission.replace_more_comments()
    for comment in flatten_tree(submission.comments):
        if comment.author and comment.author.name in ignorelist:
            return False
    return True
def extract_comments(comments_raw):
    _comments = [[comment] + flatten_tree(comment.replies)
                 for comment in comments_raw]
    comments = []
    for obj in _comments:
        inner_comments = []
        for obj_item in obj:
            try:
                inner_comments.append({
                    'body': obj_item.body,
                    'score': obj_item.score
                })
            except AttributeError:
                pass
        comments.append(inner_comments)
    return comments
Beispiel #12
0
def has_video_content(submission):
    """ Return boolean based on whether submission is viable to have a youtube playlist made """

    all_comments = submission.comments
    # imported from praw.helpers
    flat_comments = flatten_tree(all_comments)

    video_count = 0

    for comment in flat_comments:
        if type(comment) != MoreComments and comment.score >= MINIMUM_COMMENT_UPVOTES:
            links_in_comment = YOUTUBE_REGEX.findall(comment.body)
            if links_in_comment:
                video_count = video_count + len(links_in_comment)
        if video_count >= MINIMUM_PLAYLIST_SIZE:
            return True
    return False
Beispiel #13
0
def details_scan(post, pauthor, ptime):
    found = False
    curtime = datetime.now(timezone.utc).timestamp()
    if post.is_self is False:
        print("Checking details comments...")
        difference = curtime - ptime
        comments = helpers.flatten_tree(post.comments)
        for comment in comments:
            try:
                cauthor = comment.author.name
            except AttributeError:
                cauthor = "[deleted]"
            if cauthor == pauthor and found is False:
                print("\tFound comment by OP")
                found = True
        if found is True:
            print("\tComment is okay")
            # Deletes all /u/upmo comments
            for comment in comments:
                try:
                    cauthor = comment.author.name
                except AttributeError:
                    cauthor = "[deleted]"
                if cauthor == USERNAME:
                    comment.delete()
            print("\tDeleted old comments")
        else:
            if difference > DELAY:
                slay(post, NODETAILS)
            elif difference > (DELAY * 0.5):
                commenters = [comment.author.name for comment in comments]
                if (found is False) and ("upmo" not in commenters):
                    print("\tWarning OP")
                    response = post.add_comment(DETAILSWARN)
                    response.distinguish()
                return False
            else:
                if difference < (DELAY * 0.5):
                    differences = str("%.0f" % ((DELAY * 0.5) - difference))
                    print("\tStill has " + differences + "s before warning")
                elif difference < DELAY:
                    differences = str("%.0f" % (DELAY - difference))
                    print("\tStill has " + differences + "s before removal")
                return False
Beispiel #14
0
def details_scan(post, pauthor, ptime):
    found = False
    curtime = datetime.now(timezone.utc).timestamp()
    if post.is_self is False:
        print("Checking details comments...")
        difference = curtime - ptime
        comments = helpers.flatten_tree(post.comments)
        for comment in comments:
            try:
                cauthor = comment.author.name
            except AttributeError:
                cauthor = "[deleted]"
            if cauthor == pauthor and found is False:
                print("\tFound comment by OP")
                found = True
        if found is True:
            print("\tComment is okay")
            # Deletes all /u/upmo comments
            for comment in comments:
                try:
                    cauthor = comment.author.name
                except AttributeError:
                    cauthor = "[deleted]"
                if cauthor == USERNAME:
                    comment.delete()
            print("\tDeleted old comments")
        else:
            if difference > DELAY:
                slay(post, NODETAILS)
            elif difference > (DELAY * 0.5):
                commenters = [comment.author.name for comment in comments]
                if (found is False) and ("upmo" not in commenters):
                    print("\tWarning OP")
                    response = post.add_comment(DETAILSWARN)
                    response.distinguish()
                return False
            else:
                if difference < (DELAY * 0.5):
                    differences = str("%.0f" % ((DELAY * 0.5) - difference))
                    print("\tStill has " + differences + "s before warning")
                elif difference < DELAY:
                    differences = str("%.0f" % (DELAY - difference))
                    print("\tStill has " + differences + "s before removal")
                return False
Beispiel #15
0
 def process_commenters(self):
     """Group comments by author."""
     num = len(self.submissions)
     self.msg('DEBUG: Processing Commenters on {0} submissions'.format(num),
              1)
     for i, submission in enumerate(self.submissions):
         # Explicitly fetch as many comments as possible by top sort
         # Note that this is the first time the complete submission object
         # is obtained. Only a partial object was returned when getting the
         # subreddit listings.
         try:
             submission = self.reddit.get_submission(submission.permalink,
                                                     comment_limit=None,
                                                     comment_sort='top')
         except HTTPError as exc:
             print('Ignoring comments on {0} due to HTTP status {1}'.format(
                 submission.url, exc.response.status_code))
             continue
         self.msg('{0}/{1} submissions'.format(i + 1, num),
                  2,
                  overwrite=True)
         if submission.num_comments == 0:
             continue
         skipped = submission.replace_more_comments()
         if skipped:
             skip_num = sum(x.count for x in skipped)
             print('Ignored {0} comments ({1} MoreComment objects)'.format(
                 skip_num, len(skipped)))
         comments = [
             x for x in flatten_tree(submission.comments)
             if self.distinguished or x.distinguished is None
         ]
         self.comments.extend(comments)
         # pylint: disable=W0212
         for orphans in itervalues(submission._orphaned):
             self.comments.extend(orphans)
         # pylint: enable=W0212
     for comment in self.comments:
         if comment.author:
             self.commenters[str(comment.author)].append(comment)
def get_comments(results):
    """
    Add comment responses to the submissions that matched
    the search terms.
        
    Arguments: 
    results is a list of search results

    Returns:
    A list of search results and their 
    flattened comment trees.
    """

    comment_list = []

    num_subs = len(results)
    
    for i in range(num_subs):
        print ("Fetching comments from submission " +
               str(i + 1) +
               " of " +
               str(len(results)) +
               " ..."
        )
        results[i].replace_more_comments(limit = None, threshold = 0)
        comments = flatten_tree(results[i].comments)
        comment_list += comments

    num_comms = len(comment_list)
    
    print ("All " +
           str(num_comms) +
           " comments successfully fetched,"
           " for a total of " +
           str(num_subs + num_comms) +
           " search results."
    )
    
    return (results + comment_list)
Beispiel #17
0
def fetch_discussion(reddit_handler, url):
    """
    Fetches the full discussion under a submission and stores it as a .json file.
    """
    ####################################################################################################################
    # Fetch submission and extract ALL comments - may be slow for large discussions
    ####################################################################################################################
    # Get UNIX timestamp of the fetch.
    fetch_datetime = datetime.datetime.now()
    fetch_timestamp = fetch_datetime.timestamp()

    # print(submission_id)

    submission = reddit_handler.get_submission(submission_id=url)
    submission.replace_more_comments(limit=None, threshold=0)

    redditor_name = submission.json_dict["author"]

    redditor = reddit_handler.get_redditor(redditor_name, fetch=True)

    comments = submission.comments
    comments = flatten_tree(comments)

    return submission, redditor, comments, fetch_timestamp
Beispiel #18
0
    def create_playlist(self, submission):
        """ For a reddit submission, create an associated youtube playlist """

        logging.info("--- EXECUTING FUNCTION CREATE_PLAYLIST FOR CLASS PLAYLISTBOT ---")

        youtube_links = []

        # submission.replace_more_comments(limit=None, threshold=0)
        all_comments = submission.comments
        # imported from praw.helpers
        flat_comments = flatten_tree(all_comments)

        for comment in flat_comments:
            if type(comment) != MoreComments and comment.score >= MINIMUM_COMMENT_UPVOTES:
                links_in_comment = YOUTUBE_REGEX.findall(comment.body)
                if links_in_comment:
                    youtube_links = youtube_links + links_in_comment

        new_playlist = Playlist(submission)
        for video_id in youtube_links:
            logging.info("Adding Video " + str(video_id) + " to Playlist - " + submission.title)
            new_playlist.add_video(video_id)

        return new_playlist
 def comments(self):
     if not self._comments:
         for subm in self.submissions:
             self._comments.extend(flatten_tree(subm.comments))
     return self._comments
Beispiel #20
0
def main():
    # Initialize sentiment analyzer
    sentiment_analyzer = MashapeSentimentAnalyzer()

    # Initialize connection to reddit API
    user_agent = "Percepto 0.1 by /u/_allocate"
    r = praw.Reddit(user_agent=user_agent)

    # Initialize logging info
    datetime = time.strftime("%Y-%m-%d_%H:%M")
    filename = "matches_{0}.txt".format(datetime)
    matches = []

    # Get hottest submissions in r/askreddit
    subreddit = r.get_subreddit(SUBREDDIT)
    for submission in subreddit.get_hot(limit=SUBMISSION_LIMIT):

        # Get top-level comments
        while len(submission.comments) < MAX_TOP_LEVEL_COMMENTS and \
                isinstance(submission.comments[-1], MoreComments):
            submission.replace_more_comments(limit=1)

        # Trim off extra top-level comments
        submission.comments = submission.comments[:MAX_TOP_LEVEL_COMMENTS]

        # Resolve remaining replies,
        # and flatten comment tree (hierarchy doesn't matter)
        submission.replace_more_comments(limit=None)
        all_comments = flatten_tree(submission.comments)

        debug_count = 0

        # Iterate through comments
        for comment in all_comments:

            # TODO: Refine this
            # Only process comment objects (not more comments)
            # if None != comment.body:

            # If suitable length, analyze comment
            word_count = comment.body.split(DELIMITER)
            debug_count += 1
            if len(word_count) >= MIN_COMMENT_LENGTH:

                # Determine if comment is match
                result = {}

                match = False
                try:
                    match = sentiment_analyzer.is_match(comment)
                except Exception as exception:
                    break

                # If match found, record it
                if match:
                    matches.append({
                        "comment": comment.__dict__,
                        "result": result
                    })

                    # Write to file
                    output = open(filename, "w+")
                    output.write(json.dumps(matches, indent=4, separators=(',', ': ')))
                    os.close(output)
Beispiel #21
0
def get_comments(subm):
    request_with_retry(subm.replace_more_comments, limit=None)

    comms = subm.comments
    flatten_comms = flatten_tree(comms)
    return flatten_comms
Beispiel #22
0
from RedditBotClass import RedditBot
from praw import helpers, errors, objects

rBot = RedditBot('greenbaypackers')
submissions = rBot.Get_Submissions_Since_Last_Post()
already_submitted_ids = rBot.dAccessor.Get_Previous_Submission_IDs()

insert_set = []

for s in submissions:
    s.replace_more_comments(limit=10, threshold=3)
    flat_comments = helpers.flatten_tree(s.comments)

    print s.title
    for c in flat_comments:
        if not isinstance(c, objects.MoreComments) and 'rogers' in c.body.lower() and c.id not in already_submitted_ids:
            try:
                c.reply('Its spelled Rodgers')
                print 'Replied to comment {0} by user {1}'.format(c.body.lower(), c.author)
                insert_set.append(c)
            except errors.RateLimitExceeded, e:
                print e.message
            except BaseException, e:
                print e.message

rBot.Insert_Submissions(insert_set)

raw_input('hit button to continue')