Exemple #1
0
def process_claim(post, config):
    """
    Handles comment replies containing the word 'claim' and routes
    based on a basic decision tree.

    :param post: The Comment object containing the claim.
    :param config: the global config dict.
    :return: None.
    """
    top_parent = get_parent_post_id(post, config.r)

    # WAIT! Do we actually own this post?
    if top_parent.author.name != 'transcribersofreddit':
        logging.debug('Received `claim` on post we do not own. Ignoring.')
        return

    try:
        if not coc_accepted(post, config):
            # do not cache this page. We want to get it every time.
            post.reply(
                _(
                    please_accept_coc.format(
                        get_wiki_page('codeofconduct', config))))
            return

        # this can be either '' or None depending on how the API is feeling
        # today
        if top_parent.link_flair_text in ['', None]:
            # There exists the very small possibility that the post was
            # malformed and doesn't actually have flair on it. In that case,
            # let's set something so the next part doesn't crash.
            flair_post(top_parent, flair.unclaimed)

        if flair.unclaimed in top_parent.link_flair_text:
            # need to get that "Summoned - Unclaimed" in there too
            post.reply(_(claim_success))

            flair_post(top_parent, flair.in_progress)
            logging.info(
                f'Claim on ID {top_parent.fullname} by {post.author} successful'
            )

        # can't claim something that's already claimed
        elif top_parent.link_flair_text == flair.in_progress:
            post.reply(_(already_claimed))
        elif top_parent.link_flair_text == flair.completed:
            post.reply(_(claim_already_complete))

    except praw.exceptions.APIException as e:
        if e.error_type == 'DELETED_COMMENT':
            logging.info(
                f'Comment attempting to claim ID {top_parent.fullname} has '
                f'been deleted. Back up for grabs! ')
            return
        raise  # Re-raise exception if not
Exemple #2
0
def run(config):
    time.sleep(config.ocr_delay)
    new_post = config.redis.lpop('ocr_ids')
    if new_post is None:
        logging.debug('No post found. Sleeping.')
        # nothing new in the queue. Wait and try again.
        # Yes, I know this is outside a loop. It will be run inside a loop
        # by tor_core.
        return

    # We got something!
    new_post = new_post.decode('utf-8')
    logging.info(f'Found a new post, ID {new_post}')
    url = config.r.submission(id=clean_id(new_post)).url

    try:
        result = process_image(url)
    except OCRError as e:
        logging.warning('There was an OCR Error: ' + str(e))
        return

    logging.debug(f'result: {result}')

    if not result:
        logging.info('Result was none! Skipping!')
        # we don't want orphan entries
        config.redis.delete(new_post)
        return

    tor_post_id = config.redis.get(new_post).decode('utf-8')

    logging.info(
        f'posting transcription attempt for {new_post} on {tor_post_id}')

    tor_post = config.r.submission(id=clean_id(tor_post_id))

    thing_to_reply_to = tor_post.reply(
        _(base_comment.format(result['process_time_in_ms'] / 1000)))

    for chunk in chunks(result['text'], 9000):
        # end goal: if something is over 9000 characters long, we
        # should post a top level comment, then keep replying to
        # the comments we make until we run out of chunks.

        chunk = chunk.replace('\r\n', '\n\n').replace('/u/', '\\/u/').replace(
            '/r/',
            '\\/r/').replace(' u/',
                             ' \\/u/').replace(' r/',
                                               ' \\/r/').replace('>>', '\>\>')

        thing_to_reply_to = thing_to_reply_to.reply(_(chunk))

    config.redis.delete(new_post)
Exemple #3
0
def process_override(reply, config):
    """
    This process is for moderators of ToR to force u/transcribersofreddit
    to mark a post as complete and award flair when the bot refutes a
    `done` claim. The comment containing "!override" must be in response to
    the bot's comment saying that it cannot find the transcript.

    :param reply: the comment reply object from the moderator.
    :param config: the global config object.
    :return: None.
    """

    # don't remove this check, it's not covered like other admin_commands
    # because it's used in reply to people, not as a PM
    if not from_moderator(reply, config):
        reply.reply(_(random.choice(config.no_gifs)))
        logging.info(f'{reply.author.name} just tried to override. Lolno.')

        return

    # okay, so the parent of the reply should be the bot's comment
    # saying it can't find it. In that case, we need the parent's
    # parent. That should be the comment with the `done` call in it.
    reply_parent = config.r.comment(id=clean_id(reply.parent_id))
    parents_parent = config.r.comment(id=clean_id(reply_parent.parent_id))
    if 'done' in parents_parent.body.lower():
        logging.info(
            f'Starting validation override for post {parents_parent.fullname}, '
            f'approved by {reply.author.name}')
        process_done(parents_parent, config, override=True)
def update_and_restart(reply, config):
    if not from_moderator(reply, config):

        reply.reply(_(random.choice(config.no_gifs)))
        logging.info('{} just issued update. No.'.format(reply.author.name))
    else:
        pass
def process_override(reply, config):
    """
    This process is for moderators of ToR to force u/transcribersofreddit
    to mark a post as complete and award flair when the bot refutes a
    `done` claim. The comment containing "!override" must be in response to
    the bot's comment saying that it cannot find the transcript.

    :param reply: the comment reply object from the moderator.
    :param config: the global config object.
    :return: None.
    """
    # first we verify that this comment comes from a moderator and that
    # we can work on it.
    if not from_moderator(reply, config):
        reply.reply(_(random.choice(config.no_gifs)))
        logging.info('{} just tried to override. Lolno.'.format(
            reply.author.name))
        return
    # okay, so the parent of the reply should be the bot's comment
    # saying it can't find it. In that case, we need the parent's
    # parent. That should be the comment with the `done` call in it.
    reply_parent = config.r.comment(id=clean_id(reply.parent_id))
    parents_parent = config.r.comment(id=clean_id(reply_parent.parent_id))
    if 'done' in parents_parent.body.lower():
        logging.info('Starting validation override for post {}'
                     ', approved by {}'.format(parents_parent.fullname,
                                               reply.author.name))
        process_done(parents_parent, config, override=True)
Exemple #6
0
def process_thanks(post, config):
    try:
        post.reply(_(youre_welcome.format(random.choice(thumbs_up_gifs))))
    except praw.exceptions.APIException as e:
        if e.error_type == 'DELETED_COMMENT':
            logging.debug('Comment requiring thanks was deleted')
            return
        raise
def reload_config(reply, config):
    if not from_moderator(reply, config):
        logging.info('{} just issued a reload command. No.'.format(
            reply.author.name))
        reply.reply(_(random.choice(config.no_gifs)))
    else:
        logging.info('Reloading configs at the request of {}'.format(
            reply.author.name))
        initialize(config)
        logging.info('Reload complete.')
Exemple #8
0
def update_and_restart(reply, config):
    if not from_moderator(reply, config):

        reply.reply(_(random.choice(config.no_gifs)))
        logging.info('{} just issued update. No.'.format(reply.author.name))
    else:
        # update from repo
        sh.git.pull("origin", "master")
        # restart own process
        os.execl(sys.executable, sys.executable, *sys.argv)
def process_claim(post, config):
    """
    Handles comment replies containing the word 'claim' and routes
    based on a basic decision tree.

    :param post: The Comment object containing the claim.
    :param config: the global config dict.
    :return: None.
    """
    top_parent = get_parent_post_id(post, config.r)

    # WAIT! Do we actually own this post?
    if top_parent.author.name != 'transcribersofreddit':
        logging.debug('Received `claim` on post we do not own. Ignoring.')
        return

    if not coc_accepted(post, config):
        # do not cache this page. We want to get it every time.
        post.reply(_(
            please_accept_coc.format(get_wiki_page('codeofconduct', config.tor))
        ))
        return

    if top_parent.link_flair_text is None:
        # There exists the very small possibility that the post was malformed
        # and doesn't actually have flair on it. In that case, let's set
        # something so the next part doesn't crash.
        flair_post(top_parent, flair.unclaimed)

    if flair.unclaimed in top_parent.link_flair_text:
        # need to get that "Summoned - Unclaimed" in there too
        post.reply(_(claim_success))
        flair_post(top_parent, flair.in_progress)
        logging.info(
            'Claim on ID {} by {} successful'.format(
                top_parent.fullname, post.author
            )
        )
    # can't claim something that's already claimed
    elif top_parent.link_flair_text == flair.in_progress:
        post.reply(_(already_claimed))
    elif top_parent.link_flair_text == flair.completed:
        post.reply(_(claim_already_complete))
Exemple #10
0
def process_mention(mention):
    """
    Handles username mentions and handles the formatting and posting of
    those calls as workable jobs to ToR.

    :param mention: the Comment object containing the username mention.
    :return: None.
    """

    # message format is subject, then body
    mention.author.message(pm_subject, _(pm_body))
    logging.info(f'Message sent to {mention.author.name}!')
Exemple #11
0
def process_done(post, config, override=False):
    """
    Handles comments where the user says they've completed a post.
    Also includes a basic decision tree to enable verification of
    the posts to try and make sure they actually posted a
    transcription.

    :param post: the Comment object which contains the string 'done'.
    :param config: the global config object.
    :param override: A parameter that can only come from process_override()
        and skips the validation check.
    :return: None.
    """

    top_parent = get_parent_post_id(post, config.r)

    # WAIT! Do we actually own this post?
    if top_parent.author.name != 'transcribersofreddit':
        logging.info('Received `done` on post we do not own. Ignoring.')
        return

    try:
        if flair.unclaimed in top_parent.link_flair_text:
            post.reply(_(done_still_unclaimed))
        elif top_parent.link_flair_text == flair.in_progress:
            if not override and not verified_posted_transcript(post, config):
                # we need to double-check these things to keep people
                # from gaming the system
                logging.info(
                    f'Post {top_parent.fullname} does not appear to have a '
                    f'post by claimant {post.author}. Hrm... '
                )
                # noinspection PyUnresolvedReferences
                try:
                    post.reply(_(done_cannot_find_transcript))
                except praw.exceptions.ClientException as e:
                    # We've run into an issue where someone has commented and
                    # then deleted the comment between when the bot pulls mail
                    # and when it processes comments. This should catch that.
                    # Possibly should look into subclassing praw.Comment.reply
                    # to include some basic error handling of this so that
                    # we can fix it throughout the application.
                    logging.warning(e)
                return

            # Control flow:
            # If we have an override, we end up here to complete.
            # If there is no override, we go into the validation above.
            # If the validation fails, post the apology and return.
            # If the validation succeeds, come down here.

            if override:
                logging.info('Moderator override starting!')
            # noinspection PyUnresolvedReferences
            try:
                post.reply(_(done_completed_transcript))
                update_user_flair(post, config)
                logging.info(
                    f'Post {top_parent.fullname} completed by {post.author}!'
                )
            except praw.exceptions.ClientException:
                # If the butt deleted their comment and we're already this
                # far into validation, just mark it as done. Clearly they
                # already passed.
                logging.info(
                    f'Attempted to mark post {top_parent.fullname} '
                    f'as done... hit ClientException.'
                )
            flair_post(top_parent, flair.completed)

            config.redis.incr('total_completed', amount=1)

    except praw.exceptions.APIException as e:
        if e.error_type == 'DELETED_COMMENT':
            logging.info(
                f'Comment attempting to mark ID {top_parent.fullname} '
                f'as done has been deleted'
            )
            return
        raise  # Re-raise exception if not
Exemple #12
0
def process_mention(mention, config):
    """
    Handles username mentions and handles the formatting and posting of
    those calls as workable jobs to ToR.

    :param mention: the Comment object containing the username mention.
    :param config: the global config dict
    :return: None.
    """

    # We have to do this entire parent / parent_permalink thing twice because
    # the method for calling a permalink changes for each object. Laaaame.
    if not mention.is_root:
        # this comment is in reply to something. Let's grab a comment object.
        parent = config.r.comment(id=clean_id(mention.parent_id))
        parent_permalink = parent.permalink()
        # a comment does not have a title attribute. Let's fake one by giving
        # it something to work with.
        parent.title = 'Unknown Content'
    else:
        # this is a post.
        parent = config.r.submission(id=clean_id(mention.link_id))
        parent_permalink = parent.permalink
        # format that sucker so it looks right in the template.
        parent.title = '"' + parent.title + '"'

        # Ignore requests made by the OP of content or the OP of the submission
        if mention.author == parent.author:
            logging.info('Ignoring mention by OP u/{} on ID {}'.format(
                mention.author, mention.parent_id))
            return

    logging.info('Posting call for transcription on ID {}'.format(
        mention.parent_id))

    if is_valid(parent.fullname, config):
        # we're only doing this if we haven't seen this one before.

        # noinspection PyBroadException
        try:
            result = config.tor.submit(title=summoned_submit_title.format(
                sub=mention.subreddit.display_name,
                commentorpost=parent.__class__.__name__.lower(),
                title=parent.title),
                                       url=reddit_url.format(parent_permalink))
            result.reply(
                _(rules_comment_unknown_format.format(header=config.header)))
            result.reply(
                _(
                    summoned_by_comment.format(
                        reddit_url.format(
                            config.r.comment(clean_id(
                                mention.fullname)).permalink()))))
            flair_post(result, flair.summoned_unclaimed)
            logging.debug(
                'Posting success message in response to caller, u/{}'.format(
                    mention.author))
            mention.reply(
                _('The transcribers have been summoned! Please be patient '
                  'and we\'ll be along as quickly as we can.'))
            add_complete_post_id(parent.fullname, config)

            # I need to figure out what errors can happen here
        except Exception as e:
            logging.error(
                '{} - Posting failure message in response to caller, '
                'u/{}'.format(e, mention.author))
            mention.reply(_(something_went_wrong))
Exemple #13
0
def process_post(new_post, config):
    """
    After a valid post has been discovered, this handles the formatting
    and posting of those calls as workable jobs to ToR.

    :param new_post: Submission object that needs to be posted.
    :param config: the config object.
    :return: None.
    """

    if new_post['subreddit'] in config.upvote_filter_subs:
        # ignore posts if they don't meet the threshold for karma and the sub
        # is in our list of upvoted filtered ones
        if new_post['ups'] < config.upvote_filter_subs[new_post['subreddit']]:
            return

    if not is_valid(new_post['name'], config):
        logging.debug(id_already_handled_in_db.format(new_post['name']))
        return

    if new_post['archived']:
        return

    if new_post['author'] is None:
        # we don't want to handle deleted posts, that's just silly
        return

    logging.info(
        f'Posting call for transcription on ID {new_post["name"]} posted by '
        f'{new_post["author"]}')

    if new_post['domain'] in config.image_domains:
        content_type = 'image'
        content_format = config.image_formatting

    elif new_post['domain'] in config.audio_domains:
        content_type = 'audio'
        content_format = config.audio_formatting

    elif new_post['domain'] in config.video_domains:
        if 'youtu' in new_post['domain']:
            if not valid_youtube_video(new_post['url']):
                add_complete_post_id(new_post['name'], config)
                return
            if get_yt_transcript(new_post['url']):
                np = config.r.submission(id=new_post['name'])
                np.reply(_(yt_already_has_transcripts))
                add_complete_post_id(new_post['name'], config)
                logging.info(
                    f'Found YouTube video, {get_yt_video_id(new_post["url"])},'
                    f' with good transcripts.')
                return
        content_type = 'video'
        content_format = config.video_formatting
    else:
        # This means we pulled from a subreddit bypassing the filters.
        content_type = 'Other'
        content_format = config.other_formatting

    # Truncate a post title if it exceeds 250 characters, so the added
    # formatting still fits in Reddit's 300 char limit for post titles
    post_title = new_post['title']
    max_title_length = 250
    if len(post_title) > max_title_length:
        post_title = post_title[:max_title_length - 3] + '...'

    # noinspection PyBroadException
    try:
        result = config.tor.submit(
            title=discovered_submit_title.format(sub=new_post['subreddit'],
                                                 type=content_type.title(),
                                                 title=post_title),
            url=reddit_url.format(new_post['permalink']))
        result.reply(
            _(
                rules_comment.format(post_type=content_type,
                                     formatting=content_format,
                                     header=config.header)))
        flair_post(result, flair.unclaimed)

        add_complete_post_id(new_post['name'], config)
        config.redis.incr('total_posted', amount=1)

        if config.OCR and content_type == 'image':
            # hook for OCR bot; in order to avoid race conditions, we add the
            # key / value pair that the bot isn't looking for before adding
            # to the set that it's monitoring.
            config.redis.set(new_post['name'], result.fullname)
            config.redis.rpush('ocr_ids', new_post['name'])

        config.redis.incr('total_new', amount=1)

    # The only errors that happen here are on Reddit's side -- pretty much
    # exclusively 503s and 403s that arbitrarily resolve themselves. A missed
    # post or two is not the end of the world.
    except Exception as e:
        logging.error(
            f'{e} - unable to post content.\nID: {new_post["name"]}\n '
            f'Title: {new_post["title"]}\n Subreddit: '
            f'{new_post["subreddit"]}')