Beispiel #1
0
def update_top_image_reposts(uowm: UnitOfWorkManager,
                             reddit: Reddit) -> NoReturn:
    days = [1, 7, 30, 365]
    with uowm.start() as uow:
        uow.session.execute('TRUNCATE `stats_top_image_repost`')
        for day in days:
            result = uow.session.execute(
                'SELECT repost_of, COUNT(*) c FROM image_reposts WHERE detected_at > NOW() - INTERVAL :days DAY GROUP BY repost_of HAVING c > 1 ORDER BY c DESC LIMIT 2000',
                {'days': day})
            for chunk in chunk_list(result.fetchall(), 100):
                reddit_ids_to_lookup = []
                for post in chunk:
                    existing = uow.stats_top_image_repost.get_by_post_id_and_days(
                        post[0], day)
                    if existing:
                        existing.repost_count = post[1]
                        continue
                    reddit_ids_to_lookup.append(f't3_{post[0]}')
                for submission in reddit.info(reddit_ids_to_lookup):
                    count_data = next(
                        (x for x in chunk if x[0] == submission.id))
                    if not count_data:
                        continue
                    uow.stats_top_image_repost.add(
                        StatsTopImageRepost(post_id=count_data[0],
                                            repost_count=count_data[1],
                                            days=day,
                                            nsfw=submission.over_18))
            uow.commit()
Beispiel #2
0
def update_banned_sub_wiki(uowm: UnitOfWorkManager,
                           reddit: Reddit) -> NoReturn:
    """
    Update the banned sub wiki page with the most recent list of banned subs
    :param uowm: UnitOfWorkmanager
    :param reddit: Praw Reddit instance
    """
    print('[Scheduled Job] Update Ban Wiki Start')
    wiki_template_file = os.path.join(os.getcwd(), 'banned-subs.md')
    if not os.path.isfile(wiki_template_file):
        log.critical('Unable to locate banned sub wiki file at %s',
                     wiki_template_file)
        return

    with open(wiki_template_file, 'r') as f:
        template = f.read()

    with uowm.start() as uow:
        banned = uow.banned_subreddit.get_all()

    results = [[f'r/{sub.subreddit}', sub.detected_at, sub.last_checked]
               for sub in banned]
    table_data = build_markdown_table(
        results, ['Subreddit', 'Detected At', 'Last Checked'])
    wiki = reddit.subreddit(
        'RepostSleuthBot').wiki['published-data/banned-subreddits']
    wiki.edit(template.format(banned_subs=table_data, total=len(banned)))
    log.info('[Banned Sub Wiki Update] Fished update')
    print('[Scheduled Job] Update Ban Wiki End')
Beispiel #3
0
def send_reports_to_meme_voting(uowm: UnitOfWorkManager) -> NoReturn:
    with uowm.start() as uow:
        reports = uow.user_report.get_reports_for_voting(7)
        for report in reports:
            if uow.meme_template.get_by_post_id(report.post_id):
                continue
            if uow.meme_template_potential.get_by_post_id(report.post_id):
                continue

            post = uow.posts.get_by_post_id(report.post_id)
            if not post:
                continue
            try:
                if not requests.head(post.searched_url).status_code == 200:
                    continue
            except Exception:
                continue

            potential_template = MemeTemplatePotential(
                post_id=report.post_id,
                submitted_by='background',
                vote_total=0)
            uow.meme_template_potential.add(potential_template)
            report.sent_for_voting = True
            uow.commit()
Beispiel #4
0
def update_mod_status(uowm: UnitOfWorkManager, reddit: Reddit) -> NoReturn:
    """
    Go through all registered subs and check if their a mod and what level of permissions they have
    :param uowm: UnitOfWorkManager
    :param reddit: Rreddit
    """
    ignore_no_mod = [
        'CouldYouDeleteThat',
        'CouldYouDeleteThat',
    ]
    print('[Scheduled Job] Checking Mod Status Start')
    with uowm.start() as uow:
        monitored_subs: List[MonitoredSub] = uow.monitored_sub.get_all()
        for sub in monitored_subs:
            if not is_sub_mod_praw(sub.name, 'RepostSleuthBot', reddit):
                log.info('[Mod Check] Bot is not a mod on %s', sub.name)
                sub.is_mod = False
                uow.commit()
                continue

            sub.is_mod = True
            sub.post_permission = bot_has_permission(sub.name, 'posts', reddit)
            sub.wiki_permission = bot_has_permission(sub.name, 'wiki', reddit)
            log.info('[Mod Check] %s | Post Perm: %s | Wiki Perm: %s',
                     sub.name, sub.post_permission, sub.wiki_permission)
            uow.commit()
    print('[Scheduled Job] Checking Mod Status End')
Beispiel #5
0
def update_ban_list(uowm: UnitOfWorkManager,
                    reddit: Reddit,
                    notification_svc: NotificationService = None) -> NoReturn:
    """
    Go through banned subs and see if we're still banned
    :rtype: NoReturn
    :param uowm: UnitOfWorkManager
    :param reddit: Reddit
    """
    log.info('Starting Job: Update Subreddit Bans')
    with uowm.start() as uow:
        bans = uow.banned_subreddit.get_all()
        for ban in bans:
            last_checked_delta = (datetime.utcnow() - ban.last_checked).days
            if last_checked_delta < 1:
                log.debug('Banned sub %s last checked %s days ago.  Skipping',
                          ban.subreddit, last_checked_delta)
                continue
            if is_bot_banned(ban.subreddit, reddit):
                log.info('[Subreddit Ban Check] Still banned on %s',
                         ban.subreddit)
                ban.last_checked = func.utc_timestamp()
            else:
                log.info('[Subreddit Ban Check] No longer banned on %s',
                         ban.subreddit)
                uow.banned_subreddit.remove(ban)
                if notification_svc:
                    notification_svc.send_notification(
                        f'Removed {ban.subreddit} from ban list',
                        subject='Subreddit Removed From Ban List!')
            uow.commit()
def get_link_reposts(
    url: Text,
    uowm: UnitOfWorkManager,
    search_settings: SearchSettings,
    post: Post = None,
    get_total: bool = False,
) -> LinkSearchResults:

    url_hash = md5(url.encode('utf-8'))
    url_hash = url_hash.hexdigest()
    with uowm.start() as uow:
        search_results = LinkSearchResults(url,
                                           search_settings,
                                           checked_post=post,
                                           search_times=LinkSearchTimes())
        search_results.search_times.start_timer('query_time')
        search_results.search_times.start_timer('total_search_time')
        raw_results = uow.posts.find_all_by_url_hash(url_hash)
        search_results.search_times.stop_timer('query_time')
        log.debug('Query time: %s', search_results.search_times.query_time)
        search_results.matches = [
            SearchMatch(url, match) for match in raw_results
        ]

        if get_total:
            search_results.total_searched = uow.posts.count_by_type('link')

    return search_results
Beispiel #7
0
def check_meme_template_potential_votes(uowm: UnitOfWorkManager) -> NoReturn:
    with uowm.start() as uow:
        potential_templates = uow.meme_template_potential.get_all()
        for potential_template in potential_templates:
            if potential_template.vote_total >= 10:
                existing_template = uow.meme_template.get_by_post_id(potential_template.post_id)
                if existing_template:
                    log.info('Meme template already exists for %s. Removing', potential_template.post_id)
                    uow.meme_template_potential.remove(potential_template)
                    uow.commit()
                    return

                log.info('Post %s received %s votes.  Creating meme template', potential_template.post_id, potential_template.vote_total)
                post = uow.posts.get_by_post_id(potential_template.post_id)
                try:
                    meme_hashes = get_image_hashes(post.searched_url, hash_size=32)
                except Exception as e:
                    log.error('Failed to get meme hash for %s', post.post_id)
                    return

                meme_template = MemeTemplate(
                    dhash_h=post.dhash_h,
                    dhash_256=meme_hashes['dhash_h'],
                    post_id=post.post_id
                )
                uow.meme_template.add(meme_template)
                uow.meme_template_potential.remove(potential_template)
            elif potential_template.vote_total <= -10:
                log.info('Removing potential template with at least 10 negative votes')
                uow.meme_template_potential.remove(potential_template)
            else:
                continue
            uow.commit()
def save_image_repost_result(search_results: ImageSearchResults,
                             uowm: UnitOfWorkManager,
                             high_match_check: bool = False,
                             source: Text = 'unknown') -> NoReturn:
    """
    Take a found repost and save to the database
    :param source: What triggered this search
    :rtype: NoReturn
    :param high_match_check: Perform a high match meme check.
    :param search_results: Set of search results
    :param uowm: Unit of Work Manager
    :return:None
    """

    with uowm.start() as uow:
        search_results.checked_post.checked_repost = True

        if not search_results.matches:
            log.debug('Post %s has no matches',
                      search_results.checked_post.post_id)
            uow.posts.update(search_results.checked_post)
            uow.commit()
            return

        # This is used for ingest repost checking.  If a meme template gets created, it intentionally throws a
        # IngestHighMatchMeme exception.  This will cause celery to retry the task so the newly created meme template
        # gets used
        if high_match_check:
            check_for_high_match_meme(
                search_results,
                uowm)  # This intentionally throws if we create a meme template

        log.info('Creating repost. Post %s is a repost of %s',
                 search_results.checked_post.url,
                 search_results.matches[0].post.url)
        new_repost = ImageRepost(
            post_id=search_results.checked_post.post_id,
            repost_of=search_results.matches[0].post.post_id,
            hamming_distance=search_results.matches[0].hamming_distance,
            annoy_distance=search_results.matches[0].annoy_distance,
            author=search_results.checked_post.author,
            search_id=search_results.logged_search.id
            if search_results.logged_search else None,
            subreddit=search_results.checked_post.subreddit,
            source=source)

        uow.image_repost.add(new_repost)
    uow.posts.update(search_results.checked_post)

    try:
        uow.commit()
    except Exception as e:
        log.exception('Failed to save image repost', exc_info=True)
Beispiel #9
0
def remove_expired_bans(uowm: UnitOfWorkManager, notification_svc: NotificationService = None) -> NoReturn:
    print('[Scheduled Job] Removed Expired Bans Start')
    with uowm.start() as uow:
        bans = uow.banned_user.get_expired_bans()
        for ban in bans:
            if notification_svc:
                notification_svc.send(
                    f'Removing expired ban for user {ban.name}',
                    subject='**Expired Ban Removed**'
                )
            log.info('[Ban Remover] Removing %s from ban list', ban.name)
            uow.banned_user.remove(ban)
            uow.commit()
def check_for_post_watch(matches: List[SearchMatch],
                         uowm: UnitOfWorkManager) -> List[Dict]:
    results = []
    with uowm.start() as uow:
        for match in matches:
            watches = uow.repostwatch.get_all_active_by_post_id(
                match.post.post_id)
            if watches:
                log.info('Found %s active watch requests for post %s',
                         len(watches), match.post.post_id)
                for watch in watches:
                    results.append({'match': match, 'watch': watch})
    return results
Beispiel #11
0
def queue_post_watch_cleanup(uowm: UnitOfWorkManager, config: Config) -> NoReturn:
    """
    Send all watches to celery to check if the post has been deleted
    :param uowm: Unit of work manager
    """
    print('[Scheduled Job] Queue Deleted Watch Check')
    redis = get_redis_client(config)
    if len(redis.lrange('watch_remove_deleted', 0, 20000)) > 0:
        log.info('Deleted watchqueue still has pending jobs.  Skipping update queueing ')
        return

    with uowm.start() as uow:
        watches = uow.repostwatch.get_all()
        for chunk in chunk_list(watches, 30):
            check_if_watched_post_is_active.apply_async((chunk,))
Beispiel #12
0
def queue_config_updates(uowm: UnitOfWorkManager, config: Config) -> NoReturn:
    print('[Scheduled Job] Queue config update check')
    redis = get_redis_client(config)
    if len(redis.lrange('config_update_check', 0, 20000)) > 0:
        log.info(
            'Config update queue still has pending jobs.  Skipping update queueing '
        )
        return

    with uowm.start() as uow:
        monitored_subs = uow.monitored_sub.get_all()
        for monitored_sub in monitored_subs:
            check_for_subreddit_config_update_task.apply_async(
                (monitored_sub, ))

    print('[Scheduled Job Complete] Queue config update check')
def save_link_repost(post: Post, repost_of: Post, uowm: UnitOfWorkManager,
                     source: Text) -> None:
    with uowm.start() as uow:
        new_repost = LinkRepost(post_id=post.post_id,
                                repost_of=repost_of.post_id,
                                author=post.author,
                                subreddit=post.subreddit,
                                source=source)

        post.checked_repost = True
        uow.posts.update(post)
        uow.link_repost.add(new_repost)
        try:
            uow.commit()
        except IntegrityError:
            log.error('Failed to save link repost, it already exists')
        except Exception as e:
            log.exception('Failed to save link repost', exc_info=True)
def pre_process_post(post: Post, uowm: UnitOfWorkManager, hash_api) -> Post:
    log.debug(post)
    with uowm.start() as uow:
        if post.post_type == 'image':
            log.debug('Post %s: Is an image', post.post_id)
            try:
                post, image_post, image_post_current = process_image_post(
                    post, hash_api)
            except (ImageRemovedException, ImageConversioinException,
                    InvalidImageUrlException, ConnectionError):
                return
            if image_post is None or image_post_current is None:
                log.error(
                    'Post %s: Failed to save image post. One of the post objects is null',
                    post.post_id)
                log.error('Image Post: %s - Image Post Current: %s',
                          image_post, image_post_current)
                return

            if not post.dhash_h:
                log.error('Post %s: is missing dhash', post.post_id)
                return

            uow.image_post.add(image_post)
            uow.image_post_current.add(image_post_current)
        elif post.post_type == 'link':
            url_hash = md5(post.url.encode('utf-8'))
            post.url_hash = url_hash.hexdigest()
            log.debug('Set URL hash for post %s', post.post_id)
        elif post.post_type == 'hosted:video':
            pass
        try:
            uow.posts.add(post)
            uow.commit()
            log.debug('Post %s: Commited post to database', post.post_id)
        except IntegrityError as e:
            log.exception('Post %s: Database save failed',
                          post.post_id,
                          exc_info=False)
            return

    return post
Beispiel #15
0
def check_for_high_match_meme(search_results: ImageSearchResults,
                              uowm: UnitOfWorkManager) -> NoReturn:
    if search_results.meme_template is not None:
        return

    with uowm.start() as uow:
        meme_template = None
        # TODO - 1/12/2021 - Should probably remember the meme in subreddit check and generate more templates
        if len(
                search_results.matches
        ) > 5 and 'meme' in search_results.checked_post.subreddit.lower():
            try:
                meme_hashes = get_image_hashes(search_results.checked_post.url,
                                               hash_size=32)
            except Exception as e:
                log.error('Failed to get meme hash for %s',
                          search_results.checked_post.post_id)
                return

            try:
                meme_template = MemeTemplate(
                    dhash_h=search_results.checked_post.dhash_h,
                    dhash_256=meme_hashes['dhash_h'],
                    post_id=search_results.checked_post.post_id)

                uow.meme_template.add(meme_template)
                uow.commit()
            except IntegrityError as e:
                log.exception(
                    f'Failed to create meme template. Template already exists for post {search_results.checked_post.post_id}',
                    exc_info=True)
                meme_template = None

        if meme_template:
            log.info('Saved new meme template for post %s in %s',
                     search_results.checked_post.post_id,
                     search_results.checked_post.subreddit)
            # Raise exception so celery will retry the task and use the new meme template
            raise IngestHighMatchMeme(
                'Created meme template.  Post needs to be rechecked')
Beispiel #16
0
def build_msg_values_from_search(search_results: 'SearchResults', uowm: UnitOfWorkManager = None, **kwargs) -> Dict:
    """
    Take a ImageRepostWrapper object and return a dict of values for use in a message template
    :param search_results: ImageRepostWrapper
    :param uowm: UnitOfWorkManager
    """
    base_values = {
        'total_searched': f'{search_results.total_searched:,}',
        'total_posts': 0,
        'match_count': len(search_results.matches),
        'post_type': search_results.checked_post.post_type,
        'this_subreddit': search_results.checked_post.subreddit,
        'times_word': 'times' if len(search_results.matches) > 1 else 'time',
        'stats_searched_post_str': searched_post_str(search_results.checked_post, search_results.total_searched),
        'post_shortlink': f'https://redd.it/{search_results.checked_post.post_id}',
        'post_author': search_results.checked_post.author,
        'report_post_link': ''

    }
    if search_results.search_times:
        base_values['search_time'] = search_results.search_times.total_search_time

    if search_results.matches:
        base_values['oldest_created_at'] = search_results.matches[0].post.created_at
        base_values['oldest_url'] = search_results.matches[0].post.url
        base_values['oldest_shortlink'] = f'https://redd.it/{search_results.matches[0].post.post_id}'
        base_values['oldest_sub'] = search_results.matches[0].post.subreddit
        base_values['newest_created_at'] = search_results.matches[-1].post.created_at
        base_values['newest_url'] = search_results.matches[-1].post.url
        base_values['newest_shortlink'] = f'https://redd.it/{search_results.matches[-1].post.post_id}'
        base_values['newest_sub'] = search_results.matches[-1].post.subreddit
        base_values['first_seen'] = f"First Seen [Here](https://redd.it/{search_results.matches[0].post.post_id}) on {search_results.matches[0].post.created_at.strftime('%Y-%m-%d')}"
        base_values['last_seen'] = f"Last Seen [Here](https://redd.it/{search_results.matches[-1].post.post_id}) on {search_results.matches[-1].post.created_at.strftime('%Y-%m-%d')}"

    if uowm:
        with uowm.start() as uow:
            base_values['total_posts'] = f'{uow.posts.get_newest_post().id:,}'

    return {**base_values, **search_results.search_settings.to_dict(), **kwargs}
Beispiel #17
0
def update_monitored_sub_data(uowm: UnitOfWorkManager) -> NoReturn:
    print('[Scheduled Job] Update Monitored Sub Data')
    with uowm.start() as uow:
        subs = uow.monitored_sub.get_all_active()
        for sub in subs:
            update_monitored_sub_stats.apply_async((sub.name, ))