Esempio n. 1
0
 def _disable_watch(self, summons: Summons) -> NoReturn:
     response = SummonsResponse(summons=summons)
     with self.uowm.start() as uow:
         existing_watch = uow.repostwatch.find_existing_watch(
             summons.requestor, summons.post_id)
         if not existing_watch or (existing_watch
                                   and not existing_watch.enabled):
             response.message = WATCH_DISABLED_NOT_FOUND
             self._send_response(response)
             return
         existing_watch.enabled = False
         try:
             uow.commit()
             response.message = WATCH_DISABLED
             log.info('Disabled watch for post %s for user %s',
                      summons.post_id, summons.requestor)
         except Exception as e:
             log.exception('Failed to disable watch %s',
                           existing_watch.id,
                           exc_info=True)
             response.message = 'An error prevented me from removing your watch on this post.  Please try again'
         self._send_response(response)
Esempio n. 2
0
    def save_unknown_post(self, post_id: str) -> Post:
        """
        If we received a request on a post we haven't ingest save it
        :param submission: Reddit Submission
        :return:
        """
        log.info('Post %s does not exist, attempting to ingest', post_id)
        submission = self.reddit.submission(post_id)
        post = None
        try:
            post = pre_process_post(submission_to_post(submission), self.uowm,
                                    None)
        except InvalidImageUrlException:
            log.error('Failed to ingest post %s.  URL appears to be bad',
                      post_id)
        if not post:
            log.error(
                'Problem ingesting post.  Either failed to save or it is not an image'
            )
            return

        return post
def check_for_high_match_meme(search_results: ImageSearchResults,
                              uowm: UnitOfWorkManager) -> NoReturn:
    if search_results.meme_template is not None:
        return

    with uowm.start() as uow:
        meme_template = None
        # TODO - 1/12/2021 - Should probably remember the meme in subreddit check and generate more templates
        if len(
                search_results.matches
        ) > 5 and 'meme' in search_results.checked_post.subreddit.lower():
            try:
                meme_hashes = get_image_hashes(search_results.checked_post.url,
                                               hash_size=32)
            except Exception as e:
                log.error('Failed to get meme hash for %s',
                          search_results.checked_post.post_id)
                return

            try:
                meme_template = MemeTemplate(
                    dhash_h=search_results.checked_post.dhash_h,
                    dhash_256=meme_hashes['dhash_h'],
                    post_id=search_results.checked_post.post_id)

                uow.meme_template.add(meme_template)
                uow.commit()
            except Exception as e:
                log.exception('Failed to create meme template', exc_info=True)
                meme_template = None

        if meme_template:
            log.info('Saved new meme template for post %s in %s',
                     search_results.checked_post.post_id,
                     search_results.checked_post.subreddit)
            # Raise exception so celery will retry the task and use the new meme template
            raise IngestHighMatchMeme(
                'Created meme template.  Post needs to be rechecked')
Esempio n. 4
0
 def _reply_to_comment(self,
                       comment_id: Text,
                       comment_body: Text,
                       subreddit: Text = None) -> Optional[Comment]:
     """
     Post a given reply to a given comment ID
     :rtype: Optional[Comment]
     :param comment_id: ID of comment to reply to
     :param comment_body: Body of the comment to leave in reply
     :return:
     """
     comment = self.reddit.comment(comment_id)
     if not comment:
         log.error('Failed to find comment %s', comment_id)
         return
     try:
         start_time = perf_counter()
         reply_comment = comment.reply(comment_body)
         self._record_api_event(
             float(round(perf_counter() - start_time,
                         2)), 'reply_to_comment',
             self.reddit.reddit.auth.limits['remaining'])
         self._log_response(reply_comment)
         log.info('Left comment at: https://reddit.com%s',
                  reply_comment.permalink)
         return reply_comment
     except Forbidden:
         log.exception('Forbidden to respond to comment %s',
                       comment_id,
                       exc_info=False)
         # If we get Forbidden there's a chance we don't have hte comment data to get subreddit
         if subreddit:
             self._save_banned_sub(subreddit)
         raise
     except AssertionError:
         log.exception('Problem leaving comment', exc_info=True)
         raise
Esempio n. 5
0
def fingerprint_audio_dl(self, post):

    with self.uowm.start() as uow:

        if uow.audio_finger_print.get_by_post_id(post.post_id):
            log.error('Post %s has already been fingerprinted', post.post_id)
            return

    try:
        file = download_file(post.searched_url)
    except Exception as e:
        log.error('Failed to download file from %s', post.searched_url)
        return

    try:
        hashes = fingerprint_audio_file(file)
    except Exception as e:
        log.exception('Problem finger printing post %s', post.post_id, exc_info=True)
        log.error(e)
        filepath = os.path.split(file)[0]
        shutil.rmtree(filepath)
        return

    fingerprints = []

    for hash in hashes:
        fingerprint = AudioFingerPrint()
        fingerprint.post_id = post.post_id
        fingerprint.hash = hash[0]
        fingerprint.offset = hash[1]
        fingerprints.append(fingerprint)

    uow.audio_finger_print.bulk_save(fingerprints)
    uow.commit()
    log.info('Finished fingerprinting %s', post.post_id)
    filepath = os.path.split(file)[0]
    shutil.rmtree(filepath)
Esempio n. 6
0
    def should_check_post(self,
                          post: Post,
                          check_image: bool,
                          check_link: bool,
                          title_keyword_filter: List[Text] = None) -> bool:
        """
        Check if a given post should be checked
        :rtype: bool
        :param post: Post to check
        :param title_keyword_filter: Optional list of keywords to skip if in title
        :return: bool
        """
        if post.left_comment:
            return False

        if post.post_type not in self.config.supported_post_types:
            return False

        if post.post_type == 'image' and not check_image:
            return False

        if post.post_type == 'link' and not check_link:
            log.info('Skipping link post')
            return False

        if post.crosspost_parent:
            log.debug('Skipping crosspost')
            return False

        if title_keyword_filter:
            for kw in title_keyword_filter:
                if kw in post.title.lower():
                    log.debug('Skipping post with keyword %s in title %s', kw,
                              post.title)
                    return False

        return True
Esempio n. 7
0
    def _send_private_message(
            self,
            user: Redditor,
            message_body,
            subject: Text = 'Repost Check',
            source: Text = None,
            post_id: Text = None,
            comment_id: Text = None
    ) -> NoReturn:

        if not user:
            log.error('No user provided to send private message')
            return
        try:
            start_time = perf_counter()
            user.message(subject, message_body)
            self._record_api_event(
                float(round(perf_counter() - start_time, 2)),
                'private_message',
                self.reddit.reddit.auth.limits['remaining']
            )
            log.info('Sent PM to %s. ', user.name)
        except Exception as e:
            log.exception('Failed to send PM to %s', user.name, exc_info=True)
            raise

        self._save_private_message(
            BotPrivateMessage(
                subject=subject,
                body=message_body,
                in_response_to_post=post_id,
                in_response_to_comment=comment_id,
                triggered_from=source,
                recipient=user.name
            )
        )
Esempio n. 8
0
def update_banned_sub_wiki(uowm: UnitOfWorkManager, reddit: Reddit) -> NoReturn:
    """
    Update the banned sub wiki page with the most recent list of banned subs
    :param uowm: UnitOfWorkmanager
    :param reddit: Praw Reddit instance
    """
    print('[Scheduled Job] Update Ban Wiki Start')
    wiki_template_file = os.path.join(os.getcwd(), 'banned-subs.md')
    if not os.path.isfile(wiki_template_file):
        log.critical('Unable to locate banned sub wiki file at %s', wiki_template_file)
        return

    with open(wiki_template_file, 'r') as f:
        template = f.read()

    with uowm.start() as uow:
        banned = uow.banned_subreddit.get_all()

    results = [[f'r/{sub.subreddit}', sub.detected_at, sub.last_checked] for sub in banned]
    table_data = build_markdown_table(results, ['Subreddit', 'Detected At', 'Last Checked'])
    wiki = reddit.subreddit('RepostSleuthBot').wiki['published-data/banned-subreddits']
    wiki.edit(template.format(banned_subs=table_data, total=len(banned)))
    log.info('[Banned Sub Wiki Update] Fished update')
    print('[Scheduled Job] Update Ban Wiki End')
def get_all_links():
    print('[Scheduled Job] Queue deleted posts')
    redis = get_redis_client(config)
    if len(redis.lrange('deleted_post_cleanup', 0, 20000)) > 0:
        log.info(
            'Deleted post cleanup queue still has pending jobs.  Skipping queueing '
        )
        return

    conn = get_db_conn()
    batch = []
    with conn.cursor() as cur:
        query = f"SELECT post_id, url, post_type FROM reddit_post WHERE post_type='image' AND last_deleted_check <= NOW() - INTERVAL 90 DAY LIMIT 1000000"
        cur.execute(query)
        log.info('Adding items to index')
        for row in cur:
            batch.append({'id': row['post_id'], 'url': row['url']})
            if len(batch) >= 30:
                try:
                    deleted_post_cleanup.apply_async(
                        (batch, ), queue='deleted_post_cleanup')
                    batch = []
                except Exception as e:
                    continue
Esempio n. 10
0
    def check_for_config_update(self,
                                monitored_sub: MonitoredSub,
                                notify_missing_keys=True):

        if not monitored_sub.is_mod:
            log.error('Bot is not a mod on %s, skipping config update',
                      monitored_sub.name)
            return

        if not monitored_sub.wiki_permission:
            log.error('Bot does not have wiki permissions on %s',
                      monitored_sub.name)
            return

        subreddit = self.reddit.subreddit(monitored_sub.name)
        wiki_page = subreddit.wiki[self.config.wiki_config_name]

        try:
            wiki_page.content_md
        except NotFound:
            self.create_initial_wiki_config(subreddit, wiki_page,
                                            monitored_sub)
            return
        except Forbidden:
            return

        try:
            if not self._is_config_updated(wiki_page.revision_id):
                log.info('Newer config found for %s', monitored_sub.name)
                wiki_config = self._load_new_config(wiki_page, monitored_sub,
                                                    subreddit)
            else:
                log.info('Already have the newest config for %s',
                         monitored_sub.name)
                wiki_config = self.get_wiki_config(wiki_page)
        except JSONDecodeError:
            return

        missing_keys = self._get_missing_config_values(wiki_config)
        if not missing_keys:
            return
        log.info('Sub %s is missing keys %s', monitored_sub.name, missing_keys)

        if not self.update_wiki_config_from_database(monitored_sub, wiki_page):
            return

        if notify_missing_keys:
            if self._notify_new_options(subreddit, missing_keys):
                self._set_config_notified(wiki_page.revision_id)
    def _add_comment(self, post: Post,
                     search_results: SearchResults) -> NoReturn:
        """
        Add a comment to the post
        :rtype: NoReturn
        :param post: Post to comment on
        :param search_results: Results
        :return: NoReturn
        """

        if self._is_banned_sub(post.subreddit):
            log.info('Skipping banned sub %s', post.subreddit)
            with self.uowm.start() as uow:
                post.left_comment = True
                uow.posts.update(post)
                uow.commit()
            return

        if self._left_comment(post.post_id):
            log.info('Already left comment on %s', post.post_id)
            return

        with self.uowm.start() as uow:
            monitored_sub = uow.monitored_sub.get_by_sub(post.subreddit)
            if monitored_sub:
                log.info('Skipping monitored sub %s', post.subreddit)
                return

        msg = self.response_builder.build_default_comment(search_results)

        try:
            self.response_handler.reply_to_submission(post.post_id, msg)
        except APIException:
            log.error('Failed to leave comment on %s in %s. ', post.post_id,
                      post.subreddit)
        except Exception:
            pass

        with self.uowm.start() as uow:
            post.left_comment = True
            uow.posts.update(post)
            uow.commit()
Esempio n. 12
0
 def compare_configs(self, config_one: Dict,
                     config_two: Dict) -> List[Dict]:
     results = []
     for k, v in config_one.items():
         if k in config_two:
             if config_two[k] != v:
                 log.info('Key: %s | Config 1: %s | Config 2: %s', k, v,
                          config_two[k])
                 results.append({
                     'key': k,
                     'config_one': v,
                     'config_two': config_two[k]
                 })
         else:
             log.error('Config 2 missing key %s', k)
     if results:
         log.info('Config Difs: %s', results)
     else:
         log.info('Confings match')
     return results
Esempio n. 13
0
def check_meme_template_potential_votes(uowm: UnitOfWorkManager) -> NoReturn:
    with uowm.start() as uow:
        potential_templates = uow.meme_template_potential.get_all()
        for potential_template in potential_templates:
            if potential_template.vote_total >= 10:
                existing_template = uow.meme_template.get_by_post_id(
                    potential_template.post_id)
                if existing_template:
                    log.info('Meme template already exists for %s. Removing',
                             potential_template.post_id)
                    uow.meme_template_potential.remove(potential_template)
                    uow.commit()
                    return

                log.info('Post %s received %s votes.  Creating meme template',
                         potential_template.post_id,
                         potential_template.vote_total)
                post = uow.posts.get_by_post_id(potential_template.post_id)
                try:
                    meme_hashes = get_image_hashes(post.searched_url,
                                                   hash_size=32)
                except Exception as e:
                    log.error('Failed to get meme hash for %s', post.post_id)
                    return

                meme_template = MemeTemplate(dhash_h=post.dhash_h,
                                             dhash_256=meme_hashes['dhash_h'],
                                             post_id=post.post_id)
                uow.meme_template.add(meme_template)
                uow.meme_template_potential.remove(potential_template)
            elif potential_template.vote_total <= -10:
                log.info(
                    'Removing potential template with at least 10 negative votes'
                )
                uow.meme_template_potential.remove(potential_template)
            else:
                continue
            uow.commit()
Esempio n. 14
0
    def handle_summons(self):
        """
        Continually check the summons table for new requests.  Handle them as they are found
        """
        while True:
            try:
                with self.uowm.start() as uow:
                    summons = uow.summons.get_unreplied()
                    for s in summons:
                        log.info('Starting summons %s', s.id)
                        post = uow.posts.get_by_post_id(s.post_id)
                        if not post:
                            post = self.save_unknown_post(s.post_id)

                        if not post:
                            response = SummonsResponse(summons=summons)
                            response.message = 'Sorry, I\'m having trouble with this post. Please try again later'
                            log.info(
                                'Failed to ingest post %s.  Sending error response',
                                s.post_id)
                            self._send_response(response)
                            continue

                        self.process_summons(s, post)
                        # TODO - This sends completed summons events to influx even if they fail
                        summons_event = SummonsEvent(
                            (datetime.utcnow() -
                             s.summons_received_at).seconds,
                            s.summons_received_at,
                            s.requestor,
                            event_type='summons')
                        self._send_event(summons_event)
                        log.info('Finished summons %s', s.id)
                time.sleep(2)
            except Exception:
                log.exception('Exception in handle summons thread',
                              exc_info=True)
Esempio n. 15
0
                      uowm,
                      reddit_manager,
                      response_builder,
                      ResponseHandler(reddit_manager,
                                      uowm,
                                      event_logger,
                                      source='submonitor',
                                      live_response=config.live_responses),
                      event_logger=event_logger,
                      config=config)
 redis = get_redis_client(config)
 while True:
     while True:
         queued_items = redis.lrange('submonitor', 0, 20000)
         if len(queued_items) == 0:
             log.info('Sub monitor queue empty.  Starting over')
             break
         log.info('Sub monitor queue still has %s tasks', len(queued_items))
         time.sleep(60)
     with uowm.start() as uow:
         monitored_subs = uow.monitored_sub.get_all()
         for monitored_sub in monitored_subs:
             if not monitored_sub.active:
                 continue
             log.info('Checking sub %s', monitored_sub.name)
             if not monitored_sub.active:
                 log.debug('Sub %s is disabled', monitored_sub.name)
                 continue
             if not monitored_sub.check_all_submissions:
                 log.info('Sub %s does not have post checking enabled',
                          monitored_sub.name)
import threading

# TODO - Mega hackery, figure this out.
import sys
from time import sleep

sys.path.append('./')
from redditrepostsleuth.core.config import Config
from redditrepostsleuth.core.db.db_utils import get_db_engine
from redditrepostsleuth.core.db.uow.sqlalchemyunitofworkmanager import SqlAlchemyUnitOfWorkManager

from redditrepostsleuth.core.logging import log
from redditrepostsleuth.core.util.helpers import get_reddit_instance
from redditrepostsleuth.ingestsvc.postingestor import PostIngestor

if __name__ == '__main__':
    log.info('Starting post ingestor')
    print('Starting post ingestor')
    config = Config()
    uowm = SqlAlchemyUnitOfWorkManager(get_db_engine(config))
    ingestor = PostIngestor(get_reddit_instance(config), uowm, config)
    threading.Thread(target=ingestor.ingest_without_stream,
                     name='praw_ingest').start()
    threading.Thread(target=ingestor.ingest_pushshift,
                     name='pushshift_ingest').start()

    while True:
        sleep(10)
Esempio n. 17
0
def cleanup_removed_posts_batch(self, posts: List[Text]) -> NoReturn:
    util_api = os.getenv('UTIL_API')
    if not util_api:
        raise ValueError('Missing util API')

    try:
        res = requests.post(f'{util_api}/maintenance/removed', json=posts)
    except Exception as e:
        log.exception('Failed to call delete check api', exc_info=True)
        return
    if res.status_code != 200:
        log.error('Unexpected status code: %s', res.status_code)
        return

    res_data = json.loads(res.text)
    with self.uowm.start() as uow:
        for p in res_data:

            if (urlparse(p['url'])).hostname in BAD_DOMAINS:
                p['action'] = 'remove'

            #log.info('Checking post %s', id)

            if p['action'] == 'skip':
                #log.info('Skipping %s', post.url)
                continue
            elif p['action'] == 'update':
                #log.info('Updating: %s', post.url)
                post = uow.posts.get_by_post_id(p['id'])
                if not post:
                    continue
                post.last_deleted_check = func.utc_timestamp()
            elif p['action'] == 'remove':
                uow.to_be_deleted.add(
                    ToBeDeleted(post_id=p['id'], post_type='image'))
                """
                image_post = uow.image_post.get_by_post_id(post.post_id)
                image_post_current = uow.image_post_current.get_by_post_id(post.post_id)
                investigate_post = uow.investigate_post.get_by_post_id(post.post_id)
                link_repost = uow.link_repost.get_by_repost_of(post.post_id)
                image_reposts = uow.image_repost.get_by_repost_of(post.post_id)
                comments = uow.bot_comment.get_by_post_id(post.post_id)
                summons = uow.summons.get_by_post_id(post.post_id)
                image_search = uow.image_search.get_by_post_id(post.post_id)
                user_reports = uow.user_report.get_by_post_id(post.post_id)

                # uow.posts.remove(post)
                if image_post:
                    log.info('Deleting image post %s - %s', image_post.id, post.url)
                    #log.info(post.url)
                    uow.image_post.remove(image_post)
                if image_post_current:
                    log.info('Deleting image post current %s', image_post_current.id)
                    uow.image_post_current.remove(image_post_current)
                if investigate_post:
                    log.info('Deleting investigate %s', investigate_post.id)
                    uow.investigate_post.remove(investigate_post)
                if link_repost:
                    for r in link_repost:
                        log.info('Deleting link repost %s', r.id)
                        uow.link_repost.remove(r)
                if image_reposts:
                    for r in image_reposts:
                        log.info('Deleting image repost %s', r.id)
                        uow.image_repost.remove(r)
                if comments:
                    for c in comments:
                        log.info('Deleting comment %s', c.id)
                        uow.bot_comment.remove(c)
                if summons:
                    for s in summons:
                        log.info('deleting summons %s', s.id)
                        uow.summons.remove(s)
                if image_search:
                    for i in image_search:
                        log.info('Deleting image search %s', i.id)
                        uow.image_search.remove(i)
                if user_reports:
                    for u in user_reports:
                        log.info('Deleting report %s', u.id)
                        uow.user_report.remove(u)
                if not post.post_type or post.post_type == 'text':
                    print(f'Deleting Text Post {post.id} - {post.created_at} - {post.url}')
                uow.posts.remove(post)
                """
            elif p['action'] == 'default':
                log.info('Got default: %s', post.url)
            else:
                continue

        uow.commit()
Esempio n. 18
0
def image_post_cleanup(self, posts: List[Text]) -> NoReturn:
    with self.uowm.start() as uow:
        for p in posts:
            post = uow.posts.get_by_post_id(p.post_id)
            image_post = uow.image_post.get_by_post_id(p.post_id)
            image_post_current = uow.image_post_current.get_by_post_id(
                p.post_id)
            investigate_post = uow.investigate_post.get_by_post_id(p.post_id)
            image_reposts = uow.image_repost.get_by_repost_of(p.post_id)
            comments = uow.bot_comment.get_by_post_id(p.post_id)
            summons = uow.summons.get_by_post_id(p.post_id)
            image_search = uow.image_search.get_by_post_id(p.post_id)
            user_reports = uow.user_report.get_by_post_id(p.post_id)

            # uow.posts.remove(post)
            if image_post:
                log.info('Deleting image post %s - %s', image_post.id,
                         post.url)
                # log.info(post.url)
                uow.image_post.remove(image_post)
            if image_post_current:
                log.info('Deleting image post current %s',
                         image_post_current.id)
                uow.image_post_current.remove(image_post_current)
            if investigate_post:
                log.info('Deleting investigate %s', investigate_post.id)
                uow.investigate_post.remove(investigate_post)
            if image_reposts:
                for r in image_reposts:
                    log.info('Deleting image repost %s', r.id)
                    uow.image_repost.remove(r)
            if comments:
                for c in comments:
                    log.info('Deleting comment %s', c.id)
                    uow.bot_comment.remove(c)
            if summons:
                for s in summons:
                    log.info('deleting summons %s', s.id)
                    uow.summons.remove(s)
            if image_search:
                for i in image_search:
                    log.info('Deleting image search %s', i.id)
                    uow.image_search.remove(i)
            if user_reports:
                for u in user_reports:
                    log.info('Deleting report %s', u.id)
                    uow.user_report.remove(u)
            if post:
                uow.posts.remove(post)
            uow.to_be_deleted.remove(p)
        uow.commit()
Esempio n. 19
0
def deleted_post_cleanup(self, posts: List[Text]) -> NoReturn:
    util_api = os.getenv('UTIL_API')
    if not self.config.util_api:
        raise ValueError('Missing util API')

    try:
        res = requests.post(f'{self.config.util_api}/maintenance/removed',
                            json=posts)
    except Exception as e:
        log.exception('Failed to call delete check api', exc_info=False)
        return
    if res.status_code != 200:
        log.error('Unexpected status code: %s', res.status_code)
        return

    res_data = json.loads(res.text)
    with self.uowm.start() as uow:
        for p in res_data:

            if p['action'] == 'skip':
                continue
            elif p['action'] == 'update':
                #log.info('Updating: %s', post.url)
                post = uow.posts.get_by_post_id(p['id'])
                if not post:
                    continue
                post.last_deleted_check = func.utc_timestamp()
            elif p['action'] == 'remove':
                post = uow.posts.get_by_post_id(p['id'])
                image_post, image_post_current = None, None
                if post.post_type == 'image':
                    image_post = uow.image_post.get_by_post_id(p['id'])
                    image_post_current = uow.image_post_current.get_by_post_id(
                        p['id'])
                investigate_post = uow.investigate_post.get_by_post_id(p['id'])
                image_reposts = uow.image_repost.get_by_repost_of(p['id'])
                comments = uow.bot_comment.get_by_post_id(p['id'])
                summons = uow.summons.get_by_post_id(p['id'])
                image_search = uow.image_search.get_by_post_id(p['id'])
                user_reports = uow.user_report.get_by_post_id(p['id'])

                # uow.posts.remove(post)
                if image_post:
                    log.info('Deleting image post %s - %s', image_post.id,
                             post.url)
                    # log.info(post.url)
                    uow.image_post.remove(image_post)
                if image_post_current:
                    log.info('Deleting image post current %s',
                             image_post_current.id)
                    uow.image_post_current.remove(image_post_current)
                if investigate_post:
                    log.info('Deleting investigate %s', investigate_post.id)
                    uow.investigate_post.remove(investigate_post)
                if image_reposts:
                    for r in image_reposts:
                        log.info('Deleting image repost %s', r.id)
                        uow.image_repost.remove(r)
                if comments:
                    for c in comments:
                        log.info('Deleting comment %s', c.id)
                        uow.bot_comment.remove(c)
                if summons:
                    for s in summons:
                        log.info('deleting summons %s', s.id)
                        uow.summons.remove(s)
                if image_search:
                    for i in image_search:
                        log.info('Deleting image search %s', i.id)
                        uow.image_search.remove(i)
                if user_reports:
                    for u in user_reports:
                        log.info('Deleting report %s', u.id)
                        uow.user_report.remove(u)
                if post:
                    uow.posts.remove(post)

            elif p['action'] == 'default':
                log.info('Got default: %s', post.url)
            else:
                continue

        uow.commit()
Esempio n. 20
0
                                 live_response=config.live_responses,
                                 source='summons',
                             ),
                             event_logger=event_logger,
                             summons_disabled=False)
    redis_client = redis.Redis(host=config.redis_host,
                               port=config.redis_port,
                               db=0,
                               password=config.redis_password)
    while True:
        try:
            with uowm.start() as uow:
                summons = uow.summons.get_unreplied(limit=20)

                for s in summons:
                    log.info('Starting summons %s', s.id)
                    process_summons.apply_async((s, ), queue='summons')
                while True:
                    queued_items = redis_client.lrange('summons', 0, 20000)
                    if len(queued_items) == 0:
                        log.info('Summons queue empty.  Starting over')
                        time.sleep(60)
                        break
                    log.info('Summons queue still has %s tasks',
                             len(queued_items))
                    time.sleep(15)
        except ConnectionError as e:
            log.exception('Error connecting to Redis')
    """
    while True:
        try:
Esempio n. 21
0
 def get_total_video_posts(self):
     log.info('Getting total video posts')
     r = self._send_query("SELECT COUNT(*) c FROM reddit_post WHERE post_type='video'")
     self.stats.total_video_posts = f'{r["c"]:,}'
Esempio n. 22
0
 def get_total_image_posts(self):
     log.info('Getting total image posts')
     r = self._send_query("SELECT id FROM reddit_image_post ORDER BY id DESC LIMIT 1")
     self.stats.total_image_posts = f'{r["id"]:,}'
Esempio n. 23
0
 def get_total_link_reposts(self):
     log.info('Getting total link reposts')
     r = self._send_query("SELECT COUNT(*) c FROM link_reposts")
     self.stats.total_link_repost = f'{r["c"]:,}'
Esempio n. 24
0
 def get_total_summons(self):
     log.info('Getting total summons')
     r = self._send_query("SELECT COUNT(*) c FROM reddit.reddit_bot_summons")
     self.stats.summon_total = f'{r["c"]:,}'
    def monitor_for_summons_pushshift(self):
        try:
            # TODO - Remove try/catch after we find crashes
            while True:
                oldest_id = None
                start_time = None
                base_url = 'https://api.pushshift.io/reddit/search/comment?size=1000&sort_type=created_utc&sort=desc'
                while True:

                    if oldest_id:
                        url = base_url + '&before=' + str(oldest_id)
                    else:
                        url = base_url

                    try:
                        r = requests.post(
                            'http://sr2.plxbx.com:8888/crosspost',
                            data={'url': url})
                    except Exception as e:
                        log.exception('Exception getting Push Shift result',
                                      exc_info=True)
                        time.sleep(10)
                        continue

                    if r.status_code != 200:
                        log.error('Unexpected status code %s from Push Shift',
                                  r.status_code)
                        time.sleep(10)
                        continue

                    try:
                        response = json.loads(r.text)
                    except Exception:
                        oldest_id = oldest_id - 90
                        log.exception('Error decoding json')
                        time.sleep(10)
                        continue

                    if response['status'] != 'success':
                        log.error('Error from API.  Status code %s, reason %s',
                                  response['status_code'], response['message'])
                        if response['status_code'] == '502':
                            continue
                        continue

                    data = json.loads(response['payload'])
                    oldest_id = data['data'][-1]['created_utc']
                    log.debug('Oldest: %s',
                              datetime.utcfromtimestamp(oldest_id))

                    self.process_pushshift_comments(data['data'])

                    if not start_time:
                        start_time = data['data'][0]['created_utc']

                    start_end_dif = start_time - oldest_id
                    if start_end_dif > 600:
                        log.info(
                            'Reached end of 30 minute window, starting over')
                        break
        except Exception as e:
            log.exception('Pushshift summons thread crashed', exc_info=True)
 def check_for_summons(comment: str, summons_string: str) -> bool:
     if re.search(summons_string, comment, re.IGNORECASE):
         log.info('Comment [%s] matches summons string [%s]', comment,
                  summons_string)
         return True
     return False
Esempio n. 27
0
    def _load_config(cls, config_file=None) -> NoReturn:
        """
        Load the config file.

        Config file can either be passed in, pulled from the ENV, in CWD or in module dir.

        Load priority:
        1. Passed in config
        2. ENV
        3. CWD
        4 Module Dir
        :param config_file: path to config file
        :return: None
        """
        config_to_load = ()

        module_dir = os.path.dirname(sys.modules[__name__].__file__)
        log.info('Checking for config in module dir: %s', module_dir)
        if os.path.isfile(os.path.join(module_dir, 'sleuth_config.json')):
            log.info('Found sleuth_config.json in module dir')
            config_to_load = os.path.join(module_dir,
                                          'sleuth_config.json'), 'module'

        log.info(f'Checking for config in current dir: %s', os.getcwd())
        if not config_to_load and os.path.isfile('sleuth_config.json'):
            log.info('Found sleuth_config.json in current directory')
            config_to_load = os.path.join(os.getcwd(),
                                          'sleuth_config.json'), 'cwd'

        log.info('Checking ENV for config file')
        if os.getenv('bot_config', None):
            if os.path.isfile(os.getenv('bot_config')):
                config_to_load = os.getenv('bot_config'), 'env'
                log.info('Loading config provided in ENV: %s', config_to_load)

        if config_file:
            log.info('Checking provided config file: %s', config_file)
            if os.path.isfile(config_file):
                config_to_load = config_file, 'passed'
            else:
                log.error('Provided config does not exist')

        if not config_to_load:
            log.error('Failed to locate config file')
            return

        log.info('Config Source: %s | Config File: %s', config_to_load[1],
                 config_to_load[0])
        cls.CONFIG_FILE = config_to_load[0]
        with open(config_to_load[0], 'r') as f:
            cls.CONFIG = json.loads(f.read())
 def bulk_save(self, items: List[AudioFingerPrint]):
     log.info('Saving %s audio hashes', len(items))
     self.db_session.bulk_save_objects(items)
    def check_image(
            self,
            url: Text,
            post: Post = None,
            source='unknown',
            sort_by='created',
            search_settings: ImageSearchSettings = None) -> ImageSearchResults:
        log.info('Checking URL for matches: %s', url)

        if not search_settings:
            log.info('No search settings provided, using default')
            search_settings = get_default_image_search_settings(self.config)

        search_results = ImageSearchResults(url,
                                            checked_post=post,
                                            search_settings=search_settings)

        search_results.search_times.start_timer('total_search_time')

        if search_settings.meme_filter:
            search_results.search_times.start_timer('meme_detection_time')
            search_results.meme_template = self._get_meme_template(
                search_results.target_hash)
            search_results.search_times.stop_timer('meme_detection_time')
            if search_results.meme_template:
                search_settings.target_match_percent = 100  # Keep only 100% matches on default hash size
                search_results.meme_hash = self._get_meme_hash(url)
                if not search_results.meme_hash:
                    log.error('No meme hash, disabled meme filter')
                    search_results.meme_template = None
                else:
                    log.info('Using meme filter %s',
                             search_results.meme_template.id)

        log.debug('Search Settings: %s', search_settings)

        api_search_results = self._get_matches(
            search_results.target_hash,
            search_results.target_hamming_distance,
            search_settings.target_annoy_distance,
            max_matches=search_settings.max_matches,
            max_depth=search_settings.max_depth,
            search_times=search_results.search_times)

        search_results.search_times.index_search_time = api_search_results.index_search_time
        search_results.total_searched = api_search_results.total_searched

        search_results.search_times.start_timer('set_match_post_time')
        search_results.matches = self._build_search_results(
            api_search_results.historical_matches, url,
            search_results.target_hash)
        search_results.matches += self._build_search_results(
            api_search_results.current_matches,
            url,
            search_results.target_hash,
            historical_index=False)
        search_results.search_times.stop_timer('set_match_post_time')

        search_results.search_times.start_timer('remove_duplicate_time')
        search_results.matches = self._remove_duplicates(
            search_results.matches)
        if post:
            search_results.matches = set_all_title_similarity(
                search_results.checked_post.title, search_results.matches)
        search_results.search_times.stop_timer('remove_duplicate_time')

        search_results = self._filter_results_for_reposts(search_results,
                                                          sort_by=sort_by)
        search_results.search_times.stop_timer('total_search_time')
        self._log_search_time(search_results, source)

        search_results = self._log_search(
            search_results,
            source,
            api_search_results.used_current_index,
            api_search_results.used_historical_index,
        )

        log.info('Seached %s items and found %s matches',
                 search_results.total_searched, len(search_results.matches))
        return search_results
Esempio n. 30
0
 def register_agent(self, agent: NotificationAgent) -> NoReturn:
     log.info('Registered notification agent %s', agent.name)
     self.notification_agents.append(agent)