def _disable_watch(self, summons: Summons) -> NoReturn: response = SummonsResponse(summons=summons) with self.uowm.start() as uow: existing_watch = uow.repostwatch.find_existing_watch( summons.requestor, summons.post_id) if not existing_watch or (existing_watch and not existing_watch.enabled): response.message = WATCH_DISABLED_NOT_FOUND self._send_response(response) return existing_watch.enabled = False try: uow.commit() response.message = WATCH_DISABLED log.info('Disabled watch for post %s for user %s', summons.post_id, summons.requestor) except Exception as e: log.exception('Failed to disable watch %s', existing_watch.id, exc_info=True) response.message = 'An error prevented me from removing your watch on this post. Please try again' self._send_response(response)
def save_unknown_post(self, post_id: str) -> Post: """ If we received a request on a post we haven't ingest save it :param submission: Reddit Submission :return: """ log.info('Post %s does not exist, attempting to ingest', post_id) submission = self.reddit.submission(post_id) post = None try: post = pre_process_post(submission_to_post(submission), self.uowm, None) except InvalidImageUrlException: log.error('Failed to ingest post %s. URL appears to be bad', post_id) if not post: log.error( 'Problem ingesting post. Either failed to save or it is not an image' ) return return post
def check_for_high_match_meme(search_results: ImageSearchResults, uowm: UnitOfWorkManager) -> NoReturn: if search_results.meme_template is not None: return with uowm.start() as uow: meme_template = None # TODO - 1/12/2021 - Should probably remember the meme in subreddit check and generate more templates if len( search_results.matches ) > 5 and 'meme' in search_results.checked_post.subreddit.lower(): try: meme_hashes = get_image_hashes(search_results.checked_post.url, hash_size=32) except Exception as e: log.error('Failed to get meme hash for %s', search_results.checked_post.post_id) return try: meme_template = MemeTemplate( dhash_h=search_results.checked_post.dhash_h, dhash_256=meme_hashes['dhash_h'], post_id=search_results.checked_post.post_id) uow.meme_template.add(meme_template) uow.commit() except Exception as e: log.exception('Failed to create meme template', exc_info=True) meme_template = None if meme_template: log.info('Saved new meme template for post %s in %s', search_results.checked_post.post_id, search_results.checked_post.subreddit) # Raise exception so celery will retry the task and use the new meme template raise IngestHighMatchMeme( 'Created meme template. Post needs to be rechecked')
def _reply_to_comment(self, comment_id: Text, comment_body: Text, subreddit: Text = None) -> Optional[Comment]: """ Post a given reply to a given comment ID :rtype: Optional[Comment] :param comment_id: ID of comment to reply to :param comment_body: Body of the comment to leave in reply :return: """ comment = self.reddit.comment(comment_id) if not comment: log.error('Failed to find comment %s', comment_id) return try: start_time = perf_counter() reply_comment = comment.reply(comment_body) self._record_api_event( float(round(perf_counter() - start_time, 2)), 'reply_to_comment', self.reddit.reddit.auth.limits['remaining']) self._log_response(reply_comment) log.info('Left comment at: https://reddit.com%s', reply_comment.permalink) return reply_comment except Forbidden: log.exception('Forbidden to respond to comment %s', comment_id, exc_info=False) # If we get Forbidden there's a chance we don't have hte comment data to get subreddit if subreddit: self._save_banned_sub(subreddit) raise except AssertionError: log.exception('Problem leaving comment', exc_info=True) raise
def fingerprint_audio_dl(self, post): with self.uowm.start() as uow: if uow.audio_finger_print.get_by_post_id(post.post_id): log.error('Post %s has already been fingerprinted', post.post_id) return try: file = download_file(post.searched_url) except Exception as e: log.error('Failed to download file from %s', post.searched_url) return try: hashes = fingerprint_audio_file(file) except Exception as e: log.exception('Problem finger printing post %s', post.post_id, exc_info=True) log.error(e) filepath = os.path.split(file)[0] shutil.rmtree(filepath) return fingerprints = [] for hash in hashes: fingerprint = AudioFingerPrint() fingerprint.post_id = post.post_id fingerprint.hash = hash[0] fingerprint.offset = hash[1] fingerprints.append(fingerprint) uow.audio_finger_print.bulk_save(fingerprints) uow.commit() log.info('Finished fingerprinting %s', post.post_id) filepath = os.path.split(file)[0] shutil.rmtree(filepath)
def should_check_post(self, post: Post, check_image: bool, check_link: bool, title_keyword_filter: List[Text] = None) -> bool: """ Check if a given post should be checked :rtype: bool :param post: Post to check :param title_keyword_filter: Optional list of keywords to skip if in title :return: bool """ if post.left_comment: return False if post.post_type not in self.config.supported_post_types: return False if post.post_type == 'image' and not check_image: return False if post.post_type == 'link' and not check_link: log.info('Skipping link post') return False if post.crosspost_parent: log.debug('Skipping crosspost') return False if title_keyword_filter: for kw in title_keyword_filter: if kw in post.title.lower(): log.debug('Skipping post with keyword %s in title %s', kw, post.title) return False return True
def _send_private_message( self, user: Redditor, message_body, subject: Text = 'Repost Check', source: Text = None, post_id: Text = None, comment_id: Text = None ) -> NoReturn: if not user: log.error('No user provided to send private message') return try: start_time = perf_counter() user.message(subject, message_body) self._record_api_event( float(round(perf_counter() - start_time, 2)), 'private_message', self.reddit.reddit.auth.limits['remaining'] ) log.info('Sent PM to %s. ', user.name) except Exception as e: log.exception('Failed to send PM to %s', user.name, exc_info=True) raise self._save_private_message( BotPrivateMessage( subject=subject, body=message_body, in_response_to_post=post_id, in_response_to_comment=comment_id, triggered_from=source, recipient=user.name ) )
def update_banned_sub_wiki(uowm: UnitOfWorkManager, reddit: Reddit) -> NoReturn: """ Update the banned sub wiki page with the most recent list of banned subs :param uowm: UnitOfWorkmanager :param reddit: Praw Reddit instance """ print('[Scheduled Job] Update Ban Wiki Start') wiki_template_file = os.path.join(os.getcwd(), 'banned-subs.md') if not os.path.isfile(wiki_template_file): log.critical('Unable to locate banned sub wiki file at %s', wiki_template_file) return with open(wiki_template_file, 'r') as f: template = f.read() with uowm.start() as uow: banned = uow.banned_subreddit.get_all() results = [[f'r/{sub.subreddit}', sub.detected_at, sub.last_checked] for sub in banned] table_data = build_markdown_table(results, ['Subreddit', 'Detected At', 'Last Checked']) wiki = reddit.subreddit('RepostSleuthBot').wiki['published-data/banned-subreddits'] wiki.edit(template.format(banned_subs=table_data, total=len(banned))) log.info('[Banned Sub Wiki Update] Fished update') print('[Scheduled Job] Update Ban Wiki End')
def get_all_links(): print('[Scheduled Job] Queue deleted posts') redis = get_redis_client(config) if len(redis.lrange('deleted_post_cleanup', 0, 20000)) > 0: log.info( 'Deleted post cleanup queue still has pending jobs. Skipping queueing ' ) return conn = get_db_conn() batch = [] with conn.cursor() as cur: query = f"SELECT post_id, url, post_type FROM reddit_post WHERE post_type='image' AND last_deleted_check <= NOW() - INTERVAL 90 DAY LIMIT 1000000" cur.execute(query) log.info('Adding items to index') for row in cur: batch.append({'id': row['post_id'], 'url': row['url']}) if len(batch) >= 30: try: deleted_post_cleanup.apply_async( (batch, ), queue='deleted_post_cleanup') batch = [] except Exception as e: continue
def check_for_config_update(self, monitored_sub: MonitoredSub, notify_missing_keys=True): if not monitored_sub.is_mod: log.error('Bot is not a mod on %s, skipping config update', monitored_sub.name) return if not monitored_sub.wiki_permission: log.error('Bot does not have wiki permissions on %s', monitored_sub.name) return subreddit = self.reddit.subreddit(monitored_sub.name) wiki_page = subreddit.wiki[self.config.wiki_config_name] try: wiki_page.content_md except NotFound: self.create_initial_wiki_config(subreddit, wiki_page, monitored_sub) return except Forbidden: return try: if not self._is_config_updated(wiki_page.revision_id): log.info('Newer config found for %s', monitored_sub.name) wiki_config = self._load_new_config(wiki_page, monitored_sub, subreddit) else: log.info('Already have the newest config for %s', monitored_sub.name) wiki_config = self.get_wiki_config(wiki_page) except JSONDecodeError: return missing_keys = self._get_missing_config_values(wiki_config) if not missing_keys: return log.info('Sub %s is missing keys %s', monitored_sub.name, missing_keys) if not self.update_wiki_config_from_database(monitored_sub, wiki_page): return if notify_missing_keys: if self._notify_new_options(subreddit, missing_keys): self._set_config_notified(wiki_page.revision_id)
def _add_comment(self, post: Post, search_results: SearchResults) -> NoReturn: """ Add a comment to the post :rtype: NoReturn :param post: Post to comment on :param search_results: Results :return: NoReturn """ if self._is_banned_sub(post.subreddit): log.info('Skipping banned sub %s', post.subreddit) with self.uowm.start() as uow: post.left_comment = True uow.posts.update(post) uow.commit() return if self._left_comment(post.post_id): log.info('Already left comment on %s', post.post_id) return with self.uowm.start() as uow: monitored_sub = uow.monitored_sub.get_by_sub(post.subreddit) if monitored_sub: log.info('Skipping monitored sub %s', post.subreddit) return msg = self.response_builder.build_default_comment(search_results) try: self.response_handler.reply_to_submission(post.post_id, msg) except APIException: log.error('Failed to leave comment on %s in %s. ', post.post_id, post.subreddit) except Exception: pass with self.uowm.start() as uow: post.left_comment = True uow.posts.update(post) uow.commit()
def compare_configs(self, config_one: Dict, config_two: Dict) -> List[Dict]: results = [] for k, v in config_one.items(): if k in config_two: if config_two[k] != v: log.info('Key: %s | Config 1: %s | Config 2: %s', k, v, config_two[k]) results.append({ 'key': k, 'config_one': v, 'config_two': config_two[k] }) else: log.error('Config 2 missing key %s', k) if results: log.info('Config Difs: %s', results) else: log.info('Confings match') return results
def check_meme_template_potential_votes(uowm: UnitOfWorkManager) -> NoReturn: with uowm.start() as uow: potential_templates = uow.meme_template_potential.get_all() for potential_template in potential_templates: if potential_template.vote_total >= 10: existing_template = uow.meme_template.get_by_post_id( potential_template.post_id) if existing_template: log.info('Meme template already exists for %s. Removing', potential_template.post_id) uow.meme_template_potential.remove(potential_template) uow.commit() return log.info('Post %s received %s votes. Creating meme template', potential_template.post_id, potential_template.vote_total) post = uow.posts.get_by_post_id(potential_template.post_id) try: meme_hashes = get_image_hashes(post.searched_url, hash_size=32) except Exception as e: log.error('Failed to get meme hash for %s', post.post_id) return meme_template = MemeTemplate(dhash_h=post.dhash_h, dhash_256=meme_hashes['dhash_h'], post_id=post.post_id) uow.meme_template.add(meme_template) uow.meme_template_potential.remove(potential_template) elif potential_template.vote_total <= -10: log.info( 'Removing potential template with at least 10 negative votes' ) uow.meme_template_potential.remove(potential_template) else: continue uow.commit()
def handle_summons(self): """ Continually check the summons table for new requests. Handle them as they are found """ while True: try: with self.uowm.start() as uow: summons = uow.summons.get_unreplied() for s in summons: log.info('Starting summons %s', s.id) post = uow.posts.get_by_post_id(s.post_id) if not post: post = self.save_unknown_post(s.post_id) if not post: response = SummonsResponse(summons=summons) response.message = 'Sorry, I\'m having trouble with this post. Please try again later' log.info( 'Failed to ingest post %s. Sending error response', s.post_id) self._send_response(response) continue self.process_summons(s, post) # TODO - This sends completed summons events to influx even if they fail summons_event = SummonsEvent( (datetime.utcnow() - s.summons_received_at).seconds, s.summons_received_at, s.requestor, event_type='summons') self._send_event(summons_event) log.info('Finished summons %s', s.id) time.sleep(2) except Exception: log.exception('Exception in handle summons thread', exc_info=True)
uowm, reddit_manager, response_builder, ResponseHandler(reddit_manager, uowm, event_logger, source='submonitor', live_response=config.live_responses), event_logger=event_logger, config=config) redis = get_redis_client(config) while True: while True: queued_items = redis.lrange('submonitor', 0, 20000) if len(queued_items) == 0: log.info('Sub monitor queue empty. Starting over') break log.info('Sub monitor queue still has %s tasks', len(queued_items)) time.sleep(60) with uowm.start() as uow: monitored_subs = uow.monitored_sub.get_all() for monitored_sub in monitored_subs: if not monitored_sub.active: continue log.info('Checking sub %s', monitored_sub.name) if not monitored_sub.active: log.debug('Sub %s is disabled', monitored_sub.name) continue if not monitored_sub.check_all_submissions: log.info('Sub %s does not have post checking enabled', monitored_sub.name)
import threading # TODO - Mega hackery, figure this out. import sys from time import sleep sys.path.append('./') from redditrepostsleuth.core.config import Config from redditrepostsleuth.core.db.db_utils import get_db_engine from redditrepostsleuth.core.db.uow.sqlalchemyunitofworkmanager import SqlAlchemyUnitOfWorkManager from redditrepostsleuth.core.logging import log from redditrepostsleuth.core.util.helpers import get_reddit_instance from redditrepostsleuth.ingestsvc.postingestor import PostIngestor if __name__ == '__main__': log.info('Starting post ingestor') print('Starting post ingestor') config = Config() uowm = SqlAlchemyUnitOfWorkManager(get_db_engine(config)) ingestor = PostIngestor(get_reddit_instance(config), uowm, config) threading.Thread(target=ingestor.ingest_without_stream, name='praw_ingest').start() threading.Thread(target=ingestor.ingest_pushshift, name='pushshift_ingest').start() while True: sleep(10)
def cleanup_removed_posts_batch(self, posts: List[Text]) -> NoReturn: util_api = os.getenv('UTIL_API') if not util_api: raise ValueError('Missing util API') try: res = requests.post(f'{util_api}/maintenance/removed', json=posts) except Exception as e: log.exception('Failed to call delete check api', exc_info=True) return if res.status_code != 200: log.error('Unexpected status code: %s', res.status_code) return res_data = json.loads(res.text) with self.uowm.start() as uow: for p in res_data: if (urlparse(p['url'])).hostname in BAD_DOMAINS: p['action'] = 'remove' #log.info('Checking post %s', id) if p['action'] == 'skip': #log.info('Skipping %s', post.url) continue elif p['action'] == 'update': #log.info('Updating: %s', post.url) post = uow.posts.get_by_post_id(p['id']) if not post: continue post.last_deleted_check = func.utc_timestamp() elif p['action'] == 'remove': uow.to_be_deleted.add( ToBeDeleted(post_id=p['id'], post_type='image')) """ image_post = uow.image_post.get_by_post_id(post.post_id) image_post_current = uow.image_post_current.get_by_post_id(post.post_id) investigate_post = uow.investigate_post.get_by_post_id(post.post_id) link_repost = uow.link_repost.get_by_repost_of(post.post_id) image_reposts = uow.image_repost.get_by_repost_of(post.post_id) comments = uow.bot_comment.get_by_post_id(post.post_id) summons = uow.summons.get_by_post_id(post.post_id) image_search = uow.image_search.get_by_post_id(post.post_id) user_reports = uow.user_report.get_by_post_id(post.post_id) # uow.posts.remove(post) if image_post: log.info('Deleting image post %s - %s', image_post.id, post.url) #log.info(post.url) uow.image_post.remove(image_post) if image_post_current: log.info('Deleting image post current %s', image_post_current.id) uow.image_post_current.remove(image_post_current) if investigate_post: log.info('Deleting investigate %s', investigate_post.id) uow.investigate_post.remove(investigate_post) if link_repost: for r in link_repost: log.info('Deleting link repost %s', r.id) uow.link_repost.remove(r) if image_reposts: for r in image_reposts: log.info('Deleting image repost %s', r.id) uow.image_repost.remove(r) if comments: for c in comments: log.info('Deleting comment %s', c.id) uow.bot_comment.remove(c) if summons: for s in summons: log.info('deleting summons %s', s.id) uow.summons.remove(s) if image_search: for i in image_search: log.info('Deleting image search %s', i.id) uow.image_search.remove(i) if user_reports: for u in user_reports: log.info('Deleting report %s', u.id) uow.user_report.remove(u) if not post.post_type or post.post_type == 'text': print(f'Deleting Text Post {post.id} - {post.created_at} - {post.url}') uow.posts.remove(post) """ elif p['action'] == 'default': log.info('Got default: %s', post.url) else: continue uow.commit()
def image_post_cleanup(self, posts: List[Text]) -> NoReturn: with self.uowm.start() as uow: for p in posts: post = uow.posts.get_by_post_id(p.post_id) image_post = uow.image_post.get_by_post_id(p.post_id) image_post_current = uow.image_post_current.get_by_post_id( p.post_id) investigate_post = uow.investigate_post.get_by_post_id(p.post_id) image_reposts = uow.image_repost.get_by_repost_of(p.post_id) comments = uow.bot_comment.get_by_post_id(p.post_id) summons = uow.summons.get_by_post_id(p.post_id) image_search = uow.image_search.get_by_post_id(p.post_id) user_reports = uow.user_report.get_by_post_id(p.post_id) # uow.posts.remove(post) if image_post: log.info('Deleting image post %s - %s', image_post.id, post.url) # log.info(post.url) uow.image_post.remove(image_post) if image_post_current: log.info('Deleting image post current %s', image_post_current.id) uow.image_post_current.remove(image_post_current) if investigate_post: log.info('Deleting investigate %s', investigate_post.id) uow.investigate_post.remove(investigate_post) if image_reposts: for r in image_reposts: log.info('Deleting image repost %s', r.id) uow.image_repost.remove(r) if comments: for c in comments: log.info('Deleting comment %s', c.id) uow.bot_comment.remove(c) if summons: for s in summons: log.info('deleting summons %s', s.id) uow.summons.remove(s) if image_search: for i in image_search: log.info('Deleting image search %s', i.id) uow.image_search.remove(i) if user_reports: for u in user_reports: log.info('Deleting report %s', u.id) uow.user_report.remove(u) if post: uow.posts.remove(post) uow.to_be_deleted.remove(p) uow.commit()
def deleted_post_cleanup(self, posts: List[Text]) -> NoReturn: util_api = os.getenv('UTIL_API') if not self.config.util_api: raise ValueError('Missing util API') try: res = requests.post(f'{self.config.util_api}/maintenance/removed', json=posts) except Exception as e: log.exception('Failed to call delete check api', exc_info=False) return if res.status_code != 200: log.error('Unexpected status code: %s', res.status_code) return res_data = json.loads(res.text) with self.uowm.start() as uow: for p in res_data: if p['action'] == 'skip': continue elif p['action'] == 'update': #log.info('Updating: %s', post.url) post = uow.posts.get_by_post_id(p['id']) if not post: continue post.last_deleted_check = func.utc_timestamp() elif p['action'] == 'remove': post = uow.posts.get_by_post_id(p['id']) image_post, image_post_current = None, None if post.post_type == 'image': image_post = uow.image_post.get_by_post_id(p['id']) image_post_current = uow.image_post_current.get_by_post_id( p['id']) investigate_post = uow.investigate_post.get_by_post_id(p['id']) image_reposts = uow.image_repost.get_by_repost_of(p['id']) comments = uow.bot_comment.get_by_post_id(p['id']) summons = uow.summons.get_by_post_id(p['id']) image_search = uow.image_search.get_by_post_id(p['id']) user_reports = uow.user_report.get_by_post_id(p['id']) # uow.posts.remove(post) if image_post: log.info('Deleting image post %s - %s', image_post.id, post.url) # log.info(post.url) uow.image_post.remove(image_post) if image_post_current: log.info('Deleting image post current %s', image_post_current.id) uow.image_post_current.remove(image_post_current) if investigate_post: log.info('Deleting investigate %s', investigate_post.id) uow.investigate_post.remove(investigate_post) if image_reposts: for r in image_reposts: log.info('Deleting image repost %s', r.id) uow.image_repost.remove(r) if comments: for c in comments: log.info('Deleting comment %s', c.id) uow.bot_comment.remove(c) if summons: for s in summons: log.info('deleting summons %s', s.id) uow.summons.remove(s) if image_search: for i in image_search: log.info('Deleting image search %s', i.id) uow.image_search.remove(i) if user_reports: for u in user_reports: log.info('Deleting report %s', u.id) uow.user_report.remove(u) if post: uow.posts.remove(post) elif p['action'] == 'default': log.info('Got default: %s', post.url) else: continue uow.commit()
live_response=config.live_responses, source='summons', ), event_logger=event_logger, summons_disabled=False) redis_client = redis.Redis(host=config.redis_host, port=config.redis_port, db=0, password=config.redis_password) while True: try: with uowm.start() as uow: summons = uow.summons.get_unreplied(limit=20) for s in summons: log.info('Starting summons %s', s.id) process_summons.apply_async((s, ), queue='summons') while True: queued_items = redis_client.lrange('summons', 0, 20000) if len(queued_items) == 0: log.info('Summons queue empty. Starting over') time.sleep(60) break log.info('Summons queue still has %s tasks', len(queued_items)) time.sleep(15) except ConnectionError as e: log.exception('Error connecting to Redis') """ while True: try:
def get_total_video_posts(self): log.info('Getting total video posts') r = self._send_query("SELECT COUNT(*) c FROM reddit_post WHERE post_type='video'") self.stats.total_video_posts = f'{r["c"]:,}'
def get_total_image_posts(self): log.info('Getting total image posts') r = self._send_query("SELECT id FROM reddit_image_post ORDER BY id DESC LIMIT 1") self.stats.total_image_posts = f'{r["id"]:,}'
def get_total_link_reposts(self): log.info('Getting total link reposts') r = self._send_query("SELECT COUNT(*) c FROM link_reposts") self.stats.total_link_repost = f'{r["c"]:,}'
def get_total_summons(self): log.info('Getting total summons') r = self._send_query("SELECT COUNT(*) c FROM reddit.reddit_bot_summons") self.stats.summon_total = f'{r["c"]:,}'
def monitor_for_summons_pushshift(self): try: # TODO - Remove try/catch after we find crashes while True: oldest_id = None start_time = None base_url = 'https://api.pushshift.io/reddit/search/comment?size=1000&sort_type=created_utc&sort=desc' while True: if oldest_id: url = base_url + '&before=' + str(oldest_id) else: url = base_url try: r = requests.post( 'http://sr2.plxbx.com:8888/crosspost', data={'url': url}) except Exception as e: log.exception('Exception getting Push Shift result', exc_info=True) time.sleep(10) continue if r.status_code != 200: log.error('Unexpected status code %s from Push Shift', r.status_code) time.sleep(10) continue try: response = json.loads(r.text) except Exception: oldest_id = oldest_id - 90 log.exception('Error decoding json') time.sleep(10) continue if response['status'] != 'success': log.error('Error from API. Status code %s, reason %s', response['status_code'], response['message']) if response['status_code'] == '502': continue continue data = json.loads(response['payload']) oldest_id = data['data'][-1]['created_utc'] log.debug('Oldest: %s', datetime.utcfromtimestamp(oldest_id)) self.process_pushshift_comments(data['data']) if not start_time: start_time = data['data'][0]['created_utc'] start_end_dif = start_time - oldest_id if start_end_dif > 600: log.info( 'Reached end of 30 minute window, starting over') break except Exception as e: log.exception('Pushshift summons thread crashed', exc_info=True)
def check_for_summons(comment: str, summons_string: str) -> bool: if re.search(summons_string, comment, re.IGNORECASE): log.info('Comment [%s] matches summons string [%s]', comment, summons_string) return True return False
def _load_config(cls, config_file=None) -> NoReturn: """ Load the config file. Config file can either be passed in, pulled from the ENV, in CWD or in module dir. Load priority: 1. Passed in config 2. ENV 3. CWD 4 Module Dir :param config_file: path to config file :return: None """ config_to_load = () module_dir = os.path.dirname(sys.modules[__name__].__file__) log.info('Checking for config in module dir: %s', module_dir) if os.path.isfile(os.path.join(module_dir, 'sleuth_config.json')): log.info('Found sleuth_config.json in module dir') config_to_load = os.path.join(module_dir, 'sleuth_config.json'), 'module' log.info(f'Checking for config in current dir: %s', os.getcwd()) if not config_to_load and os.path.isfile('sleuth_config.json'): log.info('Found sleuth_config.json in current directory') config_to_load = os.path.join(os.getcwd(), 'sleuth_config.json'), 'cwd' log.info('Checking ENV for config file') if os.getenv('bot_config', None): if os.path.isfile(os.getenv('bot_config')): config_to_load = os.getenv('bot_config'), 'env' log.info('Loading config provided in ENV: %s', config_to_load) if config_file: log.info('Checking provided config file: %s', config_file) if os.path.isfile(config_file): config_to_load = config_file, 'passed' else: log.error('Provided config does not exist') if not config_to_load: log.error('Failed to locate config file') return log.info('Config Source: %s | Config File: %s', config_to_load[1], config_to_load[0]) cls.CONFIG_FILE = config_to_load[0] with open(config_to_load[0], 'r') as f: cls.CONFIG = json.loads(f.read())
def bulk_save(self, items: List[AudioFingerPrint]): log.info('Saving %s audio hashes', len(items)) self.db_session.bulk_save_objects(items)
def check_image( self, url: Text, post: Post = None, source='unknown', sort_by='created', search_settings: ImageSearchSettings = None) -> ImageSearchResults: log.info('Checking URL for matches: %s', url) if not search_settings: log.info('No search settings provided, using default') search_settings = get_default_image_search_settings(self.config) search_results = ImageSearchResults(url, checked_post=post, search_settings=search_settings) search_results.search_times.start_timer('total_search_time') if search_settings.meme_filter: search_results.search_times.start_timer('meme_detection_time') search_results.meme_template = self._get_meme_template( search_results.target_hash) search_results.search_times.stop_timer('meme_detection_time') if search_results.meme_template: search_settings.target_match_percent = 100 # Keep only 100% matches on default hash size search_results.meme_hash = self._get_meme_hash(url) if not search_results.meme_hash: log.error('No meme hash, disabled meme filter') search_results.meme_template = None else: log.info('Using meme filter %s', search_results.meme_template.id) log.debug('Search Settings: %s', search_settings) api_search_results = self._get_matches( search_results.target_hash, search_results.target_hamming_distance, search_settings.target_annoy_distance, max_matches=search_settings.max_matches, max_depth=search_settings.max_depth, search_times=search_results.search_times) search_results.search_times.index_search_time = api_search_results.index_search_time search_results.total_searched = api_search_results.total_searched search_results.search_times.start_timer('set_match_post_time') search_results.matches = self._build_search_results( api_search_results.historical_matches, url, search_results.target_hash) search_results.matches += self._build_search_results( api_search_results.current_matches, url, search_results.target_hash, historical_index=False) search_results.search_times.stop_timer('set_match_post_time') search_results.search_times.start_timer('remove_duplicate_time') search_results.matches = self._remove_duplicates( search_results.matches) if post: search_results.matches = set_all_title_similarity( search_results.checked_post.title, search_results.matches) search_results.search_times.stop_timer('remove_duplicate_time') search_results = self._filter_results_for_reposts(search_results, sort_by=sort_by) search_results.search_times.stop_timer('total_search_time') self._log_search_time(search_results, source) search_results = self._log_search( search_results, source, api_search_results.used_current_index, api_search_results.used_historical_index, ) log.info('Seached %s items and found %s matches', search_results.total_searched, len(search_results.matches)) return search_results
def register_agent(self, agent: NotificationAgent) -> NoReturn: log.info('Registered notification agent %s', agent.name) self.notification_agents.append(agent)