def get_image_search_results_multi_match(): search_results = ImageSearchResults('test.com', get_image_search_settings(), checked_post=Post(post_id='abc123', post_type='image', subreddit='test')) search_results.search_times = ImageSearchTimes() search_results.search_times.total_search_time = 10 search_results.matches.append( ImageSearchMatch( 'test.com', 1, Post(id=1, post_id='1111', created_at=datetime.strptime('2019-01-28 05:20:03', '%Y-%m-%d %H:%M:%S')), 10, 10, 32)) search_results.matches.append( ImageSearchMatch( 'test.com', 1, Post(id=2, post_id='2222', created_at=datetime.strptime('2019-06-28 05:20:03', '%Y-%m-%d %H:%M:%S')), 10, 10, 32)) search_results.matches.append( ImageSearchMatch('test.com', 1, Post(id=3, post_id='3333', title='some normal title'), 10, 0.250, 32)) return search_results
def _get_image_search_results_multi_match(self): search_results = ImageSearchResults('test.com', self._get_image_search_settings(), checked_post=Post(post_id='abc123', post_type='image', subreddit='test')) search_results.search_times = ImageSearchTimes() search_results.search_times.total_search_time = 10 search_results.matches.append( ImageSearchMatch( 'test.com', 1, Post(post_id='abc123', created_at=datetime.strptime('2019-01-28 05:20:03', '%Y-%m-%d %H:%M:%S')), 10, 10, 32 ) ) search_results.matches.append( ImageSearchMatch( 'test.com', 1, Post(post_id='123abc', created_at=datetime.strptime('2019-06-28 05:20:03', '%Y-%m-%d %H:%M:%S')), 10, 10, 32 ) ) return search_results
def test_get_closest_image_match__return_closest(self): matches = [] match1 = ImageSearchMatch('test.com', 1, Post(id=1), 3, .077, 32) match2 = ImageSearchMatch('test.com', 1, Post(id=2), 5, .077, 32) match3 = ImageSearchMatch('test.com', 1, Post(id=3), 7, .077, 32) matches.append(match1) matches.append(match2) matches.append(match3) r = get_closest_image_match(matches, check_url=False) self.assertEqual(r, match1)
def test__remove_duplicates_one_dup_remove(self): matches = [ ImageSearchMatch('test.com', 123, Post(id=1), 10, 10, 32), ImageSearchMatch('test.com', 123, Post(id=1), 10, 10, 32), ImageSearchMatch('test.com', 123, Post(id=2), 10, 10, 32) ] dup_svc = DuplicateImageService(Mock(), Mock(), Mock(), config=MagicMock()) r = dup_svc._remove_duplicates(matches) self.assertEqual(2, len(r))
def test_sort_reposts_correct_order(self): match1 = RepostMatch() match2 = RepostMatch() match3 = RepostMatch() post1 = Post(id=1, created_at=datetime.fromtimestamp(1575508228)) post2 = Post(id=2, created_at=datetime.fromtimestamp(1572916228)) post3 = Post(id=3, created_at=datetime.fromtimestamp(1570237828)) match1.post = post1 match2.post = post2 match3.post = post3 matches = [match1, match2, match3] result = sort_reposts(matches) self.assertEqual(3, result[0].post.id)
def _get_link_search_results_no_match(self): search_times = ImageSearchTimes() search_times.total_search_time = 10 return SearchResults( 'test.com', self._get_search_settings(), checked_post=Post(post_id='abc123', post_type='link', subreddit='test'), search_times=search_times )
def get_link_search_results_matches_match(): search_times = ImageSearchTimes() search_times.total_search_time = 10 search_results = SearchResults('test.com', get_search_settings(), checked_post=Post(post_id='abc123', post_type='link', subreddit='test'), search_times=search_times) search_results.matches.append( SearchMatch( 'test.com', Post(post_id='123abc', created_at=datetime.strptime('2019-06-28 05:20:03', '%Y-%m-%d %H:%M:%S')), )) return search_results
def get_image_search_results_no_match(): search_results = ImageSearchResults('test.com', get_image_search_settings(), checked_post=Post(post_id='abc123', post_type='image', subreddit='test')) search_results.search_times = ImageSearchTimes() search_results.search_times.total_search_time = 10 return search_results
def test__should_check_post__already_checked_reject(self): sub_monitor = SubMonitor(MagicMock(), MagicMock(), MagicMock(), MagicMock(), MagicMock(), config=Config(redis_host='dummy')) post = Post(left_comment=True) self.assertFalse(sub_monitor.should_check_post(post, True, True))
def test_get_first_active_match(self): def get_dummy_res(url, **kwargs): if url == 'www.bad.com': return Mock(status_code=400) else: return Mock(status_code=200) with mock.patch( 'redditrepostsleuth.core.util.repost_helpers.requests.head' ) as mock_head: mock_head.side_effect = get_dummy_res matches = [ SearchMatch('www.dummy.com', Post(id=1, url='www.bad.com')), SearchMatch('www.dummy.com', Post(id=2, url='www.bad.com')), SearchMatch('www.dummy.com', Post(id=3, url='www.good.com')), SearchMatch('www.dummy.com', Post(id=4, url='www.good.com')), ] r = get_first_active_match(matches) self.assertEqual(3, r.post.id)
def test__should_check_post__title_filter_accept(self): sub_monitor = SubMonitor(MagicMock(), MagicMock(), MagicMock(), MagicMock(), MagicMock(), config=Config(redis_host='dummy', supported_post_types=['image'])) post = Post(left_comment=False, post_type='image', title='some post') self.assertTrue(sub_monitor.should_check_post(post, True, True))
def _mark_post_as_comment_left(self, post: Post): try: with self.uowm.start() as uow: post.left_comment = True uow.posts.update(post) uow.commit() except Exception as e: log.exception('Failed to mark post %s as checked', post.id, exc_info=True)
def test_build_default_comment__image_oc_all_enabled_close_match(self): response_builder = ResponseBuilder(MagicMock()) search_results = self._get_image_search_results_no_match() search_results.closest_match = ImageSearchMatch('test.com', 1, Post(post_id='abc123', created_at=datetime.strptime( '2019-01-28 05:20:03', '%Y-%m-%d %H:%M:%S')), 5, 3, 32) result = response_builder.build_default_comment(search_results, signature=True, stats=True, search_link=True, search_settings=True) self.assertEqual(IMAGE_OC_ALL_ENABLED_ALL_ENABLED_NO_MEME, result)
def _add_comment(self, post: Post, search_results: SearchResults) -> NoReturn: """ Add a comment to the post :rtype: NoReturn :param post: Post to comment on :param search_results: Results :return: NoReturn """ if self._is_banned_sub(post.subreddit): log.info('Skipping banned sub %s', post.subreddit) with self.uowm.start() as uow: post.left_comment = True uow.posts.update(post) uow.commit() return if self._left_comment(post.post_id): log.info('Already left comment on %s', post.post_id) return with self.uowm.start() as uow: monitored_sub = uow.monitored_sub.get_by_sub(post.subreddit) if monitored_sub: log.info('Skipping monitored sub %s', post.subreddit) return msg = self.response_builder.build_default_comment(search_results) try: self.response_handler.reply_to_submission(post.post_id, msg) except APIException: log.error('Failed to leave comment on %s in %s. ', post.post_id, post.subreddit) except Exception: pass with self.uowm.start() as uow: post.left_comment = True uow.posts.update(post) uow.commit()
def set_image_hashes(post: Post, hash_size: int = 16) -> Post: log.debug('%s - Hashing image post %s', os.getpid(), post.post_id) try: img = generate_img_by_url(post.url) except ImageConversioinException as e: raise try: dhash_h = imagehash.dhash(img, hash_size=hash_size) dhash_v = imagehash.dhash_vertical(img, hash_size=hash_size) ahash = imagehash.average_hash(img, hash_size=hash_size) post.dhash_h = str(dhash_h) post.dhash_v = str(dhash_v) post.ahash = str(ahash) except Exception as e: # TODO: Specific exception log.exception('Error creating hash', exc_info=True) raise return post
def test__should_check_post__reject_crosspost(self): sub_monitor = SubMonitor(MagicMock(), MagicMock(), MagicMock(), MagicMock(), MagicMock(), config=Config(redis_host='dummy', supported_post_types=['image'])) post = Post(left_comment=False, post_type='image', crosspost_parent='dkjlsd') self.assertFalse(sub_monitor.should_check_post(post, True, True))
def set_image_hashes_api(post: Post, api_url: str) -> Post: """ Call an external API to create image hashes. This allows us to offload bandwidth to another server. In the current case, a Digital Ocean Load Balancer :param post: Post to hash :param api_url: API URL to call :return: Dict of hashes """ log.debug('Hashing image post using api %s', post.post_id) r = requests.get(api_url, params={'url': post.url}) if r.status_code != 200: log.error('Back statuscode from DO API %s', r.status_code) raise ImageConversioinException('Bad response from DO API') hashes = json.loads(r.text) log.debug(hashes) post.dhash_h = hashes['dhash_h'] post.dhash_v = hashes['dhash_v'] post.ahash = hashes['ahash'] return post
def test__get_image_search_match_from_index_result_valid_post_no_dhash( self): with mock.patch.object(DuplicateImageService, '_get_post_from_index_id') as dup: dup.return_value = Post(id=456) dup_svc = DuplicateImageService(Mock(), Mock(), Mock(), config=MagicMock()) r = dup_svc._get_image_search_match_from_index_result( { 'id': 123, 'distance': .123 }, 'test.com', '40bec6703e3f3c2b0fc491a1c0c16cff273f00c00c020ff91b6807cc060c0014' ) self.assertIsNone(r)
def save_link_repost(post: Post, repost_of: Post, uowm: UnitOfWorkManager, source: Text) -> None: with uowm.start() as uow: new_repost = LinkRepost(post_id=post.post_id, repost_of=repost_of.post_id, author=post.author, subreddit=post.subreddit, source=source) post.checked_repost = True uow.posts.update(post) uow.link_repost.add(new_repost) try: uow.commit() except IntegrityError: log.error('Failed to save link repost, it already exists') except Exception as e: log.exception('Failed to save link repost', exc_info=True)
def pre_process_post(post: Post, uowm: UnitOfWorkManager, hash_api) -> Post: log.debug(post) with uowm.start() as uow: if post.post_type == 'image': log.debug('Post %s: Is an image', post.post_id) try: post, image_post, image_post_current = process_image_post( post, hash_api) except (ImageRemovedException, ImageConversioinException, InvalidImageUrlException, ConnectionError): return if image_post is None or image_post_current is None: log.error( 'Post %s: Failed to save image post. One of the post objects is null', post.post_id) log.error('Image Post: %s - Image Post Current: %s', image_post, image_post_current) return if not post.dhash_h: log.error('Post %s: is missing dhash', post.post_id) return uow.image_post.add(image_post) uow.image_post_current.add(image_post_current) elif post.post_type == 'link': url_hash = md5(post.url.encode('utf-8')) post.url_hash = url_hash.hexdigest() log.debug('Set URL hash for post %s', post.post_id) elif post.post_type == 'hosted:video': pass try: uow.posts.add(post) uow.commit() log.debug('Post %s: Commited post to database', post.post_id) except IntegrityError as e: log.exception('Post %s: Database save failed', post.post_id, exc_info=False) return return post
def pushshift_to_post(submission: Dict, source: str = 'pushshift') -> Post: post = Post() post.post_id = submission.get('id', None) post.url = submission.get('url', None) post.shortlink = submission.get('shortlink', None) post.author = submission.get('author', None) post.created_at = datetime.utcfromtimestamp( submission.get('created_utc', None)) post.subreddit = submission.get('subreddit', None) post.title = submission.get('title', None) post.perma_link = submission.get('permalink', None) post.crosspost_parent = submission.get('crosspost_parent', None) post.selftext = submission.get('selftext', None) post.crosspost_checked = True post.ingested_from = source post.post_type = get_post_type_pushshift(submission) return post
def submission_to_post(submission: Submission, source: str = 'praw') -> Post: """ Convert a PRAW Submission object into a Post object :param submission: """ #log.debug('Converting submission %s to post', submission.id) post = Post() post.post_id = submission.id post.url = submission.url post.shortlink = submission.__dict__.get('shortlink', None) post.author = submission.author.name if submission.author else None post.created_at = datetime.utcfromtimestamp(submission.created_utc) post.subreddit = submission.subreddit.display_name post.title = submission.title post.perma_link = submission.permalink post.crosspost_parent = submission.__dict__.get('crosspost_parent', None) post.selftext = submission.__dict__.get('selftext', None) post.crosspost_checked = True post.ingested_from = source if submission.is_self: post.post_type = 'text' else: try: post.post_type = submission.__dict__.get('post_hint', None) except (AttributeError, Forbidden) as e: pass # TODO - Do this lookup at time of checking reposts. It's slow and slows down ingest """ try: post.crosspost_parent = submission.crosspost_parent except AttributeError as e: pass """ return post
def test_searched_post_str_unknowntype_valid_count(self): post = Post(post_type='video') r = searched_post_str(post, 10) expected = '**Searched:** 10' self.assertEqual(expected, r)
def test_filter_search_results_hit_all_filters(self): search_results = get_image_search_results_multi_match() search_results.search_settings.filter_same_author = True search_results.search_settings.filter_crossposts = True search_results.search_settings.only_older_matches = True search_results.search_settings.same_sub = True search_results.search_settings.target_title_match = None search_results.search_settings.max_days_old = 4 search_results.checked_post.author = 'barry' search_results.checked_post.subreddit = 'sub1' search_results.checked_post.post_id = '1111' search_results.checked_post.created_at = datetime.utcfromtimestamp( 1573995250) matches = [] # Dropped by same author matches.append( ImageSearchMatch( 'test.com', 1, Post(id=1, author='barry', post_id='abc123', created_at=datetime.strptime('2019-01-28 05:20:03', '%Y-%m-%d %H:%M:%S')), 10, 10, 32)) # Dropped by crosspost matches.append( ImageSearchMatch( 'test.com', 1, Post(id=2, author='steve', post_id='123abc', crosspost_parent='abc', created_at=datetime.strptime('2019-06-28 05:20:03', '%Y-%m-%d %H:%M:%S')), 10, 10, 32)) # Dropped by only older matches.append( ImageSearchMatch( 'test.com', 1, Post(id=3, author='steve', post_id='3333', title='some normal title', created_at=datetime.utcfromtimestamp(1574081650)), 10, 0.250, 32)) # Dropped by same sub matches.append( ImageSearchMatch( 'test.com', 1, Post(id=4, author='steve', post_id='4444', title='some normal title', subreddit='sub2', created_at=datetime.utcfromtimestamp(1573908850)), 10, 0.250, 32)) matches.append( ImageSearchMatch( 'test.com', 1, Post(id=5, author='steve', post_id='5555', title='some normal title', subreddit='sub1', created_at=datetime.utcfromtimestamp(1573988200)), 10, 0.250, 32)) # Dropped by same post matches.append( ImageSearchMatch( 'test.com', 1, Post(id=6, post_id='1111', title='some normal title', subreddit='sub1', created_at=datetime.utcfromtimestamp(1573908850)), 10, 0.250, 32)) matches.append( ImageSearchMatch( 'test.com', 1, Post(id=7, post_id='6666', title='some normal title', subreddit='sub1', created_at=datetime.utcfromtimestamp(1573908850)), 10, 0.250, 32)) search_results.matches = matches with patch('redditrepostsleuth.core.util.repost_filters.datetime' ) as mock_date: mock_date.utcnow.return_value = datetime.utcfromtimestamp( 1574360460) r = filter_search_results(search_results) self.assertEqual(1, len(search_results.matches)) self.assertEqual('5555', r.matches[0].post.post_id) print('')
def test_build_image_report_link_positive(self): search_results = ImageSearchResults('test.com', Mock(), checked_post=Post(post_id='abc123')) search_results.matches.append(ImageSearchMatch('test.com', 123, Mock(), 1, 1, 32)) result = build_image_report_link(search_results) expected = "*I'm not perfect, but you can help. Report [ [False Positive](https://www.reddit.com/message/compose/?to=RepostSleuthBot&subject=False%20Positive&message={\"post_id\": \"abc123\", \"meme_template\": null}) ]*" self.assertEqual(expected, result)
def test_searched_post_str_valid_count(self): post = Post(post_type='image') r = searched_post_str(post, 10) expected = '**Searched Images:** 10' self.assertEqual(expected, r)
def return_post_with_id(id): return Post(id=id)
def test_searched_post_str_formatting(self): post = Post(post_type='image') r = searched_post_str(post, 1000000) expected = '**Searched Images:** 1,000,000' self.assertEqual(expected, r)
def test_searched_post_str_link_valid_count(self): post = Post(post_type='link') r = searched_post_str(post, 10) expected = '**Searched Links:** 10' self.assertEqual(expected, r)