Beispiel #1
0
def get_image_search_results_multi_match():
    search_results = ImageSearchResults('test.com',
                                        get_image_search_settings(),
                                        checked_post=Post(post_id='abc123',
                                                          post_type='image',
                                                          subreddit='test'))
    search_results.search_times = ImageSearchTimes()
    search_results.search_times.total_search_time = 10
    search_results.matches.append(
        ImageSearchMatch(
            'test.com', 1,
            Post(id=1,
                 post_id='1111',
                 created_at=datetime.strptime('2019-01-28 05:20:03',
                                              '%Y-%m-%d %H:%M:%S')), 10, 10,
            32))
    search_results.matches.append(
        ImageSearchMatch(
            'test.com', 1,
            Post(id=2,
                 post_id='2222',
                 created_at=datetime.strptime('2019-06-28 05:20:03',
                                              '%Y-%m-%d %H:%M:%S')), 10, 10,
            32))
    search_results.matches.append(
        ImageSearchMatch('test.com', 1,
                         Post(id=3, post_id='3333', title='some normal title'),
                         10, 0.250, 32))
    return search_results
Beispiel #2
0
 def _get_image_search_results_multi_match(self):
     search_results = ImageSearchResults('test.com', self._get_image_search_settings(),
                                         checked_post=Post(post_id='abc123', post_type='image', subreddit='test'))
     search_results.search_times = ImageSearchTimes()
     search_results.search_times.total_search_time = 10
     search_results.matches.append(
         ImageSearchMatch(
             'test.com',
             1,
             Post(post_id='abc123', created_at=datetime.strptime('2019-01-28 05:20:03', '%Y-%m-%d %H:%M:%S')),
             10,
             10,
             32
         )
     )
     search_results.matches.append(
         ImageSearchMatch(
             'test.com',
             1,
             Post(post_id='123abc', created_at=datetime.strptime('2019-06-28 05:20:03', '%Y-%m-%d %H:%M:%S')),
             10,
             10,
             32
         )
     )
     return search_results
    def test_get_closest_image_match__return_closest(self):
        matches = []
        match1 = ImageSearchMatch('test.com', 1, Post(id=1), 3, .077, 32)
        match2 = ImageSearchMatch('test.com', 1, Post(id=2), 5, .077, 32)
        match3 = ImageSearchMatch('test.com', 1, Post(id=3), 7, .077, 32)
        matches.append(match1)
        matches.append(match2)
        matches.append(match3)

        r = get_closest_image_match(matches, check_url=False)
        self.assertEqual(r, match1)
 def test__remove_duplicates_one_dup_remove(self):
     matches = [
         ImageSearchMatch('test.com', 123, Post(id=1), 10, 10, 32),
         ImageSearchMatch('test.com', 123, Post(id=1), 10, 10, 32),
         ImageSearchMatch('test.com', 123, Post(id=2), 10, 10, 32)
     ]
     dup_svc = DuplicateImageService(Mock(),
                                     Mock(),
                                     Mock(),
                                     config=MagicMock())
     r = dup_svc._remove_duplicates(matches)
     self.assertEqual(2, len(r))
    def test_sort_reposts_correct_order(self):
        match1 = RepostMatch()
        match2 = RepostMatch()
        match3 = RepostMatch()
        post1 = Post(id=1, created_at=datetime.fromtimestamp(1575508228))
        post2 = Post(id=2, created_at=datetime.fromtimestamp(1572916228))
        post3 = Post(id=3, created_at=datetime.fromtimestamp(1570237828))
        match1.post = post1
        match2.post = post2
        match3.post = post3
        matches = [match1, match2, match3]

        result = sort_reposts(matches)

        self.assertEqual(3, result[0].post.id)
Beispiel #6
0
 def _get_link_search_results_no_match(self):
     search_times = ImageSearchTimes()
     search_times.total_search_time = 10
     return SearchResults(
         'test.com',
         self._get_search_settings(),
         checked_post=Post(post_id='abc123', post_type='link', subreddit='test'),
         search_times=search_times
     )
Beispiel #7
0
def get_link_search_results_matches_match():
    search_times = ImageSearchTimes()
    search_times.total_search_time = 10
    search_results = SearchResults('test.com',
                                   get_search_settings(),
                                   checked_post=Post(post_id='abc123',
                                                     post_type='link',
                                                     subreddit='test'),
                                   search_times=search_times)
    search_results.matches.append(
        SearchMatch(
            'test.com',
            Post(post_id='123abc',
                 created_at=datetime.strptime('2019-06-28 05:20:03',
                                              '%Y-%m-%d %H:%M:%S')),
        ))

    return search_results
Beispiel #8
0
def get_image_search_results_no_match():
    search_results = ImageSearchResults('test.com',
                                        get_image_search_settings(),
                                        checked_post=Post(post_id='abc123',
                                                          post_type='image',
                                                          subreddit='test'))
    search_results.search_times = ImageSearchTimes()
    search_results.search_times.total_search_time = 10
    return search_results
Beispiel #9
0
 def test__should_check_post__already_checked_reject(self):
     sub_monitor = SubMonitor(MagicMock(),
                              MagicMock(),
                              MagicMock(),
                              MagicMock(),
                              MagicMock(),
                              config=Config(redis_host='dummy'))
     post = Post(left_comment=True)
     self.assertFalse(sub_monitor.should_check_post(post, True, True))
    def test_get_first_active_match(self):
        def get_dummy_res(url, **kwargs):
            if url == 'www.bad.com':
                return Mock(status_code=400)
            else:
                return Mock(status_code=200)

        with mock.patch(
                'redditrepostsleuth.core.util.repost_helpers.requests.head'
        ) as mock_head:
            mock_head.side_effect = get_dummy_res
            matches = [
                SearchMatch('www.dummy.com', Post(id=1, url='www.bad.com')),
                SearchMatch('www.dummy.com', Post(id=2, url='www.bad.com')),
                SearchMatch('www.dummy.com', Post(id=3, url='www.good.com')),
                SearchMatch('www.dummy.com', Post(id=4, url='www.good.com')),
            ]
            r = get_first_active_match(matches)
            self.assertEqual(3, r.post.id)
Beispiel #11
0
 def test__should_check_post__title_filter_accept(self):
     sub_monitor = SubMonitor(MagicMock(),
                              MagicMock(),
                              MagicMock(),
                              MagicMock(),
                              MagicMock(),
                              config=Config(redis_host='dummy',
                                            supported_post_types=['image']))
     post = Post(left_comment=False, post_type='image', title='some post')
     self.assertTrue(sub_monitor.should_check_post(post, True, True))
Beispiel #12
0
 def _mark_post_as_comment_left(self, post: Post):
     try:
         with self.uowm.start() as uow:
             post.left_comment = True
             uow.posts.update(post)
             uow.commit()
     except Exception as e:
         log.exception('Failed to mark post %s as checked',
                       post.id,
                       exc_info=True)
Beispiel #13
0
 def test_build_default_comment__image_oc_all_enabled_close_match(self):
     response_builder = ResponseBuilder(MagicMock())
     search_results = self._get_image_search_results_no_match()
     search_results.closest_match = ImageSearchMatch('test.com', 1, Post(post_id='abc123',
                                                                         created_at=datetime.strptime(
                                                                             '2019-01-28 05:20:03',
                                                                             '%Y-%m-%d %H:%M:%S')), 5, 3, 32)
     result = response_builder.build_default_comment(search_results, signature=True, stats=True, search_link=True,
                                                     search_settings=True)
     self.assertEqual(IMAGE_OC_ALL_ENABLED_ALL_ENABLED_NO_MEME, result)
    def _add_comment(self, post: Post,
                     search_results: SearchResults) -> NoReturn:
        """
        Add a comment to the post
        :rtype: NoReturn
        :param post: Post to comment on
        :param search_results: Results
        :return: NoReturn
        """

        if self._is_banned_sub(post.subreddit):
            log.info('Skipping banned sub %s', post.subreddit)
            with self.uowm.start() as uow:
                post.left_comment = True
                uow.posts.update(post)
                uow.commit()
            return

        if self._left_comment(post.post_id):
            log.info('Already left comment on %s', post.post_id)
            return

        with self.uowm.start() as uow:
            monitored_sub = uow.monitored_sub.get_by_sub(post.subreddit)
            if monitored_sub:
                log.info('Skipping monitored sub %s', post.subreddit)
                return

        msg = self.response_builder.build_default_comment(search_results)

        try:
            self.response_handler.reply_to_submission(post.post_id, msg)
        except APIException:
            log.error('Failed to leave comment on %s in %s. ', post.post_id,
                      post.subreddit)
        except Exception:
            pass

        with self.uowm.start() as uow:
            post.left_comment = True
            uow.posts.update(post)
            uow.commit()
Beispiel #15
0
def set_image_hashes(post: Post, hash_size: int = 16) -> Post:
    log.debug('%s - Hashing image post %s', os.getpid(), post.post_id)
    try:
        img = generate_img_by_url(post.url)
    except ImageConversioinException as e:
        raise

    try:
        dhash_h = imagehash.dhash(img, hash_size=hash_size)
        dhash_v = imagehash.dhash_vertical(img, hash_size=hash_size)
        ahash = imagehash.average_hash(img, hash_size=hash_size)
        post.dhash_h = str(dhash_h)
        post.dhash_v = str(dhash_v)
        post.ahash = str(ahash)
    except Exception as e:
        # TODO: Specific exception
        log.exception('Error creating hash', exc_info=True)
        raise

    return post
Beispiel #16
0
 def test__should_check_post__reject_crosspost(self):
     sub_monitor = SubMonitor(MagicMock(),
                              MagicMock(),
                              MagicMock(),
                              MagicMock(),
                              MagicMock(),
                              config=Config(redis_host='dummy',
                                            supported_post_types=['image']))
     post = Post(left_comment=False,
                 post_type='image',
                 crosspost_parent='dkjlsd')
     self.assertFalse(sub_monitor.should_check_post(post, True, True))
Beispiel #17
0
def set_image_hashes_api(post: Post, api_url: str) -> Post:
    """
    Call an external API to create image hashes.
    This allows us to offload bandwidth to another server.  In the current case, a Digital Ocean Load Balancer
    :param post: Post to hash
    :param api_url: API URL to call
    :return: Dict of hashes
    """
    log.debug('Hashing image post using api %s', post.post_id)
    r = requests.get(api_url, params={'url': post.url})
    if r.status_code != 200:
        log.error('Back statuscode from DO API %s', r.status_code)
        raise ImageConversioinException('Bad response from DO API')

    hashes = json.loads(r.text)
    log.debug(hashes)

    post.dhash_h = hashes['dhash_h']
    post.dhash_v = hashes['dhash_v']
    post.ahash = hashes['ahash']

    return post
 def test__get_image_search_match_from_index_result_valid_post_no_dhash(
         self):
     with mock.patch.object(DuplicateImageService,
                            '_get_post_from_index_id') as dup:
         dup.return_value = Post(id=456)
         dup_svc = DuplicateImageService(Mock(),
                                         Mock(),
                                         Mock(),
                                         config=MagicMock())
         r = dup_svc._get_image_search_match_from_index_result(
             {
                 'id': 123,
                 'distance': .123
             }, 'test.com',
             '40bec6703e3f3c2b0fc491a1c0c16cff273f00c00c020ff91b6807cc060c0014'
         )
         self.assertIsNone(r)
def save_link_repost(post: Post, repost_of: Post, uowm: UnitOfWorkManager,
                     source: Text) -> None:
    with uowm.start() as uow:
        new_repost = LinkRepost(post_id=post.post_id,
                                repost_of=repost_of.post_id,
                                author=post.author,
                                subreddit=post.subreddit,
                                source=source)

        post.checked_repost = True
        uow.posts.update(post)
        uow.link_repost.add(new_repost)
        try:
            uow.commit()
        except IntegrityError:
            log.error('Failed to save link repost, it already exists')
        except Exception as e:
            log.exception('Failed to save link repost', exc_info=True)
def pre_process_post(post: Post, uowm: UnitOfWorkManager, hash_api) -> Post:
    log.debug(post)
    with uowm.start() as uow:
        if post.post_type == 'image':
            log.debug('Post %s: Is an image', post.post_id)
            try:
                post, image_post, image_post_current = process_image_post(
                    post, hash_api)
            except (ImageRemovedException, ImageConversioinException,
                    InvalidImageUrlException, ConnectionError):
                return
            if image_post is None or image_post_current is None:
                log.error(
                    'Post %s: Failed to save image post. One of the post objects is null',
                    post.post_id)
                log.error('Image Post: %s - Image Post Current: %s',
                          image_post, image_post_current)
                return

            if not post.dhash_h:
                log.error('Post %s: is missing dhash', post.post_id)
                return

            uow.image_post.add(image_post)
            uow.image_post_current.add(image_post_current)
        elif post.post_type == 'link':
            url_hash = md5(post.url.encode('utf-8'))
            post.url_hash = url_hash.hexdigest()
            log.debug('Set URL hash for post %s', post.post_id)
        elif post.post_type == 'hosted:video':
            pass
        try:
            uow.posts.add(post)
            uow.commit()
            log.debug('Post %s: Commited post to database', post.post_id)
        except IntegrityError as e:
            log.exception('Post %s: Database save failed',
                          post.post_id,
                          exc_info=False)
            return

    return post
def pushshift_to_post(submission: Dict, source: str = 'pushshift') -> Post:
    post = Post()
    post.post_id = submission.get('id', None)
    post.url = submission.get('url', None)
    post.shortlink = submission.get('shortlink', None)
    post.author = submission.get('author', None)
    post.created_at = datetime.utcfromtimestamp(
        submission.get('created_utc', None))
    post.subreddit = submission.get('subreddit', None)
    post.title = submission.get('title', None)
    post.perma_link = submission.get('permalink', None)
    post.crosspost_parent = submission.get('crosspost_parent', None)
    post.selftext = submission.get('selftext', None)
    post.crosspost_checked = True
    post.ingested_from = source
    post.post_type = get_post_type_pushshift(submission)

    return post
def submission_to_post(submission: Submission, source: str = 'praw') -> Post:
    """
    Convert a PRAW Submission object into a Post object
    :param submission:
    """
    #log.debug('Converting submission %s to post', submission.id)
    post = Post()
    post.post_id = submission.id
    post.url = submission.url
    post.shortlink = submission.__dict__.get('shortlink', None)
    post.author = submission.author.name if submission.author else None
    post.created_at = datetime.utcfromtimestamp(submission.created_utc)
    post.subreddit = submission.subreddit.display_name
    post.title = submission.title
    post.perma_link = submission.permalink
    post.crosspost_parent = submission.__dict__.get('crosspost_parent', None)
    post.selftext = submission.__dict__.get('selftext', None)
    post.crosspost_checked = True
    post.ingested_from = source
    if submission.is_self:
        post.post_type = 'text'
    else:
        try:
            post.post_type = submission.__dict__.get('post_hint', None)
        except (AttributeError, Forbidden) as e:
            pass

    # TODO - Do this lookup at time of checking reposts.  It's slow and slows down ingest
    """
    try:
        post.crosspost_parent = submission.crosspost_parent
    except AttributeError as e:
        pass
    """

    return post
 def test_searched_post_str_unknowntype_valid_count(self):
     post = Post(post_type='video')
     r = searched_post_str(post, 10)
     expected = '**Searched:** 10'
     self.assertEqual(expected, r)
Beispiel #24
0
 def test_filter_search_results_hit_all_filters(self):
     search_results = get_image_search_results_multi_match()
     search_results.search_settings.filter_same_author = True
     search_results.search_settings.filter_crossposts = True
     search_results.search_settings.only_older_matches = True
     search_results.search_settings.same_sub = True
     search_results.search_settings.target_title_match = None
     search_results.search_settings.max_days_old = 4
     search_results.checked_post.author = 'barry'
     search_results.checked_post.subreddit = 'sub1'
     search_results.checked_post.post_id = '1111'
     search_results.checked_post.created_at = datetime.utcfromtimestamp(
         1573995250)
     matches = []
     # Dropped by same author
     matches.append(
         ImageSearchMatch(
             'test.com', 1,
             Post(id=1,
                  author='barry',
                  post_id='abc123',
                  created_at=datetime.strptime('2019-01-28 05:20:03',
                                               '%Y-%m-%d %H:%M:%S')), 10,
             10, 32))
     # Dropped by crosspost
     matches.append(
         ImageSearchMatch(
             'test.com', 1,
             Post(id=2,
                  author='steve',
                  post_id='123abc',
                  crosspost_parent='abc',
                  created_at=datetime.strptime('2019-06-28 05:20:03',
                                               '%Y-%m-%d %H:%M:%S')), 10,
             10, 32))
     # Dropped by only older
     matches.append(
         ImageSearchMatch(
             'test.com', 1,
             Post(id=3,
                  author='steve',
                  post_id='3333',
                  title='some normal title',
                  created_at=datetime.utcfromtimestamp(1574081650)), 10,
             0.250, 32))
     # Dropped by same sub
     matches.append(
         ImageSearchMatch(
             'test.com', 1,
             Post(id=4,
                  author='steve',
                  post_id='4444',
                  title='some normal title',
                  subreddit='sub2',
                  created_at=datetime.utcfromtimestamp(1573908850)), 10,
             0.250, 32))
     matches.append(
         ImageSearchMatch(
             'test.com', 1,
             Post(id=5,
                  author='steve',
                  post_id='5555',
                  title='some normal title',
                  subreddit='sub1',
                  created_at=datetime.utcfromtimestamp(1573988200)), 10,
             0.250, 32))
     # Dropped by same post
     matches.append(
         ImageSearchMatch(
             'test.com', 1,
             Post(id=6,
                  post_id='1111',
                  title='some normal title',
                  subreddit='sub1',
                  created_at=datetime.utcfromtimestamp(1573908850)), 10,
             0.250, 32))
     matches.append(
         ImageSearchMatch(
             'test.com', 1,
             Post(id=7,
                  post_id='6666',
                  title='some normal title',
                  subreddit='sub1',
                  created_at=datetime.utcfromtimestamp(1573908850)), 10,
             0.250, 32))
     search_results.matches = matches
     with patch('redditrepostsleuth.core.util.repost_filters.datetime'
                ) as mock_date:
         mock_date.utcnow.return_value = datetime.utcfromtimestamp(
             1574360460)
         r = filter_search_results(search_results)
     self.assertEqual(1, len(search_results.matches))
     self.assertEqual('5555', r.matches[0].post.post_id)
     print('')
 def test_build_image_report_link_positive(self):
     search_results = ImageSearchResults('test.com', Mock(), checked_post=Post(post_id='abc123'))
     search_results.matches.append(ImageSearchMatch('test.com', 123, Mock(), 1, 1, 32))
     result = build_image_report_link(search_results)
     expected = "*I'm not perfect, but you can help. Report [ [False Positive](https://www.reddit.com/message/compose/?to=RepostSleuthBot&subject=False%20Positive&message={\"post_id\": \"abc123\", \"meme_template\": null}) ]*"
     self.assertEqual(expected, result)
 def test_searched_post_str_valid_count(self):
     post = Post(post_type='image')
     r = searched_post_str(post, 10)
     expected = '**Searched Images:** 10'
     self.assertEqual(expected, r)
 def return_post_with_id(id):
     return Post(id=id)
 def test_searched_post_str_formatting(self):
     post = Post(post_type='image')
     r = searched_post_str(post, 1000000)
     expected = '**Searched Images:** 1,000,000'
     self.assertEqual(expected, r)
 def test_searched_post_str_link_valid_count(self):
     post = Post(post_type='link')
     r = searched_post_str(post, 10)
     expected = '**Searched Links:** 10'
     self.assertEqual(expected, r)