Example #1
0
def get_image_search_settings_from_request(req: Request, config: Config) -> ImageSearchSettings:
    return ImageSearchSettings(
        req.get_param_as_int('target_match_percent', required=True, default=None) or config.default_image_target_match,
        config.default_image_target_annoy_distance,
        target_title_match=req.get_param_as_int('target_title_match', required=False,
                             default=None) or config.default_image_target_title_match,
        filter_dead_matches=req.get_param_as_bool('filter_dead_matches', required=False,
                              default=None) or config.default_image_dead_matches_filter,
        filter_removed_matches=req.get_param_as_bool('filter_removed_matches', required=False,
                              default=None) or config.default_image_removed_match_filter,
        only_older_matches=req.get_param_as_bool('only_older_matches', required=False,
                              default=None) or config.default_image_only_older_matches,
        filter_same_author=req.get_param_as_bool('filter_same_author', required=False,
                              default=None) or config.default_image_same_author_filter,
        filter_crossposts=req.get_param_as_bool('filter_crossposts', required=False,
                              default=None) or config.default_image_crosspost_filter,
        target_meme_match_percent=req.get_param_as_int('target_meme_match_percent', required=False,
                             default=None) or config.default_image_target_meme_match,
        meme_filter=req.get_param_as_bool('meme_filter', required=False,
                              default=None) or config.default_image_meme_filter,
        same_sub=req.get_param_as_bool('same_sub', required=False,
                              default=None) or config.default_image_same_sub_filter,
        max_days_old=req.get_param_as_int('max_days_old', required=False,
                             default=None) or config.default_image_target_annoy_distance,

    )
 def test_test_build_site_search_url(self):
     search_settings = ImageSearchSettings(
         90,
         170,
         same_sub=True,
         only_older_matches=True,
         meme_filter=True,
         filter_dead_matches=True,
         target_meme_match_percent=95
     )
     r = build_site_search_url('abc123', search_settings)
     expected = 'https://www.repostsleuth.com?postId=abc123&sameSub=true&filterOnlyOlder=true&memeFilter=true&filterDeadMatches=true&targetImageMatch=90&targetImageMemeMatch=95'
     self.assertEqual(expected, r)
Example #3
0
def get_image_search_settings():
    return ImageSearchSettings(90,
                               .077,
                               target_meme_match_percent=50,
                               meme_filter=False,
                               max_depth=5000,
                               target_title_match=None,
                               max_matches=75,
                               same_sub=False,
                               max_days_old=190,
                               filter_dead_matches=True,
                               filter_removed_matches=True,
                               only_older_matches=True,
                               filter_same_author=True,
                               filter_crossposts=True)
Example #4
0
def get_image_search_settings_for_monitored_sub(monitored_sub: MonitoredSub, target_annoy_distance: float = 170.0) -> ImageSearchSettings:
    return ImageSearchSettings(
        monitored_sub.target_image_match,
        target_annoy_distance,
        target_meme_match_percent=monitored_sub.target_image_meme_match,
        meme_filter=monitored_sub.meme_filter,
        target_title_match=monitored_sub.target_title_match if monitored_sub.check_title_similarity else None,
        same_sub=monitored_sub.same_sub_only,
        max_days_old=monitored_sub.target_days_old,
        filter_same_author=monitored_sub.filter_same_author,
        filter_crossposts=monitored_sub.filter_crossposts,
        filter_removed_matches=monitored_sub.filter_removed_matches,
        max_depth=-1,
        max_matches=200

    )
def get_default_image_search_settings(config: Config) -> ImageSearchSettings:
    return ImageSearchSettings(
        config.default_image_target_match,
        target_title_match=config.default_image_target_title_match,
        filter_dead_matches=config.default_image_dead_matches_filter,
        filter_removed_matches=config.default_image_removed_match_filter,
        only_older_matches=config.default_image_only_older_matches,
        filter_same_author=config.default_image_same_author_filter,
        filter_crossposts=config.default_image_crosspost_filter,
        target_meme_match_percent=config.default_image_target_meme_match,
        meme_filter=config.default_image_meme_filter,
        same_sub=config.default_image_same_sub_filter,
        max_days_old=config.default_image_max_days_old_filter,
        target_annoy_distance=config.default_image_target_annoy_distance,
        max_depth=-1,
        max_matches=config.default_image_max_matches)
    def check_image(
            self,
            url: Text,
            post: Post = None,
            source='unknown',
            sort_by='created',
            search_settings: ImageSearchSettings = None) -> ImageSearchResults:
        log.info('Checking URL for matches: %s', url)

        if not search_settings:
            log.info('No search settings provided, using default')
            search_settings = get_default_image_search_settings(self.config)

        search_results = ImageSearchResults(url,
                                            checked_post=post,
                                            search_settings=search_settings)

        search_results.search_times.start_timer('total_search_time')

        if search_settings.meme_filter:
            search_results.search_times.start_timer('meme_detection_time')
            search_results.meme_template = self._get_meme_template(
                search_results.target_hash)
            search_results.search_times.stop_timer('meme_detection_time')
            if search_results.meme_template:
                search_settings.target_match_percent = 100  # Keep only 100% matches on default hash size
                search_results.meme_hash = self._get_meme_hash(url)
                if not search_results.meme_hash:
                    log.error('No meme hash, disabled meme filter')
                    search_results.meme_template = None
                else:
                    log.info('Using meme filter %s',
                             search_results.meme_template.id)

        log.debug('Search Settings: %s', search_settings)

        api_search_results = self._get_matches(
            search_results.target_hash,
            search_results.target_hamming_distance,
            search_settings.target_annoy_distance,
            max_matches=search_settings.max_matches,
            max_depth=search_settings.max_depth,
            search_times=search_results.search_times)

        search_results.search_times.index_search_time = api_search_results.index_search_time
        search_results.total_searched = api_search_results.total_searched

        search_results.search_times.start_timer('set_match_post_time')
        search_results.matches = self._build_search_results(
            api_search_results.historical_matches, url,
            search_results.target_hash)
        search_results.matches += self._build_search_results(
            api_search_results.current_matches,
            url,
            search_results.target_hash,
            historical_index=False)
        search_results.search_times.stop_timer('set_match_post_time')

        search_results.search_times.start_timer('remove_duplicate_time')
        search_results.matches = self._remove_duplicates(
            search_results.matches)
        if post:
            search_results.matches = set_all_title_similarity(
                search_results.checked_post.title, search_results.matches)
        search_results.search_times.stop_timer('remove_duplicate_time')

        search_results = self._filter_results_for_reposts(search_results,
                                                          sort_by=sort_by)
        search_results.search_times.stop_timer('total_search_time')
        self._log_search_time(search_results, source)

        search_results = self._log_search(
            search_results,
            source,
            api_search_results.used_current_index,
            api_search_results.used_historical_index,
        )

        log.info('Seached %s items and found %s matches',
                 search_results.total_searched, len(search_results.matches))
        return search_results