Ejemplo n.º 1
0
 def set_reason(self, request: HttpRequest, queryset: ArchiveQuerySet) -> None:
     source_type = request.POST['extra_field']
     rows_updated = queryset.update(reason=source_type)
     if rows_updated == 1:
         message_bit = "1 archive was"
     else:
         message_bit = "%s archives were" % rows_updated
     self.message_user(request, "%s successfully set as reason: %s." % (message_bit, source_type))
Ejemplo n.º 2
0
 def make_public(self, request: HttpRequest, queryset: ArchiveQuerySet) -> None:
     rows_updated = queryset.count()
     for archive in queryset:
         archive.set_public()
     if rows_updated == 1:
         message_bit = "1 archive was"
     else:
         message_bit = "%s archives were" % rows_updated
     self.message_user(request, "%s successfully marked as public." % message_bit)
Ejemplo n.º 3
0
def match_internal(archives: ArchiveQuerySet,
                   providers: Iterable[str],
                   logger: OptionalLogger,
                   cutoff: float = 0.4,
                   max_matches: int = 20,
                   match_by_filesize: bool = True) -> None:

    galleries_per_provider: Dict[str, GalleryQuerySet] = {}
    galleries_title_id_per_provider: Dict[str, List[Tuple[str, str]]] = {}

    if providers:
        for provider in providers:
            galleries_per_provider[
                provider] = Gallery.objects.eligible_for_use(
                    provider__contains=provider)
    else:
        galleries_per_provider['all'] = Gallery.objects.eligible_for_use()

    for provider, galleries in galleries_per_provider.items():
        galleries_title_id_per_provider[provider] = list()
        for gallery in galleries:
            if gallery.title:
                galleries_title_id_per_provider[provider].append(
                    (replace_illegal_name(gallery.title), gallery.pk))
            if gallery.title_jpn:
                galleries_title_id_per_provider[provider].append(
                    (replace_illegal_name(gallery.title_jpn), gallery.pk))

    for i, archive in enumerate(archives, start=1):  # type: ignore

        for provider, galleries_title_id in galleries_title_id_per_provider.items(
        ):

            if provider != 'all':
                matchers = crawler_settings.provider_context.get_matchers(
                    crawler_settings,
                    logger,
                    filter_name="{}_title".format(provider),
                    force=True)
                if matchers:
                    adj_title = matchers[0][0].format_to_compare_title(
                        archive.zipped.name)
                else:
                    adj_title = get_title_from_path(archive.zipped.name)
            else:
                adj_title = get_title_from_path(archive.zipped.name)
            similar_list_provider = get_list_closer_gallery_titles_from_list(
                adj_title, galleries_title_id, cutoff, max_matches)

            if similar_list_provider is not None:

                for similar in similar_list_provider:
                    gallery = Gallery.objects.get(pk=similar[1])

                    ArchiveMatches.objects.update_or_create(
                        archive=archive,
                        gallery=gallery,
                        match_type='title',
                        match_accuracy=similar[2])

                if logger:
                    logger.info(
                        "{} of {}: Found {} matches (internal search) from title for archive: {}, using provider filter: {}"
                        .format(i, archives.count(),
                                len(similar_list_provider), archive.title,
                                provider))

        if not match_by_filesize or archive.filesize <= 0:
            continue
        galleries_same_size = Gallery.objects.filter(filesize=archive.filesize)
        if galleries_same_size.exists():

            if logger:
                logger.info(
                    "{} of {}: Found {} matches (internal search) from filesize for archive: {}"
                    .format(i, str(archives.count()),
                            str(galleries_same_size.count()), archive.title))
            for similar_gallery in galleries_same_size:
                gallery = Gallery.objects.get(pk=similar_gallery.pk)

                ArchiveMatches.objects.update_or_create(archive=archive,
                                                        gallery=gallery,
                                                        match_type='size',
                                                        match_accuracy=1)
Ejemplo n.º 4
0
def match_external(archives: ArchiveQuerySet,
                   matcher_filters: Iterable[str],
                   logger: OptionalLogger,
                   cutoff: float = 0.4,
                   max_matches: int = 20) -> None:
    matchers_per_matcher_filter = {}
    if matcher_filters:
        for matcher_filter in matcher_filters:
            matchers = crawler_settings.provider_context.get_matchers(
                crawler_settings,
                logger,
                filter_name=matcher_filter,
                force=True)
            matchers_per_matcher_filter[matcher_filter] = matchers

    else:
        matchers_per_matcher_filter[
            'all'] = crawler_settings.provider_context.get_matchers(
                crawler_settings, logger, force=False)

    for matcher_filter, matchers in matchers_per_matcher_filter.items():
        for matcher in matchers:
            if logger:
                logger.info("For filter {}, using matcher: {}".format(
                    matcher_filter, str(matcher[0])))

    for i, archive in enumerate(archives, start=1):  # type: ignore

        for matcher_filter, matchers in matchers_per_matcher_filter.items():

            for matcher in matchers:

                if matcher[0].type == 'title':
                    results = matcher[0].create_closer_matches_values(
                        archive.title, cutoff=cutoff, max_matches=max_matches)
                elif matcher[0].type == 'image':
                    results = matcher[0].create_closer_matches_values(
                        archive.zipped.name)
                else:
                    results = []
                for result in results:
                    gallery = Gallery.objects.update_or_create_from_values(
                        result[1])
                    ArchiveMatches.objects.update_or_create(
                        archive=archive,
                        gallery=gallery,
                        match_type=matcher[0].type,
                        defaults={
                            'match_accuracy': result[2],
                        })
                if logger:
                    if results:
                        logger.info(
                            '{} of {}: Found {} matches (external search) for archive: {} using matcher: {}, '
                            'Processed search results: {}. '
                            'Total search results: {}'.format(
                                i, archives.count(), len(results),
                                archive.title, matcher[0],
                                len(matcher[0].values_array),
                                len(matcher[0].gallery_links)))
                    else:
                        logger.info(
                            '{} of {}: Found no matches (external search) for archive: {} using matcher: {}.'
                            .format(i, archives.count(), archive.title,
                                    matcher[0]))