def create_matches_wanted_galleries_from_providers_internal( wanted_galleries: QuerySet, provider_filter: str = '', cutoff: float = 0.4, max_matches: int = 20, must_be_used: bool = False ) -> None: try: galleries_title_id = [] if provider_filter: galleries = Gallery.objects.eligible_for_use(provider__contains=provider_filter) else: galleries = Gallery.objects.eligible_for_use() if must_be_used: galleries = galleries.filter( Q(archive__isnull=False) | (Q(gallery_container__isnull=False) & Q(gallery_container__archive__isnull=False)) | (Q(magazine__isnull=False) & Q(magazine__archive__isnull=False)) ) for gallery in galleries: if gallery.title: galleries_title_id.append( (clean_title(gallery.title), gallery.pk)) if gallery.title_jpn: galleries_title_id.append( (clean_title(gallery.title_jpn), gallery.pk)) logger.info( "Trying to match against gallery database, " "{} wanted galleries with no match. Provider filter: {}".format( wanted_galleries.count(), provider_filter ) ) for wanted_gallery in wanted_galleries: similar_list = get_list_closer_gallery_titles_from_list( wanted_gallery.search_title, galleries_title_id, cutoff, max_matches) if similar_list is not None: logger.info("Found {} matches from title for {}".format(len(similar_list), wanted_gallery.search_title)) for similar in similar_list: # We filter here instead of the Gallery model, because we use the same list for every WG. if not FoundGallery.objects.filter(wanted_gallery=wanted_gallery, gallery_id=similar[1]): GalleryMatch.objects.get_or_create( wanted_gallery=wanted_gallery, gallery_id=similar[1], defaults={'match_accuracy': similar[2]}) logger.info("Matching ended") return except BaseException: logger.critical(traceback.format_exc())
def format_to_compare_title(self, file_name: str) -> str: if file_name.endswith('.zip'): return clean_title(self.get_title_from_path(file_name)) else: return clean_title(file_name)