Example #1
0
def recall_api(request: HttpRequest, pk: int) -> HttpResponse:
    """Recall provider API, if possible."""

    if not request.user.is_staff:
        return render_error(request,
                            "You need to be an admin to recall the API.")

    try:
        archive = Archive.objects.get(pk=pk)
    except Archive.DoesNotExist:
        raise Http404("Archive does not exist")

    if not archive.gallery_id:
        return render_error(request,
                            "No gallery associated with this archive.")

    gallery = Gallery.objects.get(pk=archive.gallery_id)

    current_settings = Settings(load_from_config=crawler_settings.config)

    if current_settings.workers.web_queue:

        current_settings.set_update_metadata_options(
            providers=(gallery.provider, ))

        current_settings.workers.web_queue.enqueue_args_list(
            (gallery.get_link(), ), override_options=current_settings)

        frontend_logger.info(
            'Updating gallery API data for gallery: {} and related archives'.
            format(gallery.get_absolute_url()))

    return HttpResponseRedirect(request.META["HTTP_REFERER"])
Example #2
0
    def test_nhentai_parser(self):
        """Test Nhentai gallery page parser"""
        settings = Settings(load_from_disk=True)

        gallery_link = 'https://nhentai.net/g/198482/'
        parser = NhentaiParser(settings)
        data = parser.fetch_gallery_data(gallery_link)

        expected_data = GalleryData(
            'nh-198482',
            'nhentai',
            title=
            '(C90) [MeltdoWN COmet (Yukiu Con)] C90 Omakebon! (Pokémon GO) [English] [ATF]',
            title_jpn='(C90) [MeltdoWN COmet (雪雨こん)] C90 おまけ本! (ポケモンGO) [英訳]',
            filecount=9,
            link='https://nhentai.net/g/198482/',
            posted=dateutil.parser.parse('2017-06-19T10:33:19.022360+00:00'),
            category='Doujinshi',
            tags=[
                'parody:pokemon',
                'lolicon',
                'sole_female',
                'sole_male',
                'b*****b',
                'artist:yukiu_con',
                'group:meltdown_comet',
                'language:translated',
                'language:english',
            ])

        self.assertEqual(data, expected_data)
Example #3
0
    def job(self) -> None:
        while not self.stop.is_set():
            seconds_to_wait = self.wait_until_next_run()
            if self.stop.wait(timeout=seconds_to_wait):
                return
            if self.settings.autochecker.enable:
                connection.close()
                self.crawler_logger.info("Starting timed auto search")
                current_settings = Settings(
                    load_from_config=self.settings.config)
                current_settings.silent_processing = True
                current_settings.replace_metadata = True
                self.web_queue.enqueue_args_list(
                    ['-feed', '-wanted'], override_options=current_settings)

            self.update_last_run(django_tz.now())
Example #4
0
def recall_api(request: HttpRequest, pk: int) -> HttpResponse:
    """Recall provider API, if possible."""

    if not request.user.has_perm('viewer.update_metadata'):
        return render_error(request, "You don't have the permission to refresh source metadata on an Archive.")

    try:
        archive = Archive.objects.get(pk=pk)
    except Archive.DoesNotExist:
        raise Http404("Archive does not exist")

    if not archive.gallery_id:
        return render_error(request, "No gallery associated with this archive.")

    gallery = Gallery.objects.get(pk=archive.gallery_id)

    current_settings = Settings(load_from_config=crawler_settings.config)

    if current_settings.workers.web_queue and gallery.provider:

        current_settings.set_update_metadata_options(providers=(gallery.provider,))

        def gallery_callback(x: Optional['Gallery'], crawled_url: Optional[str], result: str) -> None:
            event_log(
                request.user,
                'UPDATE_METADATA',
                content_object=x,
                result=result,
                data=crawled_url
            )

        current_settings.workers.web_queue.enqueue_args_list(
            (gallery.get_link(),),
            override_options=current_settings,
            gallery_callback=gallery_callback
        )

        logger.info(
            'Updating gallery API data for gallery: {} and related archives'.format(
                gallery.get_absolute_url()
            )
        )

    return HttpResponseRedirect(request.META["HTTP_REFERER"])
Example #5
0
    def job(self) -> None:
        while not self.stop.is_set():

            seconds_to_wait = self.wait_until_next_run()
            if self.stop.wait(timeout=seconds_to_wait):
                return

            if self.settings.autoupdater.enable:
                current_settings = Settings(load_from_config=self.settings.config)
                current_settings.keep_dl_type = True
                current_settings.silent_processing = True
                current_settings.config['allowed']['replace_metadata'] = 'yes'

                connection.close()

                start_date = django_tz.now() - timedelta(seconds=int(self.timer)) - timedelta(days=self.settings.autoupdater.buffer_back)
                end_date = django_tz.now() - timedelta(days=self.settings.autoupdater.buffer_after)
                to_update_providers = current_settings.autoupdater.providers

                galleries = Gallery.objects.eligible_for_use(
                    posted__gte=start_date,
                    posted__lte=end_date,
                    provider__in=to_update_providers
                )

                if not galleries:
                    logger.info(
                        "No galleries posted from {} to {} need updating. Providers: {}".format(
                            start_date,
                            end_date,
                            ", ".join(to_update_providers)
                        )
                    )
                else:
                    # Leave only info downloaders, then leave only enabled auto updated providers
                    downloaders = current_settings.provider_context.get_downloaders_name_priority(current_settings, filter_name='info')
                    downloaders_names = [x[0] for x in downloaders if x[0].replace("_info", "") in to_update_providers]

                    current_settings.allow_downloaders_only(downloaders_names, True, True, True)

                    url_list = [x.get_link() for x in galleries]

                    logger.info(
                        "Starting timed auto updater, updating {} galleries "
                        "posted from {} to {}. Providers: {}".format(
                            len(url_list),
                            start_date,
                            end_date,
                            ", ".join(to_update_providers)
                        )
                    )

                    url_list.append('--update-mode')

                    self.web_queue.enqueue_args_list(url_list, override_options=current_settings)

            self.update_last_run(django_tz.now())
Example #6
0
 def __init__(self, bus: Bus, settings_module: str = 'settings',
              wsgi_http_logger: type = HTTPLogger,
              local_settings: Settings = None) -> None:
     """ CherryPy engine plugin to configure and mount
     the Django application onto the CherryPy server.
     """
     plugins.SimplePlugin.__init__(self, bus)
     self.settings_module = settings_module
     self.wsgi_http_logger = wsgi_http_logger
     if local_settings:
         self.crawler_settings = local_settings
     else:
         self.crawler_settings = Settings(load_from_disk=True)
Example #7
0
    def test_nexus_parser(self):
        """Test Nexus gallery page parser"""
        settings = Settings(load_from_disk=True)

        gallery_link = 'https://hentainexus.com/view/5665'
        parser = NexusParser(settings)
        data = parser.fetch_gallery_data(gallery_link)

        expected_data = GalleryData(
            '5665',
            'nexus',
            link=gallery_link,
            archiver_key='https://hentainexus.com/zip/5665',
            title='Sase-san is Very Popular',
            thumbnail_url=
            'https://static.hentainexus.com/content/5665/cover.jpg',
            filecount=16,
            filesize=0,
            expunged=False,
            posted=None,
            category='Manga',
            tags=[
                'artist:wantan_meo',
                'language:english',
                'magazine:comic_kairakuten_2019-04',
                'parody:original_work',
                'publisher:fakku',
                'creampie',
                'fangs',
                'hairy',
                'hentai',
                'office_lady',
                'oppai',
                'uncensored',
                'vanilla',
            ],
            comment='Let\'s chug \'em down! ♪',
        )

        self.assertEqual(data, expected_data)
Example #8
0
"""

# Build paths inside the project like this: os.path.join(BASE_DIR, ...)
import os
from typing import Any, Optional

from core.base.setup import Settings
from core.base.utilities import module_exists

if 'PANDA_BASE_DIR' in os.environ:
    BASE_DIR = os.environ['PANDA_BASE_DIR']
else:
    BASE_DIR = os.path.dirname(os.path.dirname(__file__))

if 'PANDA_CONFIG_DIR' in os.environ:
    crawler_settings = Settings(load_from_disk=True,
                                default_dir=os.environ['PANDA_CONFIG_DIR'])
else:
    crawler_settings = Settings(load_from_disk=True)

MAIN_LOGGER = crawler_settings.log_location

if not os.path.exists(os.path.dirname(MAIN_LOGGER)):
    os.makedirs(os.path.dirname(MAIN_LOGGER))

# SECURITY WARNING: keep the secret key used in production secret!
SECRET_KEY = crawler_settings.django_secret_key

# SECURITY WARNING: don't run with debug turned on in production!
DEBUG = crawler_settings.django_debug_mode

# Might want to limit it here.
Example #9
0
def missing_archives_for_galleries(request: HttpRequest) -> HttpResponse:
    p = request.POST
    get = request.GET

    title = get.get("title", '')
    tags = get.get("tags", '')

    try:
        page = int(get.get("page", '1'))
    except ValueError:
        page = 1

    if 'clear' in get:
        form = GallerySearchForm()
    else:
        form = GallerySearchForm(initial={'title': title, 'tags': tags})

    if p and request.user.is_staff:
        pks = []
        for k, v in p.items():
            if k.startswith("sel-"):
                # k, pk = k.split('-')
                # results[pk][k] = v
                pks.append(v)
        results = Gallery.objects.filter(id__in=pks).order_by('-create_date')

        if 'delete_galleries' in p:
            for gallery in results:
                message = 'Removing gallery: {}, link: {}'.format(
                    gallery.title, gallery.get_link())
                frontend_logger.info(message)
                messages.success(request, message)
                gallery.mark_as_deleted()
        elif 'download_galleries' in p:
            for gallery in results:
                message = 'Queueing gallery: {}, link: {}'.format(
                    gallery.title, gallery.get_link())
                frontend_logger.info(message)
                messages.success(request, message)

                # Force replace_metadata when queueing from this list, since it's mostly used to download non used.
                current_settings = Settings(
                    load_from_config=crawler_settings.config)

                if current_settings.workers.web_queue:

                    current_settings.replace_metadata = True
                    current_settings.retry_failed = True

                    if 'reason' in p and p['reason'] != '':
                        reason = p['reason']
                        # Force limit string length (reason field max_length)
                        current_settings.archive_reason = reason[:200]
                        current_settings.archive_details = gallery.reason
                        current_settings.gallery_reason = reason[:200]
                    elif gallery.reason:
                        current_settings.archive_reason = gallery.reason

                    current_settings.workers.web_queue.enqueue_args_list(
                        (gallery.get_link(), ),
                        override_options=current_settings)
        elif 'recall_api' in p:
            message = 'Recalling API for {} galleries'.format(results.count())
            frontend_logger.info(message)
            messages.success(request, message)

            gallery_links = [x.get_link() for x in results]
            gallery_providers = list(
                results.values_list('provider', flat=True).distinct())

            current_settings = Settings(
                load_from_config=crawler_settings.config)

            if current_settings.workers.web_queue:
                current_settings.set_update_metadata_options(
                    providers=gallery_providers)

                current_settings.workers.web_queue.enqueue_args_list(
                    gallery_links, override_options=current_settings)

    if 'force_public' in request.GET:
        force_public = True
    else:
        force_public = False
    if request.user.is_staff and not force_public:

        providers = Gallery.objects.all().values_list('provider',
                                                      flat=True).distinct()

        params = {}

        for k, v in get.items():
            params[k] = v

        for k in gallery_filter_keys:
            if k not in params:
                params[k] = ''

        results = filter_galleries_simple(params)

        results = results.non_used_galleries().prefetch_related(
            'foundgallery_set')

        paginator = Paginator(results, 50)
        try:
            results = paginator.page(page)
        except (InvalidPage, EmptyPage):
            results = paginator.page(paginator.num_pages)

        d = {
            'results': results,
            'providers': providers,
            'force_public': force_public,
            'form': form
        }
    else:

        params = {}

        for k, v in get.items():
            params[k] = v

        for k in gallery_filter_keys:
            if k not in params:
                params[k] = ''

        results = filter_galleries_simple(params)

        results = results.non_used_galleries(public=True,
                                             provider__in=['panda', 'fakku'])
        d = {'results': results}
    return render(request, "viewer/archives_missing_for_galleries.html", d)
Example #10
0
def user_crawler(request: HttpRequest) -> HttpResponse:
    """Crawl given URLs."""

    d = {}

    p = request.POST

    all_downloaders = crawler_settings.provider_context.get_downloaders_name_priority(
        crawler_settings, filter_name='generic_')

    # providers_not_generic = list(set([x[0].provider for x in all_downloaders if not x[0].provider.is_generic()]))
    generic_downloaders = [x[0] for x in all_downloaders]

    user_reason = p.get('reason', '')

    if p:
        current_settings = Settings(load_from_config=crawler_settings.config)
        if not current_settings.workers.web_queue:
            messages.error(
                request,
                'Cannot submit links currently. Please contact an admin.')
            return HttpResponseRedirect(request.META["HTTP_REFERER"])
        url_set = set()
        # create dictionary of properties for each archive
        current_settings.replace_metadata = False
        current_settings.config['allowed']['replace_metadata'] = 'no'
        for k, v in p.items():
            if k == "downloader":
                if v == 'no-generic':
                    continue
                elif v in generic_downloaders:
                    current_settings.enable_downloader_only(v)
            elif k == "urls":
                url_list = v.split("\n")
                for item in url_list:
                    url_set.add(item.rstrip('\r'))
        urls = list(url_set)

        if not urls:
            messages.error(request, 'Submission is empty.')
            return HttpResponseRedirect(request.META["HTTP_REFERER"])

        if 'reason' in p and p['reason'] != '':
            reason = p['reason']
            # Force limit string length (reason field max_length)
            current_settings.archive_reason = reason[:200]
            current_settings.gallery_reason = reason[:200]
        if 'source' in p and p['source'] != '':
            source = p['source']
            # Force limit string length (reason field max_length)
            current_settings.archive_source = source[:50]

        current_settings.archive_user = request.user

        parsers = crawler_settings.provider_context.get_parsers_classes()

        def archive_callback(x: Optional['Archive'],
                             crawled_url: Optional[str], result: str) -> None:
            event_log(request.user,
                      'ADD_ARCHIVE',
                      reason=user_reason,
                      content_object=x,
                      result=result,
                      data=crawled_url)

        def gallery_callback(x: Optional['Gallery'],
                             crawled_url: Optional[str], result: str) -> None:
            event_log(request.user,
                      'ADD_GALLERY',
                      reason=user_reason,
                      content_object=x,
                      result=result,
                      data=crawled_url)

        current_settings.workers.web_queue.enqueue_args_list(
            urls,
            override_options=current_settings,
            archive_callback=archive_callback,
            gallery_callback=gallery_callback,
            use_argparser=False)

        messages.success(
            request,
            'Starting Crawler, if the links were correctly added, they should appear on the archive or gallery list.'
        )
        for url in urls:
            frontend_logger.info("User {}: queued link: {}".format(
                request.user.username, url))
            # event_log(
            #     request.user,
            #     'CRAWL_URL',
            #     reason=user_reason,
            #     data=url,
            #     result='queue'
            # )

        found_valid_urls: List[str] = []

        for parser in parsers:
            if parser.id_from_url_implemented():
                urls_filtered = parser.filter_accepted_urls(urls)
                found_valid_urls.extend(urls_filtered)
                for url_filtered in urls_filtered:
                    gid = parser.id_from_url(url_filtered)
                    gallery = Gallery.objects.filter(gid=gid).first()
                    if not gallery:
                        messages.success(
                            request,
                            '{}: New URL, will be added to the submit queue'.
                            format(url_filtered))
                        event_log(request.user,
                                  'CRAWL_URL',
                                  reason=user_reason,
                                  data=url_filtered,
                                  result='queued')
                        continue
                    if gallery.is_submitted():
                        messages.info(
                            request,
                            '{}: Already in submit queue, link: {}, reason: {}'
                            .format(url_filtered, gallery.get_absolute_url(),
                                    gallery.reason))
                        event_log(request.user,
                                  'CRAWL_URL',
                                  reason=user_reason,
                                  data=url_filtered,
                                  result='already_submitted')
                    elif gallery.public:
                        messages.info(
                            request,
                            '{}: Already present, is public: {}'.format(
                                url_filtered,
                                request.build_absolute_uri(
                                    gallery.get_absolute_url())))
                        event_log(request.user,
                                  'CRAWL_URL',
                                  reason=user_reason,
                                  data=url_filtered,
                                  result='already_public')
                    else:
                        messages.info(
                            request,
                            '{}: Already present, is not public: {}'.format(
                                url_filtered,
                                request.build_absolute_uri(
                                    gallery.get_absolute_url())))
                        event_log(request.user,
                                  'CRAWL_URL',
                                  reason=user_reason,
                                  data=url_filtered,
                                  result='already_private')

        extra_urls = [x for x in urls if x not in found_valid_urls]

        for extra_url in extra_urls:
            messages.info(request,
                          '{}: Extra non-provider URLs'.format(extra_url))
            event_log(request.user,
                      'CRAWL_URL',
                      reason=user_reason,
                      data=extra_url,
                      result='queued')
        # Not really optimal when there's many commands being queued
        # for command in url_list:
        #     messages.success(request, command)
        return HttpResponseRedirect(request.META["HTTP_REFERER"])

    d.update({'downloaders': generic_downloaders})

    return render(request, "viewer/collaborators/gallery_crawler.html", d)
Example #11
0
def submit_queue(request: HttpRequest) -> HttpResponse:
    p = request.POST
    get = request.GET

    title = get.get("title", '')
    tags = get.get("tags", '')

    user_reason = p.get('reason', '')

    try:
        page = int(get.get("page", '1'))
    except ValueError:
        page = 1

    if 'clear' in get:
        form = GallerySearchForm()
    else:
        form = GallerySearchForm(initial={'title': title, 'tags': tags})

    if p:
        pks = []
        for k, v in p.items():
            if k.startswith("sel-"):
                # k, pk = k.split('-')
                # results[pk][k] = v
                pks.append(v)

        preserved = Case(
            *[When(pk=pk, then=pos) for pos, pk in enumerate(pks)])

        if 'denied' in get:
            results = Gallery.objects.submitted_galleries(
                id__in=pks).order_by(preserved)
        else:
            results = Gallery.objects.submitted_galleries(
                ~Q(status=Gallery.DENIED), id__in=pks).order_by(preserved)

        if 'deny_galleries' in p:
            for gallery in results:
                message = 'Denying gallery: {}, link: {}, source link: {}'.format(
                    gallery.title, gallery.get_absolute_url(),
                    gallery.get_link())
                if 'reason' in p and p['reason'] != '':
                    message += ', reason: {}'.format(p['reason'])
                frontend_logger.info("User {}: {}".format(
                    request.user.username, message))
                messages.success(request, message)
                gallery.mark_as_denied()
                event_log(request.user,
                          'DENY_GALLERY',
                          reason=user_reason,
                          content_object=gallery,
                          result='denied')
        elif 'download_galleries' in p:
            for gallery in results:
                message = 'Queueing gallery: {}, link: {}, source link: {}'.format(
                    gallery.title, gallery.get_absolute_url(),
                    gallery.get_link())
                if 'reason' in p and p['reason'] != '':
                    message += ', reason: {}'.format(p['reason'])
                frontend_logger.info("User {}: {}".format(
                    request.user.username, message))
                messages.success(request, message)

                event_log(request.user,
                          'ACCEPT_GALLERY',
                          reason=user_reason,
                          content_object=gallery,
                          result='accepted')

                # Force replace_metadata when queueing from this list, since it's mostly used to download non used.
                current_settings = Settings(
                    load_from_config=crawler_settings.config)

                if current_settings.workers.web_queue:

                    current_settings.replace_metadata = True
                    current_settings.retry_failed = True

                    if 'reason' in p and p['reason'] != '':
                        reason = p['reason']
                        # Force limit string length (reason field max_length)
                        current_settings.archive_reason = reason[:200]
                        current_settings.archive_details = gallery.reason
                        current_settings.gallery_reason = reason[:200]
                    elif gallery.reason:
                        current_settings.archive_reason = gallery.reason

                    def archive_callback(x: Optional['Archive'],
                                         crawled_url: Optional[str],
                                         result: str) -> None:
                        event_log(request.user,
                                  'ADD_ARCHIVE',
                                  reason=user_reason,
                                  content_object=x,
                                  result=result,
                                  data=crawled_url)

                    def gallery_callback(x: Optional['Gallery'],
                                         crawled_url: Optional[str],
                                         result: str) -> None:
                        event_log(request.user,
                                  'ADD_GALLERY',
                                  reason=user_reason,
                                  content_object=x,
                                  result=result,
                                  data=crawled_url)

                    current_settings.workers.web_queue.enqueue_args_list(
                        (gallery.get_link(), ),
                        override_options=current_settings,
                        archive_callback=archive_callback,
                        gallery_callback=gallery_callback,
                    )

    providers = Gallery.objects.all().values_list('provider',
                                                  flat=True).distinct()

    params = {}

    for k, v in get.items():
        params[k] = v

    for k in gallery_filter_keys:
        if k not in params:
            params[k] = ''

    results = filter_galleries_simple(params)

    if 'denied' in get:
        results = results.submitted_galleries().prefetch_related(
            'foundgallery_set')
    else:
        results = results.submitted_galleries(~Q(
            status=Gallery.DENIED)).prefetch_related('foundgallery_set')

    paginator = Paginator(results, 50)
    try:
        results = paginator.page(page)
    except (InvalidPage, EmptyPage):
        results = paginator.page(paginator.num_pages)

    d = {'results': results, 'providers': providers, 'form': form}
    return render(request, "viewer/collaborators/submit_queue.html", d)
Example #12
0
def manage_archives(request: HttpRequest) -> HttpResponse:
    p = request.POST
    get = request.GET

    title = get.get("title", '')
    tags = get.get("tags", '')

    user_reason = p.get('reason', '')

    try:
        page = int(get.get("page", '1'))
    except ValueError:
        page = 1

    if 'clear' in get:
        form = ArchiveSearchForm()
    else:
        form = ArchiveSearchForm(initial={'title': title, 'tags': tags})

    if p:
        pks = []
        for k, v in p.items():
            if k.startswith("sel-"):
                # k, pk = k.split('-')
                # results[pk][k] = v
                pks.append(v)

        preserved = Case(
            *[When(pk=pk, then=pos) for pos, pk in enumerate(pks)])

        archives = Archive.objects.filter(id__in=pks).order_by(preserved)
        if 'publish_archives' in p and request.user.has_perm(
                'viewer.publish_archive'):
            for archive in archives:
                message = 'Publishing archive: {}, link: {}'.format(
                    archive.title, archive.get_absolute_url())
                if 'reason' in p and p['reason'] != '':
                    message += ', reason: {}'.format(p['reason'])
                frontend_logger.info("User {}: {}".format(
                    request.user.username, message))
                messages.success(request, message)
                archive.set_public(reason=user_reason)
                event_log(request.user,
                          'PUBLISH_ARCHIVE',
                          reason=user_reason,
                          content_object=archive,
                          result='published')
        elif 'unpublish_archives' in p and request.user.has_perm(
                'viewer.publish_archive'):
            for archive in archives:
                message = 'Unpublishing archive: {}, link: {}'.format(
                    archive.title, archive.get_absolute_url())
                if 'reason' in p and p['reason'] != '':
                    message += ', reason: {}'.format(p['reason'])
                frontend_logger.info("User {}: {}".format(
                    request.user.username, message))
                messages.success(request, message)
                archive.set_private(reason=user_reason)
                event_log(request.user,
                          'UNPUBLISH_ARCHIVE',
                          reason=user_reason,
                          content_object=archive,
                          result='unpublished')
        elif 'delete_archives' in p and request.user.has_perm(
                'viewer.delete_archive'):
            for archive in archives:
                message = 'Deleting archive: {}, link: {}, with it\'s file: {} and associated gallery: {}'.format(
                    archive.title, archive.get_absolute_url(),
                    archive.zipped.path, archive.gallery)
                if 'reason' in p and p['reason'] != '':
                    message += ', reason: {}'.format(p['reason'])
                frontend_logger.info("User {}: {}".format(
                    request.user.username, message))
                messages.success(request, message)
                gallery = archive.gallery
                archive.gallery.mark_as_deleted()
                archive.gallery = None
                archive.delete_all_files()
                archive.delete()
                event_log(request.user,
                          'DELETE_ARCHIVE',
                          content_object=gallery,
                          reason=user_reason,
                          result='deleted')
        elif 'update_metadata' in p and request.user.has_perm(
                'viewer.update_metadata'):
            for archive in archives:
                gallery = archive.gallery

                message = 'Updating gallery API data for gallery: {} and related archives'.format(
                    gallery.get_absolute_url())
                if 'reason' in p and p['reason'] != '':
                    message += ', reason: {}'.format(p['reason'])
                frontend_logger.info("User {}: {}".format(
                    request.user.username, message))
                messages.success(request, message)

                current_settings = Settings(
                    load_from_config=crawler_settings.config)

                if current_settings.workers.web_queue:
                    current_settings.set_update_metadata_options(
                        providers=(gallery.provider, ))

                    def gallery_callback(x: Optional['Gallery'],
                                         crawled_url: Optional[str],
                                         result: str) -> None:
                        event_log(request.user,
                                  'UPDATE_METADATA',
                                  reason=user_reason,
                                  content_object=x,
                                  result=result,
                                  data=crawled_url)

                    current_settings.workers.web_queue.enqueue_args_list(
                        (gallery.get_link(), ),
                        override_options=current_settings,
                        gallery_callback=gallery_callback)

                    frontend_logger.info(
                        'Updating gallery API data for gallery: {} and related archives'
                        .format(gallery.get_absolute_url()))
        elif 'add_to_group' in p and request.user.has_perm(
                'viewer.change_archivegroup'):

            if 'archive_group' in p:
                archive_group_ids = p.getlist('archive_group')

                preserved = Case(*[
                    When(pk=pk, then=pos)
                    for pos, pk in enumerate(archive_group_ids)
                ])

                archive_groups = ArchiveGroup.objects.filter(
                    pk__in=archive_group_ids).order_by(preserved)

                for archive in archives:
                    for archive_group in archive_groups:
                        if not ArchiveGroupEntry.objects.filter(
                                archive=archive,
                                archive_group=archive_group).exists():

                            archive_group_entry = ArchiveGroupEntry(
                                archive=archive, archive_group=archive_group)
                            archive_group_entry.save()

                            message = 'Adding archive: {}, link: {}, to group: {}, link {}'.format(
                                archive.title, archive.get_absolute_url(),
                                archive_group.title,
                                archive_group.get_absolute_url())
                            if 'reason' in p and p['reason'] != '':
                                message += ', reason: {}'.format(p['reason'])
                            frontend_logger.info("User {}: {}".format(
                                request.user.username, message))
                            messages.success(request, message)
                            event_log(request.user,
                                      'ADD_ARCHIVE_TO_GROUP',
                                      content_object=archive,
                                      reason=user_reason,
                                      result='added')

    params = {
        'sort': 'create_date',
        'asc_desc': 'desc',
        'filename': title,
    }

    for k, v in get.items():
        params[k] = v

    for k in archive_filter_keys:
        if k not in params:
            params[k] = ''

    results = filter_archives_simple(params)

    results = results.prefetch_related('gallery')

    paginator = Paginator(results, 100)
    try:
        results = paginator.page(page)
    except (InvalidPage, EmptyPage):
        results = paginator.page(paginator.num_pages)

    d = {'results': results, 'form': form}

    if request.user.has_perm('viewer.change_archivegroup'):
        group_form = ArchiveGroupSelectForm()
        d.update(group_form=group_form)

    return render(request, "viewer/collaborators/manage_archives.html", d)
Example #13
0
def json_parser(request: HttpRequest) -> HttpResponse:
    response = {}

    if request.method == 'POST':
        if not request.body:
            response['error'] = 'Empty request'
            return HttpResponse(json.dumps(response), content_type="application/json; charset=utf-8")
        data = json.loads(request.body.decode("utf-8"))
        if 'api_key' not in data:
            response['error'] = 'Missing API key'
            return HttpResponse(json.dumps(response), content_type="application/json; charset=utf-8")
        elif data['api_key'] != crawler_settings.api_key:
            response['error'] = 'Incorrect API key'
            return HttpResponse(json.dumps(response), content_type="application/json; charset=utf-8")
        # send some 'ok' back
        else:
            if 'operation' not in data or 'args' not in data:
                response['error'] = 'Wrong format'
            else:
                args = data['args']
                response = {}
                # Used by internal pages and userscript
                if data['operation'] == 'webcrawler' and 'link' in args:
                    if not crawler_settings.workers.web_queue:
                        response['error'] = 'The webqueue is not running'
                    elif 'downloader' in args:
                        current_settings = Settings(load_from_config=crawler_settings.config)
                        if not current_settings.workers.web_queue:
                            response['error'] = 'The webqueue is not running'
                        else:
                            current_settings.allow_downloaders_only([args['downloader']], True, True, True)
                            archive = None
                            parsers = current_settings.provider_context.get_parsers(current_settings, crawler_logger)
                            for parser in parsers:
                                if parser.id_from_url_implemented():
                                    urls_filtered = parser.filter_accepted_urls((args['link'], ))
                                    for url_filtered in urls_filtered:
                                        gallery_gid = parser.id_from_url(url_filtered)
                                        if gallery_gid:
                                            archive = Archive.objects.filter(gallery__gid=gallery_gid).first()
                                    if urls_filtered:
                                        break
                            current_settings.workers.web_queue.enqueue_args_list((args['link'],), override_options=current_settings)
                            if archive:
                                response['message'] = "Archive exists, crawling to check for redownload: " + args['link']
                            else:
                                response['message'] = "Crawling: " + args['link']
                    else:
                        if 'parentLink' in args:
                            parent_archive = None
                            parsers = crawler_settings.provider_context.get_parsers(crawler_settings, crawler_logger)
                            for parser in parsers:
                                if parser.id_from_url_implemented():
                                    urls_filtered = parser.filter_accepted_urls((args['parentLink'],))
                                    for url_filtered in urls_filtered:
                                        gallery_gid = parser.id_from_url(url_filtered)
                                        if gallery_gid:
                                            parent_archive = Archive.objects.filter(gallery__gid=gallery_gid).first()
                                    if urls_filtered:
                                        break
                            if parent_archive:
                                link = parent_archive.gallery.get_link()
                                if 'action' in args and args['action'] == 'replaceFound':
                                    parent_archive.gallery.mark_as_deleted()
                                    parent_archive.gallery = None
                                    parent_archive.delete_all_files()
                                    parent_archive.delete_files_but_archive()
                                    parent_archive.delete()
                                    response['message'] = "Crawling: " + args['link'] + ", deleting parent: " + link
                                    crawler_settings.workers.web_queue.enqueue_args(args['link'])
                                elif 'action' in args and args['action'] == 'queueFound':
                                    response['message'] = "Crawling: " + args['link'] + ", keeping parent: " + link
                                    crawler_settings.workers.web_queue.enqueue_args(args['link'])
                                else:
                                    response['message'] = "Please confirm deletion of parent: " + link
                                    response['action'] = 'confirmDeletion'
                            else:
                                archive = None
                                parsers = crawler_settings.provider_context.get_parsers(crawler_settings, crawler_logger)
                                for parser in parsers:
                                    if parser.id_from_url_implemented():
                                        urls_filtered = parser.filter_accepted_urls((args['link'],))
                                        for url_filtered in urls_filtered:
                                            gallery_gid = parser.id_from_url(url_filtered)
                                            if gallery_gid:
                                                archive = Archive.objects.filter(gallery__gid=gallery_gid).first()
                                        if urls_filtered:
                                            break
                                if archive:
                                    response['message'] = "Archive exists, crawling to check for redownload: " + args['link']
                                else:
                                    response['message'] = "Crawling: " + args['link']
                                crawler_settings.workers.web_queue.enqueue_args(args['link'])
                        else:
                            archive = None
                            parsers = crawler_settings.provider_context.get_parsers(crawler_settings, crawler_logger)
                            for parser in parsers:
                                if parser.id_from_url_implemented():
                                    urls_filtered = parser.filter_accepted_urls((args['link'],))
                                    for url_filtered in urls_filtered:
                                        gallery_gid = parser.id_from_url(url_filtered)
                                        if gallery_gid:
                                            archive = Archive.objects.filter(gallery__gid=gallery_gid).first()
                                    if urls_filtered:
                                        break
                            if archive:
                                response['message'] = "Archive exists, crawling to check for redownload: " + args['link']
                            else:
                                response['message'] = "Crawling: " + args['link']
                            crawler_settings.workers.web_queue.enqueue_args(args['link'])
                    if not response:
                        response['error'] = 'Could not parse request'
                    return HttpResponse(json.dumps(response), content_type="application/json; charset=utf-8")
                # Used by remotesite command
                elif data['operation'] == 'archive_request':
                    archives_query = Archive.objects.filter_non_existent(crawler_settings.MEDIA_ROOT, gallery__gid__in=args)
                    archives = [{'gid': archive.gallery.gid,
                                 'id': archive.id,
                                 'zipped': archive.zipped.name,
                                 'filesize': archive.filesize} for archive in archives_query]
                    response_text = json.dumps({'result': archives})
                    return HttpResponse(response_text, content_type="application/json; charset=utf-8")
                # Used by remotesite command
                elif data['operation'] in ('queue_archives', 'queue_galleries'):
                    urls = args
                    new_urls_set = set()
                    gids_set = set()

                    parsers = crawler_settings.provider_context.get_parsers(crawler_settings, crawler_logger)
                    for parser in parsers:
                        if parser.id_from_url_implemented():
                            urls_filtered = parser.filter_accepted_urls(urls)
                            for url in urls_filtered:
                                gid = parser.id_from_url(url)
                                gids_set.add(gid)

                    gids_list = list(gids_set)

                    existing_galleries = Gallery.objects.filter(gid__in=gids_list)
                    for gallery_object in existing_galleries:
                        if gallery_object.is_submitted():
                            gallery_object.delete()
                        # Delete queue galleries that failed, and does not have archives.
                        elif data['operation'] == 'queue_archives' and "failed" in gallery_object.dl_type and not gallery_object.archive_set.all():
                            gallery_object.delete()
                        elif data['operation'] == 'queue_archives' and not gallery_object.archive_set.all():
                            gallery_object.delete()
                    already_present_gids = list(Gallery.objects.filter(gid__in=gids_list).values_list('gid', flat=True))
                    # new_gids = list(gids_set - set(already_present_gids))

                    for parser in parsers:
                        if parser.id_from_url_implemented():
                            urls_filtered = parser.filter_accepted_urls(urls)
                            for url in urls_filtered:
                                gid = parser.id_from_url(url)
                                if gid not in already_present_gids:
                                    new_urls_set.add(url)

                    pages_links = list(new_urls_set)
                    if len(pages_links) > 0:
                        current_settings = Settings(load_from_config=crawler_settings.config)
                        if data['operation'] == 'queue_galleries':
                            current_settings.allow_type_downloaders_only('info')
                        elif data['operation'] == 'queue_archives':
                            if 'archive_reason' in data:
                                current_settings.archive_reason = data['archive_reason']
                            if 'archive_details' in data:
                                current_settings.archive_details = data['archive_details']
                            current_settings.allow_type_downloaders_only('fake')
                        if current_settings.workers.web_queue:
                            current_settings.workers.web_queue.enqueue_args_list(pages_links, override_options=current_settings)
                        else:
                            pages_links = []
                    return HttpResponse(json.dumps({'result': str(len(pages_links))}), content_type="application/json; charset=utf-8")
                # Used by remotesite command
                elif data['operation'] == 'links':
                    links = args
                    if len(links) > 0:
                        crawler_settings.workers.web_queue.enqueue_args_list(links)
                    return HttpResponse(json.dumps({'result': str(len(links))}), content_type="application/json; charset=utf-8")
                # Used by archive page
                elif data['operation'] == 'match_archive':
                    archive = Archive.objects.filter(pk=args['archive'])
                    if archive:
                        generate_possible_matches_for_archives(
                            archive,
                            filters=(args['match_filter'],),
                            logger=crawler_logger,
                            match_local=False,
                            match_web=True,
                        )
                    return HttpResponse(json.dumps({'message': 'web matcher done, check the logs for results'}),
                                        content_type="application/json; charset=utf-8")
                elif data['operation'] == 'match_archive_internally':
                    archive = Archive.objects.get(pk=args['archive'])
                    if archive:
                        clear_title = True if 'clear' in args else False
                        provider_filter = args.get('provider', '')
                        try:
                            cutoff = float(request.GET.get('cutoff', '0.4'))
                        except ValueError:
                            cutoff = 0.4
                        try:
                            max_matches = int(request.GET.get('max-matches', '10'))
                        except ValueError:
                            max_matches = 10

                        archive.generate_possible_matches(
                            clear_title=clear_title, provider_filter=provider_filter,
                            cutoff=cutoff, max_matches=max_matches
                        )
                        archive.save()
                    return HttpResponse(json.dumps({'message': 'internal matcher done, check the archive for results'}),
                                        content_type="application/json; charset=utf-8")
                else:
                    response['error'] = 'Unknown function'
    elif request.method == 'GET':
        data = request.GET
        if 'api_key' not in data:
            response['error'] = 'Missing API key'
            return HttpResponse(json.dumps(response), content_type="application/json; charset=utf-8")
        elif data['api_key'] != crawler_settings.api_key:
            response['error'] = 'Incorrect API key'
            return HttpResponse(json.dumps(response), content_type="application/json; charset=utf-8")
        # send some 'ok' back
        else:
            if 'gc' in data:
                args = data.copy()

                for k in gallery_filter_keys:
                    if k not in args:
                        args[k] = ''

                keys = ("sort", "asc_desc")

                for k in keys:
                    if k not in args:
                        args[k] = ''

                # args = data
                # Already authorized by api key.
                args['public'] = False

                results = filter_galleries_no_request(args)
                if not results:
                    return HttpResponse(json.dumps([]), content_type="application/json; charset=utf-8")
                response_text = json.dumps(
                    [{
                        'gid': gallery.gid,
                        'token': gallery.token,
                        'title': gallery.title,
                        'title_jpn': gallery.title_jpn,
                        'category': gallery.category,
                        'uploader': gallery.uploader,
                        'comment': gallery.comment,
                        'posted': int(timestamp_or_zero(gallery.posted)),
                        'filecount': gallery.filecount,
                        'filesize': gallery.filesize,
                        'expunged': gallery.expunged,
                        'rating': gallery.rating,
                        'hidden': gallery.hidden,
                        'fjord': gallery.fjord,
                        'public': gallery.public,
                        'provider': gallery.provider,
                        'dl_type': gallery.dl_type,
                        'tags': gallery.tag_list(),
                        'link': gallery.get_link(),
                        'thumbnail': request.build_absolute_uri(reverse('viewer:gallery-thumb', args=(gallery.pk,))) if gallery.thumbnail else '',
                        'thumbnail_url': gallery.thumbnail_url
                    } for gallery in results
                    ],
                    # indent=2,
                    sort_keys=True,
                    ensure_ascii=False,
                )
                return HttpResponse(response_text, content_type="application/json; charset=utf-8")
            else:
                response['error'] = 'Unknown function'
    else:
        response['error'] = 'Unsupported method: {}'.format(request.method)
    return HttpResponse(json.dumps(response), content_type="application/json; charset=utf-8")
Example #14
0
                        required=False,
                        action='store_true',
                        default=False,
                        help='Run the server as a daemon.')

    parser.add_argument('-pf',
                        '--pidfile',
                        required=False,
                        action='store',
                        default=None,
                        help='Store the process id in the given file.')

    args = parser.parse_args()

    if args.config_dir:
        crawler_settings = Settings(load_from_disk=True,
                                    default_dir=args.config_dir)
        os.environ['PANDA_CONFIG_DIR'] = args.config_dir
    else:
        crawler_settings = Settings(load_from_disk=True)

    if args.port:
        cherrypy_port = args.port
    else:
        cherrypy_port = crawler_settings.webserver.bind_port

    cherrypy_settings = {
        'server.socket_host': crawler_settings.webserver.bind_address,
        'server.socket_port': cherrypy_port,
        'checker.on': False,
        'engine.autoreload.on': crawler_settings.cherrypy_auto_restart,
        'log.screen': crawler_settings.webserver.log_to_screen,
Example #15
0
    def test_fakku_parser(self):
        """Test FAKKU gallery page parser"""
        settings = Settings(load_from_disk=True)

        gallery_link = 'https://www.fakku.net/hentai/im-a-piece-of-junk-sexaroid-english'
        parser = FakkuParser(settings)
        data = parser.fetch_gallery_data(gallery_link)

        expected_data = GalleryData(
            'hentai/im-a-piece-of-junk-sexaroid-english',
            'fakku',
            link=gallery_link,
            title='I\'m a Piece of Junk Sexaroid',
            thumbnail_url=
            'https://t.fakku.net/images/manga/i/im-a-piece-of-junk-sexaroid-english/thumbs/002.thumb.jpg',
            filecount=16,
            category='Manga',
            tags=[
                'artist:wakame-san',
                'magazine:comic_kairakuten_beast_2017-05',
                'publisher:fakku',
                'language:english',
                'tsundere',
                'femdom',
                'vanilla',
                'b*****b',
                'oppai',
                'hentai',
                'creampie',
                'uncensored',
                'x-ray',
                'subscription',
            ],
            comment='Plump slacker sex robot ❤',
        )

        self.assertEqual(data, expected_data)

        gallery_link = 'https://www.fakku.net/hentai/tsf-story-append-20-english_1497401155'
        parser = FakkuParser(settings)
        data = parser.fetch_gallery_data(gallery_link)

        expected_data = GalleryData(
            'hentai/tsf-story-append-20-english_1497401155',
            'fakku',
            link=gallery_link,
            title='TSF Story Append 2.0',
            filecount=82,
            category='Doujinshi',
            tags=[
                'artist:oda_non',
                'artist:yasui_riosuke',
                'artist:meme50',
                'artist:kojima_saya',
                'artist:butcha-u',
                'artist:mizuryu_kei',
                'artist:kurenai_yuuji',
                'artist:soine',
                'artist:asanagi',
                'artist:yumeno_tanuki',
                'artist:hiroyuki_sanadura',
                'artist:shindo_l',
                'artist:naokame',
                'artist:kin_no_hiyoko',
                'artist:masaru_yajiro',
                'group:da_hootch',
                'publisher:enshodo',
                'language:english',
                'anal',
                'b*****b',
                'oppai',
                'glasses',
                'stockings',
                'group',
                'nurse',
                'hentai',
                'ahegao',
                'creampie',
                'uncensored',
                'genderbend',
                'doujin',
            ],
            comment=
            "Takumi's life as a girl only continues to get more wild, as he (she?) continues to fall deeper into a life of promiscuity, drugs and unprotected sex with strangers. Will his friend Ryou be able to pull him out of this terrible spiral?",
            thumbnail_url=
            'https://t.fakku.net/images/manga/t/tsf-story-append-20-english_1497401155_1502575464/thumbs/001.thumb.jpg',
        )

        self.assertEqual(data, expected_data)
Example #16
0
def crawler(request: HttpRequest) -> HttpResponse:
    """Crawl given URLs."""

    if not request.user.is_staff:
        return render_error(request,
                            "You need to be an admin to crawl a link.")

    d = {}

    p = request.POST

    if p:
        if 'keep_this_settings' in p:
            current_settings = crawler_settings
        else:
            current_settings = Settings(
                load_from_config=crawler_settings.config)
        url_set = set()
        # create dictionary of properties for each archive
        current_settings.replace_metadata = False
        current_settings.config['allowed']['replace_metadata'] = 'no'
        for k, v in p.items():
            if k.startswith("downloaders"):
                k, dl = k.split('-')
                current_settings.config['downloaders'][dl] = v
                current_settings.downloaders[dl] = int(v)
            elif k == "replace_metadata":
                current_settings.config['allowed'][k] = 'yes'
                current_settings.replace_metadata = True
            elif k == "urls":
                url_list = v.split("\n")
                for item in url_list:
                    url_set.add(item.rstrip('\r'))
        urls = list(url_set)

        if 'reason' in p and p['reason'] != '':
            reason = p['reason']
            # Force limit string length (reason field max_length)
            current_settings.archive_reason = reason[:200]
            current_settings.gallery_reason = reason[:200]

        if 'keep_this_settings' in p:
            current_settings.write()
            current_settings.load_config_from_file()
        if 'run_separate' in p:
            crawler_thread = CrawlerThread(crawler_logger, current_settings,
                                           urls)
            crawler_thread.start()
        else:
            current_settings.workers.web_queue.enqueue_args_list(
                urls, override_options=current_settings)
        messages.success(request,
                         'Starting Crawler, check the logs for a report.')
        # Not really optimal when there's many commands being queued
        # for command in url_list:
        #     messages.success(request, command)
        return HttpResponseRedirect(reverse('viewer:main-page'))

    d.update({
        'settings':
        crawler_settings,
        'downloaders':
        crawler_settings.provider_context.get_downloaders_name_priority(
            crawler_settings)
    })

    return render(request, "viewer/crawler.html", d)
Example #17
0
def foldercrawler(request: HttpRequest) -> HttpResponse:
    """Folder crawler."""
    if not request.user.is_staff:
        return render_error(request,
                            "You need to be an admin to use the tools.")

    d = {'media_root': os.path.realpath(crawler_settings.MEDIA_ROOT)}

    p = request.POST

    if p:
        if 'keep_this_settings' in p:
            current_settings = crawler_settings
        else:
            current_settings = Settings(
                load_from_config=crawler_settings.config)
        commands = set()
        # create dictionary of properties for each command
        for k, v in p.items():
            if k.startswith("matchers"):
                k, matcher = k.split('-')
                current_settings.config['matchers'][matcher] = v
                current_settings.matchers[matcher] = int(v)
            elif k == "commands":
                command_list = v.split("\n")
                for item in command_list:
                    commands.add(item.rstrip('\r'))
            elif k == "internal_matches":
                current_settings.internal_matches_for_non_matches = True

        if 'reason' in p and p['reason'] != '':
            reason = p['reason']
            # Force limit string length (reason field max_length)
            current_settings.archive_reason = reason[:200]
            current_settings.gallery_reason = reason[:200]

        if 'source' in p and p['source'] != '':
            source = p['source']
            # Force limit string length (reason field max_length)
            current_settings.archive_source = source[:50]

        if 'keep_this_settings' in p:
            current_settings.write()
            current_settings.load_config_from_file()
        folder_crawler = FolderCrawlerThread(folder_logger, current_settings,
                                             list(commands))
        folder_crawler.start()
        messages.success(
            request, 'Starting Folder Crawler, check the logs for a report.')
        # Not really optimal when there's many commands being queued
        # for command in commands:
        #     messages.success(request, command)
        return HttpResponseRedirect(reverse('viewer:main-page'))

    d.update({
        'settings':
        crawler_settings,
        'matchers':
        crawler_settings.provider_context.get_matchers_name_priority(
            crawler_settings)
    })

    return render(request, "viewer/foldercrawler.html", d)
Example #18
0
 def process_downloaded_archive(self, archive: Archive) -> None:
     if os.path.isfile(archive.zipped.path):
         except_at_open = False
         return_error = None
         try:
             my_zip = ZipFile(
                 archive.zipped.path, 'r')
             return_error = my_zip.testzip()
             my_zip.close()
         except (BadZipFile, NotImplementedError):
             except_at_open = True
         if except_at_open or return_error:
             if 'panda' in archive.source_type:
                 self.logger.error(
                     "For archive: {}, file check on downloaded zipfile failed on file: {}, "
                     "forcing download as panda_archive to fix it.".format(archive, archive.zipped.path)
                 )
                 crc32 = calc_crc32(
                     archive.zipped.path)
                 Archive.objects.add_or_update_from_values({'crc32': crc32}, pk=archive.pk)
                 if self.web_queue and archive.gallery:
                     temp_settings = Settings(load_from_config=self.settings.config)
                     temp_settings.allow_downloaders_only(['panda_archive'], True, True, True)
                     self.web_queue.enqueue_args_list((archive.gallery.get_link(),), override_options=temp_settings)
                     return
             else:
                 self.logger.warning(
                     "For archive: {}, File check on downloaded zipfile: {}. "
                     "Check the file manually.".format(archive, archive.zipped.path)
                 )
         crc32 = calc_crc32(
             archive.zipped.path)
         filesize = get_zip_filesize(
             archive.zipped.path)
         filecount = filecount_in_zip(
             archive.zipped.path)
         values = {'crc32': crc32,
                   'filesize': filesize,
                   'filecount': filecount,
                   }
         updated_archive = Archive.objects.add_or_update_from_values(
             values, pk=archive.pk)
         if archive.gallery and updated_archive.filesize != updated_archive.gallery.filesize:
             if Archive.objects.filter(gallery=updated_archive.gallery, filesize=updated_archive.gallery.filesize):
                 self.logger.info(
                     "For archive: {} size does not match gallery, "
                     "but there's already another archive that matches.".format(updated_archive)
                 )
                 return
             if 'panda' in archive.source_type:
                 self.logger.info(
                     "For archive: {} size does not match gallery, "
                     "downloading again from panda_archive.".format(updated_archive)
                 )
                 if self.web_queue:
                     temp_settings = Settings(load_from_config=self.settings.config)
                     temp_settings.allow_downloaders_only(['panda_archive'], True, True, True)
                     self.web_queue.enqueue_args_list(
                         (updated_archive.gallery.get_link(), ),
                         override_options=temp_settings
                     )
             else:
                 self.logger.warning(
                     "For archive: {} size does not match gallery. Check the file manually.".format(archive)
                 )