Ejemplo n.º 1
0
def recall_api(request: HttpRequest, pk: int) -> HttpResponse:
    """Recall provider API, if possible."""

    if not request.user.is_staff:
        return render_error(request,
                            "You need to be an admin to recall the API.")

    try:
        archive = Archive.objects.get(pk=pk)
    except Archive.DoesNotExist:
        raise Http404("Archive does not exist")

    if not archive.gallery_id:
        return render_error(request,
                            "No gallery associated with this archive.")

    gallery = Gallery.objects.get(pk=archive.gallery_id)

    current_settings = Settings(load_from_config=crawler_settings.config)

    if current_settings.workers.web_queue:

        current_settings.set_update_metadata_options(
            providers=(gallery.provider, ))

        current_settings.workers.web_queue.enqueue_args_list(
            (gallery.get_link(), ), override_options=current_settings)

        frontend_logger.info(
            'Updating gallery API data for gallery: {} and related archives'.
            format(gallery.get_absolute_url()))

    return HttpResponseRedirect(request.META["HTTP_REFERER"])
Ejemplo n.º 2
0
def reduce(request: HttpRequest, pk: int) -> HttpResponse:
    """Reduce archive."""

    if not request.user.has_perm('viewer.expand_archive'):
        return render_error(request, "You don't have the permission to expand an Archive.")
    try:
        with transaction.atomic():
            archive = Archive.objects.select_for_update().get(pk=pk)
            if not archive.extracted:
                return render_error(request, "Archive is already reduced.")

            logger.info('Reducing images for archive: {}'.format(archive.get_absolute_url()))

            archive.reduce()
            action = 'REDUCE_ARCHIVE'
            event_log(
                request.user,
                action,
                content_object=archive,
                result='success'
            )
    except Archive.DoesNotExist:
        raise Http404("Archive does not exist")

    return HttpResponseRedirect(request.META["HTTP_REFERER"])
Ejemplo n.º 3
0
def queue_operations(request: HttpRequest, operation: str, arguments: str = '') -> HttpResponseRedirect:

    if not request.user.is_staff:
        return render_error(request, "You need to be an admin to operate the queue.")
    if operation == "remove_by_index":
        if arguments and crawler_settings.workers.web_queue:
            crawler_settings.workers.web_queue.remove_by_index(int(arguments))
        else:
            return render_error(request, "Unknown argument.")
    else:
        return render_error(request, "Unknown queue operation.")

    return HttpResponseRedirect(request.META["HTTP_REFERER"])
Ejemplo n.º 4
0
def logs(request: HttpRequest, tool: str = "all") -> HttpResponse:
    """Logs listing."""

    if not request.user.is_staff:
        return render_error(request, "You need to be an admin to see logs.")

    log_lines: typing.List[str] = []

    if tool == "all" or tool == "webcrawler":

        f = open(MAIN_LOGGER, 'rt', encoding='utf8')
        log_lines = f.read().split('[0m\n')
        f.close()
        log_lines.pop()
        log_lines.reverse()

    log_filter = request.GET.get('filter', '')
    if log_filter:
        log_lines = [x for x in log_lines if log_filter.lower() in x.lower()]

    current_base_uri = re.escape('{scheme}://{host}'.format(
        scheme=request.scheme, host=request.get_host()))
    # Build complete URL for relative internal URLs (some)
    if crawler_settings.urls.viewer_main_url:
        patterns = [
            r'(?!' + current_base_uri + r')/' +
            crawler_settings.urls.viewer_main_url + r"archive/\d+/?",
            r'(?!' + current_base_uri + r')/' +
            crawler_settings.urls.viewer_main_url + r"gallery/\d+/?",
            r'(?!' + current_base_uri + r')/' +
            crawler_settings.urls.viewer_main_url + r"wanted-gallery/\d+/?",
        ]
    else:
        patterns = [
            r'(?<!' + current_base_uri + r')/archive/\d+/?',
            r'(?<!' + current_base_uri + r')/gallery/\d+/?',
            r'(?<!' + current_base_uri + r')/wanted-gallery/\d+/?',
        ]

    def build_request(match_obj: typing.Match) -> str:
        return request.build_absolute_uri(match_obj.group(0))

    log_lines = [
        reduce(lambda v, pattern: re.sub(pattern, build_request, v), patterns,
               line) for line in log_lines
    ]

    paginator = Paginator(log_lines, 100)
    try:
        page = int(request.GET.get("page", '1'))
    except ValueError:
        page = 1

    try:
        log_lines = paginator.page(page)
    except (InvalidPage, EmptyPage):
        log_lines = paginator.page(paginator.num_pages)

    d = {'log_lines': log_lines}
    return render(request, "viewer/logs.html", d)
Ejemplo n.º 5
0
def public_toggle(request: HttpRequest, pk: int) -> HttpResponse:
    """Public archive toggle."""

    if not request.user.is_staff:
        return render_error(
            request,
            "You need to be an admin to toggle public access for an archive.")

    try:
        archive = Archive.objects.get(pk=pk)
    except Archive.DoesNotExist:
        raise Http404("Archive does not exist")

    if archive.public:
        archive.set_private()
        frontend_logger.info('Setting public status to: private for ' +
                             archive.zipped.name)
        event_log(request.user,
                  'UNPUBLISH_ARCHIVE',
                  content_object=archive,
                  result='unpublished')
    else:
        archive.set_public()
        frontend_logger.info('Setting public status to: public for ' +
                             archive.zipped.name)
        event_log(request.user,
                  'PUBLISH_ARCHIVE',
                  content_object=archive,
                  result='published')

    return HttpResponseRedirect(request.META["HTTP_REFERER"])
Ejemplo n.º 6
0
def stats_workers(request: HttpRequest) -> HttpResponse:
    """Display workers stats."""

    if not request.user.is_staff:
        return render_error(
            request, "You need to be an admin to view workers' status.")

    stats_dict = {
        "current_settings":
        crawler_settings,
        "thread_status":
        get_thread_status(),
        "web_queue":
        crawler_settings.workers.web_queue,
        "post_downloader":
        crawler_settings.workers.timed_downloader,
        "schedulers":
        get_schedulers_status([
            crawler_settings.workers.timed_crawler,
            crawler_settings.workers.timed_downloader,
            crawler_settings.workers.timed_auto_wanted,
            crawler_settings.workers.timed_updater
        ])
    }

    d = {'stats': stats_dict}

    return render(request, "viewer/stats_workers.html", d)
Ejemplo n.º 7
0
def public_toggle(request: HttpRequest, pk: int) -> HttpResponse:
    """Public archive toggle."""

    if not request.user.has_perm('viewer.publish_archive'):
        return render_error(request, "You don't have the permission to change public status for an Archive.")

    try:
        archive = Archive.objects.get(pk=pk)
    except Archive.DoesNotExist:
        raise Http404("Archive does not exist")

    if archive.public:
        archive.set_private()
        logger.info('Setting public status to: private for ' + archive.zipped.name)
        event_log(
            request.user,
            'UNPUBLISH_ARCHIVE',
            content_object=archive,
            result='unpublished'
        )
    else:
        archive.set_public()
        logger.info('Setting public status to: public for ' + archive.zipped.name)
        event_log(
            request.user,
            'PUBLISH_ARCHIVE',
            content_object=archive,
            result='published'
        )

    return HttpResponseRedirect(request.META["HTTP_REFERER"])
Ejemplo n.º 8
0
def gallery_frequency(request: HttpRequest) -> HttpResponse:
    if not crawler_settings.urls.enable_gallery_frequency:
        if not request.user.is_staff:
            raise Http404("Page not found")
        else:
            return render_error(
                request,
                "Page disabled by settings (urls: enable_gallery_frequency).")
    title = request.GET.get("title", '')
    tags = request.GET.get("tags", '')
    if 'clear' in request.GET:
        request.GET = QueryDict('')
        form = GallerySearchForm()
        form_simple = GallerySearchSimpleForm(request.GET,
                                              error_class=SpanErrorList)
    else:
        form = GallerySearchForm(initial={'title': title, 'tags': tags})
        form_simple = GallerySearchSimpleForm(request.GET,
                                              error_class=SpanErrorList)
        form_simple.is_valid()
        for field_name, errors in form_simple.errors.items():
            messages.error(request,
                           field_name + ": " + ", ".join(errors),
                           extra_tags='danger')
    d = {'form': form, 'form_simple': form_simple}
    return render(request, "viewer/graph_gallery_posted.html", d)
Ejemplo n.º 9
0
def delete_archive(request: HttpRequest, pk: int) -> HttpResponse:
    """Delete archive and gallery data if there is any."""

    if not request.user.is_staff:
        return render_error(request,
                            "You need to be an admin to delete an archive.")

    try:
        archive = Archive.objects.get(pk=pk)
    except Archive.DoesNotExist:
        raise Http404("Archive does not exist")

    if request.method == 'POST':

        p = request.POST
        if "delete_confirm" in p:

            message_list = list()

            if "delete-archive" in p:
                message_list.append('archive entry')
            if "delete-gallery" in p:
                message_list.append('associated gallery')
            if "delete-file" in p:
                message_list.append('associated file')

            message = 'For archive: {}, deleting: {}'.format(
                archive.title, ', '.join(message_list))

            frontend_logger.info("User {}: {}".format(request.user.username,
                                                      message))
            messages.success(request, message)

            gallery = archive.gallery

            if "mark-gallery-deleted" in p:
                archive.gallery.mark_as_deleted()
                archive.gallery = None
            if "delete-file" in p:
                archive.delete_all_files()
            if "delete-archive" in p:
                archive.delete_files_but_archive()
                archive.delete()

            user_reason = p.get('reason', '')

            event_log(request.user,
                      'DELETE_ARCHIVE',
                      reason=user_reason,
                      content_object=gallery,
                      result='deleted')

            return HttpResponseRedirect(reverse('viewer:main-page'))

    d = {'archive': archive}

    return render(request, "viewer/delete_archive.html", d)
Ejemplo n.º 10
0
def recall_api(request: HttpRequest, pk: int) -> HttpResponse:
    """Recall provider API, if possible."""

    if not request.user.has_perm('viewer.update_metadata'):
        return render_error(request, "You don't have the permission to refresh source metadata on an Archive.")

    try:
        archive = Archive.objects.get(pk=pk)
    except Archive.DoesNotExist:
        raise Http404("Archive does not exist")

    if not archive.gallery_id:
        return render_error(request, "No gallery associated with this archive.")

    gallery = Gallery.objects.get(pk=archive.gallery_id)

    current_settings = Settings(load_from_config=crawler_settings.config)

    if current_settings.workers.web_queue and gallery.provider:

        current_settings.set_update_metadata_options(providers=(gallery.provider,))

        def gallery_callback(x: Optional['Gallery'], crawled_url: Optional[str], result: str) -> None:
            event_log(
                request.user,
                'UPDATE_METADATA',
                content_object=x,
                result=result,
                data=crawled_url
            )

        current_settings.workers.web_queue.enqueue_args_list(
            (gallery.get_link(),),
            override_options=current_settings,
            gallery_callback=gallery_callback
        )

        logger.info(
            'Updating gallery API data for gallery: {} and related archives'.format(
                gallery.get_absolute_url()
            )
        )

    return HttpResponseRedirect(request.META["HTTP_REFERER"])
Ejemplo n.º 11
0
def foldercrawler(request: HttpRequest) -> HttpResponse:
    """Folder crawler."""
    if not request.user.is_staff:
        return render_error(request, "You need to be an admin to use the tools.")

    d: dict[str, typing.Any] = {'media_root': os.path.realpath(crawler_settings.MEDIA_ROOT)}

    p = request.POST

    if p:
        if 'keep_this_settings' in p:
            current_settings = crawler_settings
        else:
            current_settings = Settings(load_from_config=crawler_settings.config)
        commands = set()
        # create dictionary of properties for each command
        for k, v in p.items():
            if k.startswith("matchers"):
                k, matcher = k.split('-')
                current_settings.config['matchers'][matcher] = v
                current_settings.matchers[matcher] = int(v)
            elif k == "commands":
                command_list = v.split("\n")
                for item in command_list:
                    commands.add(item.rstrip('\r'))
            elif k == "internal_matches":
                current_settings.internal_matches_for_non_matches = True

        if 'reason' in p and p['reason'] != '':
            reason = p['reason']
            # Force limit string length (reason field max_length)
            current_settings.archive_reason = reason[:200]
            current_settings.gallery_reason = reason[:200]

        if 'source' in p and p['source'] != '':
            source = p['source']
            # Force limit string length (reason field max_length)
            current_settings.archive_source = source[:50]

        if 'keep_this_settings' in p:
            current_settings.write()
            current_settings.load_config_from_file()
        folder_crawler = FolderCrawlerThread(current_settings, list(commands))
        folder_crawler.start()
        messages.success(request, 'Starting Folder Crawler, check the logs for a report.')
        # Not really optimal when there's many commands being queued
        # for command in commands:
        #     messages.success(request, command)
        return HttpResponseRedirect(reverse('viewer:main-page'))

    d.update({
        'settings': crawler_settings,
        'matchers': crawler_settings.provider_context.get_matchers_name_priority(crawler_settings)
    })

    return render(request, "viewer/foldercrawler.html", d)
Ejemplo n.º 12
0
def stats_settings(request: HttpRequest) -> HttpResponse:
    """Display settings objects."""

    if not request.user.is_staff:
        return render_error(request, "You need to be an admin to view setting objects.")

    stats_dict = {
        "current_settings": crawler_settings,
    }

    d = {'stats': stats_dict}

    return render(request, "viewer/stats_settings.html", d)
Ejemplo n.º 13
0
def generate_matches(request: HttpRequest, pk: int) -> HttpResponse:
    """Generate matches for non-match."""

    if not request.user.is_staff:
        return render_error(request, "You need to be an admin to generate matches.")

    try:
        archive = Archive.objects.get(pk=pk)
    except Archive.DoesNotExist:
        raise Http404("Archive does not exist")

    if archive.gallery:
        return render_error(request, "Archive is already matched.")

    clear_title = True if 'clear' in request.GET else False

    provider_filter = request.GET.get('provider', '')
    try:
        cutoff = float(request.GET.get('cutoff', '0.4'))
    except ValueError:
        cutoff = 0.4
    try:
        max_matches = int(request.GET.get('max-matches', '10'))
    except ValueError:
        max_matches = 10

    archive.generate_possible_matches(
        clear_title=clear_title, provider_filter=provider_filter,
        cutoff=cutoff, max_matches=max_matches
    )
    archive.save()

    logger.info('Generated matches for {}, found {}'.format(
        archive.zipped.path,
        archive.possible_matches.count()
    ))

    return HttpResponseRedirect(request.META["HTTP_REFERER"])
Ejemplo n.º 14
0
def extract_toggle(request: HttpRequest, pk: int) -> HttpResponse:
    """Extract archive toggle."""

    if not request.user.is_staff:
        return render_error(
            request, "You need to be an admin to toggle extract an archive.")

    try:
        with transaction.atomic():
            archive = Archive.objects.select_for_update().get(pk=pk)
            frontend_logger.info('Toggling images for ' + archive.zipped.name)
            archive.extract_toggle()
    except Archive.DoesNotExist:
        raise Http404("Archive does not exist")

    return HttpResponseRedirect(request.META["HTTP_REFERER"])
Ejemplo n.º 15
0
def recalc_info(request: HttpRequest, pk: int) -> HttpResponse:
    """Recalculate archive info."""

    if not request.user.is_staff:
        return render_error(
            request, "You need to be an admin to recalculate file info.")

    try:
        archive = Archive.objects.get(pk=pk)
    except Archive.DoesNotExist:
        raise Http404("Archive does not exist")

    frontend_logger.info('Recalculating file info for ' + archive.zipped.name)
    archive.recalc_fileinfo()
    archive.generate_image_set(force=False)
    archive.generate_thumbnails()

    return HttpResponseRedirect(request.META["HTTP_REFERER"])
Ejemplo n.º 16
0
def archive_update(request: HttpRequest,
                   pk: int,
                   tool: str = None,
                   tool_use_id: str = None) -> HttpResponse:
    try:
        archive = Archive.objects.get(pk=pk)
    except Archive.DoesNotExist:
        raise Http404("Archive does not exist")

    if tool == 'select-as-match' and request.user.has_perm(
            'viewer.match_archive'):
        archive.select_as_match(tool_use_id)
        if archive.gallery:
            frontend_logger.info(
                "User: {}: Archive {} ({}) was matched with gallery {} ({}).".
                format(
                    request.user.username,
                    archive,
                    reverse('viewer:archive', args=(archive.pk, )),
                    archive.gallery,
                    reverse('viewer:gallery', args=(archive.gallery.pk, )),
                ))
            event_log(
                request.user,
                'MATCH_ARCHIVE',
                # reason=user_reason,
                data=reverse('viewer:gallery', args=(archive.gallery.pk, )),
                content_object=archive,
                result='matched')
        return HttpResponseRedirect(request.META["HTTP_REFERER"])
    elif tool == 'clear-possible-matches' and request.user.has_perm(
            'viewer.match_archive'):
        archive.possible_matches.clear()
        frontend_logger.info(
            "User: {}: Archive {} ({}) was cleared from its possible matches.".
            format(
                request.user.username,
                archive,
                reverse('viewer:archive', args=(archive.pk, )),
            ))
        return HttpResponseRedirect(request.META["HTTP_REFERER"])
    else:
        return render_error(request, 'Unrecognized command')
Ejemplo n.º 17
0
def rematch_archive(request: HttpRequest, pk: int) -> HttpResponse:
    """Match an Archive."""

    if not request.user.is_staff:
        return render_error(request, "You need to be an admin to rematch an archive.")

    try:
        archive = Archive.objects.get(pk=pk)
    except Archive.DoesNotExist:
        raise Http404("Archive does not exist")

    if archive.gallery:
        archive.gallery.archive_set.remove(archive)

    folder_crawler_thread = FolderCrawlerThread(
        crawler_settings, ['-frm', archive.zipped.path])
    folder_crawler_thread.start()

    logger.info('Rematching archive: {}'.format(archive.title))

    return HttpResponseRedirect(request.META["HTTP_REFERER"])
Ejemplo n.º 18
0
def stats_collection(request: HttpRequest) -> HttpResponse:
    """Display galleries and archives stats."""

    if not request.user.is_staff:
        return render_error(request,
                            "You need to be an admin to display the stats.")

    # General
    stats_dict = {
        "n_archives":
        Archive.objects.count(),
        "n_expanded_archives":
        Archive.objects.filter(extracted=True).count(),
        "n_to_download_archives":
        Archive.objects.filter_by_dl_remote().count(),
        "n_galleries":
        Gallery.objects.count(),
        "archive":
        Archive.objects.filter(filesize__gt=0).aggregate(
            Avg('filesize'), Max('filesize'), Min('filesize'),
            Sum('filesize')),
        "gallery":
        Gallery.objects.filter(filesize__gt=0).aggregate(
            Avg('filesize'), Max('filesize'), Min('filesize'),
            Sum('filesize')),
        "hidden_galleries":
        Gallery.objects.filter(hidden=True).count(),
        "hidden_galleries_size":
        Gallery.objects.filter(filesize__gt=0,
                               hidden=True).aggregate(Sum('filesize')),
        "fjord_galleries":
        Gallery.objects.filter(fjord=True).count(),
        "expunged_galleries":
        Gallery.objects.filter(expunged=True).count(),
        "n_tags":
        Tag.objects.count(),
        "n_tag_scopes":
        Tag.objects.values('scope').distinct().count(),
        "n_custom_tags":
        Tag.objects.are_custom().count(),
        "top_10_tags":
        Tag.objects.annotate(
            num_gallery=Count('gallery')).order_by('-num_gallery')[:10],
        "top_10_parody_tags":
        Tag.objects.filter(scope='parody').annotate(
            num_gallery=Count('gallery')).order_by('-num_gallery')[:10],
        "wanted_galleries": {
            "total": WantedGallery.objects.all().count(),
            "found": WantedGallery.objects.filter(found=True).count(),
            "total_galleries_found": FoundGallery.objects.all().count(),
            "user_created":
            WantedGallery.objects.filter(book_type='user').count(),
        }
    }

    # Per provider
    providers = Gallery.objects.all().values_list('provider',
                                                  flat=True).distinct()

    providers_dict = {}

    for provider in providers:
        providers_dict[provider] = {
            'galleries':
            Gallery.objects.filter(provider=provider).count(),
            'archives':
            Archive.objects.filter(gallery__provider=provider).count(),
            'wanted_galleries':
            WantedGallery.objects.filter(provider=provider).count(),
        }

    d = {'stats': stats_dict, 'providers': providers_dict}

    return render(request, "viewer/stats_collection.html", d)
Ejemplo n.º 19
0
def crawler(request: HttpRequest) -> HttpResponse:
    """Crawl given URLs."""

    if not request.user.is_staff:
        return render_error(request,
                            "You need to be an admin to crawl a link.")

    d = {}

    p = request.POST

    if p:
        if 'keep_this_settings' in p:
            current_settings = crawler_settings
        else:
            current_settings = Settings(
                load_from_config=crawler_settings.config)
        url_set = set()
        # create dictionary of properties for each archive
        current_settings.replace_metadata = False
        current_settings.config['allowed']['replace_metadata'] = 'no'
        for k, v in p.items():
            if k.startswith("downloaders"):
                k, dl = k.split('-')
                current_settings.config['downloaders'][dl] = v
                current_settings.downloaders[dl] = int(v)
            elif k == "replace_metadata":
                current_settings.config['allowed'][k] = 'yes'
                current_settings.replace_metadata = True
            elif k == "urls":
                url_list = v.split("\n")
                for item in url_list:
                    url_set.add(item.rstrip('\r'))
        urls = list(url_set)

        if 'reason' in p and p['reason'] != '':
            reason = p['reason']
            # Force limit string length (reason field max_length)
            current_settings.archive_reason = reason[:200]
            current_settings.gallery_reason = reason[:200]

        if 'keep_this_settings' in p:
            current_settings.write()
            current_settings.load_config_from_file()
        if 'run_separate' in p:
            crawler_thread = CrawlerThread(crawler_logger, current_settings,
                                           urls)
            crawler_thread.start()
        else:
            current_settings.workers.web_queue.enqueue_args_list(
                urls, override_options=current_settings)
        messages.success(request,
                         'Starting Crawler, check the logs for a report.')
        # Not really optimal when there's many commands being queued
        # for command in url_list:
        #     messages.success(request, command)
        return HttpResponseRedirect(reverse('viewer:main-page'))

    d.update({
        'settings':
        crawler_settings,
        'downloaders':
        crawler_settings.provider_context.get_downloaders_name_priority(
            crawler_settings)
    })

    return render(request, "viewer/crawler.html", d)
Ejemplo n.º 20
0
def archives_not_matched_with_gallery(request: HttpRequest) -> HttpResponse:
    p = request.POST
    get = request.GET

    title = get.get("title", '')
    tags = get.get("tags", '')

    try:
        page = int(get.get("page", '1'))
    except ValueError:
        page = 1

    if 'clear' in get:
        form = ArchiveSearchForm()
    else:
        form = ArchiveSearchForm(initial={'title': title, 'tags': tags})

    if p:
        pks = []
        for k, v in p.items():
            if k.startswith("sel-"):
                # k, pk = k.split('-')
                # results[pk][k] = v
                pks.append(v)

        preserved = Case(
            *[When(pk=pk, then=pos) for pos, pk in enumerate(pks)])

        archives = Archive.objects.filter(id__in=pks).order_by(preserved)
        if 'create_possible_matches' in p:
            if thread_exists('match_unmatched_worker'):
                return render_error(
                    request, "Local matching worker is already running.")
            provider = p['create_possible_matches']
            try:
                cutoff = float(p.get('cutoff', '0.4'))
            except ValueError:
                cutoff = 0.4
            try:
                max_matches = int(p.get('max-matches', '10'))
            except ValueError:
                max_matches = 10

            frontend_logger.info(
                'User {}: Looking for possible matches in gallery database '
                'for non-matched archives (cutoff: {}, max matches: {}) '
                'using provider filter "{}"'.format(request.user.username,
                                                    cutoff, max_matches,
                                                    provider))
            matching_thread = threading.Thread(
                name='match_unmatched_worker',
                target=generate_possible_matches_for_archives,
                args=(archives, ),
                kwargs={
                    'logger': frontend_logger,
                    'cutoff': cutoff,
                    'max_matches': max_matches,
                    'filters': (provider, ),
                    'match_local': True,
                    'match_web': False
                })
            matching_thread.daemon = True
            matching_thread.start()
            messages.success(request, 'Starting internal match worker.')
        elif 'clear_possible_matches' in p:

            for archive in archives:
                archive.possible_matches.clear()

            frontend_logger.info(
                'User {}: Clearing possible matches for archives'.format(
                    request.user.username))
            messages.success(request, 'Clearing possible matches.')

    params = {
        'sort': 'create_date',
        'asc_desc': 'desc',
        'filename': title,
    }

    for k, v in get.items():
        params[k] = v

    for k in archive_filter_keys:
        if k not in params:
            params[k] = ''

    results = filter_archives_simple(params)

    if 'show-matched' not in get:
        results = results.filter(gallery__isnull=True)

    results = results.prefetch_related(
        Prefetch('archivematches_set',
                 queryset=ArchiveMatches.objects.select_related(
                     'gallery', 'archive').prefetch_related(
                         Prefetch('gallery__tags',
                                  queryset=Tag.objects.filter(
                                      scope__exact='artist'),
                                  to_attr='artist_tags')),
                 to_attr='possible_galleries'),
        'possible_galleries__gallery',
    )

    if 'with-possible-matches' in get:
        results = results.annotate(
            n_possible_matches=Count('possible_matches')).filter(
                n_possible_matches__gt=0)

    paginator = Paginator(results, 50)
    try:
        results = paginator.page(page)
    except (InvalidPage, EmptyPage):
        results = paginator.page(paginator.num_pages)

    d = {
        'results':
        results,
        'providers':
        Gallery.objects.all().values_list('provider', flat=True).distinct(),
        'form':
        form
    }
    return render(request, "viewer/collaborators/unmatched_archives.html", d)
Ejemplo n.º 21
0
def tools(request: HttpRequest, tool: str = "main") -> HttpResponse:
    """Tools listing."""
    settings_text = ''
    if not request.user.is_staff:
        return render_error(request,
                            "You need to be an admin to use the tools.")
    if tool == "transfer_missing_downloads":
        crawler_thread = CrawlerThread(crawler_logger, crawler_settings,
                                       '-tmd'.split())
        crawler_thread.start()
        return HttpResponseRedirect(request.META["HTTP_REFERER"])
    elif tool == "retry_failed":
        crawler_thread = CrawlerThread(crawler_logger, crawler_settings,
                                       '--retry-failed'.split())
        crawler_thread.start()
        return HttpResponseRedirect(request.META["HTTP_REFERER"])
    elif tool == "update_newer_than":
        p = request.GET
        if p and 'newer_than' in p:
            newer_than_date = p['newer_than']
            try:
                if parse_date(newer_than_date) is not None:
                    crawler_settings.workers.web_queue.enqueue_args_list(
                        ('-unt', newer_than_date))
                    messages.success(
                        request,
                        'Updating galleries posted after ' + newer_than_date)
                else:
                    messages.error(request,
                                   'Invalid date format.',
                                   extra_tags='danger')
            except ValueError:
                messages.error(request, 'Invalid date.', extra_tags='danger')
            return HttpResponseRedirect(request.META["HTTP_REFERER"])
    elif tool == "update_missing_thumbnails":
        p = request.GET
        if p and 'limit_number' in p:

            try:
                limit_number = int(p['limit_number'])

                provider = request.GET.get('provider', '')
                if provider:
                    crawler_settings.workers.web_queue.enqueue_args_list(
                        ('-umt', str(limit_number), '-ip', provider))
                else:
                    crawler_settings.workers.web_queue.enqueue_args_list(
                        ('-umt', str(limit_number)))
                messages.success(
                    request,
                    'Updating galleries missing thumbnails, limiting to older {}'
                    .format(limit_number))

            except ValueError:
                messages.error(request, 'Invalid limit.', extra_tags='danger')
            return HttpResponseRedirect(request.META["HTTP_REFERER"])
    elif tool == "generate_missing_thumbs":
        archives = Archive.objects.filter(thumbnail='')
        for archive in archives:
            frontend_logger.info('Generating thumbs for file: {}'.format(
                archive.zipped.name))
            archive.generate_thumbnails()
        return HttpResponseRedirect(request.META["HTTP_REFERER"])
    elif tool == "calculate_missing_info":
        archives = Archive.objects.filter_by_missing_file_info()
        for archive in archives:
            frontend_logger.info('Calculating file info for file: {}'.format(
                archive.zipped.name))
            archive.recalc_fileinfo()
        return HttpResponseRedirect(request.META["HTTP_REFERER"])
    elif tool == "recalc_all_file_info":
        if thread_exists('fileinfo_worker'):
            return render_error(request,
                                "File info worker is already running.")

        archives = Archive.objects.all()
        frontend_logger.info(
            'Recalculating file info for all archives, count: {}'.format(
                archives.count()))

        image_worker_thread = ImageWorker(crawler_logger, 4)
        for archive in archives:
            if os.path.exists(archive.zipped.path):
                image_worker_thread.enqueue_archive(archive)
        fileinfo_thread = threading.Thread(
            name='fileinfo_worker',
            target=image_worker_thread.start_info_thread)
        fileinfo_thread.start()
        return HttpResponseRedirect(request.META["HTTP_REFERER"])
    elif tool == "set_all_hidden_as_public":

        archives = Archive.objects.filter(gallery__hidden=True)

        for archive in archives:
            if os.path.isfile(archive.zipped.path):
                archive.public = True
                archive.save()
                archive.gallery.public = True
                archive.gallery.save()

        return HttpResponseRedirect(request.META["HTTP_REFERER"])
    elif tool == "regenerate_all_thumbs":
        if thread_exists('thumbnails_worker'):
            return render_error(request,
                                "Thumbnails worker is already running.")

        archives = Archive.objects.all()
        frontend_logger.info(
            'Generating thumbs for all archives, count {}'.format(
                archives.count()))

        image_worker_thread = ImageWorker(crawler_logger, 4)
        for archive in archives:
            if os.path.exists(archive.zipped.path):
                image_worker_thread.enqueue_archive(archive)
        thumbnails_thread = threading.Thread(
            name='thumbnails_worker',
            target=image_worker_thread.start_thumbs_thread)
        thumbnails_thread.start()
        return HttpResponseRedirect(request.META["HTTP_REFERER"])
    elif tool == "generate_possible_matches_internally":
        if thread_exists('match_unmatched_worker'):
            return render_error(request, "Matching worker is already running.")
        provider = request.GET.get('provider', '')
        try:
            cutoff = float(request.GET.get('cutoff', '0.4'))
        except ValueError:
            cutoff = 0.4
        try:
            max_matches = int(request.GET.get('max-matches', '10'))
        except ValueError:
            max_matches = 10
        frontend_logger.info(
            'Looking for possible matches in gallery database '
            'for non-matched archives (cutoff: {}, max matches: {}) '
            'using provider filter "{}"'.format(cutoff, max_matches, provider))
        matching_thread = threading.Thread(
            name='match_unmatched_worker',
            target=generate_possible_matches_for_archives,
            args=(None, ),
            kwargs={
                'logger': frontend_logger,
                'cutoff': cutoff,
                'max_matches': max_matches,
                'filters': (provider, ),
                'match_local': True,
                'match_web': False
            })
        matching_thread.daemon = True
        matching_thread.start()
        messages.success(
            request,
            'Looking for possible matches, filtering providers: {}, cutoff: {}.'
            .format(provider, cutoff))
        return HttpResponseRedirect(request.META["HTTP_REFERER"])
    elif tool == "clear_all_archive_possible_matches":
        ArchiveMatches.objects.all().delete()
        frontend_logger.info('Clearing all possible matches for archives.')
        messages.success(request,
                         'Clearing all possible matches for archives.')
        return HttpResponseRedirect(request.META["HTTP_REFERER"])
    elif tool == "search_wanted_galleries_provider_titles":
        if thread_exists('web_search_worker'):
            messages.error(request,
                           'Web search worker is already running.',
                           extra_tags='danger')
            return HttpResponseRedirect(request.META["HTTP_REFERER"])
        results = WantedGallery.objects.eligible_to_search()

        if not results:
            frontend_logger.info('No wanted galleries eligible to search.')
            messages.success(request,
                             'No wanted galleries eligible to search.')
            return HttpResponseRedirect(request.META["HTTP_REFERER"])

        provider = request.GET.get('provider', '')

        frontend_logger.info(
            'Searching for gallery matches in panda for wanted galleries, starting thread.'
        )
        messages.success(
            request,
            'Searching for gallery matches in panda for wanted galleries, starting thread.'
        )

        panda_search_thread = threading.Thread(
            name='web_search_worker',
            target=create_matches_wanted_galleries_from_providers,
            args=(results, provider),
            kwargs={'logger': frontend_logger})
        panda_search_thread.daemon = True
        panda_search_thread.start()
        return HttpResponseRedirect(request.META["HTTP_REFERER"])
    elif tool == "wanted_galleries_possible_matches":
        if thread_exists('wanted_local_search_worker'):
            return render_error(
                request, "Wanted local matching worker is already running.")

        non_match_wanted = WantedGallery.objects.eligible_to_search()

        if not non_match_wanted:
            frontend_logger.info('No wanted galleries eligible to search.')
            messages.success(request,
                             'No wanted galleries eligible to search.')
            return HttpResponseRedirect(request.META["HTTP_REFERER"])

        frontend_logger.info(
            'Looking for possible matches in gallery database '
            'for wanted galleries (fixed 0.4 cutoff)')

        provider = request.GET.get('provider', '')

        try:
            cutoff = float(request.GET.get('cutoff', '0.4'))
        except ValueError:
            cutoff = 0.4
        try:
            max_matches = int(request.GET.get('max-matches', '10'))
        except ValueError:
            max_matches = 10

        matching_thread = threading.Thread(
            name='wanted_local_search_worker',
            target=create_matches_wanted_galleries_from_providers_internal,
            args=(non_match_wanted, ),
            kwargs={
                'logger': frontend_logger,
                'provider_filter': provider,
                'cutoff': cutoff,
                'max_matches': max_matches
            })
        matching_thread.daemon = True
        matching_thread.start()
        return HttpResponseRedirect(request.META["HTTP_REFERER"])
    elif tool == "restart_viewer":
        crawler_settings.workers.stop_workers_and_wait()
        if hasattr(signal, 'SIGUSR2'):
            os.kill(os.getpid(), signal.SIGUSR2)
        else:
            return render_error(request,
                                "This OS does not support signal SIGUSR2.")
        return HttpResponseRedirect(request.META["HTTP_REFERER"])
    elif tool == "modify_settings":
        p = request.POST
        if p:
            settings_text = p['settings_file']
            if (os.path.isfile(
                    os.path.join(crawler_settings.default_dir,
                                 "settings.ini"))):
                with open(os.path.join(crawler_settings.default_dir,
                                       "settings.ini"),
                          "w",
                          encoding="utf-8") as f:
                    f.write(settings_text)
                    frontend_logger.info(
                        'Modified settings file for Panda Backup')
                    messages.success(
                        request, 'Modified settings file for Panda Backup')
                    return HttpResponseRedirect(request.META["HTTP_REFERER"])
        else:
            if (os.path.isfile(
                    os.path.join(crawler_settings.default_dir,
                                 "settings.ini"))):
                with open(os.path.join(crawler_settings.default_dir,
                                       "settings.ini"),
                          "r",
                          encoding="utf-8") as f:
                    first = f.read(1)
                    if first != '\ufeff':
                        # not a BOM, rewind
                        f.seek(0)
                    settings_text = f.read()
    elif tool == "reload_settings":
        if not request.user.is_staff:
            return render_error(
                request, "You need to be an admin to reload the config.")
        crawler_settings.load_config_from_file()
        frontend_logger.info('Reloaded settings file for Panda Backup')
        messages.success(request, 'Reloaded settings file for Panda Backup')
        return HttpResponseRedirect(request.META["HTTP_REFERER"])
    elif tool == "start_timed_dl":
        crawler_settings.workers.timed_downloader.start_running(
            timer=crawler_settings.timed_downloader_cycle_timer)
        return HttpResponseRedirect(request.META["HTTP_REFERER"])
    elif tool == "stop_timed_dl":
        crawler_settings.workers.timed_downloader.stop_running()
        return HttpResponseRedirect(request.META["HTTP_REFERER"])
    elif tool == "force_run_timed_dl":
        crawler_settings.workers.timed_downloader.stop_running()
        crawler_settings.workers.timed_downloader.force_run_once = True
        crawler_settings.workers.timed_downloader.start_running(
            timer=crawler_settings.timed_downloader_cycle_timer)
        return HttpResponseRedirect(request.META["HTTP_REFERER"])
    elif tool == "start_timed_crawler":
        crawler_settings.workers.timed_crawler.start_running(
            timer=crawler_settings.autochecker.cycle_timer)
        return HttpResponseRedirect(request.META["HTTP_REFERER"])
    elif tool == "stop_timed_crawler":
        crawler_settings.workers.timed_crawler.stop_running()
        return HttpResponseRedirect(request.META["HTTP_REFERER"])
    elif tool == "force_run_timed_crawler":
        crawler_settings.workers.timed_crawler.stop_running()
        crawler_settings.workers.timed_crawler.force_run_once = True
        crawler_settings.workers.timed_crawler.start_running(
            timer=crawler_settings.autochecker.cycle_timer)
        return HttpResponseRedirect(request.META["HTTP_REFERER"])
    elif tool == "start_timed_updater":
        crawler_settings.workers.timed_updater.start_running(
            timer=crawler_settings.autoupdater.cycle_timer)
        return HttpResponseRedirect(request.META["HTTP_REFERER"])
    elif tool == "stop_timed_updater":
        crawler_settings.workers.timed_updater.stop_running()
        return HttpResponseRedirect(request.META["HTTP_REFERER"])
    elif tool == "force_run_timed_updater":
        crawler_settings.workers.timed_updater.stop_running()
        crawler_settings.workers.timed_updater.force_run_once = True
        crawler_settings.workers.timed_updater.start_running(
            timer=crawler_settings.autoupdater.cycle_timer)
        return HttpResponseRedirect(request.META["HTTP_REFERER"])
    elif tool == "start_timed_auto_wanted":
        crawler_settings.workers.timed_auto_wanted.start_running(
            timer=crawler_settings.auto_wanted.cycle_timer)
        return HttpResponseRedirect(request.META["HTTP_REFERER"])
    elif tool == "stop_timed_auto_wanted":
        crawler_settings.workers.timed_auto_wanted.stop_running()
        return HttpResponseRedirect(request.META["HTTP_REFERER"])
    elif tool == "force_run_timed_auto_wanted":
        crawler_settings.workers.timed_auto_wanted.stop_running()
        crawler_settings.workers.timed_auto_wanted.force_run_once = True
        crawler_settings.workers.timed_auto_wanted.start_running(
            timer=crawler_settings.auto_wanted.cycle_timer)
        return HttpResponseRedirect(request.META["HTTP_REFERER"])
    elif tool == "start_web_queue":
        crawler_settings.workers.web_queue.start_running()
        return HttpResponseRedirect(request.META["HTTP_REFERER"])

    threads_status = get_thread_status_bool()

    d = {
        'tool': tool,
        'settings_text': settings_text,
        'threads_status': threads_status
    }

    return render(request, "viewer/tools.html", d)
Ejemplo n.º 22
0
def stats_collection(request: HttpRequest) -> HttpResponse:
    """Display galleries and archives stats."""

    if not request.user.is_staff:
        return render_error(request, "You need to be an admin to display the stats.")

    # General
    stats_dict = {
        "n_archives": Archive.objects.count(),
        "n_expanded_archives": Archive.objects.filter(extracted=True).count(),
        "n_to_download_archives": Archive.objects.filter_by_dl_remote().count(),
        "n_galleries": Gallery.objects.count(),
        "archive": Archive.objects.filter(filesize__gt=0).aggregate(
            Avg('filesize'), Max('filesize'), Min('filesize'), Sum('filesize'), Avg('filecount'), Sum('filecount')),
        "gallery": Gallery.objects.filter(filesize__gt=0).aggregate(
            Avg('filesize'), Max('filesize'), Min('filesize'), Sum('filesize'), Avg('filecount'), Sum('filecount')),
        "hidden_galleries": Gallery.objects.filter(hidden=True).count(),
        "hidden_galleries_size": Gallery.objects.filter(
            filesize__gt=0, hidden=True).aggregate(Sum('filesize')),
        "fjord_galleries": Gallery.objects.filter(fjord=True).count(),
        "expunged_galleries": Gallery.objects.filter(expunged=True).count(),
        "n_tags": Tag.objects.count(),
        "n_tag_scopes": Tag.objects.values('scope').distinct().count(),
        "n_custom_tags": Tag.objects.are_custom().count(),  # type: ignore
        "top_10_tags": Tag.objects.annotate(num_archive=Count('gallery_tags')).order_by('-num_archive')[:10],
        "top_10_parody_tags": Tag.objects.filter(scope='parody').annotate(
            num_archive=Count('gallery_tags')).order_by('-num_archive')[:10],
        "top_10_artist_tags": Tag.objects.filter(scope='artist').annotate(
            num_archive=Count('gallery_tags')).order_by('-num_archive')[:10],
        "wanted_galleries": {
            "total": WantedGallery.objects.all().count(),
            "found": WantedGallery.objects.filter(found=True).count(),
            "total_galleries_found": FoundGallery.objects.all().count(),
            "user_created": WantedGallery.objects.filter(book_type='user').count(),
        }
    }

    # Per provider
    providers = Gallery.objects.all().values_list('provider', flat=True).distinct()

    providers_dict = {}

    for provider in providers:
        providers_dict[provider] = {
            'galleries': Gallery.objects.filter(provider=provider).count(),
            'archives': Archive.objects.filter(gallery__provider=provider).count(),
            'wanted_galleries': WantedGallery.objects.filter(provider=provider).count(),

        }

    # Per category
    categories = Gallery.objects.all().values_list('category', flat=True).distinct()

    categories_dict = {}

    for category in categories:
        categories_dict[category] = {
            'n_galleries': Gallery.objects.filter(category=category).count(),
            'gallery': Gallery.objects.filter(
                filesize__gt=0, category=category
            ).aggregate(
                Avg('filesize'), Max('filesize'), Min('filesize'), Sum('filesize'), Avg('filecount'), Sum('filecount')
            )
        }

    # Per language tag
    languages = Tag.objects.filter(
        scope='language'
    ).exclude(
        scope='language', name='translated'
    ).annotate(num_gallery=Count('gallery')).order_by('-num_gallery').values_list('name', flat=True).distinct()

    languages_dict = {}

    languages_dict['untranslated'] = {
        'n_galleries': Gallery.objects.exclude(tags__scope='language').distinct().count(),
        'gallery': Gallery.objects.filter(
            filesize__gt=0, tags__scope='language'
        ).distinct().aggregate(
            Avg('filesize'), Max('filesize'), Min('filesize'), Sum('filesize'), Avg('filecount'), Sum('filecount')
        )
    }

    for language in languages:
        languages_dict[language] = {
            'n_galleries': Gallery.objects.filter(tags__scope='language', tags__name=language).distinct().count(),
            'gallery': Gallery.objects.filter(
                filesize__gt=0, tags__scope='language', tags__name=language
            ).distinct().aggregate(
                Avg('filesize'), Max('filesize'), Min('filesize'), Sum('filesize'), Avg('filecount'), Sum('filecount')
            )
        }

    d = {
        'stats': stats_dict, 'providers': providers_dict,
        'gallery_categories': categories_dict, 'gallery_languages': languages_dict
    }

    return render(request, "viewer/stats_collection.html", d)
Ejemplo n.º 23
0
def archives_not_matched_with_gallery(request: HttpRequest) -> HttpResponse:
    p = request.POST
    get = request.GET

    title = get.get("title", '')
    tags = get.get("tags", '')

    try:
        page = int(get.get("page", '1'))
    except ValueError:
        page = 1

    if 'clear' in get:
        form = ArchiveSearchForm()
    else:
        form = ArchiveSearchForm(initial={'title': title, 'tags': tags})

    if p:
        pks = []
        for k, v in p.items():
            if k.startswith("sel-"):
                # k, pk = k.split('-')
                # results[pk][k] = v
                pks.append(v)
        archives = Archive.objects.filter(id__in=pks).order_by('-create_date')
        if 'delete_archives' in p:
            for archive in archives:
                message = 'Removing archive not matched: {} and deleting file: {}'.format(
                    archive.title, archive.zipped.path)
                frontend_logger.info(message)
                messages.success(request, message)
                archive.delete_all_files()
                archive.delete()
        elif 'delete_objects' in p:
            for archive in archives:
                message = 'Removing archive not matched: {}, keeping file: {}'.format(
                    archive.title, archive.zipped.path)
                frontend_logger.info(message)
                messages.success(request, message)
                archive.delete_files_but_archive()
                archive.delete()
        elif 'create_possible_matches' in p:
            if thread_exists('web_match_worker'):
                return render_error(request,
                                    'Web match worker is already running.')

            matcher_filter = p['create_possible_matches']
            try:
                cutoff = float(p.get('cutoff', '0.4'))
            except ValueError:
                cutoff = 0.4
            try:
                max_matches = int(p.get('max-matches', '10'))
            except ValueError:
                max_matches = 10

            web_match_thread = threading.Thread(
                name='web_match_worker',
                target=generate_possible_matches_for_archives,
                args=(archives, ),
                kwargs={
                    'logger': frontend_logger,
                    'cutoff': cutoff,
                    'max_matches': max_matches,
                    'filters': (matcher_filter, ),
                    'match_local': False,
                    'match_web': True
                })
            web_match_thread.daemon = True
            web_match_thread.start()
            messages.success(request, 'Starting web match worker.')
        elif 'create_possible_matches_internal' in p:
            if thread_exists('match_unmatched_worker'):
                return render_error(
                    request, "Local matching worker is already running.")
            provider = p['create_possible_matches_internal']
            try:
                cutoff = float(p.get('cutoff', '0.4'))
            except ValueError:
                cutoff = 0.4
            try:
                max_matches = int(p.get('max-matches', '10'))
            except ValueError:
                max_matches = 10

            frontend_logger.info(
                'Looking for possible matches in gallery database '
                'for non-matched archives (cutoff: {}, max matches: {}) '
                'using provider filter "{}"'.format(cutoff, max_matches,
                                                    provider))
            matching_thread = threading.Thread(
                name='match_unmatched_worker',
                target=generate_possible_matches_for_archives,
                args=(archives, ),
                kwargs={
                    'logger': frontend_logger,
                    'cutoff': cutoff,
                    'max_matches': max_matches,
                    'filters': (provider, ),
                    'match_local': True,
                    'match_web': False
                })
            matching_thread.daemon = True
            matching_thread.start()
            messages.success(request, 'Starting internal match worker.')

    params = {
        'sort': 'create_date',
        'asc_desc': 'desc',
        'filename': title,
    }

    for k, v in get.items():
        params[k] = v

    for k in archive_filter_keys:
        if k not in params:
            params[k] = ''

    results = filter_archives_simple(params)

    results = results.filter(gallery__isnull=True).prefetch_related(
        Prefetch('archivematches_set',
                 queryset=ArchiveMatches.objects.select_related(
                     'gallery', 'archive').prefetch_related(
                         Prefetch('gallery__tags',
                                  queryset=Tag.objects.filter(
                                      scope__exact='artist'),
                                  to_attr='artist_tags')),
                 to_attr='possible_galleries'),
        'possible_galleries__gallery',
    )

    if 'no-custom-tags' in get:
        results = results.annotate(
            num_custom_tags=Count('custom_tags')).filter(num_custom_tags=0)
    if 'with-possible-matches' in get:
        results = results.annotate(
            n_possible_matches=Count('possible_matches')).filter(
                n_possible_matches__gt=0)

    paginator = Paginator(results, 100)
    try:
        results = paginator.page(page)
    except (InvalidPage, EmptyPage):
        results = paginator.page(paginator.num_pages)

    d = {
        'results':
        results,
        'providers':
        Gallery.objects.all().values_list('provider', flat=True).distinct(),
        'matchers':
        crawler_settings.provider_context.get_matchers(crawler_settings,
                                                       force=True),
        'api_key':
        crawler_settings.api_key,
        'form':
        form
    }
    return render(request, "viewer/archives_not_matched.html", d)
Ejemplo n.º 24
0
def logs(request: HttpRequest, tool: str = "all") -> HttpResponse:
    """Logs listing."""

    if not request.user.is_staff:
        return render_error(request, "You need to be an admin to see logs.")

    lines_info: dict[int, dict] = defaultdict(dict)

    f = open(MAIN_LOGGER, 'rt', encoding='utf8')
    log_lines: list[str] = f.read().split('[0m\n')
    f.close()
    log_lines.pop()
    log_lines.reverse()

    log_filter = request.GET.get('filter', '')

    if log_filter:
        # log_lines = [x for x in log_lines if log_filter.lower() in x.lower()]
        max_len = len(log_lines)
        found_indices = set()

        for i, log_line in enumerate(log_lines):
            if log_filter.lower() in log_line.lower():
                found_indices.update(list(range(max(i - 10, 0), min(i + 10, max_len))))
                lines_info[i]['highlighted'] = True

        log_lines_extended: list[tuple[str, dict]] = [(x, lines_info[i]) for i, x in enumerate(log_lines) if i in found_indices]
    else:
        log_lines_extended = [(x, lines_info[i]) for i, x in enumerate(log_lines)]

    current_base_uri = re.escape('{scheme}://{host}'.format(scheme=request.scheme, host=request.get_host()))
    # Build complete URL for relative internal URLs (some)
    if crawler_settings.urls.viewer_main_url:
        patterns = [
            r'(?!' + current_base_uri + r')/' + crawler_settings.urls.viewer_main_url + r"archive/\d+/?",
            r'(?!' + current_base_uri + r')/' + crawler_settings.urls.viewer_main_url + r"gallery/\d+/?",
            r'(?!' + current_base_uri + r')/' + crawler_settings.urls.viewer_main_url + r"wanted-gallery/\d+/?",
            r'(?!' + current_base_uri + r')/' + crawler_settings.urls.viewer_main_url + r"archive-group/[a-z0-9]+(?:-[a-z0-9]+)*/?",
        ]
    else:
        patterns = [
            r'(?<!' + current_base_uri + r')/archive/\d+/?',
            r'(?<!' + current_base_uri + r')/gallery/\d+/?',
            r'(?<!' + current_base_uri + r')/wanted-gallery/\d+/?',
            r'(?<!' + current_base_uri + r')/archive-group/[a-z0-9]+(?:-[a-z0-9]+)*/?',
        ]

    def build_request(match_obj: typing.Match) -> str:
        return request.build_absolute_uri(match_obj.group(0))

    log_lines_extended = [(reduce(lambda v, pattern: re.sub(pattern, build_request, v), patterns, line[0]), line[1]) for line in log_lines_extended]

    paginator = Paginator(log_lines_extended, 100)
    try:
        page = int(request.GET.get("page", '1'))
    except ValueError:
        page = 1

    try:
        log_lines_paginated = paginator.page(page)
    except (InvalidPage, EmptyPage):
        log_lines_paginated = paginator.page(paginator.num_pages)

    d = {'log_lines': log_lines_paginated}
    return render(request, "viewer/logs.html", d)