Exemple #1
0
def foldercrawler(request: HttpRequest) -> HttpResponse:
    """Folder crawler."""
    if not request.user.is_staff:
        return render_error(request, "You need to be an admin to use the tools.")

    d: dict[str, typing.Any] = {'media_root': os.path.realpath(crawler_settings.MEDIA_ROOT)}

    p = request.POST

    if p:
        if 'keep_this_settings' in p:
            current_settings = crawler_settings
        else:
            current_settings = Settings(load_from_config=crawler_settings.config)
        commands = set()
        # create dictionary of properties for each command
        for k, v in p.items():
            if k.startswith("matchers"):
                k, matcher = k.split('-')
                current_settings.config['matchers'][matcher] = v
                current_settings.matchers[matcher] = int(v)
            elif k == "commands":
                command_list = v.split("\n")
                for item in command_list:
                    commands.add(item.rstrip('\r'))
            elif k == "internal_matches":
                current_settings.internal_matches_for_non_matches = True

        if 'reason' in p and p['reason'] != '':
            reason = p['reason']
            # Force limit string length (reason field max_length)
            current_settings.archive_reason = reason[:200]
            current_settings.gallery_reason = reason[:200]

        if 'source' in p and p['source'] != '':
            source = p['source']
            # Force limit string length (reason field max_length)
            current_settings.archive_source = source[:50]

        if 'keep_this_settings' in p:
            current_settings.write()
            current_settings.load_config_from_file()
        folder_crawler = FolderCrawlerThread(current_settings, list(commands))
        folder_crawler.start()
        messages.success(request, 'Starting Folder Crawler, check the logs for a report.')
        # Not really optimal when there's many commands being queued
        # for command in commands:
        #     messages.success(request, command)
        return HttpResponseRedirect(reverse('viewer:main-page'))

    d.update({
        'settings': crawler_settings,
        'matchers': crawler_settings.provider_context.get_matchers_name_priority(crawler_settings)
    })

    return render(request, "viewer/foldercrawler.html", d)
Exemple #2
0
def crawler(request: HttpRequest) -> HttpResponse:
    """Crawl given URLs."""

    if not request.user.is_staff:
        return render_error(request,
                            "You need to be an admin to crawl a link.")

    d = {}

    p = request.POST

    if p:
        if 'keep_this_settings' in p:
            current_settings = crawler_settings
        else:
            current_settings = Settings(
                load_from_config=crawler_settings.config)
        url_set = set()
        # create dictionary of properties for each archive
        current_settings.replace_metadata = False
        current_settings.config['allowed']['replace_metadata'] = 'no'
        for k, v in p.items():
            if k.startswith("downloaders"):
                k, dl = k.split('-')
                current_settings.config['downloaders'][dl] = v
                current_settings.downloaders[dl] = int(v)
            elif k == "replace_metadata":
                current_settings.config['allowed'][k] = 'yes'
                current_settings.replace_metadata = True
            elif k == "urls":
                url_list = v.split("\n")
                for item in url_list:
                    url_set.add(item.rstrip('\r'))
        urls = list(url_set)

        if 'reason' in p and p['reason'] != '':
            reason = p['reason']
            # Force limit string length (reason field max_length)
            current_settings.archive_reason = reason[:200]
            current_settings.gallery_reason = reason[:200]

        if 'keep_this_settings' in p:
            current_settings.write()
            current_settings.load_config_from_file()
        if 'run_separate' in p:
            crawler_thread = CrawlerThread(crawler_logger, current_settings,
                                           urls)
            crawler_thread.start()
        else:
            current_settings.workers.web_queue.enqueue_args_list(
                urls, override_options=current_settings)
        messages.success(request,
                         'Starting Crawler, check the logs for a report.')
        # Not really optimal when there's many commands being queued
        # for command in url_list:
        #     messages.success(request, command)
        return HttpResponseRedirect(reverse('viewer:main-page'))

    d.update({
        'settings':
        crawler_settings,
        'downloaders':
        crawler_settings.provider_context.get_downloaders_name_priority(
            crawler_settings)
    })

    return render(request, "viewer/crawler.html", d)