def foldercrawler(request: HttpRequest) -> HttpResponse: """Folder crawler.""" if not request.user.is_staff: return render_error(request, "You need to be an admin to use the tools.") d: dict[str, typing.Any] = {'media_root': os.path.realpath(crawler_settings.MEDIA_ROOT)} p = request.POST if p: if 'keep_this_settings' in p: current_settings = crawler_settings else: current_settings = Settings(load_from_config=crawler_settings.config) commands = set() # create dictionary of properties for each command for k, v in p.items(): if k.startswith("matchers"): k, matcher = k.split('-') current_settings.config['matchers'][matcher] = v current_settings.matchers[matcher] = int(v) elif k == "commands": command_list = v.split("\n") for item in command_list: commands.add(item.rstrip('\r')) elif k == "internal_matches": current_settings.internal_matches_for_non_matches = True if 'reason' in p and p['reason'] != '': reason = p['reason'] # Force limit string length (reason field max_length) current_settings.archive_reason = reason[:200] current_settings.gallery_reason = reason[:200] if 'source' in p and p['source'] != '': source = p['source'] # Force limit string length (reason field max_length) current_settings.archive_source = source[:50] if 'keep_this_settings' in p: current_settings.write() current_settings.load_config_from_file() folder_crawler = FolderCrawlerThread(current_settings, list(commands)) folder_crawler.start() messages.success(request, 'Starting Folder Crawler, check the logs for a report.') # Not really optimal when there's many commands being queued # for command in commands: # messages.success(request, command) return HttpResponseRedirect(reverse('viewer:main-page')) d.update({ 'settings': crawler_settings, 'matchers': crawler_settings.provider_context.get_matchers_name_priority(crawler_settings) }) return render(request, "viewer/foldercrawler.html", d)
def crawler(request: HttpRequest) -> HttpResponse: """Crawl given URLs.""" if not request.user.is_staff: return render_error(request, "You need to be an admin to crawl a link.") d = {} p = request.POST if p: if 'keep_this_settings' in p: current_settings = crawler_settings else: current_settings = Settings( load_from_config=crawler_settings.config) url_set = set() # create dictionary of properties for each archive current_settings.replace_metadata = False current_settings.config['allowed']['replace_metadata'] = 'no' for k, v in p.items(): if k.startswith("downloaders"): k, dl = k.split('-') current_settings.config['downloaders'][dl] = v current_settings.downloaders[dl] = int(v) elif k == "replace_metadata": current_settings.config['allowed'][k] = 'yes' current_settings.replace_metadata = True elif k == "urls": url_list = v.split("\n") for item in url_list: url_set.add(item.rstrip('\r')) urls = list(url_set) if 'reason' in p and p['reason'] != '': reason = p['reason'] # Force limit string length (reason field max_length) current_settings.archive_reason = reason[:200] current_settings.gallery_reason = reason[:200] if 'keep_this_settings' in p: current_settings.write() current_settings.load_config_from_file() if 'run_separate' in p: crawler_thread = CrawlerThread(crawler_logger, current_settings, urls) crawler_thread.start() else: current_settings.workers.web_queue.enqueue_args_list( urls, override_options=current_settings) messages.success(request, 'Starting Crawler, check the logs for a report.') # Not really optimal when there's many commands being queued # for command in url_list: # messages.success(request, command) return HttpResponseRedirect(reverse('viewer:main-page')) d.update({ 'settings': crawler_settings, 'downloaders': crawler_settings.provider_context.get_downloaders_name_priority( crawler_settings) }) return render(request, "viewer/crawler.html", d)