def recall_api(request: HttpRequest, pk: int) -> HttpResponse: """Recall provider API, if possible.""" if not request.user.is_staff: return render_error(request, "You need to be an admin to recall the API.") try: archive = Archive.objects.get(pk=pk) except Archive.DoesNotExist: raise Http404("Archive does not exist") if not archive.gallery_id: return render_error(request, "No gallery associated with this archive.") gallery = Gallery.objects.get(pk=archive.gallery_id) current_settings = Settings(load_from_config=crawler_settings.config) if current_settings.workers.web_queue: current_settings.set_update_metadata_options( providers=(gallery.provider, )) current_settings.workers.web_queue.enqueue_args_list( (gallery.get_link(), ), override_options=current_settings) frontend_logger.info( 'Updating gallery API data for gallery: {} and related archives'. format(gallery.get_absolute_url())) return HttpResponseRedirect(request.META["HTTP_REFERER"])
def reduce(request: HttpRequest, pk: int) -> HttpResponse: """Reduce archive.""" if not request.user.has_perm('viewer.expand_archive'): return render_error(request, "You don't have the permission to expand an Archive.") try: with transaction.atomic(): archive = Archive.objects.select_for_update().get(pk=pk) if not archive.extracted: return render_error(request, "Archive is already reduced.") logger.info('Reducing images for archive: {}'.format(archive.get_absolute_url())) archive.reduce() action = 'REDUCE_ARCHIVE' event_log( request.user, action, content_object=archive, result='success' ) except Archive.DoesNotExist: raise Http404("Archive does not exist") return HttpResponseRedirect(request.META["HTTP_REFERER"])
def queue_operations(request: HttpRequest, operation: str, arguments: str = '') -> HttpResponseRedirect: if not request.user.is_staff: return render_error(request, "You need to be an admin to operate the queue.") if operation == "remove_by_index": if arguments and crawler_settings.workers.web_queue: crawler_settings.workers.web_queue.remove_by_index(int(arguments)) else: return render_error(request, "Unknown argument.") else: return render_error(request, "Unknown queue operation.") return HttpResponseRedirect(request.META["HTTP_REFERER"])
def logs(request: HttpRequest, tool: str = "all") -> HttpResponse: """Logs listing.""" if not request.user.is_staff: return render_error(request, "You need to be an admin to see logs.") log_lines: typing.List[str] = [] if tool == "all" or tool == "webcrawler": f = open(MAIN_LOGGER, 'rt', encoding='utf8') log_lines = f.read().split('[0m\n') f.close() log_lines.pop() log_lines.reverse() log_filter = request.GET.get('filter', '') if log_filter: log_lines = [x for x in log_lines if log_filter.lower() in x.lower()] current_base_uri = re.escape('{scheme}://{host}'.format( scheme=request.scheme, host=request.get_host())) # Build complete URL for relative internal URLs (some) if crawler_settings.urls.viewer_main_url: patterns = [ r'(?!' + current_base_uri + r')/' + crawler_settings.urls.viewer_main_url + r"archive/\d+/?", r'(?!' + current_base_uri + r')/' + crawler_settings.urls.viewer_main_url + r"gallery/\d+/?", r'(?!' + current_base_uri + r')/' + crawler_settings.urls.viewer_main_url + r"wanted-gallery/\d+/?", ] else: patterns = [ r'(?<!' + current_base_uri + r')/archive/\d+/?', r'(?<!' + current_base_uri + r')/gallery/\d+/?', r'(?<!' + current_base_uri + r')/wanted-gallery/\d+/?', ] def build_request(match_obj: typing.Match) -> str: return request.build_absolute_uri(match_obj.group(0)) log_lines = [ reduce(lambda v, pattern: re.sub(pattern, build_request, v), patterns, line) for line in log_lines ] paginator = Paginator(log_lines, 100) try: page = int(request.GET.get("page", '1')) except ValueError: page = 1 try: log_lines = paginator.page(page) except (InvalidPage, EmptyPage): log_lines = paginator.page(paginator.num_pages) d = {'log_lines': log_lines} return render(request, "viewer/logs.html", d)
def public_toggle(request: HttpRequest, pk: int) -> HttpResponse: """Public archive toggle.""" if not request.user.is_staff: return render_error( request, "You need to be an admin to toggle public access for an archive.") try: archive = Archive.objects.get(pk=pk) except Archive.DoesNotExist: raise Http404("Archive does not exist") if archive.public: archive.set_private() frontend_logger.info('Setting public status to: private for ' + archive.zipped.name) event_log(request.user, 'UNPUBLISH_ARCHIVE', content_object=archive, result='unpublished') else: archive.set_public() frontend_logger.info('Setting public status to: public for ' + archive.zipped.name) event_log(request.user, 'PUBLISH_ARCHIVE', content_object=archive, result='published') return HttpResponseRedirect(request.META["HTTP_REFERER"])
def stats_workers(request: HttpRequest) -> HttpResponse: """Display workers stats.""" if not request.user.is_staff: return render_error( request, "You need to be an admin to view workers' status.") stats_dict = { "current_settings": crawler_settings, "thread_status": get_thread_status(), "web_queue": crawler_settings.workers.web_queue, "post_downloader": crawler_settings.workers.timed_downloader, "schedulers": get_schedulers_status([ crawler_settings.workers.timed_crawler, crawler_settings.workers.timed_downloader, crawler_settings.workers.timed_auto_wanted, crawler_settings.workers.timed_updater ]) } d = {'stats': stats_dict} return render(request, "viewer/stats_workers.html", d)
def public_toggle(request: HttpRequest, pk: int) -> HttpResponse: """Public archive toggle.""" if not request.user.has_perm('viewer.publish_archive'): return render_error(request, "You don't have the permission to change public status for an Archive.") try: archive = Archive.objects.get(pk=pk) except Archive.DoesNotExist: raise Http404("Archive does not exist") if archive.public: archive.set_private() logger.info('Setting public status to: private for ' + archive.zipped.name) event_log( request.user, 'UNPUBLISH_ARCHIVE', content_object=archive, result='unpublished' ) else: archive.set_public() logger.info('Setting public status to: public for ' + archive.zipped.name) event_log( request.user, 'PUBLISH_ARCHIVE', content_object=archive, result='published' ) return HttpResponseRedirect(request.META["HTTP_REFERER"])
def gallery_frequency(request: HttpRequest) -> HttpResponse: if not crawler_settings.urls.enable_gallery_frequency: if not request.user.is_staff: raise Http404("Page not found") else: return render_error( request, "Page disabled by settings (urls: enable_gallery_frequency).") title = request.GET.get("title", '') tags = request.GET.get("tags", '') if 'clear' in request.GET: request.GET = QueryDict('') form = GallerySearchForm() form_simple = GallerySearchSimpleForm(request.GET, error_class=SpanErrorList) else: form = GallerySearchForm(initial={'title': title, 'tags': tags}) form_simple = GallerySearchSimpleForm(request.GET, error_class=SpanErrorList) form_simple.is_valid() for field_name, errors in form_simple.errors.items(): messages.error(request, field_name + ": " + ", ".join(errors), extra_tags='danger') d = {'form': form, 'form_simple': form_simple} return render(request, "viewer/graph_gallery_posted.html", d)
def delete_archive(request: HttpRequest, pk: int) -> HttpResponse: """Delete archive and gallery data if there is any.""" if not request.user.is_staff: return render_error(request, "You need to be an admin to delete an archive.") try: archive = Archive.objects.get(pk=pk) except Archive.DoesNotExist: raise Http404("Archive does not exist") if request.method == 'POST': p = request.POST if "delete_confirm" in p: message_list = list() if "delete-archive" in p: message_list.append('archive entry') if "delete-gallery" in p: message_list.append('associated gallery') if "delete-file" in p: message_list.append('associated file') message = 'For archive: {}, deleting: {}'.format( archive.title, ', '.join(message_list)) frontend_logger.info("User {}: {}".format(request.user.username, message)) messages.success(request, message) gallery = archive.gallery if "mark-gallery-deleted" in p: archive.gallery.mark_as_deleted() archive.gallery = None if "delete-file" in p: archive.delete_all_files() if "delete-archive" in p: archive.delete_files_but_archive() archive.delete() user_reason = p.get('reason', '') event_log(request.user, 'DELETE_ARCHIVE', reason=user_reason, content_object=gallery, result='deleted') return HttpResponseRedirect(reverse('viewer:main-page')) d = {'archive': archive} return render(request, "viewer/delete_archive.html", d)
def recall_api(request: HttpRequest, pk: int) -> HttpResponse: """Recall provider API, if possible.""" if not request.user.has_perm('viewer.update_metadata'): return render_error(request, "You don't have the permission to refresh source metadata on an Archive.") try: archive = Archive.objects.get(pk=pk) except Archive.DoesNotExist: raise Http404("Archive does not exist") if not archive.gallery_id: return render_error(request, "No gallery associated with this archive.") gallery = Gallery.objects.get(pk=archive.gallery_id) current_settings = Settings(load_from_config=crawler_settings.config) if current_settings.workers.web_queue and gallery.provider: current_settings.set_update_metadata_options(providers=(gallery.provider,)) def gallery_callback(x: Optional['Gallery'], crawled_url: Optional[str], result: str) -> None: event_log( request.user, 'UPDATE_METADATA', content_object=x, result=result, data=crawled_url ) current_settings.workers.web_queue.enqueue_args_list( (gallery.get_link(),), override_options=current_settings, gallery_callback=gallery_callback ) logger.info( 'Updating gallery API data for gallery: {} and related archives'.format( gallery.get_absolute_url() ) ) return HttpResponseRedirect(request.META["HTTP_REFERER"])
def foldercrawler(request: HttpRequest) -> HttpResponse: """Folder crawler.""" if not request.user.is_staff: return render_error(request, "You need to be an admin to use the tools.") d: dict[str, typing.Any] = {'media_root': os.path.realpath(crawler_settings.MEDIA_ROOT)} p = request.POST if p: if 'keep_this_settings' in p: current_settings = crawler_settings else: current_settings = Settings(load_from_config=crawler_settings.config) commands = set() # create dictionary of properties for each command for k, v in p.items(): if k.startswith("matchers"): k, matcher = k.split('-') current_settings.config['matchers'][matcher] = v current_settings.matchers[matcher] = int(v) elif k == "commands": command_list = v.split("\n") for item in command_list: commands.add(item.rstrip('\r')) elif k == "internal_matches": current_settings.internal_matches_for_non_matches = True if 'reason' in p and p['reason'] != '': reason = p['reason'] # Force limit string length (reason field max_length) current_settings.archive_reason = reason[:200] current_settings.gallery_reason = reason[:200] if 'source' in p and p['source'] != '': source = p['source'] # Force limit string length (reason field max_length) current_settings.archive_source = source[:50] if 'keep_this_settings' in p: current_settings.write() current_settings.load_config_from_file() folder_crawler = FolderCrawlerThread(current_settings, list(commands)) folder_crawler.start() messages.success(request, 'Starting Folder Crawler, check the logs for a report.') # Not really optimal when there's many commands being queued # for command in commands: # messages.success(request, command) return HttpResponseRedirect(reverse('viewer:main-page')) d.update({ 'settings': crawler_settings, 'matchers': crawler_settings.provider_context.get_matchers_name_priority(crawler_settings) }) return render(request, "viewer/foldercrawler.html", d)
def stats_settings(request: HttpRequest) -> HttpResponse: """Display settings objects.""" if not request.user.is_staff: return render_error(request, "You need to be an admin to view setting objects.") stats_dict = { "current_settings": crawler_settings, } d = {'stats': stats_dict} return render(request, "viewer/stats_settings.html", d)
def generate_matches(request: HttpRequest, pk: int) -> HttpResponse: """Generate matches for non-match.""" if not request.user.is_staff: return render_error(request, "You need to be an admin to generate matches.") try: archive = Archive.objects.get(pk=pk) except Archive.DoesNotExist: raise Http404("Archive does not exist") if archive.gallery: return render_error(request, "Archive is already matched.") clear_title = True if 'clear' in request.GET else False provider_filter = request.GET.get('provider', '') try: cutoff = float(request.GET.get('cutoff', '0.4')) except ValueError: cutoff = 0.4 try: max_matches = int(request.GET.get('max-matches', '10')) except ValueError: max_matches = 10 archive.generate_possible_matches( clear_title=clear_title, provider_filter=provider_filter, cutoff=cutoff, max_matches=max_matches ) archive.save() logger.info('Generated matches for {}, found {}'.format( archive.zipped.path, archive.possible_matches.count() )) return HttpResponseRedirect(request.META["HTTP_REFERER"])
def extract_toggle(request: HttpRequest, pk: int) -> HttpResponse: """Extract archive toggle.""" if not request.user.is_staff: return render_error( request, "You need to be an admin to toggle extract an archive.") try: with transaction.atomic(): archive = Archive.objects.select_for_update().get(pk=pk) frontend_logger.info('Toggling images for ' + archive.zipped.name) archive.extract_toggle() except Archive.DoesNotExist: raise Http404("Archive does not exist") return HttpResponseRedirect(request.META["HTTP_REFERER"])
def recalc_info(request: HttpRequest, pk: int) -> HttpResponse: """Recalculate archive info.""" if not request.user.is_staff: return render_error( request, "You need to be an admin to recalculate file info.") try: archive = Archive.objects.get(pk=pk) except Archive.DoesNotExist: raise Http404("Archive does not exist") frontend_logger.info('Recalculating file info for ' + archive.zipped.name) archive.recalc_fileinfo() archive.generate_image_set(force=False) archive.generate_thumbnails() return HttpResponseRedirect(request.META["HTTP_REFERER"])
def archive_update(request: HttpRequest, pk: int, tool: str = None, tool_use_id: str = None) -> HttpResponse: try: archive = Archive.objects.get(pk=pk) except Archive.DoesNotExist: raise Http404("Archive does not exist") if tool == 'select-as-match' and request.user.has_perm( 'viewer.match_archive'): archive.select_as_match(tool_use_id) if archive.gallery: frontend_logger.info( "User: {}: Archive {} ({}) was matched with gallery {} ({}).". format( request.user.username, archive, reverse('viewer:archive', args=(archive.pk, )), archive.gallery, reverse('viewer:gallery', args=(archive.gallery.pk, )), )) event_log( request.user, 'MATCH_ARCHIVE', # reason=user_reason, data=reverse('viewer:gallery', args=(archive.gallery.pk, )), content_object=archive, result='matched') return HttpResponseRedirect(request.META["HTTP_REFERER"]) elif tool == 'clear-possible-matches' and request.user.has_perm( 'viewer.match_archive'): archive.possible_matches.clear() frontend_logger.info( "User: {}: Archive {} ({}) was cleared from its possible matches.". format( request.user.username, archive, reverse('viewer:archive', args=(archive.pk, )), )) return HttpResponseRedirect(request.META["HTTP_REFERER"]) else: return render_error(request, 'Unrecognized command')
def rematch_archive(request: HttpRequest, pk: int) -> HttpResponse: """Match an Archive.""" if not request.user.is_staff: return render_error(request, "You need to be an admin to rematch an archive.") try: archive = Archive.objects.get(pk=pk) except Archive.DoesNotExist: raise Http404("Archive does not exist") if archive.gallery: archive.gallery.archive_set.remove(archive) folder_crawler_thread = FolderCrawlerThread( crawler_settings, ['-frm', archive.zipped.path]) folder_crawler_thread.start() logger.info('Rematching archive: {}'.format(archive.title)) return HttpResponseRedirect(request.META["HTTP_REFERER"])
def stats_collection(request: HttpRequest) -> HttpResponse: """Display galleries and archives stats.""" if not request.user.is_staff: return render_error(request, "You need to be an admin to display the stats.") # General stats_dict = { "n_archives": Archive.objects.count(), "n_expanded_archives": Archive.objects.filter(extracted=True).count(), "n_to_download_archives": Archive.objects.filter_by_dl_remote().count(), "n_galleries": Gallery.objects.count(), "archive": Archive.objects.filter(filesize__gt=0).aggregate( Avg('filesize'), Max('filesize'), Min('filesize'), Sum('filesize')), "gallery": Gallery.objects.filter(filesize__gt=0).aggregate( Avg('filesize'), Max('filesize'), Min('filesize'), Sum('filesize')), "hidden_galleries": Gallery.objects.filter(hidden=True).count(), "hidden_galleries_size": Gallery.objects.filter(filesize__gt=0, hidden=True).aggregate(Sum('filesize')), "fjord_galleries": Gallery.objects.filter(fjord=True).count(), "expunged_galleries": Gallery.objects.filter(expunged=True).count(), "n_tags": Tag.objects.count(), "n_tag_scopes": Tag.objects.values('scope').distinct().count(), "n_custom_tags": Tag.objects.are_custom().count(), "top_10_tags": Tag.objects.annotate( num_gallery=Count('gallery')).order_by('-num_gallery')[:10], "top_10_parody_tags": Tag.objects.filter(scope='parody').annotate( num_gallery=Count('gallery')).order_by('-num_gallery')[:10], "wanted_galleries": { "total": WantedGallery.objects.all().count(), "found": WantedGallery.objects.filter(found=True).count(), "total_galleries_found": FoundGallery.objects.all().count(), "user_created": WantedGallery.objects.filter(book_type='user').count(), } } # Per provider providers = Gallery.objects.all().values_list('provider', flat=True).distinct() providers_dict = {} for provider in providers: providers_dict[provider] = { 'galleries': Gallery.objects.filter(provider=provider).count(), 'archives': Archive.objects.filter(gallery__provider=provider).count(), 'wanted_galleries': WantedGallery.objects.filter(provider=provider).count(), } d = {'stats': stats_dict, 'providers': providers_dict} return render(request, "viewer/stats_collection.html", d)
def crawler(request: HttpRequest) -> HttpResponse: """Crawl given URLs.""" if not request.user.is_staff: return render_error(request, "You need to be an admin to crawl a link.") d = {} p = request.POST if p: if 'keep_this_settings' in p: current_settings = crawler_settings else: current_settings = Settings( load_from_config=crawler_settings.config) url_set = set() # create dictionary of properties for each archive current_settings.replace_metadata = False current_settings.config['allowed']['replace_metadata'] = 'no' for k, v in p.items(): if k.startswith("downloaders"): k, dl = k.split('-') current_settings.config['downloaders'][dl] = v current_settings.downloaders[dl] = int(v) elif k == "replace_metadata": current_settings.config['allowed'][k] = 'yes' current_settings.replace_metadata = True elif k == "urls": url_list = v.split("\n") for item in url_list: url_set.add(item.rstrip('\r')) urls = list(url_set) if 'reason' in p and p['reason'] != '': reason = p['reason'] # Force limit string length (reason field max_length) current_settings.archive_reason = reason[:200] current_settings.gallery_reason = reason[:200] if 'keep_this_settings' in p: current_settings.write() current_settings.load_config_from_file() if 'run_separate' in p: crawler_thread = CrawlerThread(crawler_logger, current_settings, urls) crawler_thread.start() else: current_settings.workers.web_queue.enqueue_args_list( urls, override_options=current_settings) messages.success(request, 'Starting Crawler, check the logs for a report.') # Not really optimal when there's many commands being queued # for command in url_list: # messages.success(request, command) return HttpResponseRedirect(reverse('viewer:main-page')) d.update({ 'settings': crawler_settings, 'downloaders': crawler_settings.provider_context.get_downloaders_name_priority( crawler_settings) }) return render(request, "viewer/crawler.html", d)
def archives_not_matched_with_gallery(request: HttpRequest) -> HttpResponse: p = request.POST get = request.GET title = get.get("title", '') tags = get.get("tags", '') try: page = int(get.get("page", '1')) except ValueError: page = 1 if 'clear' in get: form = ArchiveSearchForm() else: form = ArchiveSearchForm(initial={'title': title, 'tags': tags}) if p: pks = [] for k, v in p.items(): if k.startswith("sel-"): # k, pk = k.split('-') # results[pk][k] = v pks.append(v) preserved = Case( *[When(pk=pk, then=pos) for pos, pk in enumerate(pks)]) archives = Archive.objects.filter(id__in=pks).order_by(preserved) if 'create_possible_matches' in p: if thread_exists('match_unmatched_worker'): return render_error( request, "Local matching worker is already running.") provider = p['create_possible_matches'] try: cutoff = float(p.get('cutoff', '0.4')) except ValueError: cutoff = 0.4 try: max_matches = int(p.get('max-matches', '10')) except ValueError: max_matches = 10 frontend_logger.info( 'User {}: Looking for possible matches in gallery database ' 'for non-matched archives (cutoff: {}, max matches: {}) ' 'using provider filter "{}"'.format(request.user.username, cutoff, max_matches, provider)) matching_thread = threading.Thread( name='match_unmatched_worker', target=generate_possible_matches_for_archives, args=(archives, ), kwargs={ 'logger': frontend_logger, 'cutoff': cutoff, 'max_matches': max_matches, 'filters': (provider, ), 'match_local': True, 'match_web': False }) matching_thread.daemon = True matching_thread.start() messages.success(request, 'Starting internal match worker.') elif 'clear_possible_matches' in p: for archive in archives: archive.possible_matches.clear() frontend_logger.info( 'User {}: Clearing possible matches for archives'.format( request.user.username)) messages.success(request, 'Clearing possible matches.') params = { 'sort': 'create_date', 'asc_desc': 'desc', 'filename': title, } for k, v in get.items(): params[k] = v for k in archive_filter_keys: if k not in params: params[k] = '' results = filter_archives_simple(params) if 'show-matched' not in get: results = results.filter(gallery__isnull=True) results = results.prefetch_related( Prefetch('archivematches_set', queryset=ArchiveMatches.objects.select_related( 'gallery', 'archive').prefetch_related( Prefetch('gallery__tags', queryset=Tag.objects.filter( scope__exact='artist'), to_attr='artist_tags')), to_attr='possible_galleries'), 'possible_galleries__gallery', ) if 'with-possible-matches' in get: results = results.annotate( n_possible_matches=Count('possible_matches')).filter( n_possible_matches__gt=0) paginator = Paginator(results, 50) try: results = paginator.page(page) except (InvalidPage, EmptyPage): results = paginator.page(paginator.num_pages) d = { 'results': results, 'providers': Gallery.objects.all().values_list('provider', flat=True).distinct(), 'form': form } return render(request, "viewer/collaborators/unmatched_archives.html", d)
def tools(request: HttpRequest, tool: str = "main") -> HttpResponse: """Tools listing.""" settings_text = '' if not request.user.is_staff: return render_error(request, "You need to be an admin to use the tools.") if tool == "transfer_missing_downloads": crawler_thread = CrawlerThread(crawler_logger, crawler_settings, '-tmd'.split()) crawler_thread.start() return HttpResponseRedirect(request.META["HTTP_REFERER"]) elif tool == "retry_failed": crawler_thread = CrawlerThread(crawler_logger, crawler_settings, '--retry-failed'.split()) crawler_thread.start() return HttpResponseRedirect(request.META["HTTP_REFERER"]) elif tool == "update_newer_than": p = request.GET if p and 'newer_than' in p: newer_than_date = p['newer_than'] try: if parse_date(newer_than_date) is not None: crawler_settings.workers.web_queue.enqueue_args_list( ('-unt', newer_than_date)) messages.success( request, 'Updating galleries posted after ' + newer_than_date) else: messages.error(request, 'Invalid date format.', extra_tags='danger') except ValueError: messages.error(request, 'Invalid date.', extra_tags='danger') return HttpResponseRedirect(request.META["HTTP_REFERER"]) elif tool == "update_missing_thumbnails": p = request.GET if p and 'limit_number' in p: try: limit_number = int(p['limit_number']) provider = request.GET.get('provider', '') if provider: crawler_settings.workers.web_queue.enqueue_args_list( ('-umt', str(limit_number), '-ip', provider)) else: crawler_settings.workers.web_queue.enqueue_args_list( ('-umt', str(limit_number))) messages.success( request, 'Updating galleries missing thumbnails, limiting to older {}' .format(limit_number)) except ValueError: messages.error(request, 'Invalid limit.', extra_tags='danger') return HttpResponseRedirect(request.META["HTTP_REFERER"]) elif tool == "generate_missing_thumbs": archives = Archive.objects.filter(thumbnail='') for archive in archives: frontend_logger.info('Generating thumbs for file: {}'.format( archive.zipped.name)) archive.generate_thumbnails() return HttpResponseRedirect(request.META["HTTP_REFERER"]) elif tool == "calculate_missing_info": archives = Archive.objects.filter_by_missing_file_info() for archive in archives: frontend_logger.info('Calculating file info for file: {}'.format( archive.zipped.name)) archive.recalc_fileinfo() return HttpResponseRedirect(request.META["HTTP_REFERER"]) elif tool == "recalc_all_file_info": if thread_exists('fileinfo_worker'): return render_error(request, "File info worker is already running.") archives = Archive.objects.all() frontend_logger.info( 'Recalculating file info for all archives, count: {}'.format( archives.count())) image_worker_thread = ImageWorker(crawler_logger, 4) for archive in archives: if os.path.exists(archive.zipped.path): image_worker_thread.enqueue_archive(archive) fileinfo_thread = threading.Thread( name='fileinfo_worker', target=image_worker_thread.start_info_thread) fileinfo_thread.start() return HttpResponseRedirect(request.META["HTTP_REFERER"]) elif tool == "set_all_hidden_as_public": archives = Archive.objects.filter(gallery__hidden=True) for archive in archives: if os.path.isfile(archive.zipped.path): archive.public = True archive.save() archive.gallery.public = True archive.gallery.save() return HttpResponseRedirect(request.META["HTTP_REFERER"]) elif tool == "regenerate_all_thumbs": if thread_exists('thumbnails_worker'): return render_error(request, "Thumbnails worker is already running.") archives = Archive.objects.all() frontend_logger.info( 'Generating thumbs for all archives, count {}'.format( archives.count())) image_worker_thread = ImageWorker(crawler_logger, 4) for archive in archives: if os.path.exists(archive.zipped.path): image_worker_thread.enqueue_archive(archive) thumbnails_thread = threading.Thread( name='thumbnails_worker', target=image_worker_thread.start_thumbs_thread) thumbnails_thread.start() return HttpResponseRedirect(request.META["HTTP_REFERER"]) elif tool == "generate_possible_matches_internally": if thread_exists('match_unmatched_worker'): return render_error(request, "Matching worker is already running.") provider = request.GET.get('provider', '') try: cutoff = float(request.GET.get('cutoff', '0.4')) except ValueError: cutoff = 0.4 try: max_matches = int(request.GET.get('max-matches', '10')) except ValueError: max_matches = 10 frontend_logger.info( 'Looking for possible matches in gallery database ' 'for non-matched archives (cutoff: {}, max matches: {}) ' 'using provider filter "{}"'.format(cutoff, max_matches, provider)) matching_thread = threading.Thread( name='match_unmatched_worker', target=generate_possible_matches_for_archives, args=(None, ), kwargs={ 'logger': frontend_logger, 'cutoff': cutoff, 'max_matches': max_matches, 'filters': (provider, ), 'match_local': True, 'match_web': False }) matching_thread.daemon = True matching_thread.start() messages.success( request, 'Looking for possible matches, filtering providers: {}, cutoff: {}.' .format(provider, cutoff)) return HttpResponseRedirect(request.META["HTTP_REFERER"]) elif tool == "clear_all_archive_possible_matches": ArchiveMatches.objects.all().delete() frontend_logger.info('Clearing all possible matches for archives.') messages.success(request, 'Clearing all possible matches for archives.') return HttpResponseRedirect(request.META["HTTP_REFERER"]) elif tool == "search_wanted_galleries_provider_titles": if thread_exists('web_search_worker'): messages.error(request, 'Web search worker is already running.', extra_tags='danger') return HttpResponseRedirect(request.META["HTTP_REFERER"]) results = WantedGallery.objects.eligible_to_search() if not results: frontend_logger.info('No wanted galleries eligible to search.') messages.success(request, 'No wanted galleries eligible to search.') return HttpResponseRedirect(request.META["HTTP_REFERER"]) provider = request.GET.get('provider', '') frontend_logger.info( 'Searching for gallery matches in panda for wanted galleries, starting thread.' ) messages.success( request, 'Searching for gallery matches in panda for wanted galleries, starting thread.' ) panda_search_thread = threading.Thread( name='web_search_worker', target=create_matches_wanted_galleries_from_providers, args=(results, provider), kwargs={'logger': frontend_logger}) panda_search_thread.daemon = True panda_search_thread.start() return HttpResponseRedirect(request.META["HTTP_REFERER"]) elif tool == "wanted_galleries_possible_matches": if thread_exists('wanted_local_search_worker'): return render_error( request, "Wanted local matching worker is already running.") non_match_wanted = WantedGallery.objects.eligible_to_search() if not non_match_wanted: frontend_logger.info('No wanted galleries eligible to search.') messages.success(request, 'No wanted galleries eligible to search.') return HttpResponseRedirect(request.META["HTTP_REFERER"]) frontend_logger.info( 'Looking for possible matches in gallery database ' 'for wanted galleries (fixed 0.4 cutoff)') provider = request.GET.get('provider', '') try: cutoff = float(request.GET.get('cutoff', '0.4')) except ValueError: cutoff = 0.4 try: max_matches = int(request.GET.get('max-matches', '10')) except ValueError: max_matches = 10 matching_thread = threading.Thread( name='wanted_local_search_worker', target=create_matches_wanted_galleries_from_providers_internal, args=(non_match_wanted, ), kwargs={ 'logger': frontend_logger, 'provider_filter': provider, 'cutoff': cutoff, 'max_matches': max_matches }) matching_thread.daemon = True matching_thread.start() return HttpResponseRedirect(request.META["HTTP_REFERER"]) elif tool == "restart_viewer": crawler_settings.workers.stop_workers_and_wait() if hasattr(signal, 'SIGUSR2'): os.kill(os.getpid(), signal.SIGUSR2) else: return render_error(request, "This OS does not support signal SIGUSR2.") return HttpResponseRedirect(request.META["HTTP_REFERER"]) elif tool == "modify_settings": p = request.POST if p: settings_text = p['settings_file'] if (os.path.isfile( os.path.join(crawler_settings.default_dir, "settings.ini"))): with open(os.path.join(crawler_settings.default_dir, "settings.ini"), "w", encoding="utf-8") as f: f.write(settings_text) frontend_logger.info( 'Modified settings file for Panda Backup') messages.success( request, 'Modified settings file for Panda Backup') return HttpResponseRedirect(request.META["HTTP_REFERER"]) else: if (os.path.isfile( os.path.join(crawler_settings.default_dir, "settings.ini"))): with open(os.path.join(crawler_settings.default_dir, "settings.ini"), "r", encoding="utf-8") as f: first = f.read(1) if first != '\ufeff': # not a BOM, rewind f.seek(0) settings_text = f.read() elif tool == "reload_settings": if not request.user.is_staff: return render_error( request, "You need to be an admin to reload the config.") crawler_settings.load_config_from_file() frontend_logger.info('Reloaded settings file for Panda Backup') messages.success(request, 'Reloaded settings file for Panda Backup') return HttpResponseRedirect(request.META["HTTP_REFERER"]) elif tool == "start_timed_dl": crawler_settings.workers.timed_downloader.start_running( timer=crawler_settings.timed_downloader_cycle_timer) return HttpResponseRedirect(request.META["HTTP_REFERER"]) elif tool == "stop_timed_dl": crawler_settings.workers.timed_downloader.stop_running() return HttpResponseRedirect(request.META["HTTP_REFERER"]) elif tool == "force_run_timed_dl": crawler_settings.workers.timed_downloader.stop_running() crawler_settings.workers.timed_downloader.force_run_once = True crawler_settings.workers.timed_downloader.start_running( timer=crawler_settings.timed_downloader_cycle_timer) return HttpResponseRedirect(request.META["HTTP_REFERER"]) elif tool == "start_timed_crawler": crawler_settings.workers.timed_crawler.start_running( timer=crawler_settings.autochecker.cycle_timer) return HttpResponseRedirect(request.META["HTTP_REFERER"]) elif tool == "stop_timed_crawler": crawler_settings.workers.timed_crawler.stop_running() return HttpResponseRedirect(request.META["HTTP_REFERER"]) elif tool == "force_run_timed_crawler": crawler_settings.workers.timed_crawler.stop_running() crawler_settings.workers.timed_crawler.force_run_once = True crawler_settings.workers.timed_crawler.start_running( timer=crawler_settings.autochecker.cycle_timer) return HttpResponseRedirect(request.META["HTTP_REFERER"]) elif tool == "start_timed_updater": crawler_settings.workers.timed_updater.start_running( timer=crawler_settings.autoupdater.cycle_timer) return HttpResponseRedirect(request.META["HTTP_REFERER"]) elif tool == "stop_timed_updater": crawler_settings.workers.timed_updater.stop_running() return HttpResponseRedirect(request.META["HTTP_REFERER"]) elif tool == "force_run_timed_updater": crawler_settings.workers.timed_updater.stop_running() crawler_settings.workers.timed_updater.force_run_once = True crawler_settings.workers.timed_updater.start_running( timer=crawler_settings.autoupdater.cycle_timer) return HttpResponseRedirect(request.META["HTTP_REFERER"]) elif tool == "start_timed_auto_wanted": crawler_settings.workers.timed_auto_wanted.start_running( timer=crawler_settings.auto_wanted.cycle_timer) return HttpResponseRedirect(request.META["HTTP_REFERER"]) elif tool == "stop_timed_auto_wanted": crawler_settings.workers.timed_auto_wanted.stop_running() return HttpResponseRedirect(request.META["HTTP_REFERER"]) elif tool == "force_run_timed_auto_wanted": crawler_settings.workers.timed_auto_wanted.stop_running() crawler_settings.workers.timed_auto_wanted.force_run_once = True crawler_settings.workers.timed_auto_wanted.start_running( timer=crawler_settings.auto_wanted.cycle_timer) return HttpResponseRedirect(request.META["HTTP_REFERER"]) elif tool == "start_web_queue": crawler_settings.workers.web_queue.start_running() return HttpResponseRedirect(request.META["HTTP_REFERER"]) threads_status = get_thread_status_bool() d = { 'tool': tool, 'settings_text': settings_text, 'threads_status': threads_status } return render(request, "viewer/tools.html", d)
def stats_collection(request: HttpRequest) -> HttpResponse: """Display galleries and archives stats.""" if not request.user.is_staff: return render_error(request, "You need to be an admin to display the stats.") # General stats_dict = { "n_archives": Archive.objects.count(), "n_expanded_archives": Archive.objects.filter(extracted=True).count(), "n_to_download_archives": Archive.objects.filter_by_dl_remote().count(), "n_galleries": Gallery.objects.count(), "archive": Archive.objects.filter(filesize__gt=0).aggregate( Avg('filesize'), Max('filesize'), Min('filesize'), Sum('filesize'), Avg('filecount'), Sum('filecount')), "gallery": Gallery.objects.filter(filesize__gt=0).aggregate( Avg('filesize'), Max('filesize'), Min('filesize'), Sum('filesize'), Avg('filecount'), Sum('filecount')), "hidden_galleries": Gallery.objects.filter(hidden=True).count(), "hidden_galleries_size": Gallery.objects.filter( filesize__gt=0, hidden=True).aggregate(Sum('filesize')), "fjord_galleries": Gallery.objects.filter(fjord=True).count(), "expunged_galleries": Gallery.objects.filter(expunged=True).count(), "n_tags": Tag.objects.count(), "n_tag_scopes": Tag.objects.values('scope').distinct().count(), "n_custom_tags": Tag.objects.are_custom().count(), # type: ignore "top_10_tags": Tag.objects.annotate(num_archive=Count('gallery_tags')).order_by('-num_archive')[:10], "top_10_parody_tags": Tag.objects.filter(scope='parody').annotate( num_archive=Count('gallery_tags')).order_by('-num_archive')[:10], "top_10_artist_tags": Tag.objects.filter(scope='artist').annotate( num_archive=Count('gallery_tags')).order_by('-num_archive')[:10], "wanted_galleries": { "total": WantedGallery.objects.all().count(), "found": WantedGallery.objects.filter(found=True).count(), "total_galleries_found": FoundGallery.objects.all().count(), "user_created": WantedGallery.objects.filter(book_type='user').count(), } } # Per provider providers = Gallery.objects.all().values_list('provider', flat=True).distinct() providers_dict = {} for provider in providers: providers_dict[provider] = { 'galleries': Gallery.objects.filter(provider=provider).count(), 'archives': Archive.objects.filter(gallery__provider=provider).count(), 'wanted_galleries': WantedGallery.objects.filter(provider=provider).count(), } # Per category categories = Gallery.objects.all().values_list('category', flat=True).distinct() categories_dict = {} for category in categories: categories_dict[category] = { 'n_galleries': Gallery.objects.filter(category=category).count(), 'gallery': Gallery.objects.filter( filesize__gt=0, category=category ).aggregate( Avg('filesize'), Max('filesize'), Min('filesize'), Sum('filesize'), Avg('filecount'), Sum('filecount') ) } # Per language tag languages = Tag.objects.filter( scope='language' ).exclude( scope='language', name='translated' ).annotate(num_gallery=Count('gallery')).order_by('-num_gallery').values_list('name', flat=True).distinct() languages_dict = {} languages_dict['untranslated'] = { 'n_galleries': Gallery.objects.exclude(tags__scope='language').distinct().count(), 'gallery': Gallery.objects.filter( filesize__gt=0, tags__scope='language' ).distinct().aggregate( Avg('filesize'), Max('filesize'), Min('filesize'), Sum('filesize'), Avg('filecount'), Sum('filecount') ) } for language in languages: languages_dict[language] = { 'n_galleries': Gallery.objects.filter(tags__scope='language', tags__name=language).distinct().count(), 'gallery': Gallery.objects.filter( filesize__gt=0, tags__scope='language', tags__name=language ).distinct().aggregate( Avg('filesize'), Max('filesize'), Min('filesize'), Sum('filesize'), Avg('filecount'), Sum('filecount') ) } d = { 'stats': stats_dict, 'providers': providers_dict, 'gallery_categories': categories_dict, 'gallery_languages': languages_dict } return render(request, "viewer/stats_collection.html", d)
def archives_not_matched_with_gallery(request: HttpRequest) -> HttpResponse: p = request.POST get = request.GET title = get.get("title", '') tags = get.get("tags", '') try: page = int(get.get("page", '1')) except ValueError: page = 1 if 'clear' in get: form = ArchiveSearchForm() else: form = ArchiveSearchForm(initial={'title': title, 'tags': tags}) if p: pks = [] for k, v in p.items(): if k.startswith("sel-"): # k, pk = k.split('-') # results[pk][k] = v pks.append(v) archives = Archive.objects.filter(id__in=pks).order_by('-create_date') if 'delete_archives' in p: for archive in archives: message = 'Removing archive not matched: {} and deleting file: {}'.format( archive.title, archive.zipped.path) frontend_logger.info(message) messages.success(request, message) archive.delete_all_files() archive.delete() elif 'delete_objects' in p: for archive in archives: message = 'Removing archive not matched: {}, keeping file: {}'.format( archive.title, archive.zipped.path) frontend_logger.info(message) messages.success(request, message) archive.delete_files_but_archive() archive.delete() elif 'create_possible_matches' in p: if thread_exists('web_match_worker'): return render_error(request, 'Web match worker is already running.') matcher_filter = p['create_possible_matches'] try: cutoff = float(p.get('cutoff', '0.4')) except ValueError: cutoff = 0.4 try: max_matches = int(p.get('max-matches', '10')) except ValueError: max_matches = 10 web_match_thread = threading.Thread( name='web_match_worker', target=generate_possible_matches_for_archives, args=(archives, ), kwargs={ 'logger': frontend_logger, 'cutoff': cutoff, 'max_matches': max_matches, 'filters': (matcher_filter, ), 'match_local': False, 'match_web': True }) web_match_thread.daemon = True web_match_thread.start() messages.success(request, 'Starting web match worker.') elif 'create_possible_matches_internal' in p: if thread_exists('match_unmatched_worker'): return render_error( request, "Local matching worker is already running.") provider = p['create_possible_matches_internal'] try: cutoff = float(p.get('cutoff', '0.4')) except ValueError: cutoff = 0.4 try: max_matches = int(p.get('max-matches', '10')) except ValueError: max_matches = 10 frontend_logger.info( 'Looking for possible matches in gallery database ' 'for non-matched archives (cutoff: {}, max matches: {}) ' 'using provider filter "{}"'.format(cutoff, max_matches, provider)) matching_thread = threading.Thread( name='match_unmatched_worker', target=generate_possible_matches_for_archives, args=(archives, ), kwargs={ 'logger': frontend_logger, 'cutoff': cutoff, 'max_matches': max_matches, 'filters': (provider, ), 'match_local': True, 'match_web': False }) matching_thread.daemon = True matching_thread.start() messages.success(request, 'Starting internal match worker.') params = { 'sort': 'create_date', 'asc_desc': 'desc', 'filename': title, } for k, v in get.items(): params[k] = v for k in archive_filter_keys: if k not in params: params[k] = '' results = filter_archives_simple(params) results = results.filter(gallery__isnull=True).prefetch_related( Prefetch('archivematches_set', queryset=ArchiveMatches.objects.select_related( 'gallery', 'archive').prefetch_related( Prefetch('gallery__tags', queryset=Tag.objects.filter( scope__exact='artist'), to_attr='artist_tags')), to_attr='possible_galleries'), 'possible_galleries__gallery', ) if 'no-custom-tags' in get: results = results.annotate( num_custom_tags=Count('custom_tags')).filter(num_custom_tags=0) if 'with-possible-matches' in get: results = results.annotate( n_possible_matches=Count('possible_matches')).filter( n_possible_matches__gt=0) paginator = Paginator(results, 100) try: results = paginator.page(page) except (InvalidPage, EmptyPage): results = paginator.page(paginator.num_pages) d = { 'results': results, 'providers': Gallery.objects.all().values_list('provider', flat=True).distinct(), 'matchers': crawler_settings.provider_context.get_matchers(crawler_settings, force=True), 'api_key': crawler_settings.api_key, 'form': form } return render(request, "viewer/archives_not_matched.html", d)
def logs(request: HttpRequest, tool: str = "all") -> HttpResponse: """Logs listing.""" if not request.user.is_staff: return render_error(request, "You need to be an admin to see logs.") lines_info: dict[int, dict] = defaultdict(dict) f = open(MAIN_LOGGER, 'rt', encoding='utf8') log_lines: list[str] = f.read().split('[0m\n') f.close() log_lines.pop() log_lines.reverse() log_filter = request.GET.get('filter', '') if log_filter: # log_lines = [x for x in log_lines if log_filter.lower() in x.lower()] max_len = len(log_lines) found_indices = set() for i, log_line in enumerate(log_lines): if log_filter.lower() in log_line.lower(): found_indices.update(list(range(max(i - 10, 0), min(i + 10, max_len)))) lines_info[i]['highlighted'] = True log_lines_extended: list[tuple[str, dict]] = [(x, lines_info[i]) for i, x in enumerate(log_lines) if i in found_indices] else: log_lines_extended = [(x, lines_info[i]) for i, x in enumerate(log_lines)] current_base_uri = re.escape('{scheme}://{host}'.format(scheme=request.scheme, host=request.get_host())) # Build complete URL for relative internal URLs (some) if crawler_settings.urls.viewer_main_url: patterns = [ r'(?!' + current_base_uri + r')/' + crawler_settings.urls.viewer_main_url + r"archive/\d+/?", r'(?!' + current_base_uri + r')/' + crawler_settings.urls.viewer_main_url + r"gallery/\d+/?", r'(?!' + current_base_uri + r')/' + crawler_settings.urls.viewer_main_url + r"wanted-gallery/\d+/?", r'(?!' + current_base_uri + r')/' + crawler_settings.urls.viewer_main_url + r"archive-group/[a-z0-9]+(?:-[a-z0-9]+)*/?", ] else: patterns = [ r'(?<!' + current_base_uri + r')/archive/\d+/?', r'(?<!' + current_base_uri + r')/gallery/\d+/?', r'(?<!' + current_base_uri + r')/wanted-gallery/\d+/?', r'(?<!' + current_base_uri + r')/archive-group/[a-z0-9]+(?:-[a-z0-9]+)*/?', ] def build_request(match_obj: typing.Match) -> str: return request.build_absolute_uri(match_obj.group(0)) log_lines_extended = [(reduce(lambda v, pattern: re.sub(pattern, build_request, v), patterns, line[0]), line[1]) for line in log_lines_extended] paginator = Paginator(log_lines_extended, 100) try: page = int(request.GET.get("page", '1')) except ValueError: page = 1 try: log_lines_paginated = paginator.page(page) except (InvalidPage, EmptyPage): log_lines_paginated = paginator.page(paginator.num_pages) d = {'log_lines': log_lines_paginated} return render(request, "viewer/logs.html", d)