def upload_archive(request: HttpRequest) -> HttpResponse: if request.POST.get('submit-archive'): # create a form instance and populate it with data from the request: edit_form = ArchiveCreateForm(request.POST, request.FILES) # check whether it's valid: if edit_form.is_valid(): new_archive = edit_form.save(commit=False) new_archive.user = request.user new_archive = edit_form.save() message = 'Archive successfully uploaded' messages.success(request, message) frontend_logger.info("User {}: {}".format(request.user.username, message)) event_log(request.user, 'ADD_ARCHIVE', content_object=new_archive, result='added') # return HttpResponseRedirect(request.META["HTTP_REFERER"]) else: messages.error(request, 'The provided data is not valid', extra_tags='danger') # return HttpResponseRedirect(request.META["HTTP_REFERER"]) else: edit_form = ArchiveCreateForm() d = {'edit_form': edit_form} return render(request, "viewer/collaborators/add_archive.html", d)
def archives_not_present_in_filesystem(request: HttpRequest) -> HttpResponse: p = request.POST get = request.GET title = get.get("title", '') tags = get.get("tags", '') try: page = int(get.get("page", '1')) except ValueError: page = 1 if 'clear' in get: form = ArchiveSearchForm() else: form = ArchiveSearchForm(initial={'title': title, 'tags': tags}) if p: pks = [] for k, v in p.items(): if k.startswith("del-"): # k, pk = k.split('-') # results[pk][k] = v pks.append(v) results = Archive.objects.filter(id__in=pks).order_by('-pk') for archive in results: message = 'Removing archive missing in filesystem: {}, path: {}'.format( archive.title, archive.zipped.path) frontend_logger.info(message) messages.success(request, message) archive.delete() params = { 'sort': 'create_date', 'asc_desc': 'desc', 'filename': title, } for k, v in get.items(): params[k] = v for k in archive_filter_keys: if k not in params: params[k] = '' results = filter_archives_simple(params) results = results.filter_non_existent(crawler_settings.MEDIA_ROOT) paginator = Paginator(results, 50) try: results = paginator.page(page) except (InvalidPage, EmptyPage): results = paginator.page(paginator.num_pages) d = {'results': results, 'form': form} return render(request, "viewer/archives_not_present.html", d)
def archive_update(request: HttpRequest, pk: int, tool: str = None, tool_use_id: str = None) -> HttpResponse: try: archive = Archive.objects.get(pk=pk) except Archive.DoesNotExist: raise Http404("Archive does not exist") if tool == 'select-as-match' and request.user.has_perm( 'viewer.match_archive'): archive.select_as_match(tool_use_id) if archive.gallery: frontend_logger.info( "User: {}: Archive {} ({}) was matched with gallery {} ({}).". format( request.user.username, archive, reverse('viewer:archive', args=(archive.pk, )), archive.gallery, reverse('viewer:gallery', args=(archive.gallery.pk, )), )) event_log( request.user, 'MATCH_ARCHIVE', # reason=user_reason, data=reverse('viewer:gallery', args=(archive.gallery.pk, )), content_object=archive, result='matched') return HttpResponseRedirect(request.META["HTTP_REFERER"]) elif tool == 'clear-possible-matches' and request.user.has_perm( 'viewer.match_archive'): archive.possible_matches.clear() frontend_logger.info( "User: {}: Archive {} ({}) was cleared from its possible matches.". format( request.user.username, archive, reverse('viewer:archive', args=(archive.pk, )), )) return HttpResponseRedirect(request.META["HTTP_REFERER"]) else: return render_error(request, 'Unrecognized command')
def wanted_gallery(request: HttpRequest, pk: int) -> HttpResponse: """WantedGallery listing.""" try: wanted_gallery_instance = WantedGallery.objects.get(pk=pk) except WantedGallery.DoesNotExist: raise Http404("Wanted gallery does not exist") if request.POST.get('submit-wanted-gallery') and request.user.has_perm( 'viewer.change_wantedgallery'): # create a form instance and populate it with data from the request: edit_form = WantedGalleryCreateOrEditForm( request.POST, instance=wanted_gallery_instance) # check whether it's valid: if edit_form.is_valid(): new_wanted_gallery = edit_form.save() message = 'Wanted gallery successfully modified' messages.success(request, message) frontend_logger.info("User {}: {}".format(request.user.username, message)) event_log(request.user, 'CHANGE_WANTED_GALLERY', content_object=new_wanted_gallery, result='changed') # return HttpResponseRedirect(request.META["HTTP_REFERER"]) else: messages.error(request, 'The provided data is not valid', extra_tags='danger') # return HttpResponseRedirect(request.META["HTTP_REFERER"]) else: edit_form = WantedGalleryCreateOrEditForm( instance=wanted_gallery_instance) wanted_tag_lists = sort_tags(wanted_gallery_instance.wanted_tags.all()) unwanted_tag_lists = sort_tags(wanted_gallery_instance.unwanted_tags.all()) d = { 'wanted_gallery': wanted_gallery_instance, 'wanted_tag_lists': wanted_tag_lists, 'unwanted_tag_lists': unwanted_tag_lists, 'edit_form': edit_form } return render(request, "viewer/collaborators/wanted_gallery.html", d)
def archives_not_matched_with_gallery(request: HttpRequest) -> HttpResponse: p = request.POST get = request.GET title = get.get("title", '') tags = get.get("tags", '') try: page = int(get.get("page", '1')) except ValueError: page = 1 if 'clear' in get: form = ArchiveSearchForm() else: form = ArchiveSearchForm(initial={'title': title, 'tags': tags}) if p: pks = [] for k, v in p.items(): if k.startswith("sel-"): # k, pk = k.split('-') # results[pk][k] = v pks.append(v) archives = Archive.objects.filter(id__in=pks).order_by('-create_date') if 'delete_archives' in p: for archive in archives: message = 'Removing archive not matched: {} and deleting file: {}'.format( archive.title, archive.zipped.path) frontend_logger.info(message) messages.success(request, message) archive.delete_all_files() archive.delete() elif 'delete_objects' in p: for archive in archives: message = 'Removing archive not matched: {}, keeping file: {}'.format( archive.title, archive.zipped.path) frontend_logger.info(message) messages.success(request, message) archive.delete_files_but_archive() archive.delete() elif 'create_possible_matches' in p: if thread_exists('web_match_worker'): return render_error(request, 'Web match worker is already running.') matcher_filter = p['create_possible_matches'] try: cutoff = float(p.get('cutoff', '0.4')) except ValueError: cutoff = 0.4 try: max_matches = int(p.get('max-matches', '10')) except ValueError: max_matches = 10 web_match_thread = threading.Thread( name='web_match_worker', target=generate_possible_matches_for_archives, args=(archives, ), kwargs={ 'logger': frontend_logger, 'cutoff': cutoff, 'max_matches': max_matches, 'filters': (matcher_filter, ), 'match_local': False, 'match_web': True }) web_match_thread.daemon = True web_match_thread.start() messages.success(request, 'Starting web match worker.') elif 'create_possible_matches_internal' in p: if thread_exists('match_unmatched_worker'): return render_error( request, "Local matching worker is already running.") provider = p['create_possible_matches_internal'] try: cutoff = float(p.get('cutoff', '0.4')) except ValueError: cutoff = 0.4 try: max_matches = int(p.get('max-matches', '10')) except ValueError: max_matches = 10 frontend_logger.info( 'Looking for possible matches in gallery database ' 'for non-matched archives (cutoff: {}, max matches: {}) ' 'using provider filter "{}"'.format(cutoff, max_matches, provider)) matching_thread = threading.Thread( name='match_unmatched_worker', target=generate_possible_matches_for_archives, args=(archives, ), kwargs={ 'logger': frontend_logger, 'cutoff': cutoff, 'max_matches': max_matches, 'filters': (provider, ), 'match_local': True, 'match_web': False }) matching_thread.daemon = True matching_thread.start() messages.success(request, 'Starting internal match worker.') params = { 'sort': 'create_date', 'asc_desc': 'desc', 'filename': title, } for k, v in get.items(): params[k] = v for k in archive_filter_keys: if k not in params: params[k] = '' results = filter_archives_simple(params) results = results.filter(gallery__isnull=True).prefetch_related( Prefetch('archivematches_set', queryset=ArchiveMatches.objects.select_related( 'gallery', 'archive').prefetch_related( Prefetch('gallery__tags', queryset=Tag.objects.filter( scope__exact='artist'), to_attr='artist_tags')), to_attr='possible_galleries'), 'possible_galleries__gallery', ) if 'no-custom-tags' in get: results = results.annotate( num_custom_tags=Count('custom_tags')).filter(num_custom_tags=0) if 'with-possible-matches' in get: results = results.annotate( n_possible_matches=Count('possible_matches')).filter( n_possible_matches__gt=0) paginator = Paginator(results, 100) try: results = paginator.page(page) except (InvalidPage, EmptyPage): results = paginator.page(paginator.num_pages) d = { 'results': results, 'providers': Gallery.objects.all().values_list('provider', flat=True).distinct(), 'matchers': crawler_settings.provider_context.get_matchers(crawler_settings, force=True), 'api_key': crawler_settings.api_key, 'form': form } return render(request, "viewer/archives_not_matched.html", d)
def missing_archives_for_galleries(request: HttpRequest) -> HttpResponse: p = request.POST get = request.GET title = get.get("title", '') tags = get.get("tags", '') try: page = int(get.get("page", '1')) except ValueError: page = 1 if 'clear' in get: form = GallerySearchForm() else: form = GallerySearchForm(initial={'title': title, 'tags': tags}) if p and request.user.is_staff: pks = [] for k, v in p.items(): if k.startswith("sel-"): # k, pk = k.split('-') # results[pk][k] = v pks.append(v) results = Gallery.objects.filter(id__in=pks).order_by('-create_date') if 'delete_galleries' in p: for gallery in results: message = 'Removing gallery: {}, link: {}'.format( gallery.title, gallery.get_link()) frontend_logger.info(message) messages.success(request, message) gallery.mark_as_deleted() elif 'download_galleries' in p: for gallery in results: message = 'Queueing gallery: {}, link: {}'.format( gallery.title, gallery.get_link()) frontend_logger.info(message) messages.success(request, message) # Force replace_metadata when queueing from this list, since it's mostly used to download non used. current_settings = Settings( load_from_config=crawler_settings.config) if current_settings.workers.web_queue: current_settings.replace_metadata = True current_settings.retry_failed = True if 'reason' in p and p['reason'] != '': reason = p['reason'] # Force limit string length (reason field max_length) current_settings.archive_reason = reason[:200] current_settings.archive_details = gallery.reason current_settings.gallery_reason = reason[:200] elif gallery.reason: current_settings.archive_reason = gallery.reason current_settings.workers.web_queue.enqueue_args_list( (gallery.get_link(), ), override_options=current_settings) elif 'recall_api' in p: message = 'Recalling API for {} galleries'.format(results.count()) frontend_logger.info(message) messages.success(request, message) gallery_links = [x.get_link() for x in results] gallery_providers = list( results.values_list('provider', flat=True).distinct()) current_settings = Settings( load_from_config=crawler_settings.config) if current_settings.workers.web_queue: current_settings.set_update_metadata_options( providers=gallery_providers) current_settings.workers.web_queue.enqueue_args_list( gallery_links, override_options=current_settings) if 'force_public' in request.GET: force_public = True else: force_public = False if request.user.is_staff and not force_public: providers = Gallery.objects.all().values_list('provider', flat=True).distinct() params = {} for k, v in get.items(): params[k] = v for k in gallery_filter_keys: if k not in params: params[k] = '' results = filter_galleries_simple(params) results = results.non_used_galleries().prefetch_related( 'foundgallery_set') paginator = Paginator(results, 50) try: results = paginator.page(page) except (InvalidPage, EmptyPage): results = paginator.page(paginator.num_pages) d = { 'results': results, 'providers': providers, 'force_public': force_public, 'form': form } else: params = {} for k, v in get.items(): params[k] = v for k in gallery_filter_keys: if k not in params: params[k] = '' results = filter_galleries_simple(params) results = results.non_used_galleries(public=True, provider__in=['panda', 'fakku']) d = {'results': results} return render(request, "viewer/archives_missing_for_galleries.html", d)
def archive_filesize_different_from_gallery( request: HttpRequest) -> HttpResponse: providers = Gallery.objects.all().values_list('provider', flat=True).distinct() p = request.POST get = request.GET title = get.get("title", '') tags = get.get("tags", '') try: page = int(get.get("page", '1')) except ValueError: page = 1 if 'clear' in get: form = GallerySearchForm() else: form = GallerySearchForm(initial={'title': title, 'tags': tags}) if p: pks: List[str] = [] for k, v in p.items(): if k.startswith("del-"): # k, pk = k.split('-') # results[pk][k] = v pks.extend(p.getlist(k)) results = Archive.objects.filter(id__in=pks).order_by('-pk') for archive in results: if 'delete_archives' in p: message = "Removing archive: {} and keeping its file: {}".format( archive.title, archive.zipped.name) frontend_logger.info(message) messages.success(request, message) archive.delete() if 'delete_archives_and_files' in p: message = "Removing archive: {} and deleting its file: {}".format( archive.title, archive.zipped.name) frontend_logger.info(message) messages.success(request, message) archive.delete_all_files() archive.delete() params = {} for k, v in get.items(): params[k] = v for k in gallery_filter_keys: if k not in params: params[k] = '' results = filter_galleries_simple(params) results = results.different_filesize_archive() paginator = Paginator(results, 50) try: results = paginator.page(page) except (InvalidPage, EmptyPage): results = paginator.page(paginator.num_pages) d = {'results': results, 'providers': providers, 'form': form} return render(request, "viewer/archives_different_filesize.html", d)
def repeated_archives_for_galleries(request: HttpRequest) -> HttpResponse: p = request.POST get = request.GET title = get.get("title", '') tags = get.get("tags", '') try: page = int(get.get("page", '1')) except ValueError: page = 1 if 'clear' in get: form = GallerySearchForm() else: form = GallerySearchForm(initial={'title': title, 'tags': tags}) if p: pks: List[str] = [] for k, v in p.items(): if k.startswith("del-"): # k, pk = k.split('-') # results[pk][k] = v pks.extend(p.getlist(k)) results = Archive.objects.filter(id__in=pks).order_by('-pk') for archive in results: if 'delete_archives_and_files' in p: message = 'Removing archive and deleting file: {}'.format( archive.zipped.name) frontend_logger.info(message) messages.success(request, message) archive.delete_all_files() else: message = 'Removing archive: {}'.format(archive.zipped.name) frontend_logger.info(message) messages.success(request, message) archive.delete() params = { 'sort': 'create_date', 'asc_desc': 'desc', } for k, v in get.items(): params[k] = v for k in gallery_filter_keys: if k not in params: params[k] = '' results = filter_galleries_simple(params) results = results.several_archives() paginator = Paginator(results, 50) try: results = paginator.page(page) except (InvalidPage, EmptyPage): results = paginator.page(paginator.num_pages) d = {'results': results, 'form': form} return render(request, "viewer/archives_repeated.html", d)
def repeated_galleries_by_field(request: HttpRequest) -> HttpResponse: p = request.POST get = request.GET title = get.get("title", '') tags = get.get("tags", '') if 'clear' in get: form = GallerySearchForm() else: form = GallerySearchForm(initial={'title': title, 'tags': tags}) if p: pks = [] for k, v in p.items(): if k.startswith("del-"): # k, pk = k.split('-') # results[pk][k] = v pks.append(v) results = Gallery.objects.filter(id__in=pks).order_by('-create_date') if 'delete_galleries' in p: user_reason = p.get('reason', '') for gallery in results: message = 'Removing gallery: {}, link: {}'.format( gallery.title, gallery.get_link()) frontend_logger.info(message) messages.success(request, message) gallery.mark_as_deleted() event_log(request.user, 'DELETE_GALLERY', reason=user_reason, content_object=gallery, result='deleted') params = { 'sort': 'create_date', 'asc_desc': 'desc', } for k, v in get.items(): params[k] = v for k in gallery_filter_keys: if k not in params: params[k] = '' results = filter_galleries_simple(params) results = results.eligible_for_use().exclude(title__exact='') if 'has-archives' in get: results = results.annotate(archives=Count('archive')).filter( archives__gt=0) by_title = dict() if 'same-uploader' in get: for k, v in groupby(results.order_by('title', 'uploader'), lambda x: (x.title, x.uploader)): objects = list(v) if len(objects) > 1: by_title[k] = objects else: for k, v in groupby(results.order_by('title'), lambda x: x.title): objects = list(v) if len(objects) > 1: by_title[k] = objects providers = Gallery.objects.all().values_list('provider', flat=True).distinct() d = {'by_title': by_title, 'form': form, 'providers': providers} return render(request, "viewer/galleries_repeated_by_fields.html", d)
def submit_queue(request: HttpRequest) -> HttpResponse: p = request.POST get = request.GET title = get.get("title", '') tags = get.get("tags", '') user_reason = p.get('reason', '') try: page = int(get.get("page", '1')) except ValueError: page = 1 if 'clear' in get: form = GallerySearchForm() else: form = GallerySearchForm(initial={'title': title, 'tags': tags}) if p: pks = [] for k, v in p.items(): if k.startswith("sel-"): # k, pk = k.split('-') # results[pk][k] = v pks.append(v) preserved = Case( *[When(pk=pk, then=pos) for pos, pk in enumerate(pks)]) if 'denied' in get: results = Gallery.objects.submitted_galleries( id__in=pks).order_by(preserved) else: results = Gallery.objects.submitted_galleries( ~Q(status=Gallery.DENIED), id__in=pks).order_by(preserved) if 'deny_galleries' in p: for gallery in results: message = 'Denying gallery: {}, link: {}, source link: {}'.format( gallery.title, gallery.get_absolute_url(), gallery.get_link()) if 'reason' in p and p['reason'] != '': message += ', reason: {}'.format(p['reason']) frontend_logger.info("User {}: {}".format( request.user.username, message)) messages.success(request, message) gallery.mark_as_denied() event_log(request.user, 'DENY_GALLERY', reason=user_reason, content_object=gallery, result='denied') elif 'download_galleries' in p: for gallery in results: message = 'Queueing gallery: {}, link: {}, source link: {}'.format( gallery.title, gallery.get_absolute_url(), gallery.get_link()) if 'reason' in p and p['reason'] != '': message += ', reason: {}'.format(p['reason']) frontend_logger.info("User {}: {}".format( request.user.username, message)) messages.success(request, message) event_log(request.user, 'ACCEPT_GALLERY', reason=user_reason, content_object=gallery, result='accepted') # Force replace_metadata when queueing from this list, since it's mostly used to download non used. current_settings = Settings( load_from_config=crawler_settings.config) if current_settings.workers.web_queue: current_settings.replace_metadata = True current_settings.retry_failed = True if 'reason' in p and p['reason'] != '': reason = p['reason'] # Force limit string length (reason field max_length) current_settings.archive_reason = reason[:200] current_settings.archive_details = gallery.reason current_settings.gallery_reason = reason[:200] elif gallery.reason: current_settings.archive_reason = gallery.reason def archive_callback(x: Optional['Archive'], crawled_url: Optional[str], result: str) -> None: event_log(request.user, 'ADD_ARCHIVE', reason=user_reason, content_object=x, result=result, data=crawled_url) def gallery_callback(x: Optional['Gallery'], crawled_url: Optional[str], result: str) -> None: event_log(request.user, 'ADD_GALLERY', reason=user_reason, content_object=x, result=result, data=crawled_url) current_settings.workers.web_queue.enqueue_args_list( (gallery.get_link(), ), override_options=current_settings, archive_callback=archive_callback, gallery_callback=gallery_callback, ) providers = Gallery.objects.all().values_list('provider', flat=True).distinct() params = {} for k, v in get.items(): params[k] = v for k in gallery_filter_keys: if k not in params: params[k] = '' results = filter_galleries_simple(params) if 'denied' in get: results = results.submitted_galleries().prefetch_related( 'foundgallery_set') else: results = results.submitted_galleries(~Q( status=Gallery.DENIED)).prefetch_related('foundgallery_set') paginator = Paginator(results, 50) try: results = paginator.page(page) except (InvalidPage, EmptyPage): results = paginator.page(paginator.num_pages) d = {'results': results, 'providers': providers, 'form': form} return render(request, "viewer/collaborators/submit_queue.html", d)
def archive_group_edit(request: HttpRequest, pk: int = None, slug: str = None) -> HttpResponse: """ArchiveGroup listing.""" try: if pk is not None: archive_group_instance = ArchiveGroup.objects.get(pk=pk) elif slug is not None: archive_group_instance = ArchiveGroup.objects.get(title_slug=slug) else: raise Http404("Archive Group does not exist") except ArchiveGroup.DoesNotExist: raise Http404("Archive Group does not exist") if not archive_group_instance.public and not request.user.is_authenticated: raise Http404("Archive Group does not exist") get = request.GET p = request.POST user_reason = p.get('reason', '') d = { 'archive_group': archive_group_instance, } if request.POST.get('submit-archive-group'): # create a form instance and populate it with data from the request: edit_form = ArchiveGroupCreateOrEditForm(request.POST, instance=archive_group_instance) # check whether it's valid: if edit_form.is_valid(): new_archive_group = edit_form.save() message = 'Archive group successfully modified' messages.success(request, message) frontend_logger.info("User {}: {}".format(request.user.username, message)) event_log( request.user, 'CHANGE_ARCHIVE_GROUP', content_object=new_archive_group, result='changed' ) # return HttpResponseRedirect(request.META["HTTP_REFERER"]) else: messages.error(request, 'The provided data is not valid', extra_tags='danger') # return HttpResponseRedirect(request.META["HTTP_REFERER"]) else: edit_form = ArchiveGroupCreateOrEditForm(instance=archive_group_instance) if 'add_to_group' in p: pks = [] for k, v in p.items(): if k.startswith("sel-"): # k, pk = k.split('-') # results[pk][k] = v pks.append(v) archives = Archive.objects.filter(id__in=pks).order_by('-create_date') for archive in archives: if not ArchiveGroupEntry.objects.filter(archive=archive, archive_group=archive_group_instance).exists(): archive_group_entry = ArchiveGroupEntry(archive=archive, archive_group=archive_group_instance) archive_group_entry.save() message = 'Adding archive: {}, link: {}, to group: {}, link {}'.format( archive.title, archive.get_absolute_url(), archive_group_instance.title, archive_group_instance.get_absolute_url() ) if 'reason' in p and p['reason'] != '': message += ', reason: {}'.format(p['reason']) frontend_logger.info("User {}: {}".format(request.user.username, message)) messages.success(request, message) event_log( request.user, 'ADD_ARCHIVE_TO_GROUP', content_object=archive, reason=user_reason, result='added' ) d.update(edit_form=edit_form) # Multi add search form title = get.get("title", '') tags = get.get("tags", '') try: page = int(get.get("page", '1')) except ValueError: page = 1 if 'clear' in get: search_form = ArchiveSearchForm() else: search_form = ArchiveSearchForm(initial={'title': title, 'tags': tags}) params = { 'sort': 'create_date', 'asc_desc': 'desc', 'filename': title, } for k, v in get.items(): params[k] = v for k in archive_filter_keys: if k not in params: params[k] = '' search_results = filter_archives_simple(params) search_results = search_results.exclude(archive_groups=archive_group_instance) search_results = search_results.prefetch_related('gallery') paginator = Paginator(search_results, 100) try: search_results = paginator.page(page) except (InvalidPage, EmptyPage): search_results = paginator.page(paginator.num_pages) d.update(search_form=search_form, search_results=search_results) return render(request, "viewer/archive_group_edit.html", d)
def wanted_galleries(request: HttpRequest) -> HttpResponse: # p = request.POST get = request.GET title = get.get("title", '') tags = get.get("tags", '') try: page = int(get.get("page", '1')) except ValueError: page = 1 if 'clear' in get: form = WantedGallerySearchForm() else: form = WantedGallerySearchForm(initial={'title': title, 'tags': tags}) if request.POST.get('submit-wanted-gallery') and request.user.has_perm( 'viewer.add_wantedgallery'): # create a form instance and populate it with data from the request: edit_form = WantedGalleryCreateOrEditForm(request.POST) # check whether it's valid: if edit_form.is_valid(): new_wanted_gallery = edit_form.save() message = 'New wanted gallery successfully created' messages.success(request, message) frontend_logger.info("User {}: {}".format(request.user.username, message)) event_log(request.user, 'ADD_WANTED_GALLERY', content_object=new_wanted_gallery, result='created') else: messages.error(request, 'The provided data is not valid', extra_tags='danger') # return HttpResponseRedirect(request.META["HTTP_REFERER"]) else: edit_form = WantedGalleryCreateOrEditForm() params = {} for k, v in get.items(): params[k] = v for k in wanted_gallery_filter_keys: if k not in params: params[k] = '' results = filter_wanted_galleries_simple(params) results = results.prefetch_related( # Prefetch( # 'gallerymatch_set', # queryset=GalleryMatch.objects.select_related('gallery', 'wanted_gallery').prefetch_related( # Prefetch( # 'gallery__tags', # queryset=Tag.objects.filter(scope__exact='artist'), # to_attr='artist_tags' # ) # ), # to_attr='possible_galleries' # ), # 'possible_galleries__gallery__archive_set', 'artists', 'announces').order_by('-release_date') paginator = Paginator(results, 100) try: results = paginator.page(page) except (InvalidPage, EmptyPage): results = paginator.page(paginator.num_pages) d = {'results': results, 'form': form, 'edit_form': edit_form} return render(request, "viewer/collaborators/wanted_galleries.html", d)
def archives_not_matched_with_gallery(request: HttpRequest) -> HttpResponse: p = request.POST get = request.GET title = get.get("title", '') tags = get.get("tags", '') try: page = int(get.get("page", '1')) except ValueError: page = 1 if 'clear' in get: form = ArchiveSearchForm() else: form = ArchiveSearchForm(initial={'title': title, 'tags': tags}) if p: pks = [] for k, v in p.items(): if k.startswith("sel-"): # k, pk = k.split('-') # results[pk][k] = v pks.append(v) preserved = Case( *[When(pk=pk, then=pos) for pos, pk in enumerate(pks)]) archives = Archive.objects.filter(id__in=pks).order_by(preserved) if 'create_possible_matches' in p: if thread_exists('match_unmatched_worker'): return render_error( request, "Local matching worker is already running.") provider = p['create_possible_matches'] try: cutoff = float(p.get('cutoff', '0.4')) except ValueError: cutoff = 0.4 try: max_matches = int(p.get('max-matches', '10')) except ValueError: max_matches = 10 frontend_logger.info( 'User {}: Looking for possible matches in gallery database ' 'for non-matched archives (cutoff: {}, max matches: {}) ' 'using provider filter "{}"'.format(request.user.username, cutoff, max_matches, provider)) matching_thread = threading.Thread( name='match_unmatched_worker', target=generate_possible_matches_for_archives, args=(archives, ), kwargs={ 'logger': frontend_logger, 'cutoff': cutoff, 'max_matches': max_matches, 'filters': (provider, ), 'match_local': True, 'match_web': False }) matching_thread.daemon = True matching_thread.start() messages.success(request, 'Starting internal match worker.') elif 'clear_possible_matches' in p: for archive in archives: archive.possible_matches.clear() frontend_logger.info( 'User {}: Clearing possible matches for archives'.format( request.user.username)) messages.success(request, 'Clearing possible matches.') params = { 'sort': 'create_date', 'asc_desc': 'desc', 'filename': title, } for k, v in get.items(): params[k] = v for k in archive_filter_keys: if k not in params: params[k] = '' results = filter_archives_simple(params) if 'show-matched' not in get: results = results.filter(gallery__isnull=True) results = results.prefetch_related( Prefetch('archivematches_set', queryset=ArchiveMatches.objects.select_related( 'gallery', 'archive').prefetch_related( Prefetch('gallery__tags', queryset=Tag.objects.filter( scope__exact='artist'), to_attr='artist_tags')), to_attr='possible_galleries'), 'possible_galleries__gallery', ) if 'with-possible-matches' in get: results = results.annotate( n_possible_matches=Count('possible_matches')).filter( n_possible_matches__gt=0) paginator = Paginator(results, 50) try: results = paginator.page(page) except (InvalidPage, EmptyPage): results = paginator.page(paginator.num_pages) d = { 'results': results, 'providers': Gallery.objects.all().values_list('provider', flat=True).distinct(), 'form': form } return render(request, "viewer/collaborators/unmatched_archives.html", d)
def user_crawler(request: HttpRequest) -> HttpResponse: """Crawl given URLs.""" d = {} p = request.POST all_downloaders = crawler_settings.provider_context.get_downloaders_name_priority( crawler_settings, filter_name='generic_') # providers_not_generic = list(set([x[0].provider for x in all_downloaders if not x[0].provider.is_generic()])) generic_downloaders = [x[0] for x in all_downloaders] user_reason = p.get('reason', '') if p: current_settings = Settings(load_from_config=crawler_settings.config) if not current_settings.workers.web_queue: messages.error( request, 'Cannot submit links currently. Please contact an admin.') return HttpResponseRedirect(request.META["HTTP_REFERER"]) url_set = set() # create dictionary of properties for each archive current_settings.replace_metadata = False current_settings.config['allowed']['replace_metadata'] = 'no' for k, v in p.items(): if k == "downloader": if v == 'no-generic': continue elif v in generic_downloaders: current_settings.enable_downloader_only(v) elif k == "urls": url_list = v.split("\n") for item in url_list: url_set.add(item.rstrip('\r')) urls = list(url_set) if not urls: messages.error(request, 'Submission is empty.') return HttpResponseRedirect(request.META["HTTP_REFERER"]) if 'reason' in p and p['reason'] != '': reason = p['reason'] # Force limit string length (reason field max_length) current_settings.archive_reason = reason[:200] current_settings.gallery_reason = reason[:200] if 'source' in p and p['source'] != '': source = p['source'] # Force limit string length (reason field max_length) current_settings.archive_source = source[:50] current_settings.archive_user = request.user parsers = crawler_settings.provider_context.get_parsers_classes() def archive_callback(x: Optional['Archive'], crawled_url: Optional[str], result: str) -> None: event_log(request.user, 'ADD_ARCHIVE', reason=user_reason, content_object=x, result=result, data=crawled_url) def gallery_callback(x: Optional['Gallery'], crawled_url: Optional[str], result: str) -> None: event_log(request.user, 'ADD_GALLERY', reason=user_reason, content_object=x, result=result, data=crawled_url) current_settings.workers.web_queue.enqueue_args_list( urls, override_options=current_settings, archive_callback=archive_callback, gallery_callback=gallery_callback, use_argparser=False) messages.success( request, 'Starting Crawler, if the links were correctly added, they should appear on the archive or gallery list.' ) for url in urls: frontend_logger.info("User {}: queued link: {}".format( request.user.username, url)) # event_log( # request.user, # 'CRAWL_URL', # reason=user_reason, # data=url, # result='queue' # ) found_valid_urls: List[str] = [] for parser in parsers: if parser.id_from_url_implemented(): urls_filtered = parser.filter_accepted_urls(urls) found_valid_urls.extend(urls_filtered) for url_filtered in urls_filtered: gid = parser.id_from_url(url_filtered) gallery = Gallery.objects.filter(gid=gid).first() if not gallery: messages.success( request, '{}: New URL, will be added to the submit queue'. format(url_filtered)) event_log(request.user, 'CRAWL_URL', reason=user_reason, data=url_filtered, result='queued') continue if gallery.is_submitted(): messages.info( request, '{}: Already in submit queue, link: {}, reason: {}' .format(url_filtered, gallery.get_absolute_url(), gallery.reason)) event_log(request.user, 'CRAWL_URL', reason=user_reason, data=url_filtered, result='already_submitted') elif gallery.public: messages.info( request, '{}: Already present, is public: {}'.format( url_filtered, request.build_absolute_uri( gallery.get_absolute_url()))) event_log(request.user, 'CRAWL_URL', reason=user_reason, data=url_filtered, result='already_public') else: messages.info( request, '{}: Already present, is not public: {}'.format( url_filtered, request.build_absolute_uri( gallery.get_absolute_url()))) event_log(request.user, 'CRAWL_URL', reason=user_reason, data=url_filtered, result='already_private') extra_urls = [x for x in urls if x not in found_valid_urls] for extra_url in extra_urls: messages.info(request, '{}: Extra non-provider URLs'.format(extra_url)) event_log(request.user, 'CRAWL_URL', reason=user_reason, data=extra_url, result='queued') # Not really optimal when there's many commands being queued # for command in url_list: # messages.success(request, command) return HttpResponseRedirect(request.META["HTTP_REFERER"]) d.update({'downloaders': generic_downloaders}) return render(request, "viewer/collaborators/gallery_crawler.html", d)
def wanted_galleries(request: HttpRequest) -> HttpResponse: p = request.POST get = request.GET title = get.get("title", '') tags = get.get("tags", '') try: page = int(get.get("page", '1')) except ValueError: page = 1 if 'clear' in get: form = WantedGallerySearchForm() else: form = WantedGallerySearchForm(initial={'title': title, 'tags': tags}) if not request.user.is_staff: results = WantedGallery.objects.filter( Q(should_search=True) & Q(found=False) & Q(public=True)).prefetch_related( 'artists', 'announces').order_by('-release_date') return render(request, "viewer/wanted_galleries.html", {'results': results}) if p and request.user.is_staff: if 'delete_galleries' in p: pks = [] for k, v in p.items(): if k.startswith("sel-"): # k, pk = k.split('-') # results[pk][k] = v pks.append(v) results = WantedGallery.objects.filter(id__in=pks).reverse() for wanted_gallery in results: message = 'Removing wanted gallery: {}'.format( wanted_gallery.title) frontend_logger.info(message) messages.success(request, message) wanted_gallery.delete() elif 'search_for_galleries' in p: pks = [] for k, v in p.items(): if k.startswith("sel-"): # k, pk = k.split('-') # results[pk][k] = v pks.append(v) results = WantedGallery.objects.filter(id__in=pks).reverse() results.update(should_search=True) for wanted_gallery in results: message = 'Marking gallery as to search for: {}'.format( wanted_gallery.title) frontend_logger.info(message) messages.success(request, message) elif 'toggle-public' in p: pks = [] for k, v in p.items(): if k.startswith("sel-"): # k, pk = k.split('-') # results[pk][k] = v pks.append(v) results = WantedGallery.objects.filter(id__in=pks).reverse() results.update(public=True) for wanted_gallery in results: message = 'Marking gallery as public: {}'.format( wanted_gallery.title) frontend_logger.info(message) messages.success(request, message) elif 'search_provider_galleries' in p: if thread_exists('web_search_worker'): messages.error(request, 'Web search worker is already running.', extra_tags='danger') return HttpResponseRedirect(request.META["HTTP_REFERER"]) pks = [] for k, v in p.items(): if k.startswith("sel-"): # k, pk = k.split('-') # results[pk][k] = v pks.append(v) results = WantedGallery.objects.filter(id__in=pks).reverse() provider = p.get('provider', '') try: cutoff = float(p.get('cutoff', '0.4')) except ValueError: cutoff = 0.4 try: max_matches = int(p.get('max-matches', '10')) except ValueError: max_matches = 10 message = 'Searching for gallery matches in providers for wanted galleries.' frontend_logger.info(message) messages.success(request, message) panda_search_thread = threading.Thread( name='web_search_worker', target=create_matches_wanted_galleries_from_providers, args=(results, provider), kwargs={ 'logger': frontend_logger, 'cutoff': cutoff, 'max_matches': max_matches, }) panda_search_thread.daemon = True panda_search_thread.start() elif 'search_provider_galleries_internal' in p: if thread_exists('wanted_local_search_worker'): messages.error( request, 'Wanted local matching worker is already running.', extra_tags='danger') return HttpResponseRedirect(request.META["HTTP_REFERER"]) pks = [] for k, v in p.items(): if k.startswith("sel-"): # k, pk = k.split('-') # results[pk][k] = v pks.append(v) results = WantedGallery.objects.filter(id__in=pks).reverse() provider = p.get('provider', '') try: cutoff = float(p.get('cutoff', '0.4')) except ValueError: cutoff = 0.4 try: max_matches = int(p.get('max-matches', '10')) except ValueError: max_matches = 10 try: must_be_used = bool(p.get('must-be-used', False)) except ValueError: must_be_used = False message = 'Searching for gallery matches locally in providers for wanted galleries.' frontend_logger.info(message) messages.success(request, message) matching_thread = threading.Thread( name='web_search_worker', target=create_matches_wanted_galleries_from_providers_internal, args=(results, ), kwargs={ 'logger': frontend_logger, 'provider_filter': provider, 'cutoff': cutoff, 'max_matches': max_matches, 'must_be_used': must_be_used }) matching_thread.daemon = True matching_thread.start() elif 'clear_all_matches' in p: GalleryMatch.objects.all().delete() message = 'Clearing matches from every wanted gallery.' frontend_logger.info(message) messages.success(request, message) params = {} for k, v in get.items(): params[k] = v for k in wanted_gallery_filter_keys: if k not in params: params[k] = '' results = filter_wanted_galleries_simple(params) results = results.prefetch_related( Prefetch('gallerymatch_set', queryset=GalleryMatch.objects.select_related( 'gallery', 'wanted_gallery').prefetch_related( Prefetch('gallery__tags', queryset=Tag.objects.filter( scope__exact='artist'), to_attr='artist_tags')), to_attr='possible_galleries'), 'possible_galleries__gallery__archive_set', 'artists', 'announces').order_by('-release_date') paginator = Paginator(results, 100) try: results = paginator.page(page) except (InvalidPage, EmptyPage): results = paginator.page(paginator.num_pages) matchers = crawler_settings.provider_context.get_matchers_name_priority( crawler_settings, matcher_type='title') d = {'results': results, 'title_matchers': matchers, 'form': form} return render(request, "viewer/wanted_galleries.html", d)
def archive_groups_explorer(request: HttpRequest) -> HttpResponse: get = request.GET title = get.get("title", '') try: page = int(get.get("page", '1')) except ValueError: page = 1 if 'clear' in get: form = ArchiveGroupSearchForm() else: form = ArchiveGroupSearchForm(initial={'title': title}) d = { 'form': form, } if request.user.has_perm('viewer.add_archivegroup'): if request.POST.get('submit-archive-group'): # create a form instance and populate it with data from the request: edit_form = ArchiveGroupCreateOrEditForm(request.POST) # check whether it's valid: if edit_form.is_valid(): new_archive_group = edit_form.save() message = 'New archive group successfully created' messages.success(request, message) frontend_logger.info("User {}: {}".format(request.user.username, message)) event_log( request.user, 'ADD_ARCHIVE_GROUP', content_object=new_archive_group, result='created' ) else: messages.error(request, 'The provided data is not valid', extra_tags='danger') # return HttpResponseRedirect(request.META["HTTP_REFERER"]) else: edit_form = ArchiveGroupCreateOrEditForm() d.update(edit_form=edit_form) order = 'position' results = ArchiveGroup.objects.order_by(F(order).asc(nulls_last=True)) if not request.user.is_authenticated: results = results.filter(public=True) q_formatted = '%' + title.replace(' ', '%') + '%' results = results.filter( Q(title__ss=q_formatted) ) results = results.prefetch_related( Prefetch( 'archivegroupentry_set', queryset=ArchiveGroupEntry.objects.select_related('archive_group', 'archive').prefetch_related( Prefetch( 'archive__tags', ) ), to_attr='archivegroup_entries' ), ) paginator = Paginator(results, 50) try: results = paginator.page(page) except (InvalidPage, EmptyPage): results = paginator.page(paginator.num_pages) d.update(results=results) return render(request, "viewer/archive_groups.html", d)
def repeated_archives_by_field(request: HttpRequest) -> HttpResponse: p = request.POST get = request.GET title = get.get("title", '') tags = get.get("tags", '') if 'clear' in get: form = ArchiveSearchForm() else: form = ArchiveSearchForm(initial={'title': title, 'tags': tags}) if p: pks = [] for k, v in p.items(): if k.startswith("del-"): # k, pk = k.split('-') # results[pk][k] = v pks.append(v) archives = Archive.objects.filter(id__in=pks).order_by('-create_date') user_reason = p.get('reason', '') if 'delete_archives' in p: for archive in archives: message = 'Removing archive: {} and deleting file: {}'.format( archive.title, archive.zipped.path) frontend_logger.info(message) messages.success(request, message) gallery = archive.gallery archive.gallery.mark_as_deleted() archive.delete_all_files() archive.delete() event_log(request.user, 'DELETE_ARCHIVE', reason=user_reason, content_object=gallery, result='deleted') elif 'delete_objects' in p: for archive in archives: message = 'Removing archive: {}, keeping file: {}'.format( archive.title, archive.zipped.path) frontend_logger.info(message) messages.success(request, message) gallery = archive.gallery archive.gallery.mark_as_deleted() archive.delete_files_but_archive() archive.delete() event_log(request.user, 'DELETE_ARCHIVE', reason=user_reason, content_object=gallery, result='deleted') params = { 'sort': 'create_date', 'asc_desc': 'desc', 'filename': title, } for k, v in get.items(): params[k] = v for k in archive_filter_keys: if k not in params: params[k] = '' results = filter_archives_simple(params) if 'no-custom-tags' in get: results = results.annotate( num_custom_tags=Count('custom_tags')).filter(num_custom_tags=0) by_filesize = dict() by_crc32 = dict() for k, v in groupby(results.order_by('filesize'), lambda x: x.filesize): objects = list(v) if len(objects) > 1: by_filesize[k] = objects for k, v in groupby(results.order_by('crc32'), lambda x: x.crc32): objects = list(v) if len(objects) > 1: by_crc32[k] = objects # paginator = Paginator(results, 100) # try: # results = paginator.page(page) # except (InvalidPage, EmptyPage): # results = paginator.page(paginator.num_pages) d = {'by_filesize': by_filesize, 'by_crc32': by_crc32, 'form': form} return render(request, "viewer/archives_repeated_by_fields.html", d)
def manage_archives(request: HttpRequest) -> HttpResponse: p = request.POST get = request.GET title = get.get("title", '') tags = get.get("tags", '') user_reason = p.get('reason', '') try: page = int(get.get("page", '1')) except ValueError: page = 1 if 'clear' in get: form = ArchiveSearchForm() else: form = ArchiveSearchForm(initial={'title': title, 'tags': tags}) if p: pks = [] for k, v in p.items(): if k.startswith("sel-"): # k, pk = k.split('-') # results[pk][k] = v pks.append(v) preserved = Case( *[When(pk=pk, then=pos) for pos, pk in enumerate(pks)]) archives = Archive.objects.filter(id__in=pks).order_by(preserved) if 'publish_archives' in p and request.user.has_perm( 'viewer.publish_archive'): for archive in archives: message = 'Publishing archive: {}, link: {}'.format( archive.title, archive.get_absolute_url()) if 'reason' in p and p['reason'] != '': message += ', reason: {}'.format(p['reason']) frontend_logger.info("User {}: {}".format( request.user.username, message)) messages.success(request, message) archive.set_public(reason=user_reason) event_log(request.user, 'PUBLISH_ARCHIVE', reason=user_reason, content_object=archive, result='published') elif 'unpublish_archives' in p and request.user.has_perm( 'viewer.publish_archive'): for archive in archives: message = 'Unpublishing archive: {}, link: {}'.format( archive.title, archive.get_absolute_url()) if 'reason' in p and p['reason'] != '': message += ', reason: {}'.format(p['reason']) frontend_logger.info("User {}: {}".format( request.user.username, message)) messages.success(request, message) archive.set_private(reason=user_reason) event_log(request.user, 'UNPUBLISH_ARCHIVE', reason=user_reason, content_object=archive, result='unpublished') elif 'delete_archives' in p and request.user.has_perm( 'viewer.delete_archive'): for archive in archives: message = 'Deleting archive: {}, link: {}, with it\'s file: {} and associated gallery: {}'.format( archive.title, archive.get_absolute_url(), archive.zipped.path, archive.gallery) if 'reason' in p and p['reason'] != '': message += ', reason: {}'.format(p['reason']) frontend_logger.info("User {}: {}".format( request.user.username, message)) messages.success(request, message) gallery = archive.gallery archive.gallery.mark_as_deleted() archive.gallery = None archive.delete_all_files() archive.delete() event_log(request.user, 'DELETE_ARCHIVE', content_object=gallery, reason=user_reason, result='deleted') elif 'update_metadata' in p and request.user.has_perm( 'viewer.update_metadata'): for archive in archives: gallery = archive.gallery message = 'Updating gallery API data for gallery: {} and related archives'.format( gallery.get_absolute_url()) if 'reason' in p and p['reason'] != '': message += ', reason: {}'.format(p['reason']) frontend_logger.info("User {}: {}".format( request.user.username, message)) messages.success(request, message) current_settings = Settings( load_from_config=crawler_settings.config) if current_settings.workers.web_queue: current_settings.set_update_metadata_options( providers=(gallery.provider, )) def gallery_callback(x: Optional['Gallery'], crawled_url: Optional[str], result: str) -> None: event_log(request.user, 'UPDATE_METADATA', reason=user_reason, content_object=x, result=result, data=crawled_url) current_settings.workers.web_queue.enqueue_args_list( (gallery.get_link(), ), override_options=current_settings, gallery_callback=gallery_callback) frontend_logger.info( 'Updating gallery API data for gallery: {} and related archives' .format(gallery.get_absolute_url())) elif 'add_to_group' in p and request.user.has_perm( 'viewer.change_archivegroup'): if 'archive_group' in p: archive_group_ids = p.getlist('archive_group') preserved = Case(*[ When(pk=pk, then=pos) for pos, pk in enumerate(archive_group_ids) ]) archive_groups = ArchiveGroup.objects.filter( pk__in=archive_group_ids).order_by(preserved) for archive in archives: for archive_group in archive_groups: if not ArchiveGroupEntry.objects.filter( archive=archive, archive_group=archive_group).exists(): archive_group_entry = ArchiveGroupEntry( archive=archive, archive_group=archive_group) archive_group_entry.save() message = 'Adding archive: {}, link: {}, to group: {}, link {}'.format( archive.title, archive.get_absolute_url(), archive_group.title, archive_group.get_absolute_url()) if 'reason' in p and p['reason'] != '': message += ', reason: {}'.format(p['reason']) frontend_logger.info("User {}: {}".format( request.user.username, message)) messages.success(request, message) event_log(request.user, 'ADD_ARCHIVE_TO_GROUP', content_object=archive, reason=user_reason, result='added') params = { 'sort': 'create_date', 'asc_desc': 'desc', 'filename': title, } for k, v in get.items(): params[k] = v for k in archive_filter_keys: if k not in params: params[k] = '' results = filter_archives_simple(params) results = results.prefetch_related('gallery') paginator = Paginator(results, 100) try: results = paginator.page(page) except (InvalidPage, EmptyPage): results = paginator.page(paginator.num_pages) d = {'results': results, 'form': form} if request.user.has_perm('viewer.change_archivegroup'): group_form = ArchiveGroupSelectForm() d.update(group_form=group_form) return render(request, "viewer/collaborators/manage_archives.html", d)