class FindView(APIView): """A view to find submissions or transcriptions by their URL.""" @csrf_exempt @swagger_auto_schema( manual_parameters=[ Parameter( "url", "query", type="string", description="The URL to find the object of. " "Can be a submission URL, a ToR submission URL or a transcription URL.", required=True, ), ], required=["url"], responses={ 200: DocResponse( "The URL has been found!", schema=Schema( type="object", # TODO: Use the schemas of the corresponding models properties={ "submission": Schema(type="object"), "author": Schema(type="object"), "transcription": Schema(type="object"), "ocr": Schema(type="object"), }, ), ), 400: "The given URL has an invalid format.", 404: "The corresponding submission/transcription could not be found.", }, ) def get(self, request: Request) -> Response: """Find the submission/transcription corresponding to the URL.""" url = request.query_params.get("url") normalized_url = normalize_url(url) if normalized_url is None: return Response(data="Invalid URL.", status=status.HTTP_400_BAD_REQUEST,) data = find_by_url(normalized_url) if data is None: return Response( data="No submission or transcription found for the given URL.", status=status.HTTP_404_NOT_FOUND, ) return Response( data=FindResponseSerializer(data, context={"request": request}).data, status=status.HTTP_200_OK, )
class PingView(APIView): """View to check whether the service is responsive.""" permission_classes = (AllowAny,) @csrf_exempt @swagger_auto_schema( responses={ 200: DocResponse( "Successful pong", schema=Schema( type="object", properties={"ping!": Schema(type="string")} ), ) } ) def get(self, request: Request, *args: object, **kwargs: object) -> Response: """Ping the server.""" return Response({"ping?!": "PONG"}, status=status.HTTP_200_OK)
class SummaryView(APIView): """A view to request the summary of statistics.""" permission_classes = (AdminApiKeyCustomCheck,) @csrf_exempt @swagger_auto_schema( responses={ 200: DocResponse( "Successful summary provision", schema=Schema( type="object", properties={ "volunteer_count": Schema(type="int"), "transcription_count": Schema(type="int"), "days_since_inception": Schema(type="int"), }, ), ) } ) def get(self, request: Request, *args: object, **kwargs: object) -> Response: """Get a summary of statistics on volunteers and transcriptions.""" return Response(data=Summary().generate_summary(), status=status.HTTP_200_OK)
class VolunteerViewSet(viewsets.ModelViewSet): """The API view to view and edit information regarding Volunteers.""" queryset = BlossomUser.objects.filter( is_volunteer=True).order_by("date_joined") serializer_class = VolunteerSerializer basename = "volunteer" permission_classes = (BlossomApiPermission, ) filter_backends = [CaseInsensitiveUsernameFilter, DjangoFilterBackend] filterset_fields = [ "id", "is_volunteer", "is_bot", "accepted_coc", "blacklisted" ] @csrf_exempt @swagger_auto_schema( manual_parameters=[Parameter("username", "query", type="string")], responses={ 400: 'No "username" as a query parameter.', 404: "No volunteer with the specified username.", }, ) @action(detail=False, methods=["get"]) @validate_request(query_params={"username"}) def summary(self, request: Request, username: str = None) -> Response: """Get information on the volunteer with the provided username.""" user = get_object_or_404(BlossomUser, username=username, is_volunteer=True) return Response(self.serializer_class(user).data) @csrf_exempt @swagger_auto_schema( request_body=no_body, responses={404: "No volunteer with the specified ID."}) @action(detail=True, methods=["patch"]) def gamma_plusone(self, request: Request, pk: int) -> Response: """ Add one gamma through a fake completed transcription by the volunteer. This method should only be called in the case of erroneous behavior of the proper procedure of awarding gamma. """ user = get_object_or_404(BlossomUser, id=pk) gamma_plus_one, _ = Source.objects.get_or_create(name="gamma_plus_one") dummy_post = Submission.objects.create(source=gamma_plus_one, completed_by=user) Transcription.objects.create( submission=dummy_post, author=user, original_id=str(uuid.uuid4()), source=gamma_plus_one, text="dummy transcription", ) return Response(self.serializer_class(user).data) @csrf_exempt @swagger_auto_schema( request_body=Schema(type="object", properties={"username": Schema(type="string")}), responses={ 201: DocResponse("User successfully updated.", schema=serializer_class), 400: 'No "username" key in the data body.', 422: "There already exists a volunteer with the specified username.", }, ) @validate_request(data_params={"username"}) def create(self, request: Request, username: str = None, *args: object, **kwargs: object) -> Response: """Create a new user with the specified username.""" if BlossomUser.objects.filter(username=username).exists(): return Response(status=status.HTTP_422_UNPROCESSABLE_ENTITY) user = BlossomUser.objects.create(username=username) user.set_unusable_password() return Response(self.serializer_class(user).data, status=status.HTTP_201_CREATED) @csrf_exempt @swagger_auto_schema( request_body=no_body, responses={ 200: "The volunteer has been updated successfully.", 404: "No volunteer with the specified username.", 409: "The volunteer has already accepted the Code of Conduct.", }, ) @validate_request(query_params={"username"}) @action(detail=False, methods=["post"]) def accept_coc(self, request: Request, username: str) -> Response: """Set the requested volunteer as having accepted the Code of Conduct.""" user = get_object_or_404(BlossomUser, username=username, is_volunteer=True) if user.accepted_coc is True: return Response(status=status.HTTP_409_CONFLICT) user.accepted_coc = True user.save() return Response(status=status.HTTP_200_OK)
class TranscriptionViewSet(viewsets.ModelViewSet): """The API view to view and edit information regarding Transcribers.""" queryset = Transcription.objects.all().order_by("-create_time") serializer_class = TranscriptionSerializer permission_classes = (BlossomApiPermission,) filter_backends = [DjangoFilterBackend, OrderingFilter] filterset_fields = { "id": ["exact"], "submission": ["exact"], "author": ["exact"], "original_id": ["exact", "isnull"], "source": ["exact"], "url": ["exact", "isnull"], "text": ["isnull", "icontains"], "removed_from_reddit": ["exact"], "create_time": ["gt", "gte", "lte", "lt"], } ordering_fields = [ "id", "create_time", "last_update_time", ] @csrf_exempt @swagger_auto_schema( request_body=Schema( type="object", required=[ "original_id", "source", "submission_id", "text", "url", "username", ], properties={ "original_id": Schema(type="string"), "removed_from_reddit": Schema(type="string"), "create_time": Schema(type="string"), "source": Schema(type="string"), "submission_id": Schema(type="string"), "text": Schema(type="string"), "url": Schema(type="string"), "username": Schema(type="string"), }, ), responses={ 201: DocResponse( "Successful transcription creation", schema=serializer_class ), 400: "The request does not adhere to the specified HTTP body", 403: "The volunteer has not accepted the Code of Conduct", 404: "Either the specified submission or volunteer is not found", 423: "The user is blacklisted", }, ) @validate_request( data_params={ "original_id", "submission_id", "source", "text", "url", "username", } ) def create( self, request: Request, original_id: str = None, source: str = None, submission_id: str = None, text: str = None, url: str = None, username: str = None, *args: object, **kwargs: object, ) -> Response: """ Create a new transcription. The following fields are passed in the HTTP Body: - original_id the base36 ID of the comment - source the system which has submitted this request - submission_id the ID of the corresponding submission - text the text of the transcription - url the direct url to the transcription - username the ID or username of the authoring volunteer - removed_from_reddit whether the transcription is removed from Reddit """ # todo: if the original_id is passed in here, make sure this is okay submission = get_object_or_404(Submission, id=submission_id) user = get_object_or_404(BlossomUser, username=username) source = get_object_or_404(Source, name=source) removed_from_reddit = request.data.get("removed_from_reddit", "False") == "True" if user.blacklisted: return Response(status=status.HTTP_423_LOCKED) if not user.accepted_coc: return Response(status=status.HTTP_403_FORBIDDEN) transcription_create_data = { "submission": submission, "author": user, "original_id": original_id, "url": url, "source": source, "text": text, "removed_from_reddit": removed_from_reddit, } if create_time := request.data.get("create_time"): transcription_create_data.update({"create_time": create_time}) transcription = Transcription.objects.create(**transcription_create_data) return Response( data=self.serializer_class( transcription, context={"request": request} ).data, status=status.HTTP_201_CREATED, )
Search for the transcriptions of a specific submission. Note that providing the id of the submission as a query parameter is mandatory. """ queryset = Transcription.objects.filter(submission__id=submission_id) return Response( data=self.serializer_class( queryset, many=True, context={"request": request} ).data ) @csrf_exempt @swagger_auto_schema( responses={ 200: DocResponse( "Successful retrieval of a random transcription", schema=serializer_class, ) } ) @action(detail=False, methods=["get"]) def review_random( self, request: Request, *args: object, **kwargs: object ) -> Response: """ Pull a random transcription that was completed in the last hour and return it. Note that if there are no transcriptions in the last hour, this request returns an empty HTTP body. """ one_hour_ago = timezone.now() - timedelta(hours=1)
class SubmissionViewSet(viewsets.ModelViewSet): serializer_class = SubmissionSerializer permission_classes = (BlossomApiPermission,) queryset = Submission.objects.order_by("id") filter_backends = [DjangoFilterBackend, OrderingFilter] filterset_fields = { "id": ["exact"], "original_id": ["exact"], "claimed_by": ["exact", "isnull"], "completed_by": ["exact", "isnull"], "create_time": ["gt", "gte", "lte", "lt"], "claim_time": ["isnull", "gt", "gte", "lte", "lt"], "complete_time": ["isnull", "gt", "gte", "lte", "lt"], "source": ["exact"], "title": ["exact", "isnull", "icontains"], "url": ["exact", "isnull"], "tor_url": ["exact", "isnull"], "archived": ["exact"], "content_url": ["exact", "isnull"], "redis_id": ["exact", "isnull"], "removed_from_queue": ["exact"], } ordering_fields = [ "id", "title", "create_time", "last_update_time", "claim_time", "complete_time", ] @csrf_exempt @swagger_auto_schema( manual_parameters=[ Parameter("ctq", "query", type="boolean"), Parameter("hours", "query", type="integer"), ], required=["source"], responses={ 200: DocResponse("Successful operation", schema=serializer_class), 400: "The custom hour provided is invalid.", }, ) @validate_request(query_params={"source"}) @action(detail=False, methods=["get"]) def expired(self, request: Request, source: str = None) -> Response: """ Return all old submissions that have not been claimed or completed yet. A set definition for old is when a Submission has been submitted 18 hours or longer ago. If the query string of ctq is passed in with a value of True then return all posts that have not been completed or claimed. When no posts are found, an empty array is returned in the body. """ if request.query_params.get("ctq", False): delay_time = timezone.now() else: hours = request.query_params.get("hours", settings.ARCHIVIST_DELAY_TIME) try: hours = int(hours) delay_time = timezone.now() - timedelta(hours=hours) except ValueError: return Response(status=status.HTTP_400_BAD_REQUEST) source_obj = get_object_or_404(Source, pk=source) queryset = Submission.objects.filter( completed_by=None, claimed_by=None, create_time__lt=delay_time, archived=False, source=source_obj, removed_from_queue=False, ) return Response(self.get_serializer(queryset[:100], many=True).data) @csrf_exempt @swagger_auto_schema( manual_parameters=[Parameter("hours", "query", type="integer")], required=["source"], responses={ 200: DocResponse("Successful operation", schema=serializer_class), 400: "The hour provided is invalid.", }, ) @validate_request(query_params={"source"}) @action(detail=False, methods=["get"]) def in_progress(self, request: Request, source: str = None) -> Response: """ Return all old submissions that are still in progress. Sometimes submissions get lost in the ether because volunteers forget to complete them. This function accepts a query string of `hours` that can be used to adjust the amount of time that is considered before returning a submission that is still in progress. Default is four hours. """ hours = request.query_params.get("hours", 4) try: hours = int(hours) delay_time = timezone.now() - timedelta(hours=hours) except ValueError: return Response(status=status.HTTP_400_BAD_REQUEST) source_obj = get_object_or_404(Source, pk=source) queryset = Submission.objects.filter( completed_by=None, claimed_by__isnull=False, claim_time__lt=delay_time, archived=False, source=source_obj, removed_from_queue=False, ) return Response(self.get_serializer(queryset[:100], many=True).data) @csrf_exempt @swagger_auto_schema( responses={200: DocResponse("Successful operation", schema=serializer_class)}, required=["source"], ) @validate_request(query_params={"source"}) @action(detail=False, methods=["get"]) def unarchived(self, request: Request, source: str = None) -> Response: """ Return all completed old submissions which are not archived. The definition of old in this method is half an hour. When no posts are found, an empty array is returned in the body. """ source_obj = get_object_or_404(Source, pk=source) delay_time = timezone.now() - timedelta( hours=settings.ARCHIVIST_COMPLETED_DELAY_TIME ) queryset = Submission.objects.filter( completed_by__isnull=False, complete_time__lt=delay_time, archived=False, source=source_obj, ) return Response(data=self.get_serializer(queryset[:100], many=True).data) @swagger_auto_schema( operation_summary=( "Retrieve a count of transcriptions for a volunteer per time frame." ), operation_description=( "A paginated endpoint. Pass page_size to control number of results" " returned, page to select a different block." ), manual_parameters=[ Parameter( "time_frame", "query", type="string", enum=["none", "hour", "day", "week", "month", "year"], description="The time interval to calculate the rate by. " 'Must be one of "none", "hour", "day", "week", "month" or "year".' 'For example, "none" will return the date of every transcription ' 'separately, while "day" will return the daily transcribing rate.', ), Parameter( "utc_offset", "query", type="number", description="The timezone offset to calculate the rate on, in seconds.", default=0, required=False, ), Parameter("page_size", "query", type="number"), Parameter("page", "query", type="number"), ], ) @action(detail=False, methods=["get"]) def rate(self, request: Request) -> Response: """Get the number of transcriptions the volunteer made per time frame. IMPORTANT: To reduce the number of entries, this does not include days on which the user did not make any transcriptions! """ time_frame = request.GET.get("time_frame", "day") utc_offset = int(request.GET.get("utc_offset", "0")) # Construct a timezone from the offset tzinfo = datetime.timezone(datetime.timedelta(seconds=utc_offset)) trunc_dict = { # Don't group the transcriptions at all # TODO: Make this a true noop for transcriptions posted in the same second "none": TruncSecond, "hour": TruncHour, "day": TruncDay, # Unfortunately weeks starts on Sunday for this. # There doesn't seem to be an ISO week equivalent :( "week": TruncWeek, "month": TruncMonth, "year": TruncYear, } trunc_fn = trunc_dict.get(time_frame, TruncDate) # https://stackoverflow.com/questions/8746014/django-group-by-date-day-month-year rate = ( self.filter_queryset(Submission.objects) .filter(complete_time__isnull=False) .annotate(date=trunc_fn("complete_time", tzinfo=tzinfo)) .values("date") .annotate(count=Count("id")) .values("date", "count") .order_by("date") ) pagination = StandardResultsSetPagination() page = pagination.paginate_queryset(rate, request) return pagination.get_paginated_response(page) @csrf_exempt @swagger_auto_schema( operation_summary=("Get the data to construct a heatmap of the submissions."), manual_parameters=[ Parameter( "utc_offset", "query", type="number", description="The timezone offset to calculate the rate on, in seconds.", default=0, required=False, ), ], ) @action(detail=False, methods=["get"]) def heatmap(self, request: Request) -> Response: """Get the data to generate a heatmap for the volunteer. This includes one entry for every weekday and every hour containing the number of transcriptions made in that time slot. For example, there will be an entry for Sundays at 13:00 UTC, counting how many transcriptions the volunteer made in that time. The week days are numbered Monday=1 through Sunday=7. """ utc_offset = int(request.GET.get("utc_offset", "0")) # Construct a timezone from the offset tzinfo = datetime.timezone(datetime.timedelta(seconds=utc_offset)) heatmap = ( self.filter_queryset(Submission.objects).filter(complete_time__isnull=False) # Extract the day of the week and the hour the transcription was made in .annotate( day=ExtractIsoWeekDay("complete_time", tzinfo=tzinfo), hour=ExtractHour("complete_time", tzinfo=tzinfo), ) # Group by the day and hour .values("day", "hour") # Count the transcription made in each time slot .annotate(count=Count("id")) # Return the values .values("day", "hour", "count") # Order by day first, then hour .order_by("day", "hour") ) return Response(heatmap) @csrf_exempt @swagger_auto_schema( request_body=Schema( type="object", properties={"username": Schema(type="string")} ), responses={ 201: DocResponse("Successful unclaim operation", schema=serializer_class), 400: "The volunteer username is not provided", 404: "The specified volunteer or submission is not found", 406: "The specified volunteer has not claimed the specified submission", 409: "The submission has already been completed", 412: "The submission has not yet been claimed", 423: "The user is blacklisted", }, ) @validate_request(data_params={"username"}) @action(detail=True, methods=["patch"]) def unclaim(self, request: Request, pk: int, username: str = None) -> Response: """ Unclaim the specified submission, from the specified volunteer. The volunteer is specified in the HTTP body. """ submission = get_object_or_404(Submission, id=pk) user = get_object_or_404(BlossomUser, username=username) if user.blacklisted: return Response(status=status.HTTP_423_LOCKED) if submission.claimed_by is None: return Response(status=status.HTTP_412_PRECONDITION_FAILED) if submission.claimed_by != user: return Response(status=status.HTTP_406_NOT_ACCEPTABLE) if submission.completed_by is not None: return Response(status=status.HTTP_409_CONFLICT) submission.claimed_by = None submission.claim_time = None submission.save() return Response( status=status.HTTP_201_CREATED, data=self.serializer_class(submission, context={"request": request}).data, ) @csrf_exempt @swagger_auto_schema( request_body=Schema( type="object", properties={"username": Schema(type="string")} ), responses={ 201: DocResponse("Successful claim operation", schema=serializer_class), 400: "The volunteer username is not provided", 403: "The volunteer has not accepted the Code of Conduct", 404: "The specified volunteer or submission is not found", 409: "The submission is already claimed", 423: "The user is blacklisted", 460: "The volunteer has already claimed too many posts", }, ) @validate_request(data_params={"username"}) @action(detail=True, methods=["patch"]) def claim(self, request: Request, pk: int, username: str = None) -> Response: """ Claim the specified submission from the specified volunteer. The volunteer is specified in the HTTP body. """ submission = get_object_or_404(Submission, id=pk) user = get_object_or_404(BlossomUser, username=username) if user.blacklisted: return Response(status=status.HTTP_423_LOCKED) if not user.accepted_coc: return Response(status=status.HTTP_403_FORBIDDEN) if submission.claimed_by is not None: return Response( data=VolunteerViewSet.serializer_class( submission.claimed_by, context={"request": request} ).data, status=status.HTTP_409_CONFLICT, ) # Determine how many submissions the user has already claimed claimed_submissions = Submission.objects.filter( claimed_by=user, archived=False, completed_by__isnull=True ) claimed_count = claimed_submissions.count() for claim_restriction in reversed(MAX_CLAIMS): if user.gamma >= claim_restriction["gamma"]: if claimed_count >= claim_restriction["claims"]: # The user has already claimed too many submissions return Response( data=self.get_serializer( claimed_submissions, context={"request": request}, many=True ).data, status=460, ) break submission.claimed_by = user submission.claim_time = timezone.now() submission.save() return Response( status=status.HTTP_201_CREATED, data=self.serializer_class(submission, context={"request": request}).data, ) @csrf_exempt @swagger_auto_schema( request_body=Schema( type="object", required=["username"], properties={ "username": Schema(type="string"), "mod_override": Schema(type="boolean"), }, ), responses={ 201: DocResponse("Successful done operation", schema=serializer_class), 400: "The volunteer username is not provided", 403: "The volunteer has not accepted the Code of Conduct", 404: "The specified volunteer or submission is not found", 409: "The submission is already completed", 412: "The submission is not claimed or claimed by someone else", 423: "The user is blacklisted", 428: "A transcription belonging to the volunteer was not found", }, ) @validate_request(data_params={"username"}) @action(detail=True, methods=["patch"]) def done(self, request: Request, pk: int, username: str = None) -> Response: """ Mark the submission as done from the specified volunteer. When "mod_override" is provided as a field in the HTTP body and is true, and the requesting user is a mod, then the check of whether the completing volunteer is the volunteer that claimed the submission is skipped. Note that this API call has a certain chance to send a message to Slack for the random check of this transcription. """ submission = get_object_or_404(Submission, id=pk) user = get_object_or_404(BlossomUser, username=username) if user.blacklisted: return Response(status=status.HTTP_423_LOCKED) if not user.accepted_coc: return Response(status=status.HTTP_403_FORBIDDEN) if submission.completed_by is not None: return Response(status=status.HTTP_409_CONFLICT) if submission.claimed_by is None: return Response(status=status.HTTP_412_PRECONDITION_FAILED) mod_override = ( request.data.get("mod_override", "False") == "True" and request.user.is_grafeas_staff ) if not mod_override: if submission.claimed_by != user: return Response(status=status.HTTP_412_PRECONDITION_FAILED) transcription = Transcription.objects.filter(submission=submission).first() if not transcription: return Response(status=status.HTTP_428_PRECONDITION_REQUIRED) if _should_check_transcription(user): # Check to see if the transcription has been removed. If it has, only # post the message to slack if the user has completed 5 or fewer posts. if not transcription.removed_from_reddit or user.gamma <= 5: _send_transcription_to_slack(transcription, submission, user, slack) submission.completed_by = user submission.complete_time = timezone.now() submission.save() _check_for_rank_up(user, submission) return Response( status=status.HTTP_201_CREATED, data=self.serializer_class(submission, context={"request": request}).data, ) @csrf_exempt @swagger_auto_schema( request_body=Schema( type="object", required=["original_id", "source", "content_url"], properties={ "original_id": Schema(type="string"), "source": Schema(type="string"), "url": Schema(type="string"), "tor_url": Schema(type="string"), "content_url": Schema(type="string"), "cannot_ocr": Schema(type="boolean"), "nsfw": Schema(type="boolean"), "title": Schema(type="string"), }, ), responses={ 201: DocResponse("Successful creation", schema=serializer_class), 400: "Required parameters not provided", 404: "Source requested was not found", }, ) @validate_request(data_params={"original_id", "source", "content_url"}) def create( self, request: Request, original_id: str = None, source: str = None, content_url: str = None, cannot_ocr: bool = None, *args: object, **kwargs: object, ) -> Response: """ Create a new submission. Note that both the original id, source, and content_url should be supplied. """ source_obj = get_object_or_404(Source, pk=source) url = request.data.get("url") tor_url = request.data.get("tor_url") # allows pre-marking submissions we know won't be able to make it through OCR cannot_ocr = request.data.get("cannot_ocr", "False") == "True" nsfw = request.data.get("nsfw", "False") == "True" title = request.data.get("title") submission = Submission.objects.create( original_id=original_id, source=source_obj, url=url, tor_url=tor_url, content_url=content_url, cannot_ocr=cannot_ocr, nsfw=nsfw, title=title, ) return Response( status=status.HTTP_201_CREATED, data=self.serializer_class(submission, context={"request": request}).data, ) @csrf_exempt @swagger_auto_schema( responses={ 200: DocResponse("Successful operation", schema=serializer_class), 400: "Required parameters not provided", } ) @validate_request(query_params={"source"}) @action(detail=False, methods=["get"]) def get_transcribot_queue( self, request: Request, source: str = None ) -> JsonResponse: """ Get the submissions that still need to be attempted by transcribot. The helper method of `.has_ocr_transcription` exists, but you cannot filter a django queryset on a property because it's generated in Python, not stored in the database. All transcriptions that have text but are missing vital information (like the original_id) because this information will be added by transcribot when the transcription is posted. This endpoint will return all the submissions that need updates along with their transcription FKs, then transcribot pulls the transcription text as needed. Brief walkthrough of this query: Grab all submissions that: * are from a given source * have a transcription object written by transcribot * that the transcription objects do NOT have an original_id key - if that key was there, that would mean that the transcription had been posted * that the submission has not been marked as removed from the queue - ie. it broke rules and was reported & removed """ source_obj = get_object_or_404(Source, pk=source) transcribot = BlossomUser.objects.get(username="******") return_limit = _get_limit_value(request) queryset = Submission.objects.filter( source=source_obj, transcription__author=transcribot, transcription__original_id__isnull=True, removed_from_queue=False, cannot_ocr=False, ).values("id", "tor_url", "transcription__id", "transcription__text")[ :return_limit ] return JsonResponse({"data": list(queryset)}) @csrf_exempt @swagger_auto_schema( request_body=Schema( type="object", required=["username"], properties={"username": Schema(type="string"), "count": Schema(type="int")}, ), responses={ 200: DocResponse( "Submissions were successfully yeeted", schema=serializer_class ), 400: "Required parameters not provided", 411: "No yeetable submissions were found", }, ) @validate_request(data_params={"username"}) @action(detail=False, methods=["post"]) def yeet(self, request: Request, username: str = None) -> Response: """ Manually fix users who have too many auto-generated submissions. For an unidentified reason, sometimes the bootstrap script is creating too many submissions for a given user. This function allows us to yeet some of the offending submissions out of the database while we focus on cleaning and maintaining the data with the redis cache after deployment. """ user = get_object_or_404(BlossomUser, username=username) count = int(request.data.get("count", 1)) auto_generated_submissions = ( Submission.objects.filter(completed_by=user) .annotate(id_len=Length("original_id")) .filter(id_len__gt=10) ) if auto_generated_submissions.count() == 0: return Response(status=status.HTTP_411_LENGTH_REQUIRED) qs = Submission.objects.filter( pk__in=auto_generated_submissions.values_list("pk", flat=True)[:count] ) yeeted = qs.count() qs.delete() return Response(status=status.HTTP_200_OK, data={"total_yeeted": yeeted}) @csrf_exempt @action(detail=False, methods=["post"]) def bulkcheck(self, request: Request) -> Response: """Start with of a list of IDs, then return which ones are new to us.""" # we can't do a filter for things that don't exist, and excluding doesn't # make sense here because we're looking for IDs that actually don't exist. urls = dict(request.data).get("urls") submissions = Submission.objects.filter(url__in=urls) for submission in submissions: if submission.url in urls: urls.pop(urls.index(submission.url)) return Response(status=status.HTTP_200_OK, data=urls) @csrf_exempt @swagger_auto_schema( manual_parameters=[ Parameter( "user_id", "query", type="number", description="The user to center the leaderboard on.", ), Parameter( "top_count", "query", type="number", description="The number of users to show from the top leaderboard.", ), Parameter( "above_count", "query", type="number", description="The number of users to show above the given user.", ), Parameter( "below_count", "query", type="number", description="The number of users to show below the given user.", ), ], responses={404: "No volunteer with the specified ID."}, ) @action(detail=False, methods=["get"]) def leaderboard(self, request: Request,) -> Response: """Get the leaderboard for the given user.""" user_id = request.GET.get("user_id", None) if user_id is not None: user_id = int(user_id) top_count = int(request.GET.get("top_count", 5)) above_count = int(request.GET.get("above_count", 5)) below_count = int(request.GET.get("below_count", 5)) above_data = user_data = below_data = None rank_query = ( # Apply the provided submission filters self.filter_queryset(Submission.objects) .filter(completed_by__isnull=False) # Add author information .select_related("completed_by") # Group by author .values( "completed_by", "completed_by__username", "completed_by__date_joined" ) # Count gamma .annotate( gamma=Count("completed_by"), id=F("completed_by"), username=F("completed_by__username"), date_joined=F("completed_by__date_joined"), ) .values("id", "username", "gamma", "date_joined") .order_by(F("gamma").desc(), F("date_joined").desc()) ) # TODO: This is very inefficient, maybe there's a better way to do this? # Originally we used window expressions to annotate the ranks directly # https://stackoverflow.com/questions/54595867/django-model-how-to-add-order-index-annotation # Unfortunately that is not supported on all backends # Instead, we convert the query into a list and also add the ranks manually rank_list = rank_list = [ {**entry, "rank": i + 1} for i, entry in enumerate(rank_query) ] # Find the top users top_data = rank_list[:top_count] if user_id is not None: # Find the queried user in the list # TODO: Find a more efficient way to do this user_index = [user["id"] for user in rank_list].index(user_id) user_data = rank_list[user_index] # Users with more gamma than the current user above_data = rank_list[user_index - 1 - below_count : user_index] # Users with less gamma than the current user below_data = rank_list[user_index + 1 : user_index + 1 + above_count] data = { "top": top_data, "above": above_data, "user": user_data, "below": below_data, } return Response(data) @csrf_exempt @swagger_auto_schema( request_body=Schema( type="object", properties={"removed_from_queue": Schema(type="bool")} ), responses={ 200: DocResponse("Successful removal", schema=serializer_class), 404: "Submission not found.", }, ) @action(detail=True, methods=["patch"]) def remove(self, request: Request, pk: int) -> Response: """ Remove the submission from the queue. It is also possible to revert the removal by setting removed_from_queue to false in the body of the request. """ submission = get_object_or_404(Submission, id=pk) removed_from_queue = request.data.get("removed_from_queue", True) submission.removed_from_queue = removed_from_queue if removed_from_queue: # Revert the approval submission.approved = False submission.save() return Response( status=status.HTTP_200_OK, data=self.serializer_class(submission, context={"request": request}).data, ) @csrf_exempt @swagger_auto_schema( request_body=Schema(type="object", properties={"reason": Schema(type="str")}), responses={ 201: DocResponse("Successful report", schema=serializer_class), 404: "Submission not found.", }, ) @validate_request(data_params={"reason"}) @action(detail=True, methods=["patch"]) def report(self, request: Request, pk: int, reason: str) -> Response: """Report the given submission. This will send a message to the mods to review the submission. """ submission = get_object_or_404(Submission, id=pk) if ( submission.removed_from_queue or submission.report_reason is not None or submission.approved ): # The submission is already removed, reported or approved-- ignore the report return Response( status=status.HTTP_201_CREATED, data=self.serializer_class( submission, context={"request": request} ).data, ) # Save the report reason submission.report_reason = reason submission.save(skip_extras=True) # Send the report to mod chat ask_about_removing_post(submission, reason) return Response( status=status.HTTP_201_CREATED, data=self.serializer_class(submission, context={"request": request}).data, ) @csrf_exempt @swagger_auto_schema( request_body=Schema( type="object", properties={"approved": Schema(type="bool")} ), responses={ 200: DocResponse("Successful approval", schema=serializer_class), 404: "Submission not found.", }, ) @action(detail=True, methods=["patch"]) def approve(self, request: Request, pk: int) -> Response: """ Approve the submission. This will prevent future reports from being generated for this submission. """ submission = get_object_or_404(Submission, id=pk) approved = request.data.get("approved", True) submission.approved = approved if approved: # Revert the removal submission.removed_from_queue = False submission.save() return Response( status=status.HTTP_200_OK, data=self.serializer_class(submission, context={"request": request}).data, ) @csrf_exempt @swagger_auto_schema( request_body=Schema(type="object", properties={"nsfw": Schema(type="bool")}), responses={ 200: DocResponse( "Successfully marked as NSFW (or SWF)", schema=serializer_class ), 404: "Submission not found.", }, ) @action(detail=True, methods=["patch"]) def nsfw(self, request: Request, pk: int) -> Response: """ Mark a submission as NSFW. It is also possible to set it back to SFW by setting nsfw to false in the body of the request. """ submission = get_object_or_404(Submission, id=pk) nsfw = request.data.get("nsfw", True) submission.nsfw = nsfw submission.save() return Response( status=status.HTTP_200_OK, data=self.serializer_class(submission, context={"request": request}).data, )