def tasks(self, request, pk=None): """ get: Get task list for view Retrieve a list of tasks with pagination for a specific view using filters and ordering. """ view = self.get_object() queryset = self.get_task_queryset(request, view) context = {'proxy': bool_from_request(request.GET, 'proxy', True), 'resolve_uri': True, 'request': request} project = view.project # paginated tasks self.pagination_class = TaskPagination page = self.paginate_queryset(queryset) if page is not None: # retrieve ML predictions if tasks don't have them if project.evaluate_predictions_automatically: ids = [task.id for task in page] # page is a list already tasks_for_predictions = Task.objects.filter(id__in=ids, predictions__isnull=True) evaluate_predictions(tasks_for_predictions) serializer = self.task_serializer_class(page, many=True, context=context) return self.get_paginated_response(serializer.data) # all tasks if project.evaluate_predictions_automatically: evaluate_predictions(queryset.filter(predictions__isnull=True)) serializer = self.task_serializer_class(queryset, many=True, context=context) return Response(serializer.data)
def get(self, request, pk): """ get: Task by ID Retrieve a specific task by ID. """ task = Task.prepared.get(id=pk) context = { 'proxy': bool_from_request(request.GET, 'proxy', True), 'resolve_uri': True, 'completed_by': 'full', 'request': request } # get prediction if task.project.evaluate_predictions_automatically and not task.predictions.exists( ): evaluate_predictions([task]) serializer = self.get_serializer_class()(task, many=False, context=context) data = serializer.data return Response(data)
def test_core_bool_from_request(param, result): params = {'test': param} if param is not None else {} # incorrect param should call exception if result is None: error = False try: bool_from_request(params, 'test', 0) except: error = True assert error # everything ok else: assert bool_from_request(params, 'test', 0) == result
def tasks(self, request, pk=None): """ get: Get task list for view Retrieve a list of tasks with pagination for a specific view using filters and ordering. """ view = self.get_object() queryset = Task.prepared.all( prepare_params=view.get_prepare_tasks_params()) context = { 'proxy': bool_from_request(request.GET, 'proxy', True), 'resolve_uri': True } # paginated tasks self.pagination_class = TaskPagination page = self.paginate_queryset(queryset) if page is not None: self.evaluate_predictions(page) serializer = self.task_serializer_class(page, many=True, context=context) return self.get_paginated_response(serializer.data) # all tasks self.evaluate_predictions(queryset) serializer = self.task_serializer_class(queryset, many=True, context=context) return Response(serializer.data)
def perform_create(self, ser): task = get_object_with_check_and_log(self.request, Task, pk=self.kwargs['pk']) # annotator has write access only to annotations and it can't be checked it after serializer.save() user = self.request.user # updates history result = ser.validated_data.get('result') extra_args = {'task_id': self.kwargs['pk']} # save stats about how well annotator annotations coincide with current prediction # only for finished task annotations if result is not None: prediction = Prediction.objects.filter( task=task, model_version=task.project.model_version) if prediction.exists(): prediction = prediction.first() prediction_ser = PredictionSerializer(prediction).data else: logger.debug( f'User={self.request.user}: there are no predictions for task={task}' ) prediction_ser = {} # serialize annotation extra_args.update({ 'prediction': prediction_ser, }) if 'was_cancelled' in self.request.GET: extra_args['was_cancelled'] = bool_from_request( self.request.GET, 'was_cancelled', False) if 'completed_by' not in ser.validated_data: extra_args['completed_by'] = self.request.user # create annotation logger.debug(f'User={self.request.user}: save annotation') annotation = ser.save(**extra_args) logger.debug(f'Save activity for user={self.request.user}') self.request.user.activity_at = timezone.now() self.request.user.save() # Release task if it has been taken at work (it should be taken by the same user, or it makes sentry error logger.debug(f'User={user} releases task={task}') task.release_lock(user) # if annotation created from draft - remove this draft draft_id = self.request.data.get('draft_id') if draft_id is not None: logger.debug( f'Remove draft {draft_id} after creating annotation {annotation.id}' ) AnnotationDraft.objects.filter(id=draft_id).delete() if self.request.data.get('ground_truth'): annotation.task.ensure_unique_groundtruth( annotation_id=annotation.id) return annotation
def _make_response(self, next_task, request, use_task_lock=True): """Once next task has chosen, this function triggers inference and prepare the API response""" user = request.user project = next_task.project if use_task_lock: # set lock for the task with TTL 3x time more then current average lead time (or 1 hour by default) next_task.set_lock(request.user) # call machine learning api and format response if project.show_collab_predictions: for ml_backend in project.ml_backends.all(): ml_backend.predict_one_task(next_task) # serialize task context = { 'request': request, 'project': project, 'resolve_uri': True, 'proxy': bool_from_request(request.GET, 'proxy', True) } serializer = TaskWithAnnotationsAndPredictionsAndDraftsSerializer( next_task, context=context) response = serializer.data annotations = [] for c in response.get('annotations', []): if c.get('completed_by') == user.id and not (c.get('ground_truth') or c.get('honeypot')): annotations.append(c) response['annotations'] = annotations return Response(response)
def retrieve(self, request, *args, **kwargs): task = self.get_object() result = self.get_serializer(task).data # use proxy inlining to task data (for credential access) proxy = bool_from_request(request.GET, 'proxy', True) result['data'] = task.resolve_uri(result['data'], proxy=proxy) return Response(result)
def get_serializer_context(request): return { 'proxy': bool_from_request(request.GET, 'proxy', True), 'resolve_uri': True, 'completed_by': 'full', 'drafts': True, 'predictions': True, 'annotations': True, 'request': request }
def create(self, request, *args, **kwargs): start = time.time() files_as_tasks_list = bool_from_request(request.data, 'files_as_tasks_list', True) file_upload_ids = self.request.data.get('file_upload_ids') # check project permissions project = generics.get_object_or_404(Project.objects.for_user( self.request.user), pk=self.kwargs['pk']) if not file_upload_ids: return Response( { 'task_count': 0, 'annotation_count': 0, 'prediction_count': 0, 'duration': 0, 'file_upload_ids': [], 'found_formats': {}, 'data_columns': [] }, status=status.HTTP_204_NO_CONTENT) tasks, found_formats, data_columns = FileUpload.load_tasks_from_uploaded_files( project, file_upload_ids, files_as_tasks_list=files_as_tasks_list) with transaction.atomic(): project.remove_tasks_by_file_uploads(file_upload_ids) tasks, serializer = self._save(tasks) duration = time.time() - start # Update task states if there are related settings in project # after bulk create we can bulk update task stats with # flag_update_stats=True but they are already updated with signal in same transaction # so just update tasks_number_changed project.update_tasks_states(maximum_annotations_changed=False, overlap_cohort_percentage_changed=False, tasks_number_changed=True) logger.info('Tasks bulk_update finished') project.summary.update_data_columns(tasks) # TODO: project.summary.update_created_annotations_and_labels return Response( { 'task_count': len(tasks), 'annotation_count': len(serializer.db_annotations), 'prediction_count': len(serializer.db_predictions), 'duration': duration, 'file_upload_ids': file_upload_ids, 'found_formats': found_formats, 'data_columns': data_columns }, status=status.HTTP_201_CREATED)
def get(self, request, *args, **kwargs): project = self.get_object() export_type = (request.GET.get('exportType', 'JSON') if 'exportType' in request.GET else request.GET.get( 'export_type', 'JSON')) only_finished = not bool_from_request(request.GET, 'download_all_tasks', False) tasks_ids = request.GET.getlist('ids[]') if 'download_resources' in request.GET: download_resources = bool_from_request(request.GET, 'download_resources', True) else: download_resources = settings.CONVERTER_DOWNLOAD_RESOURCES logger.debug('Get tasks') tasks = Task.objects.filter(project=project) if tasks_ids and len(tasks_ids) > 0: logger.debug(f'Select only subset of {len(tasks_ids)} tasks') tasks = tasks.filter(id__in=tasks_ids) query = tasks.select_related('project').prefetch_related( 'annotations', 'predictions') if only_finished: query = query.filter(annotations__isnull=False).distinct() task_ids = query.values_list('id', flat=True) logger.debug('Serialize tasks for export') tasks = [] for _task_ids in batch(task_ids, 1000): tasks += ExportDataSerializer(query.filter(id__in=_task_ids), many=True, expand=['drafts']).data logger.debug('Prepare export files') export_stream, content_type, filename = DataExport.generate_export_file( project, tasks, export_type, download_resources, request.GET) response = HttpResponse(File(export_stream), content_type=content_type) response[ 'Content-Disposition'] = 'attachment; filename="%s"' % filename response['filename'] = filename return response
def create(self, request, *args, **kwargs): start = time.time() commit_to_project = bool_from_request(request.query_params, 'commit_to_project', True) # check project permissions project = generics.get_object_or_404(Project.objects.for_user( self.request.user), pk=self.kwargs['pk']) # upload files from request, and parse all tasks parsed_data, file_upload_ids, could_be_tasks_lists, found_formats, data_columns = load_tasks( request, project) if commit_to_project: # Immediately create project tasks and update project states and counters tasks, serializer = self._save(parsed_data) task_count = len(tasks) annotation_count = len(serializer.db_annotations) prediction_count = len(serializer.db_predictions) # Update tasks states if there are related settings in project # after bulk create we can bulk update tasks stats with # flag_update_stats=True but they are already updated with signal in same transaction # so just update tasks_number_changed project.update_tasks_states( maximum_annotations_changed=False, overlap_cohort_percentage_changed=False, tasks_number_changed=True) logger.info('Tasks bulk_update finished') project.summary.update_data_columns(parsed_data) # TODO: project.summary.update_created_annotations_and_labels else: # Do nothing - just output file upload ids for further use task_count = len(parsed_data) annotation_count = None prediction_count = None duration = time.time() - start return Response( { 'task_count': task_count, 'annotation_count': annotation_count, 'prediction_count': prediction_count, 'duration': duration, 'file_upload_ids': file_upload_ids, 'could_be_tasks_list': could_be_tasks_lists, 'found_formats': found_formats, 'data_columns': data_columns }, status=status.HTTP_201_CREATED)
def get(self, request, *args, **kwargs): project = get_object_with_permissions(request, Project, kwargs['pk'], 'projects.change_project') title = request.GET.get('title', '') title = project.title if not title else title title = generate_unique_title(request.user, title) duplicate_tasks = bool_from_request(request.GET, 'duplicate_tasks', default=False) try: project = duplicate_project(project, title, duplicate_tasks, request.user) except Exception as e: raise ValueError(f"Can't duplicate project: {e}") return Response({'id': project.pk}, status=status.HTTP_200_OK)
def retrieve(self, request, *args, **kwargs): task = self.get_object() # call machine learning api and format response if task.project.evaluate_predictions_automatically: for ml_backend in task.project.ml_backends.all(): ml_backend.predict_one_task(task) result = self.get_serializer(task).data # use proxy inlining to task data (for credential access) proxy = bool_from_request(request.GET, 'proxy', True) result['data'] = task.resolve_uri(result['data'], proxy=proxy) return Response(result)
def get(self, request, pk): """ get: Task by ID Retrieve a specific task by ID. """ queryset = Task.prepared.get(id=pk) context = { 'proxy': bool_from_request(request.GET, 'proxy', True), 'resolve_uri': True, 'completed_by': 'full' } serializer = self.serializer_class(queryset, many=False, context=context) return Response(serializer.data)
def get_queryset(self): review = bool_from_request(self.request.GET, 'review', False) selected = {"all": False, "included": [self.kwargs.get("pk")]} if review: kwargs = {'fields_for_evaluation': ['annotators', 'reviewed']} else: kwargs = {'all_fields': True} project = self.request.query_params.get( 'project') or self.request.data.get('project') if not project: project = Task.objects.get( id=self.request.parser_context['kwargs'].get('pk')).project.id return self.prefetch( Task.prepared.get_queryset(prepare_params=PrepareParams( project=project, selectedItems=selected), **kwargs))
def get(self, request, *args, **kwargs): project = self.get_object() export_type = request.GET.get('exportType') is_labeled = not bool_from_request(request.GET, 'download_all_tasks', False) logger.debug('Get tasks') query = Task.objects.filter(project=project, is_labeled=is_labeled) logger.debug('Serialize tasks for export') tasks = ExportDataSerializer(query, many=True).data logger.debug('Prepare export files') export_stream, content_type, filename = DataExport.generate_export_file(project, tasks, export_type, request.GET) response = HttpResponse(File(export_stream), content_type=content_type) response['Content-Disposition'] = 'attachment; filename="%s"' % filename response['filename'] = filename return response
def get_task_serializer_context(request, project): storage = find_first_many_to_one_related_field_by_prefix( project, '.*io_storages.*') resolve_uri = True if not storage and not project.task_data_login and not project.task_data_password: resolve_uri = False all_fields = request.GET.get('fields', None) == 'all' # false by default return { 'proxy': bool_from_request(request.GET, 'proxy', True), 'resolve_uri': resolve_uri, 'request': request, 'project': project, 'drafts': all_fields, 'predictions': all_fields, 'annotations': all_fields }
def get(self, request, *args, **kwargs): project = self.get_object() export_type = request.GET.get('exportType') only_finished = not bool_from_request(request.GET, 'download_all_tasks', False) logger.debug('Get tasks') query = Task.objects.filter(project=project).select_related('project').prefetch_related('annotations', 'predictions') if only_finished: query = query.filter(annotations__isnull=False).distinct() task_ids = query.values_list('id', flat=True) logger.debug('Serialize tasks for export') tasks = [] for _task_ids in batch(task_ids, 1000): tasks += ExportDataSerializer(query.filter(id__in=_task_ids), many=True).data logger.debug('Prepare export files') export_stream, content_type, filename = DataExport.generate_export_file(project, tasks, export_type, request.GET) response = HttpResponse(File(export_stream), content_type=content_type) response['Content-Disposition'] = 'attachment; filename="%s"' % filename response['filename'] = filename return response
def get(self, request): # get project view_pk = int_from_request(request.GET, 'view', 0) or int_from_request(request.data, 'view', 0) project_pk = int_from_request(request.GET, 'project', 0) or int_from_request(request.data, 'project', 0) if project_pk: project = get_object_with_check_and_log(request, Project, pk=project_pk) self.check_object_permissions(request, project) elif view_pk: view = get_object_with_check_and_log(request, View, pk=view_pk) project = view.project self.check_object_permissions(request, project) else: return Response({'detail': 'Neither project nor view id specified'}, status=404) # get prepare params (from view or from payload directly) prepare_params = get_prepare_params(request, project) queryset = self.get_task_queryset(request, prepare_params) context = self.get_task_serializer_context(self.request, project) # paginated tasks self.pagination_class = TaskPagination page = self.paginate_queryset(queryset) all_fields = 'all' if request.GET.get('fields', None) == 'all' else None fields_for_evaluation = get_fields_for_evaluation(prepare_params, request.user) review = bool_from_request(self.request.GET, 'review', False) if review: fields_for_evaluation = ['annotators', 'reviewed'] all_fields = None if page is not None: ids = [task.id for task in page] # page is a list already tasks = list( self.prefetch( Task.prepared.annotate_queryset( Task.objects.filter(id__in=ids), fields_for_evaluation=fields_for_evaluation, all_fields=all_fields, ) ) ) tasks_by_ids = {task.id: task for task in tasks} # keep ids ordering page = [tasks_by_ids[_id] for _id in ids] # retrieve ML predictions if tasks don't have them if not review and project.evaluate_predictions_automatically: tasks_for_predictions = Task.objects.filter(id__in=ids, predictions__isnull=True) evaluate_predictions(tasks_for_predictions) serializer = self.task_serializer_class(page, many=True, context=context) return self.get_paginated_response(serializer.data) # all tasks if project.evaluate_predictions_automatically: evaluate_predictions(queryset.filter(predictions__isnull=True)) queryset = Task.prepared.annotate_queryset( queryset, fields_for_evaluation=fields_for_evaluation, all_fields=all_fields ) serializer = self.task_serializer_class(queryset, many=True, context=context) return Response(serializer.data)