Esempio n. 1
0
    def tasks(self, request, pk=None):
        """
        get:
        Get task list for view

        Retrieve a list of tasks with pagination for a specific view using filters and ordering.
        """
        view = self.get_object()
        queryset = self.get_task_queryset(request, view)
        context = {'proxy': bool_from_request(request.GET, 'proxy', True), 'resolve_uri': True, 'request': request}
        project = view.project

        # paginated tasks
        self.pagination_class = TaskPagination
        page = self.paginate_queryset(queryset)
        if page is not None:
            # retrieve ML predictions if tasks don't have them
            if project.evaluate_predictions_automatically:
                ids = [task.id for task in page]  # page is a list already
                tasks_for_predictions = Task.objects.filter(id__in=ids, predictions__isnull=True)
                evaluate_predictions(tasks_for_predictions)

            serializer = self.task_serializer_class(page, many=True, context=context)
            return self.get_paginated_response(serializer.data)

        # all tasks
        if project.evaluate_predictions_automatically:
            evaluate_predictions(queryset.filter(predictions__isnull=True))
        serializer = self.task_serializer_class(queryset, many=True, context=context)
        return Response(serializer.data)
Esempio n. 2
0
    def get(self, request, pk):
        """
        get:
        Task by ID

        Retrieve a specific task by ID.
        """
        task = Task.prepared.get(id=pk)
        context = {
            'proxy': bool_from_request(request.GET, 'proxy', True),
            'resolve_uri': True,
            'completed_by': 'full',
            'request': request
        }

        # get prediction
        if task.project.evaluate_predictions_automatically and not task.predictions.exists(
        ):
            evaluate_predictions([task])

        serializer = self.get_serializer_class()(task,
                                                 many=False,
                                                 context=context)
        data = serializer.data
        return Response(data)
Esempio n. 3
0
def test_core_bool_from_request(param, result):
    params = {'test': param} if param is not None else {}

    # incorrect param should call exception
    if result is None:
        error = False
        try:
            bool_from_request(params, 'test', 0)
        except:
            error = True

        assert error

    # everything ok
    else:
        assert bool_from_request(params, 'test', 0) == result
Esempio n. 4
0
    def tasks(self, request, pk=None):
        """
        get:
        Get task list for view

        Retrieve a list of tasks with pagination for a specific view using filters and ordering.
        """
        view = self.get_object()
        queryset = Task.prepared.all(
            prepare_params=view.get_prepare_tasks_params())
        context = {
            'proxy': bool_from_request(request.GET, 'proxy', True),
            'resolve_uri': True
        }

        # paginated tasks
        self.pagination_class = TaskPagination
        page = self.paginate_queryset(queryset)
        if page is not None:
            self.evaluate_predictions(page)
            serializer = self.task_serializer_class(page,
                                                    many=True,
                                                    context=context)
            return self.get_paginated_response(serializer.data)

        # all tasks
        self.evaluate_predictions(queryset)
        serializer = self.task_serializer_class(queryset,
                                                many=True,
                                                context=context)
        return Response(serializer.data)
Esempio n. 5
0
    def perform_create(self, ser):
        task = get_object_with_check_and_log(self.request,
                                             Task,
                                             pk=self.kwargs['pk'])
        # annotator has write access only to annotations and it can't be checked it after serializer.save()
        user = self.request.user

        # updates history
        result = ser.validated_data.get('result')
        extra_args = {'task_id': self.kwargs['pk']}

        # save stats about how well annotator annotations coincide with current prediction
        # only for finished task annotations
        if result is not None:
            prediction = Prediction.objects.filter(
                task=task, model_version=task.project.model_version)
            if prediction.exists():
                prediction = prediction.first()
                prediction_ser = PredictionSerializer(prediction).data
            else:
                logger.debug(
                    f'User={self.request.user}: there are no predictions for task={task}'
                )
                prediction_ser = {}
            # serialize annotation
            extra_args.update({
                'prediction': prediction_ser,
            })

        if 'was_cancelled' in self.request.GET:
            extra_args['was_cancelled'] = bool_from_request(
                self.request.GET, 'was_cancelled', False)

        if 'completed_by' not in ser.validated_data:
            extra_args['completed_by'] = self.request.user

        # create annotation
        logger.debug(f'User={self.request.user}: save annotation')
        annotation = ser.save(**extra_args)
        logger.debug(f'Save activity for user={self.request.user}')
        self.request.user.activity_at = timezone.now()
        self.request.user.save()

        # Release task if it has been taken at work (it should be taken by the same user, or it makes sentry error
        logger.debug(f'User={user} releases task={task}')
        task.release_lock(user)

        # if annotation created from draft - remove this draft
        draft_id = self.request.data.get('draft_id')
        if draft_id is not None:
            logger.debug(
                f'Remove draft {draft_id} after creating annotation {annotation.id}'
            )
            AnnotationDraft.objects.filter(id=draft_id).delete()

        if self.request.data.get('ground_truth'):
            annotation.task.ensure_unique_groundtruth(
                annotation_id=annotation.id)

        return annotation
Esempio n. 6
0
    def _make_response(self, next_task, request, use_task_lock=True):
        """Once next task has chosen, this function triggers inference and prepare the API response"""
        user = request.user
        project = next_task.project

        if use_task_lock:
            # set lock for the task with TTL 3x time more then current average lead time (or 1 hour by default)
            next_task.set_lock(request.user)

        # call machine learning api and format response
        if project.show_collab_predictions:
            for ml_backend in project.ml_backends.all():
                ml_backend.predict_one_task(next_task)

        # serialize task
        context = {
            'request': request,
            'project': project,
            'resolve_uri': True,
            'proxy': bool_from_request(request.GET, 'proxy', True)
        }
        serializer = TaskWithAnnotationsAndPredictionsAndDraftsSerializer(
            next_task, context=context)
        response = serializer.data

        annotations = []
        for c in response.get('annotations', []):
            if c.get('completed_by') == user.id and not (c.get('ground_truth')
                                                         or c.get('honeypot')):
                annotations.append(c)
        response['annotations'] = annotations

        return Response(response)
Esempio n. 7
0
    def retrieve(self, request, *args, **kwargs):
        task = self.get_object()
        result = self.get_serializer(task).data

        # use proxy inlining to task data (for credential access)
        proxy = bool_from_request(request.GET, 'proxy', True)
        result['data'] = task.resolve_uri(result['data'], proxy=proxy)
        return Response(result)
Esempio n. 8
0
 def get_serializer_context(request):
     return {
         'proxy': bool_from_request(request.GET, 'proxy', True),
         'resolve_uri': True,
         'completed_by': 'full',
         'drafts': True,
         'predictions': True,
         'annotations': True,
         'request': request
     }
Esempio n. 9
0
    def create(self, request, *args, **kwargs):
        start = time.time()
        files_as_tasks_list = bool_from_request(request.data,
                                                'files_as_tasks_list', True)
        file_upload_ids = self.request.data.get('file_upload_ids')

        # check project permissions
        project = generics.get_object_or_404(Project.objects.for_user(
            self.request.user),
                                             pk=self.kwargs['pk'])

        if not file_upload_ids:
            return Response(
                {
                    'task_count': 0,
                    'annotation_count': 0,
                    'prediction_count': 0,
                    'duration': 0,
                    'file_upload_ids': [],
                    'found_formats': {},
                    'data_columns': []
                },
                status=status.HTTP_204_NO_CONTENT)

        tasks, found_formats, data_columns = FileUpload.load_tasks_from_uploaded_files(
            project, file_upload_ids, files_as_tasks_list=files_as_tasks_list)

        with transaction.atomic():
            project.remove_tasks_by_file_uploads(file_upload_ids)
            tasks, serializer = self._save(tasks)
        duration = time.time() - start

        # Update task states if there are related settings in project
        # after bulk create we can bulk update task stats with
        # flag_update_stats=True but they are already updated with signal in same transaction
        # so just update tasks_number_changed
        project.update_tasks_states(maximum_annotations_changed=False,
                                    overlap_cohort_percentage_changed=False,
                                    tasks_number_changed=True)
        logger.info('Tasks bulk_update finished')

        project.summary.update_data_columns(tasks)
        # TODO: project.summary.update_created_annotations_and_labels

        return Response(
            {
                'task_count': len(tasks),
                'annotation_count': len(serializer.db_annotations),
                'prediction_count': len(serializer.db_predictions),
                'duration': duration,
                'file_upload_ids': file_upload_ids,
                'found_formats': found_formats,
                'data_columns': data_columns
            },
            status=status.HTTP_201_CREATED)
Esempio n. 10
0
    def get(self, request, *args, **kwargs):
        project = self.get_object()
        export_type = (request.GET.get('exportType', 'JSON')
                       if 'exportType' in request.GET else request.GET.get(
                           'export_type', 'JSON'))
        only_finished = not bool_from_request(request.GET,
                                              'download_all_tasks', False)
        tasks_ids = request.GET.getlist('ids[]')
        if 'download_resources' in request.GET:
            download_resources = bool_from_request(request.GET,
                                                   'download_resources', True)
        else:
            download_resources = settings.CONVERTER_DOWNLOAD_RESOURCES

        logger.debug('Get tasks')
        tasks = Task.objects.filter(project=project)
        if tasks_ids and len(tasks_ids) > 0:
            logger.debug(f'Select only subset of {len(tasks_ids)} tasks')
            tasks = tasks.filter(id__in=tasks_ids)
        query = tasks.select_related('project').prefetch_related(
            'annotations', 'predictions')
        if only_finished:
            query = query.filter(annotations__isnull=False).distinct()

        task_ids = query.values_list('id', flat=True)

        logger.debug('Serialize tasks for export')
        tasks = []
        for _task_ids in batch(task_ids, 1000):
            tasks += ExportDataSerializer(query.filter(id__in=_task_ids),
                                          many=True,
                                          expand=['drafts']).data
        logger.debug('Prepare export files')

        export_stream, content_type, filename = DataExport.generate_export_file(
            project, tasks, export_type, download_resources, request.GET)

        response = HttpResponse(File(export_stream), content_type=content_type)
        response[
            'Content-Disposition'] = 'attachment; filename="%s"' % filename
        response['filename'] = filename
        return response
Esempio n. 11
0
    def create(self, request, *args, **kwargs):
        start = time.time()
        commit_to_project = bool_from_request(request.query_params,
                                              'commit_to_project', True)

        # check project permissions
        project = generics.get_object_or_404(Project.objects.for_user(
            self.request.user),
                                             pk=self.kwargs['pk'])

        # upload files from request, and parse all tasks
        parsed_data, file_upload_ids, could_be_tasks_lists, found_formats, data_columns = load_tasks(
            request, project)

        if commit_to_project:
            # Immediately create project tasks and update project states and counters
            tasks, serializer = self._save(parsed_data)
            task_count = len(tasks)
            annotation_count = len(serializer.db_annotations)
            prediction_count = len(serializer.db_predictions)
            # Update tasks states if there are related settings in project
            # after bulk create we can bulk update tasks stats with
            # flag_update_stats=True but they are already updated with signal in same transaction
            # so just update tasks_number_changed
            project.update_tasks_states(
                maximum_annotations_changed=False,
                overlap_cohort_percentage_changed=False,
                tasks_number_changed=True)
            logger.info('Tasks bulk_update finished')

            project.summary.update_data_columns(parsed_data)
            # TODO: project.summary.update_created_annotations_and_labels
        else:
            # Do nothing - just output file upload ids for further use
            task_count = len(parsed_data)
            annotation_count = None
            prediction_count = None

        duration = time.time() - start

        return Response(
            {
                'task_count': task_count,
                'annotation_count': annotation_count,
                'prediction_count': prediction_count,
                'duration': duration,
                'file_upload_ids': file_upload_ids,
                'could_be_tasks_list': could_be_tasks_lists,
                'found_formats': found_formats,
                'data_columns': data_columns
            },
            status=status.HTTP_201_CREATED)
Esempio n. 12
0
    def get(self, request, *args, **kwargs):
        project = get_object_with_permissions(request, Project, kwargs['pk'], 'projects.change_project')
        title = request.GET.get('title', '')
        title = project.title if not title else title
        title = generate_unique_title(request.user, title)

        duplicate_tasks = bool_from_request(request.GET, 'duplicate_tasks', default=False)

        try:
            project = duplicate_project(project, title, duplicate_tasks, request.user)
        except Exception as e:
            raise ValueError(f"Can't duplicate project: {e}")

        return Response({'id': project.pk}, status=status.HTTP_200_OK)
Esempio n. 13
0
    def retrieve(self, request, *args, **kwargs):
        task = self.get_object()

        # call machine learning api and format response
        if task.project.evaluate_predictions_automatically:
            for ml_backend in task.project.ml_backends.all():
                ml_backend.predict_one_task(task)

        result = self.get_serializer(task).data

        # use proxy inlining to task data (for credential access)
        proxy = bool_from_request(request.GET, 'proxy', True)
        result['data'] = task.resolve_uri(result['data'], proxy=proxy)
        return Response(result)
Esempio n. 14
0
    def get(self, request, pk):
        """
        get:
        Task by ID

        Retrieve a specific task by ID.
        """
        queryset = Task.prepared.get(id=pk)
        context = {
            'proxy': bool_from_request(request.GET, 'proxy', True),
            'resolve_uri': True,
            'completed_by': 'full'
        }
        serializer = self.serializer_class(queryset, many=False, context=context)
        return Response(serializer.data)
Esempio n. 15
0
 def get_queryset(self):
     review = bool_from_request(self.request.GET, 'review', False)
     selected = {"all": False, "included": [self.kwargs.get("pk")]}
     if review:
         kwargs = {'fields_for_evaluation': ['annotators', 'reviewed']}
     else:
         kwargs = {'all_fields': True}
     project = self.request.query_params.get(
         'project') or self.request.data.get('project')
     if not project:
         project = Task.objects.get(
             id=self.request.parser_context['kwargs'].get('pk')).project.id
     return self.prefetch(
         Task.prepared.get_queryset(prepare_params=PrepareParams(
             project=project, selectedItems=selected),
                                    **kwargs))
Esempio n. 16
0
    def get(self, request, *args, **kwargs):
        project = self.get_object()
        export_type = request.GET.get('exportType')
        is_labeled = not bool_from_request(request.GET, 'download_all_tasks', False)

        logger.debug('Get tasks')
        query = Task.objects.filter(project=project, is_labeled=is_labeled)
        logger.debug('Serialize tasks for export')
        tasks = ExportDataSerializer(query, many=True).data
        logger.debug('Prepare export files')
        export_stream, content_type, filename = DataExport.generate_export_file(project, tasks, export_type, request.GET)

        response = HttpResponse(File(export_stream), content_type=content_type)
        response['Content-Disposition'] = 'attachment; filename="%s"' % filename
        response['filename'] = filename
        return response
Esempio n. 17
0
    def get_task_serializer_context(request, project):
        storage = find_first_many_to_one_related_field_by_prefix(
            project, '.*io_storages.*')
        resolve_uri = True
        if not storage and not project.task_data_login and not project.task_data_password:
            resolve_uri = False

        all_fields = request.GET.get('fields',
                                     None) == 'all'  # false by default

        return {
            'proxy': bool_from_request(request.GET, 'proxy', True),
            'resolve_uri': resolve_uri,
            'request': request,
            'project': project,
            'drafts': all_fields,
            'predictions': all_fields,
            'annotations': all_fields
        }
Esempio n. 18
0
    def get(self, request, *args, **kwargs):
        project = self.get_object()
        export_type = request.GET.get('exportType')
        only_finished = not bool_from_request(request.GET, 'download_all_tasks', False)

        logger.debug('Get tasks')
        query = Task.objects.filter(project=project).select_related('project').prefetch_related('annotations', 'predictions')
        if only_finished:
            query = query.filter(annotations__isnull=False).distinct()

        task_ids = query.values_list('id', flat=True)

        logger.debug('Serialize tasks for export')
        tasks = []
        for _task_ids in batch(task_ids, 1000):
            tasks += ExportDataSerializer(query.filter(id__in=_task_ids), many=True).data
        logger.debug('Prepare export files')

        export_stream, content_type, filename = DataExport.generate_export_file(project, tasks, export_type, request.GET)

        response = HttpResponse(File(export_stream), content_type=content_type)
        response['Content-Disposition'] = 'attachment; filename="%s"' % filename
        response['filename'] = filename
        return response
Esempio n. 19
0
    def get(self, request):
        # get project
        view_pk = int_from_request(request.GET, 'view', 0) or int_from_request(request.data, 'view', 0)
        project_pk = int_from_request(request.GET, 'project', 0) or int_from_request(request.data, 'project', 0)
        if project_pk:
            project = get_object_with_check_and_log(request, Project, pk=project_pk)
            self.check_object_permissions(request, project)
        elif view_pk:
            view = get_object_with_check_and_log(request, View, pk=view_pk)
            project = view.project
            self.check_object_permissions(request, project)
        else:
            return Response({'detail': 'Neither project nor view id specified'}, status=404)

        # get prepare params (from view or from payload directly)
        prepare_params = get_prepare_params(request, project)
        queryset = self.get_task_queryset(request, prepare_params)
        context = self.get_task_serializer_context(self.request, project)

        # paginated tasks
        self.pagination_class = TaskPagination
        page = self.paginate_queryset(queryset)
        all_fields = 'all' if request.GET.get('fields', None) == 'all' else None
        fields_for_evaluation = get_fields_for_evaluation(prepare_params, request.user)

        review = bool_from_request(self.request.GET, 'review', False)
        if review:
            fields_for_evaluation = ['annotators', 'reviewed']
            all_fields = None

        if page is not None:
            ids = [task.id for task in page]  # page is a list already
            tasks = list(
                self.prefetch(
                    Task.prepared.annotate_queryset(
                        Task.objects.filter(id__in=ids),
                        fields_for_evaluation=fields_for_evaluation,
                        all_fields=all_fields,
                    )
                )
            )
            tasks_by_ids = {task.id: task for task in tasks}

            # keep ids ordering
            page = [tasks_by_ids[_id] for _id in ids]

            # retrieve ML predictions if tasks don't have them
            if not review and project.evaluate_predictions_automatically:
                tasks_for_predictions = Task.objects.filter(id__in=ids, predictions__isnull=True)
                evaluate_predictions(tasks_for_predictions)

            serializer = self.task_serializer_class(page, many=True, context=context)
            return self.get_paginated_response(serializer.data)

        # all tasks
        if project.evaluate_predictions_automatically:
            evaluate_predictions(queryset.filter(predictions__isnull=True))
        queryset = Task.prepared.annotate_queryset(
            queryset, fields_for_evaluation=fields_for_evaluation, all_fields=all_fields
        )
        serializer = self.task_serializer_class(queryset, many=True, context=context)
        return Response(serializer.data)