Ejemplo n.º 1
0
    def perform_create(self, ser):
        task = get_object_with_check_and_log(self.request,
                                             Task,
                                             pk=self.kwargs['pk'])
        # annotator has write access only to annotations and it can't be checked it after serializer.save()
        user = self.request.user

        # updates history
        result = ser.validated_data.get('result')
        extra_args = {'task_id': self.kwargs['pk']}

        # save stats about how well annotator annotations coincide with current prediction
        # only for finished task annotations
        if result is not None:
            prediction = Prediction.objects.filter(
                task=task, model_version=task.project.model_version)
            if prediction.exists():
                prediction = prediction.first()
                prediction_ser = PredictionSerializer(prediction).data
            else:
                logger.debug(
                    f'User={self.request.user}: there are no predictions for task={task}'
                )
                prediction_ser = {}
            # serialize annotation
            extra_args.update({
                'prediction': prediction_ser,
            })

        if 'was_cancelled' in self.request.GET:
            extra_args['was_cancelled'] = bool_from_request(
                self.request.GET, 'was_cancelled', False)

        if 'completed_by' not in ser.validated_data:
            extra_args['completed_by'] = self.request.user

        # create annotation
        logger.debug(f'User={self.request.user}: save annotation')
        annotation = ser.save(**extra_args)
        logger.debug(f'Save activity for user={self.request.user}')
        self.request.user.activity_at = timezone.now()
        self.request.user.save()

        # Release task if it has been taken at work (it should be taken by the same user, or it makes sentry error
        logger.debug(f'User={user} releases task={task}')
        task.release_lock(user)

        # if annotation created from draft - remove this draft
        draft_id = self.request.data.get('draft_id')
        if draft_id is not None:
            logger.debug(
                f'Remove draft {draft_id} after creating annotation {annotation.id}'
            )
            AnnotationDraft.objects.filter(id=draft_id).delete()

        if self.request.data.get('ground_truth'):
            annotation.task.ensure_unique_groundtruth(
                annotation_id=annotation.id)

        return annotation
Ejemplo n.º 2
0
class ExportDataSerializer(FlexFieldsModelSerializer):
    annotations = AnnotationSerializer(many=True, read_only=True)
    predictions = PredictionSerializer(many=True, read_only=True)
    file_upload = serializers.ReadOnlyField(source='file_upload_name')
    drafts = serializers.PrimaryKeyRelatedField(many=True, read_only=True)
    predictions = serializers.PrimaryKeyRelatedField(many=True, read_only=True)

    # resolve $undefined$ key in task data, if any
    def to_representation(self, task):
        project = task.project
        data = task.data

        replace_task_data_undefined_with_config_field(data, project)

        return super().to_representation(task)

    class Meta:
        model = Task
        exclude = ('overlap', 'is_labeled')
        expandable_fields = {
            'drafts': (AnnotationDraftSerializer, {
                'many': True
            }),
            'predictions': (PredictionSerializer, {
                'many': True
            }),
        }
Ejemplo n.º 3
0
 def get_predictions(self, task):
     if not self.context.get('predictions'):
         return []
     return PredictionSerializer(task.predictions,
                                 many=True,
                                 default=[],
                                 read_only=True).data
Ejemplo n.º 4
0
class ImportApiSerializer(TaskSerializer):
    """ Tasks serializer for Import API (TaskBulkCreateAPI)
    """
    annotations = AnnotationSerializer(many=True, default=[])
    predictions = PredictionSerializer(many=True, default=[])

    class Meta:
        model = Task
        list_serializer_class = TaskSerializerBulk
        exclude = ('is_labeled', 'project')
Ejemplo n.º 5
0
    def predict_many_tasks(self, tasks):
        self.update_state()
        if self.not_ready:
            logger.debug(f'ML backend {self} is not ready')
            return

        if isinstance(tasks, list):
            from tasks.models import Task
            tasks = Task.objects.filter(id__in=[task.id for task in tasks])

        tasks_ser = TaskSimpleSerializer(tasks, many=True).data
        ml_api_result = self.api.make_predictions(tasks_ser,
                                                  self.model_version,
                                                  self.project)
        if ml_api_result.is_error:
            logger.error(
                f'Prediction not created for project {self}: {ml_api_result.error_message}'
            )
            return

        responses = ml_api_result.response['results']

        if len(responses) == 0:
            logger.error(
                f'ML backend returned empty prediction for project {self}')
            return

        # ML Backend doesn't support batch of tasks, do it one by one
        elif len(responses) == 1:
            logger.warning(
                f"'ML backend '{self.title}' doesn't support batch processing of tasks, "
                f"switched to one-by-one task retrieving")
            for task in tasks:
                self.predict_one_task(task)
            return

        # wrong result number
        elif len(responses) != len(tasks_ser):
            logger.error(
                f'ML backend returned response number {len(responses)} != task number {len(tasks_ser)}'
            )

        predictions = []
        for task, response in zip(tasks_ser, responses):
            predictions.append({
                'task': task['id'],
                'result': response['result'],
                'score': response.get('score'),
                'model_version': self.model_version
            })
        with conditional_atomic():
            prediction_ser = PredictionSerializer(data=predictions, many=True)
            prediction_ser.is_valid(raise_exception=True)
            prediction_ser.save()
Ejemplo n.º 6
0
class ExportDataSerializer(serializers.ModelSerializer):
    annotations = AnnotationSerializer(many=True, read_only=True)
    predictions = PredictionSerializer(many=True, read_only=True)
    file_upload = serializers.ReadOnlyField(source='file_upload_name')

    # resolve $undefined$ key in task data, if any
    def to_representation(self, task):
        project = task.project
        data = task.data

        replace_task_data_undefined_with_config_field(data, project)

        return super().to_representation(task)

    class Meta:
        model = Task
        exclude = ('overlap', 'is_labeled')
Ejemplo n.º 7
0
class ExportDataSerializer(serializers.ModelSerializer):
    annotations = AnnotationSerializer(many=True, read_only=True)
    predictions = PredictionSerializer(many=True, read_only=True)
    file_upload = serializers.ReadOnlyField(source='file_upload_name')

    # resolve $undefined$ key in task data, if any
    def to_representation(self, task):
        project = task.project
        data = task.data
        data_types_keys = project.data_types.keys()

        if settings.DATA_UNDEFINED_NAME in data and data_types_keys:
            key = list(data_types_keys)[0]
            data[key] = data[settings.DATA_UNDEFINED_NAME]
            del data[settings.DATA_UNDEFINED_NAME]

        return super().to_representation(task)

    class Meta:
        model = Task
        exclude = ('overlap', 'is_labeled')
Ejemplo n.º 8
0
    def _scan_and_create_links(self, link_class):
        tasks_created = 0
        maximum_annotations = self.project.maximum_annotations
        task = self.project.tasks.order_by('-inner_id').first()
        max_inner_id = (task.inner_id + 1) if task else 1

        for key in self.iterkeys():
            logger.debug(f'Scanning key {key}')

            # skip if task already exists
            if link_class.exists(key, self):
                logger.debug(
                    f'{self.__class__.__name__} link {key} already exists')
                continue

            logger.debug(f'{self}: found new key {key}')
            try:
                data = self.get_data(key)
            except (UnicodeDecodeError, json.decoder.JSONDecodeError) as exc:
                logger.error(exc, exc_info=True)
                raise ValueError(
                    f'Error loading JSON from file "{key}".\nIf you\'re trying to import non-JSON data '
                    f'(images, audio, text, etc.), edit storage settings and enable '
                    f'"Treat every bucket object as a source file"')

            # predictions
            predictions = data.get('predictions', [])
            if predictions:
                if 'data' not in data:
                    raise ValueError(
                        'If you use "predictions" field in the task, '
                        'you must put "data" field in the task too')

            # annotations
            annotations = data.get('annotations', [])
            if annotations:
                if 'data' not in data:
                    raise ValueError(
                        'If you use "annotations" field in the task, '
                        'you must put "data" field in the task too')

            if 'data' in data and isinstance(data['data'], dict):
                data = data['data']

            with transaction.atomic():
                task = Task.objects.create(
                    data=data,
                    project=self.project,
                    overlap=maximum_annotations,
                    is_labeled=len(annotations) >= maximum_annotations,
                    inner_id=max_inner_id)
                max_inner_id += 1

                link_class.create(task, key, self)
                logger.debug(
                    f'Create {self.__class__.__name__} link with key={key} for task={task}'
                )
                tasks_created += 1

                # add predictions
                logger.debug(
                    f'Create {len(predictions)} predictions for task={task}')
                for prediction in predictions:
                    prediction['task'] = task.id
                prediction_ser = PredictionSerializer(data=predictions,
                                                      many=True)
                prediction_ser.is_valid(raise_exception=True)
                prediction_ser.save()

                # add annotations
                logger.debug(
                    f'Create {len(annotations)} annotations for task={task}')
                for annotation in annotations:
                    annotation['task'] = task.id
                annotation_ser = AnnotationSerializer(data=annotations,
                                                      many=True)
                annotation_ser.is_valid(raise_exception=True)
                annotation_ser.save()

        self.last_sync = timezone.now()
        self.last_sync_count = tasks_created
        self.save()

        self.project.update_tasks_states(
            maximum_annotations_changed=False,
            overlap_cohort_percentage_changed=False,
            tasks_number_changed=True)
Ejemplo n.º 9
0
class DataManagerTaskSerializer(TaskSerializer):
    predictions = PredictionSerializer(many=True, default=[], read_only=True)
    annotations = AnnotationSerializer(many=True, default=[], read_only=True)

    cancelled_annotations = serializers.SerializerMethodField()
    completed_at = serializers.SerializerMethodField()
    annotations_results = serializers.SerializerMethodField()
    predictions_results = serializers.SerializerMethodField()
    predictions_score = serializers.SerializerMethodField()
    total_annotations = serializers.SerializerMethodField()
    total_predictions = serializers.SerializerMethodField()
    file_upload = serializers.ReadOnlyField(source='file_upload_name')
    annotators = serializers.SerializerMethodField()

    class Meta:
        model = Task
        ref_name = 'data_manager_task_serializer'

        fields = [
            "cancelled_annotations", "completed_at", "created_at",
            "annotations_results", "data", "id", "predictions_results",
            "predictions_score", "total_annotations", "total_predictions",
            "annotations", "predictions", "file_upload", "annotators",
            "project"
        ]

    @staticmethod
    def get_cancelled_annotations(obj):
        return obj.annotations.filter(was_cancelled=True).count()

    @staticmethod
    def get_completed_at(obj):
        annotations = obj.annotations.all()
        if annotations:
            return max(c.created_at for c in annotations)
        return None

    @staticmethod
    def get_annotations_results(obj):
        annotations = obj.annotations.all()
        if annotations:
            return json.dumps([item.result for item in annotations])
        else:
            return ""

    @staticmethod
    def get_predictions_results(obj):
        predictions = obj.predictions.all()
        if predictions:
            return json.dumps([item.result for item in predictions])
        else:
            return ""

    @staticmethod
    def get_predictions_score(obj):
        predictions = obj.predictions.all()
        if predictions:
            values = [
                item.score for item in predictions
                if isinstance(item.score, (float, int))
            ]
            if values:
                return sum(values) / float(len(values))
        return None

    @staticmethod
    def get_total_predictions(obj):
        return obj.predictions.count()

    @staticmethod
    def get_total_annotations(obj):
        return obj.annotations.filter(was_cancelled=False).count()

    @staticmethod
    def get_annotators(obj):
        result = obj.annotations.values_list('completed_by',
                                             flat=True).distinct()
        result = [r for r in result if r is not None]
        return result
Ejemplo n.º 10
0
 def get_predictions(self, task):
     return PredictionSerializer(task.predictions, many=True, default=[], read_only=True).data
Ejemplo n.º 11
0
class DataManagerTaskSerializer(TaskSerializer):
    predictions = PredictionSerializer(many=True, default=[], read_only=True)
    annotations = AnnotationSerializer(many=True, default=[], read_only=True)
    drafts = serializers.SerializerMethodField()

    cancelled_annotations = serializers.SerializerMethodField()
    completed_at = serializers.SerializerMethodField()
    annotations_results = serializers.SerializerMethodField()
    predictions_results = serializers.SerializerMethodField()
    predictions_score = serializers.SerializerMethodField()
    total_annotations = serializers.SerializerMethodField()
    total_predictions = serializers.SerializerMethodField()
    file_upload = serializers.ReadOnlyField(source='file_upload_name')
    annotators = serializers.SerializerMethodField()

    class Meta:
        model = Task
        ref_name = 'data_manager_task_serializer'

        fields = [
            "cancelled_annotations", "completed_at", "created_at",
            "annotations_results", "data", "id", "predictions_results",
            "predictions_score", "total_annotations", "total_predictions",
            "annotations", "predictions", "drafts", "file_upload",
            "annotators", "project"
        ]

    @staticmethod
    def get_cancelled_annotations(obj):
        return obj.annotations.filter(was_cancelled=True).count()

    @staticmethod
    def get_completed_at(obj):
        annotations = obj.annotations.all()
        if obj.is_labeled and annotations:
            return max(c.created_at for c in annotations)
        return None

    @staticmethod
    def get_annotations_results(obj):
        annotations = obj.annotations.all()
        if annotations:
            return json.dumps([item.result for item in annotations])
        else:
            return ""

    @staticmethod
    def get_predictions_results(obj):
        predictions = obj.predictions.all()
        if predictions:
            return json.dumps([item.result for item in predictions])
        else:
            return ""

    @staticmethod
    def get_predictions_score(obj):
        predictions = obj.predictions.all()
        if predictions:
            values = [
                item.score for item in predictions
                if isinstance(item.score, (float, int))
            ]
            if values:
                return sum(values) / float(len(values))
        return None

    @staticmethod
    def get_total_predictions(obj):
        return obj.predictions.count()

    @staticmethod
    def get_total_annotations(obj):
        return obj.annotations.filter(was_cancelled=False).count()

    @staticmethod
    def get_annotators(obj):
        result = obj.annotations.values_list('completed_by',
                                             flat=True).distinct()
        result = [r for r in result if r is not None]
        return result

    def get_drafts(self, task):
        """Return drafts only for the current user"""
        # it's for swagger documentation
        if not isinstance(task, Task):
            return AnnotationDraftSerializer(many=True)

        drafts = task.drafts
        if 'request' in self.context and hasattr(self.context['request'],
                                                 'user'):
            user = self.context['request'].user
            drafts = drafts.filter(user=user)

        return AnnotationDraftSerializer(drafts,
                                         many=True,
                                         read_only=True,
                                         default=True,
                                         context=self.context).data