def get_annotations(self, task): if not self.context.get('annotations'): return [] return AnnotationSerializer(task.annotations, many=True, default=[], read_only=True).data
def save_annotation(self, annotation): client = self.get_client() logger.debug( f'Creating new object on {self.__class__.__name__} Storage {self} for annotation {annotation}' ) ser_annotation = AnnotationSerializer(annotation).data with transaction.atomic(): # Create export storage link link = RedisExportStorageLink.create(annotation, self) client.set(link.key, json.dumps(ser_annotation))
class ImportApiSerializer(TaskSerializer): """ Tasks serializer for Import API (TaskBulkCreateAPI) """ annotations = AnnotationSerializer(many=True, default=[]) predictions = PredictionSerializer(many=True, default=[]) class Meta: model = Task list_serializer_class = TaskSerializerBulk exclude = ('is_labeled', 'project')
def post(self, request, *args, **kwargs): # get the cancelled task task = get_object_with_permissions(self.request, Task, self.kwargs['pk'], 'tasks.change_task') # validate data from annotation annotation = AnnotationSerializer(data=request.data) annotation.is_valid(raise_exception=True) # set annotator last activity user = request.user user.activity_at = timezone.now() user.save() # serialize annotation, update task and save com = annotation.save(completed_by=user, was_cancelled=True, task=task) task.annotations.add(com) task.save() return Response(annotation.data, status=status.HTTP_200_OK)
def save_annotation(self, annotation): client, s3 = self.get_client_and_resource() logger.debug(f'Creating new object on {self.__class__.__name__} Storage {self} for annotation {annotation}') ser_annotation = AnnotationSerializer(annotation).data with transaction.atomic(): # Create export storage link link = S3ExportStorageLink.create(annotation, self) try: s3.Object(self.bucket, link.key).put(Body=ser_annotation) except Exception as exc: logger.error(f"Can't export annotation {annotation} to S3 storage {self}. Reason: {exc}", exc_info=True)
def save_annotation(self, annotation): bucket = self.get_bucket() logger.debug(f'Creating new object on {self.__class__.__name__} Storage {self} for annotation {annotation}') ser_annotation = AnnotationSerializer(annotation).data with transaction.atomic(): # Create export storage link link = GCSExportStorageLink.create(annotation, self) try: blob = bucket.blob(link.key) blob.upload_from_string(json.dumps(ser_annotation)) except Exception as exc: logger.error(f"Can't export annotation {annotation} to GCS storage {self}. Reason: {exc}", exc_info=True)
def save_annotation(self, annotation): container = self.get_container() logger.debug(f'Creating new object on {self.__class__.__name__} Storage {self} for annotation {annotation}') ser_annotation = AnnotationSerializer(annotation).data with transaction.atomic(): # Create export storage link link = AzureBlobExportStorageLink.create(annotation, self) try: blob = container.get_blob_client(link.key) blob.upload_blob(json.dumps(ser_annotation)) except Exception as exc: logger.error(f"Can't export annotation {annotation} to Azure storage {self}. Reason: {exc}", exc_info=True)
def predictions_to_annotations(project, queryset, **kwargs): request = kwargs['request'] user = request.user model_version = request.data.get('model_version') queryset = queryset.filter(predictions__isnull=False) predictions = Prediction.objects.filter(task__in=queryset, child_annotations__isnull=True) # model version filter if model_version is not None: predictions = predictions.filter(model_version=model_version) predictions_values = list(predictions.values_list( 'result', 'model_version', 'task_id', 'id' )) # prepare annotations annotations = [] for result, model_version, task_id, prediction_id in predictions_values: annotations.append({ 'result': result, 'completed_by': user.pk, 'task': task_id, 'parent_prediction': prediction_id }) count = len(annotations) logger.debug(f'{count} predictions will be converter to annotations') annotation_ser = AnnotationSerializer(data=annotations, many=True) annotation_ser.is_valid(raise_exception=True) annotation_ser.save() return {'response_code': 200, 'detail': f'Created {count} annotations'}
def _scan_and_create_links(self, link_class): tasks_created = 0 maximum_annotations = self.project.maximum_annotations task = self.project.tasks.order_by('-inner_id').first() max_inner_id = (task.inner_id + 1) if task else 1 for key in self.iterkeys(): logger.debug(f'Scanning key {key}') # skip if task already exists if link_class.exists(key, self): logger.debug( f'{self.__class__.__name__} link {key} already exists') continue logger.debug(f'{self}: found new key {key}') try: data = self.get_data(key) except (UnicodeDecodeError, json.decoder.JSONDecodeError) as exc: logger.error(exc, exc_info=True) raise ValueError( f'Error loading JSON from file "{key}".\nIf you\'re trying to import non-JSON data ' f'(images, audio, text, etc.), edit storage settings and enable ' f'"Treat every bucket object as a source file"') # predictions predictions = data.get('predictions', []) if predictions: if 'data' not in data: raise ValueError( 'If you use "predictions" field in the task, ' 'you must put "data" field in the task too') # annotations annotations = data.get('annotations', []) if annotations: if 'data' not in data: raise ValueError( 'If you use "annotations" field in the task, ' 'you must put "data" field in the task too') if 'data' in data and isinstance(data['data'], dict): data = data['data'] with transaction.atomic(): task = Task.objects.create( data=data, project=self.project, overlap=maximum_annotations, is_labeled=len(annotations) >= maximum_annotations, inner_id=max_inner_id) max_inner_id += 1 link_class.create(task, key, self) logger.debug( f'Create {self.__class__.__name__} link with key={key} for task={task}' ) tasks_created += 1 # add predictions logger.debug( f'Create {len(predictions)} predictions for task={task}') for prediction in predictions: prediction['task'] = task.id prediction_ser = PredictionSerializer(data=predictions, many=True) prediction_ser.is_valid(raise_exception=True) prediction_ser.save() # add annotations logger.debug( f'Create {len(annotations)} annotations for task={task}') for annotation in annotations: annotation['task'] = task.id annotation_ser = AnnotationSerializer(data=annotations, many=True) annotation_ser.is_valid(raise_exception=True) annotation_ser.save() self.last_sync = timezone.now() self.last_sync_count = tasks_created self.save() self.project.update_tasks_states( maximum_annotations_changed=False, overlap_cohort_percentage_changed=False, tasks_number_changed=True)
def test_export(business_client, configured_project, finished, aggregator_type, return_task, num_task_in_result, annotation_items, aggregated_class): if aggregator_type == 'majority_vote' and not apps.is_installed( 'businesses'): pytest.skip('Not supported aggregation for open-source version') task_query = Task.objects.filter(project=configured_project.id) task = task_query.first() expected_annotations_for_task = set() for annotation in annotation_items: db_annotation = Annotation.objects.create( task=task, result=annotation['result'], completed_by=business_client.admin) db_annotation = AnnotationSerializer(db_annotation).data annotation['id'] = db_annotation['id'] annotation['created_at'] = db_annotation['created_at'] annotation['updated_at'] = db_annotation['updated_at'] annotation['completed_by'] = business_client.admin.id expected_annotations_for_task.add(json.dumps(annotation)) r = business_client.get(f'/api/projects/{configured_project.id}/results/', data={ 'finished': finished, 'aggregator_type': aggregator_type, 'return_task': return_task }) assert r.status_code == 200 exports = r.json() # test expected number of objects returned assert len(exports) == num_task_in_result # test whether "id" or full task included in results if return_task == '0': task_with_annotation = next((t for t in exports if t['id'] == task.id)) assert task_with_annotation['id'] == task.id elif return_task == '1': task_with_annotation = next((t for t in exports if t['id'] == task.id)) assert task_with_annotation['data'] == task.data else: raise Exception('Incorrect return_task param in test: ' + str(return_task)) # test how aggregation affects annotations if aggregator_type == 'no_aggregation': exported_annotations = set() for annotation in task_with_annotation['annotations']: exported_annotations.add(json.dumps(annotation)) assert exported_annotations == expected_annotations_for_task if finished != '1': # we expect to see all tasks in exports... assert len(exports) == task_query.count() # ...as well as task without annotations (with empty results) assert all( len(t['annotations']) == 0 for t in exports if t['id'] != task.id) else: assert task_with_annotation['annotations'][0]['result'][0]['value'][ 'choices'][0] == aggregated_class
class DataManagerTaskSerializer(TaskSerializer): predictions = PredictionSerializer(many=True, default=[], read_only=True) annotations = AnnotationSerializer(many=True, default=[], read_only=True) cancelled_annotations = serializers.SerializerMethodField() completed_at = serializers.SerializerMethodField() annotations_results = serializers.SerializerMethodField() predictions_results = serializers.SerializerMethodField() predictions_score = serializers.SerializerMethodField() total_annotations = serializers.SerializerMethodField() total_predictions = serializers.SerializerMethodField() file_upload = serializers.ReadOnlyField(source='file_upload_name') annotators = serializers.SerializerMethodField() class Meta: model = Task ref_name = 'data_manager_task_serializer' fields = [ "cancelled_annotations", "completed_at", "created_at", "annotations_results", "data", "id", "predictions_results", "predictions_score", "total_annotations", "total_predictions", "annotations", "predictions", "file_upload", "annotators", "project" ] @staticmethod def get_cancelled_annotations(obj): return obj.annotations.filter(was_cancelled=True).count() @staticmethod def get_completed_at(obj): annotations = obj.annotations.all() if annotations: return max(c.created_at for c in annotations) return None @staticmethod def get_annotations_results(obj): annotations = obj.annotations.all() if annotations: return json.dumps([item.result for item in annotations]) else: return "" @staticmethod def get_predictions_results(obj): predictions = obj.predictions.all() if predictions: return json.dumps([item.result for item in predictions]) else: return "" @staticmethod def get_predictions_score(obj): predictions = obj.predictions.all() if predictions: values = [ item.score for item in predictions if isinstance(item.score, (float, int)) ] if values: return sum(values) / float(len(values)) return None @staticmethod def get_total_predictions(obj): return obj.predictions.count() @staticmethod def get_total_annotations(obj): return obj.annotations.filter(was_cancelled=False).count() @staticmethod def get_annotators(obj): result = obj.annotations.values_list('completed_by', flat=True).distinct() result = [r for r in result if r is not None] return result
def get_annotations(self, task): return AnnotationSerializer(task.annotations, many=True, default=[], read_only=True).data
class DataManagerTaskSerializer(TaskSerializer): predictions = PredictionSerializer(many=True, default=[], read_only=True) annotations = AnnotationSerializer(many=True, default=[], read_only=True) drafts = serializers.SerializerMethodField() cancelled_annotations = serializers.SerializerMethodField() completed_at = serializers.SerializerMethodField() annotations_results = serializers.SerializerMethodField() predictions_results = serializers.SerializerMethodField() predictions_score = serializers.SerializerMethodField() total_annotations = serializers.SerializerMethodField() total_predictions = serializers.SerializerMethodField() file_upload = serializers.ReadOnlyField(source='file_upload_name') annotators = serializers.SerializerMethodField() class Meta: model = Task ref_name = 'data_manager_task_serializer' fields = [ "cancelled_annotations", "completed_at", "created_at", "annotations_results", "data", "id", "predictions_results", "predictions_score", "total_annotations", "total_predictions", "annotations", "predictions", "drafts", "file_upload", "annotators", "project" ] @staticmethod def get_cancelled_annotations(obj): return obj.annotations.filter(was_cancelled=True).count() @staticmethod def get_completed_at(obj): annotations = obj.annotations.all() if obj.is_labeled and annotations: return max(c.created_at for c in annotations) return None @staticmethod def get_annotations_results(obj): annotations = obj.annotations.all() if annotations: return json.dumps([item.result for item in annotations]) else: return "" @staticmethod def get_predictions_results(obj): predictions = obj.predictions.all() if predictions: return json.dumps([item.result for item in predictions]) else: return "" @staticmethod def get_predictions_score(obj): predictions = obj.predictions.all() if predictions: values = [ item.score for item in predictions if isinstance(item.score, (float, int)) ] if values: return sum(values) / float(len(values)) return None @staticmethod def get_total_predictions(obj): return obj.predictions.count() @staticmethod def get_total_annotations(obj): return obj.annotations.filter(was_cancelled=False).count() @staticmethod def get_annotators(obj): result = obj.annotations.values_list('completed_by', flat=True).distinct() result = [r for r in result if r is not None] return result def get_drafts(self, task): """Return drafts only for the current user""" # it's for swagger documentation if not isinstance(task, Task): return AnnotationDraftSerializer(many=True) drafts = task.drafts if 'request' in self.context and hasattr(self.context['request'], 'user'): user = self.context['request'].user drafts = drafts.filter(user=user) return AnnotationDraftSerializer(drafts, many=True, read_only=True, default=True, context=self.context).data