def _migrate_tasks(project_path, project):
    """ Migrate tasks from json file to database objects"""
    tasks_path = project_path / 'tasks.json'
    with io.open(os.path.abspath(tasks_path)) as t:
        tasks_data = json.load(t)
        for task_id, task_data in tasks_data.items():
            task = Task.objects.create(data=task_data.get('data', {}),
                                       project=project)

            # migrate annotations
            annotations_path = project_path / 'completions' / '{}.json'.format(
                task_id)
            if annotations_path.exists():
                with io.open(os.path.abspath(annotations_path)) as c:
                    annotations_data = json.load(c)
                    for annotation in annotations_data['completions']:
                        task_annotation = Annotation(
                            result=annotation['result'],
                            task=task,
                            lead_time=annotation['lead_time'],
                            was_cancelled=annotation.get(
                                'was_cancelled', False),
                            completed_by=project.created_by,
                        )
                        with suppress_autotime(task_annotation,
                                               ['created_at']):
                            task_annotation.created_at = datetime.datetime.fromtimestamp(
                                annotation['created_at'],
                                tz=datetime.datetime.now().astimezone().tzinfo)
                            task_annotation.save()

            # migrate predictions
            predictions_path = project_path / 'predictions' / '{}.json'.format(
                task_id)
            if predictions_path.exists():
                with io.open(os.path.abspath(predictions_path)) as c:
                    predictions_data = json.load(c)
                    for prediction in predictions_data['predictions']:
                        task_prediction = Prediction(
                            result=prediction['result'], task=task)
                        with suppress_autotime(task_prediction,
                                               ['created_at']):
                            task_prediction.created_at = datetime.datetime.fromtimestamp(
                                prediction['created_at'],
                                tz=datetime.datetime.now().astimezone().tzinfo)
                            task_prediction.save()
Example #2
0
    def perform_create(self, ser):
        task = get_object_with_check_and_log(self.request,
                                             Task,
                                             pk=self.kwargs['pk'])
        # annotator has write access only to annotations and it can't be checked it after serializer.save()
        check_object_permissions(self.request, Annotation(task=task),
                                 'annotations.change_annotation')
        user = self.request.user
        # Release task if it has been taken at work (it should be taken by the same user, or it makes sentry error
        logger.debug(f'User={user} releases task={task}')
        task.release_lock(user)

        # updates history
        update_id = self.request.user.id
        result = ser.validated_data.get('result')
        extra_args = {'task_id': self.kwargs['pk']}

        # save stats about how well annotator annotations coincide with current prediction
        # only for finished task annotations
        if result is not None:
            prediction = Prediction.objects.filter(
                task=task, model_version=task.project.model_version)
            if prediction.exists():
                prediction = prediction.first()
                prediction_ser = PredictionSerializer(prediction).data
            else:
                logger.debug(
                    f'User={self.request.user}: there are no predictions for task={task}'
                )
                prediction_ser = {}
            # serialize annotation
            extra_args.update({
                'prediction': prediction_ser,
            })

        if 'was_cancelled' in self.request.GET:
            extra_args['was_cancelled'] = bool_from_request(
                self.request.GET, 'was_cancelled', False)

        if 'completed_by' not in ser.validated_data:
            extra_args['completed_by'] = self.request.user

        # create annotation
        logger.debug(f'User={self.request.user}: save annotation')
        annotation = ser.save(**extra_args)
        logger.debug(f'Save activity for user={self.request.user}')
        self.request.user.activity_at = timezone.now()
        self.request.user.save()

        # if annotation created from draft - remove this draft
        draft_id = self.request.data.get('draft_id')
        if draft_id is not None:
            logger.debug(
                f'Remove draft {draft_id} after creating annotation {annotation.id}'
            )
            AnnotationDraft.objects.filter(id=draft_id).delete()

        return annotation
def predictions_to_annotations(project, queryset, **kwargs):
    request = kwargs['request']
    user = request.user
    model_version = request.data.get('model_version')
    queryset = queryset.filter(predictions__isnull=False)
    predictions = Prediction.objects.filter(task__in=queryset,
                                            child_annotations__isnull=True)

    # model version filter
    if model_version is not None:
        if isinstance(model_version, list):
            predictions = predictions.filter(
                model_version__in=model_version).distinct()
        else:
            predictions = predictions.filter(model_version=model_version)

    predictions_values = list(
        predictions.values_list('result', 'model_version', 'task_id', 'id'))

    # prepare annotations
    annotations = []
    tasks_ids = []
    for result, model_version, task_id, prediction_id in predictions_values:
        tasks_ids.append(task_id)
        annotations.append({
            'result': result,
            'completed_by_id': user.pk,
            'task_id': task_id,
            'parent_prediction_id': prediction_id
        })

    count = len(annotations)
    logger.debug(f'{count} predictions will be converter to annotations')
    db_annotations = [Annotation(**annotation) for annotation in annotations]
    db_annotations = Annotation.objects.bulk_create(db_annotations)
    Task.objects.filter(id__in=tasks_ids).update(updated_at=now(),
                                                 updated_by=request.user)

    if db_annotations:
        TaskSerializerBulk.post_process_annotations(db_annotations)
        # Execute webhook for created annotations
        emit_webhooks_for_instance(user.active_organization, project,
                                   WebhookAction.ANNOTATIONS_CREATED,
                                   db_annotations)

    return {'response_code': 200, 'detail': f'Created {count} annotations'}
Example #4
0
def propagate_annotations(project, queryset, **kwargs):
    request = kwargs['request']
    user = request.user
    source_annotation_id = request.data.get('source_annotation_id')
    annotations = Annotation.objects.filter(task__project=project,
                                            id=source_annotation_id)
    if not annotations:
        raise DataManagerException(
            f'Source annotation {source_annotation_id} not found in the current project'
        )
    source_annotation = annotations.first()

    tasks = set(queryset.values_list('id', flat=True))
    try:
        tasks.remove(source_annotation.task.id)
    except KeyError:
        pass

    # copy source annotation to new annotations for each task
    db_annotations = []
    for i in tasks:
        db_annotations.append(
            Annotation(task_id=i,
                       completed_by_id=user.id,
                       result=source_annotation.result,
                       result_count=source_annotation.result_count,
                       parent_annotation_id=source_annotation.id))

    db_annotations = Annotation.objects.bulk_create(
        db_annotations, batch_size=settings.BATCH_SIZE)
    TaskSerializerBulk.post_process_annotations(db_annotations)

    return {
        'response_code': 200,
        'detail': f'Created {len(db_annotations)} annotations'
    }
Example #5
0
def restore_objects(project):
    """ Create task and annotation for URL tests
    """
    # task_db, annotation_db = None, None

    if project.pk != 1000:
        project.pk = 1000
        project.title += '2'
        project.save()
    try:
        task_db = Task.objects.get(pk=1000)
    except Task.DoesNotExist:
        task_db = Task()
        task_db.data = {"data": {"image": "kittens.jpg"}}
        task_db.project = project
        task_db.id = 1000  # we need to use id 1000 to avoid db last start
        task_db.save()

    try:
        annotation_db = Annotation.objects.get(pk=1000)
    except Annotation.DoesNotExist:
        task_db = Task.objects.get(pk=1000)
        annotation_db = Annotation()
        annotation = [{
            "from_name": "some",
            "to_name": "x",
            "type": "none",
            "value": {
                "none": ["Opossum"]
            }
        }]
        annotation_db.result = annotation
        annotation_db.id = 1000  # we need to use id 1000 to avoid db last start
        annotation_db.task = task_db
        annotation_db.save()

    return task_db, annotation_db
Example #6
0
    def create(self, validated_data):
        """ Create Tasks and Annotations in bulk
        """
        db_tasks, db_annotations, db_predictions, validated_tasks = [], [], [], validated_data
        logging.info(
            f'Try to serialize tasks with annotations, data len = {len(validated_data)}'
        )
        user = self.context.get('user', None)
        project = self.context.get('project')

        # to be sure we add tasks with annotations at the same time
        with transaction.atomic():

            # extract annotations and predictions
            task_annotations, task_predictions = [], []
            for task in validated_tasks:
                task_annotations.append(task.pop('annotations', []))
                task_predictions.append(task.pop('predictions', []))

            # check annotator permissions for completed by
            organization = user.active_organization \
                if not project.created_by.active_organization else project.created_by.active_organization
            project_user_ids = organization.members.values_list('user__id',
                                                                flat=True)
            annotator_ids = set()
            for annotations in task_annotations:
                for annotation in annotations:
                    annotator_ids.add(self.get_completed_by_id(annotation))

            for i in annotator_ids:
                if i not in project_user_ids and i is not None:
                    raise ValidationError(
                        f'Annotations with "completed_by"={i} are produced by annotator '
                        f'who is not allowed for this project as invited annotator or team member'
                    )

            # add tasks first
            for task in validated_tasks:
                t = Task(project=project,
                         data=task['data'],
                         meta=task.get('meta', {}),
                         overlap=project.maximum_annotations,
                         file_upload_id=task.get('file_upload_id'))
                db_tasks.append(t)

            if settings.DJANGO_DB == settings.DJANGO_DB_SQLITE:
                self.db_tasks = []
                try:
                    last_task = Task.objects.latest('id')
                    current_id = last_task.id + 1
                except Task.DoesNotExist:
                    current_id = 1

                for task in db_tasks:
                    task.id = current_id
                    current_id += 1
                self.db_tasks = Task.objects.bulk_create(
                    db_tasks, batch_size=settings.BATCH_SIZE)
            else:
                self.db_tasks = Task.objects.bulk_create(
                    db_tasks, batch_size=settings.BATCH_SIZE)
            logging.info(
                f'Tasks serialization success, len = {len(self.db_tasks)}')

            # add annotations
            for i, annotations in enumerate(task_annotations):
                for annotation in annotations:
                    # support both "ground_truth" and "ground_truth"
                    ground_truth = annotation.pop('ground_truth', True)
                    if 'ground_truth' in annotation:
                        ground_truth = annotation.pop('ground_truth', True)

                    # get user id
                    completed_by_id = self.get_completed_by_id(
                        annotation, default=user.id if user else None)
                    annotation.pop('completed_by', None)

                    db_annotations.append(
                        Annotation(task=self.db_tasks[i],
                                   ground_truth=ground_truth,
                                   completed_by_id=completed_by_id,
                                   result=annotation['result']))

            # add predictions
            last_model_version = None
            for i, predictions in enumerate(task_predictions):
                for prediction in predictions:
                    prediction_score = prediction.get('score')
                    if prediction_score is not None:
                        try:
                            prediction_score = float(prediction_score)
                        except ValueError as exc:
                            logger.error(
                                f'Can\'t upload prediction score: should be in float format. Reason: {exc}.'
                                f'Fallback to score=None',
                                exc_info=True)
                            prediction_score = None

                    last_model_version = prediction.get(
                        'model_version', 'undefined')
                    db_predictions.append(
                        Prediction(task=self.db_tasks[i],
                                   result=prediction['result'],
                                   score=prediction_score,
                                   model_version=last_model_version))

            # annotations: DB bulk create
            if settings.DJANGO_DB == settings.DJANGO_DB_SQLITE:
                self.db_annotations = []
                try:
                    last_annotation = Annotation.objects.latest('id')
                    current_id = last_annotation.id + 1
                except Annotation.DoesNotExist:
                    current_id = 1

                for annotation in db_annotations:
                    annotation.id = current_id
                    current_id += 1
                self.db_annotations = Annotation.objects.bulk_create(
                    db_annotations, batch_size=settings.BATCH_SIZE)
            else:
                self.db_annotations = Annotation.objects.bulk_create(
                    db_annotations, batch_size=settings.BATCH_SIZE)
            logging.info(
                f'Annotations serialization success, len = {len(self.db_annotations)}'
            )

            # predictions: DB bulk create
            self.db_predictions = Prediction.objects.bulk_create(
                db_predictions, batch_size=settings.BATCH_SIZE)
            logging.info(
                f'Predictions serialization success, len = {len(self.db_predictions)}'
            )

            # renew project model version if it's empty
            if not project.model_version and last_model_version is not None:
                project.model_version = last_model_version
                project.save()

        return db_tasks
Example #7
0
    def create(self, validated_data):
        """ Create Tasks and Annotations in bulk
        """
        db_tasks, db_annotations, db_predictions, validated_tasks = [], [], [], validated_data
        logging.info(
            f'Try to serialize tasks with annotations, data len = {len(validated_data)}'
        )
        user = self.context.get('user', None)
        project = self.context.get('project')

        organization = user.active_organization \
            if not project.created_by.active_organization else project.created_by.active_organization
        members_email_to_id = dict(
            organization.members.values_list('user__email', 'user__id'))
        members_ids = set(members_email_to_id.values())
        logger.debug(
            f"{len(members_email_to_id)} members found in organization {organization}"
        )

        # to be sure we add tasks with annotations at the same time
        with transaction.atomic():

            # extract annotations and predictions
            task_annotations, task_predictions = [], []
            for task in validated_tasks:
                annotations = task.pop('annotations', [])
                # insert a valid "completed_by_id" by existing member
                self._insert_valid_completed_by_id_or_raise(
                    annotations, members_email_to_id, members_ids, user
                    or project.created_by)
                predictions = task.pop('predictions', [])
                task_annotations.append(annotations)
                task_predictions.append(predictions)

            # add tasks first
            for task in validated_tasks:
                t = Task(project=project,
                         data=task['data'],
                         meta=task.get('meta', {}),
                         overlap=project.maximum_annotations,
                         file_upload_id=task.get('file_upload_id'))
                db_tasks.append(t)

            # deprecated meta warning
            if 'meta' in task:
                logger.warning(
                    'You task data has field "meta" which is deprecated and it will be removed in future'
                )

            if settings.DJANGO_DB == settings.DJANGO_DB_SQLITE:
                self.db_tasks = []
                try:
                    last_task = Task.objects.latest('id')
                    current_id = last_task.id + 1
                except Task.DoesNotExist:
                    current_id = 1

                for task in db_tasks:
                    task.id = current_id
                    current_id += 1
                self.db_tasks = Task.objects.bulk_create(
                    db_tasks, batch_size=settings.BATCH_SIZE)
            else:
                self.db_tasks = Task.objects.bulk_create(
                    db_tasks, batch_size=settings.BATCH_SIZE)
            logging.info(
                f'Tasks serialization success, len = {len(self.db_tasks)}')

            # add annotations
            for i, annotations in enumerate(task_annotations):
                for annotation in annotations:
                    # support both "ground_truth" and "ground_truth"
                    ground_truth = annotation.pop('ground_truth', True)
                    was_cancelled = annotation.pop('was_cancelled', False)

                    db_annotations.append(
                        Annotation(
                            task=self.db_tasks[i],
                            ground_truth=ground_truth,
                            was_cancelled=was_cancelled,
                            completed_by_id=annotation['completed_by_id'],
                            result=annotation['result']))

            # add predictions
            last_model_version = None
            for i, predictions in enumerate(task_predictions):
                for prediction in predictions:
                    prediction_score = prediction.get('score')
                    if prediction_score is not None:
                        try:
                            prediction_score = float(prediction_score)
                        except ValueError as exc:
                            logger.error(
                                f'Can\'t upload prediction score: should be in float format. Reason: {exc}.'
                                f'Fallback to score=None',
                                exc_info=True)
                            prediction_score = None

                    last_model_version = prediction.get(
                        'model_version', 'undefined')
                    db_predictions.append(
                        Prediction(task=self.db_tasks[i],
                                   result=prediction['result'],
                                   score=prediction_score,
                                   model_version=last_model_version))

            # annotations: DB bulk create
            if settings.DJANGO_DB == settings.DJANGO_DB_SQLITE:
                self.db_annotations = []
                try:
                    last_annotation = Annotation.objects.latest('id')
                    current_id = last_annotation.id + 1
                except Annotation.DoesNotExist:
                    current_id = 1

                for annotation in db_annotations:
                    annotation.id = current_id
                    current_id += 1
                self.db_annotations = Annotation.objects.bulk_create(
                    db_annotations, batch_size=settings.BATCH_SIZE)
            else:
                self.db_annotations = Annotation.objects.bulk_create(
                    db_annotations, batch_size=settings.BATCH_SIZE)
            logging.info(
                f'Annotations serialization success, len = {len(self.db_annotations)}'
            )

            # predictions: DB bulk create
            self.db_predictions = Prediction.objects.bulk_create(
                db_predictions, batch_size=settings.BATCH_SIZE)
            logging.info(
                f'Predictions serialization success, len = {len(self.db_predictions)}'
            )

            # renew project model version if it's empty
            if not project.model_version and last_model_version is not None:
                project.model_version = last_model_version
                project.save()

        return db_tasks
Example #8
0
    def create(self, validated_data):
        """ Create Tasks and Annotations in bulk
        """
        db_tasks, db_annotations, db_predictions, validated_tasks = [], [], [], validated_data
        logging.info(f'Try to serialize tasks with annotations, data len = {len(validated_data)}')
        user = self.context.get('user', None)

        organization = user.active_organization \
            if not self.project.created_by.active_organization else self.project.created_by.active_organization
        members_email_to_id = dict(organization.members.values_list('user__email', 'user__id'))
        members_ids = set(members_email_to_id.values())
        logger.debug(f"{len(members_email_to_id)} members found in organization {organization}")

        # to be sure we add tasks with annotations at the same time
        with transaction.atomic():

            # extract annotations and predictions
            task_annotations, task_predictions = [], []
            for task in validated_tasks:
                annotations = task.pop('annotations', [])
                # insert a valid "completed_by_id" by existing member
                self._insert_valid_completed_by_id_or_raise(
                    annotations, members_email_to_id, members_ids, user or self.project.created_by)
                predictions = task.pop('predictions', [])
                task_annotations.append(annotations)
                task_predictions.append(predictions)

            # add tasks first
            max_overlap = self.project.maximum_annotations

            # identify max inner id
            tasks = Task.objects.filter(project=self.project)
            max_inner_id = (tasks.order_by("-inner_id")[0].inner_id + 1) if tasks else 1

            for i, task in enumerate(validated_tasks):
                t = Task(
                    project=self.project,
                    data=task['data'],
                    meta=task.get('meta', {}),
                    overlap=max_overlap,
                    is_labeled=len(task_annotations[i]) >= max_overlap,
                    file_upload_id=task.get('file_upload_id'),
                    inner_id=max_inner_id + i
                )
                db_tasks.append(t)

            if settings.DJANGO_DB == settings.DJANGO_DB_SQLITE:
                self.db_tasks = []
                try:
                    last_task = Task.objects.latest('id')
                    current_id = last_task.id + 1
                except Task.DoesNotExist:
                    current_id = 1

                for task in db_tasks:
                    task.id = current_id
                    current_id += 1
                self.db_tasks = Task.objects.bulk_create(db_tasks, batch_size=settings.BATCH_SIZE)
            else:
                self.db_tasks = Task.objects.bulk_create(db_tasks, batch_size=settings.BATCH_SIZE)
            logging.info(f'Tasks serialization success, len = {len(self.db_tasks)}')

            # add annotations
            for i, annotations in enumerate(task_annotations):
                for annotation in annotations:
                    if not isinstance(annotation, dict):
                        continue
                        
                    # support both "ground_truth" and "ground_truth"
                    ground_truth = annotation.pop('ground_truth', True)
                    was_cancelled = annotation.pop('was_cancelled', False)
                    lead_time = annotation.pop('lead_time', None)

                    db_annotations.append(Annotation(task=self.db_tasks[i],
                                                     ground_truth=ground_truth,
                                                     was_cancelled=was_cancelled,
                                                     completed_by_id=annotation['completed_by_id'],
                                                     result=annotation['result'],
                                                     lead_time=lead_time))

            # add predictions
            last_model_version = None
            for i, predictions in enumerate(task_predictions):
                for prediction in predictions:
                    if not isinstance(prediction, dict):
                        continue

                    # we need to call result normalizer here since "bulk_create" doesn't call save() method
                    result = Prediction.prepare_prediction_result(prediction['result'], self.project)
                    prediction_score = prediction.get('score')
                    if prediction_score is not None:
                        try:
                            prediction_score = float(prediction_score)
                        except ValueError as exc:
                            logger.error(
                                f'Can\'t upload prediction score: should be in float format. Reason: {exc}.'
                                f'Fallback to score=None', exc_info=True)
                            prediction_score = None

                    last_model_version = prediction.get('model_version', 'undefined')
                    db_predictions.append(Prediction(task=self.db_tasks[i],
                                                     result=result,
                                                     score=prediction_score,
                                                     model_version=last_model_version))

            # annotations: DB bulk create
            if settings.DJANGO_DB == settings.DJANGO_DB_SQLITE:
                self.db_annotations = []
                try:
                    last_annotation = Annotation.objects.latest('id')
                    current_id = last_annotation.id + 1
                except Annotation.DoesNotExist:
                    current_id = 1

                for annotation in db_annotations:
                    annotation.id = current_id
                    current_id += 1
                self.db_annotations = Annotation.objects.bulk_create(db_annotations, batch_size=settings.BATCH_SIZE)
            else:
                self.db_annotations = Annotation.objects.bulk_create(db_annotations, batch_size=settings.BATCH_SIZE)
            logging.info(f'Annotations serialization success, len = {len(self.db_annotations)}')

            # predictions: DB bulk create
            self.db_predictions = Prediction.objects.bulk_create(db_predictions, batch_size=settings.BATCH_SIZE)
            logging.info(f'Predictions serialization success, len = {len(self.db_predictions)}')

            # renew project model version if it's empty
            if not self.project.model_version and last_model_version is not None:
                self.project.model_version = last_model_version
                self.project.save()

        self.post_process_annotations(self.db_annotations)
        return db_tasks