Beispiel #1
0
    def predict_many_tasks(self, tasks):
        self.update_state()
        if self.not_ready:
            logger.debug(f'ML backend {self} is not ready')
            return

        if isinstance(tasks, list):
            from tasks.models import Task
            tasks = Task.objects.filter(id__in=[task.id for task in tasks])

        tasks_ser = TaskSimpleSerializer(tasks, many=True).data
        ml_api_result = self.api.make_predictions(tasks_ser,
                                                  self.model_version,
                                                  self.project)
        if ml_api_result.is_error:
            logger.error(
                f'Prediction not created for project {self}: {ml_api_result.error_message}'
            )
            return

        responses = ml_api_result.response['results']

        if len(responses) == 0:
            logger.error(
                f'ML backend returned empty prediction for project {self}')
            return

        # ML Backend doesn't support batch of tasks, do it one by one
        elif len(responses) == 1:
            logger.warning(
                f"'ML backend '{self.title}' doesn't support batch processing of tasks, "
                f"switched to one-by-one task retrieving")
            for task in tasks:
                self.predict_one_task(task)
            return

        # wrong result number
        elif len(responses) != len(tasks_ser):
            logger.error(
                f'ML backend returned response number {len(responses)} != task number {len(tasks_ser)}'
            )

        predictions = []
        for task, response in zip(tasks_ser, responses):
            predictions.append({
                'task': task['id'],
                'result': response['result'],
                'score': response.get('score'),
                'model_version': self.model_version
            })
        with conditional_atomic():
            prediction_ser = PredictionSerializer(data=predictions, many=True)
            prediction_ser.is_valid(raise_exception=True)
            prediction_ser.save()
Beispiel #2
0
    def _scan_and_create_links(self, link_class):
        tasks_created = 0
        maximum_annotations = self.project.maximum_annotations
        task = self.project.tasks.order_by('-inner_id').first()
        max_inner_id = (task.inner_id + 1) if task else 1

        for key in self.iterkeys():
            logger.debug(f'Scanning key {key}')

            # skip if task already exists
            if link_class.exists(key, self):
                logger.debug(
                    f'{self.__class__.__name__} link {key} already exists')
                continue

            logger.debug(f'{self}: found new key {key}')
            try:
                data = self.get_data(key)
            except (UnicodeDecodeError, json.decoder.JSONDecodeError) as exc:
                logger.error(exc, exc_info=True)
                raise ValueError(
                    f'Error loading JSON from file "{key}".\nIf you\'re trying to import non-JSON data '
                    f'(images, audio, text, etc.), edit storage settings and enable '
                    f'"Treat every bucket object as a source file"')

            # predictions
            predictions = data.get('predictions', [])
            if predictions:
                if 'data' not in data:
                    raise ValueError(
                        'If you use "predictions" field in the task, '
                        'you must put "data" field in the task too')

            # annotations
            annotations = data.get('annotations', [])
            if annotations:
                if 'data' not in data:
                    raise ValueError(
                        'If you use "annotations" field in the task, '
                        'you must put "data" field in the task too')

            if 'data' in data and isinstance(data['data'], dict):
                data = data['data']

            with transaction.atomic():
                task = Task.objects.create(
                    data=data,
                    project=self.project,
                    overlap=maximum_annotations,
                    is_labeled=len(annotations) >= maximum_annotations,
                    inner_id=max_inner_id)
                max_inner_id += 1

                link_class.create(task, key, self)
                logger.debug(
                    f'Create {self.__class__.__name__} link with key={key} for task={task}'
                )
                tasks_created += 1

                # add predictions
                logger.debug(
                    f'Create {len(predictions)} predictions for task={task}')
                for prediction in predictions:
                    prediction['task'] = task.id
                prediction_ser = PredictionSerializer(data=predictions,
                                                      many=True)
                prediction_ser.is_valid(raise_exception=True)
                prediction_ser.save()

                # add annotations
                logger.debug(
                    f'Create {len(annotations)} annotations for task={task}')
                for annotation in annotations:
                    annotation['task'] = task.id
                annotation_ser = AnnotationSerializer(data=annotations,
                                                      many=True)
                annotation_ser.is_valid(raise_exception=True)
                annotation_ser.save()

        self.last_sync = timezone.now()
        self.last_sync_count = tasks_created
        self.save()

        self.project.update_tasks_states(
            maximum_annotations_changed=False,
            overlap_cohort_percentage_changed=False,
            tasks_number_changed=True)