def predict_many_tasks(self, tasks): self.update_state() if self.not_ready: logger.debug(f'ML backend {self} is not ready') return if isinstance(tasks, list): from tasks.models import Task tasks = Task.objects.filter(id__in=[task.id for task in tasks]) tasks_ser = TaskSimpleSerializer(tasks, many=True).data ml_api_result = self.api.make_predictions(tasks_ser, self.model_version, self.project) if ml_api_result.is_error: logger.error( f'Prediction not created for project {self}: {ml_api_result.error_message}' ) return responses = ml_api_result.response['results'] if len(responses) == 0: logger.error( f'ML backend returned empty prediction for project {self}') return # ML Backend doesn't support batch of tasks, do it one by one elif len(responses) == 1: logger.warning( f"'ML backend '{self.title}' doesn't support batch processing of tasks, " f"switched to one-by-one task retrieving") for task in tasks: self.predict_one_task(task) return # wrong result number elif len(responses) != len(tasks_ser): logger.error( f'ML backend returned response number {len(responses)} != task number {len(tasks_ser)}' ) predictions = [] for task, response in zip(tasks_ser, responses): predictions.append({ 'task': task['id'], 'result': response['result'], 'score': response.get('score'), 'model_version': self.model_version }) with conditional_atomic(): prediction_ser = PredictionSerializer(data=predictions, many=True) prediction_ser.is_valid(raise_exception=True) prediction_ser.save()
def _scan_and_create_links(self, link_class): tasks_created = 0 maximum_annotations = self.project.maximum_annotations task = self.project.tasks.order_by('-inner_id').first() max_inner_id = (task.inner_id + 1) if task else 1 for key in self.iterkeys(): logger.debug(f'Scanning key {key}') # skip if task already exists if link_class.exists(key, self): logger.debug( f'{self.__class__.__name__} link {key} already exists') continue logger.debug(f'{self}: found new key {key}') try: data = self.get_data(key) except (UnicodeDecodeError, json.decoder.JSONDecodeError) as exc: logger.error(exc, exc_info=True) raise ValueError( f'Error loading JSON from file "{key}".\nIf you\'re trying to import non-JSON data ' f'(images, audio, text, etc.), edit storage settings and enable ' f'"Treat every bucket object as a source file"') # predictions predictions = data.get('predictions', []) if predictions: if 'data' not in data: raise ValueError( 'If you use "predictions" field in the task, ' 'you must put "data" field in the task too') # annotations annotations = data.get('annotations', []) if annotations: if 'data' not in data: raise ValueError( 'If you use "annotations" field in the task, ' 'you must put "data" field in the task too') if 'data' in data and isinstance(data['data'], dict): data = data['data'] with transaction.atomic(): task = Task.objects.create( data=data, project=self.project, overlap=maximum_annotations, is_labeled=len(annotations) >= maximum_annotations, inner_id=max_inner_id) max_inner_id += 1 link_class.create(task, key, self) logger.debug( f'Create {self.__class__.__name__} link with key={key} for task={task}' ) tasks_created += 1 # add predictions logger.debug( f'Create {len(predictions)} predictions for task={task}') for prediction in predictions: prediction['task'] = task.id prediction_ser = PredictionSerializer(data=predictions, many=True) prediction_ser.is_valid(raise_exception=True) prediction_ser.save() # add annotations logger.debug( f'Create {len(annotations)} annotations for task={task}') for annotation in annotations: annotation['task'] = task.id annotation_ser = AnnotationSerializer(data=annotations, many=True) annotation_ser.is_valid(raise_exception=True) annotation_ser.save() self.last_sync = timezone.now() self.last_sync_count = tasks_created self.save() self.project.update_tasks_states( maximum_annotations_changed=False, overlap_cohort_percentage_changed=False, tasks_number_changed=True)