def _migrate_tasks(project_path, project): """ Migrate tasks from json file to database objects""" tasks_path = project_path / 'tasks.json' with io.open(os.path.abspath(tasks_path)) as t: tasks_data = json.load(t) for task_id, task_data in tasks_data.items(): task = Task.objects.create(data=task_data.get('data', {}), project=project) # migrate annotations annotations_path = project_path / 'completions' / '{}.json'.format( task_id) if annotations_path.exists(): with io.open(os.path.abspath(annotations_path)) as c: annotations_data = json.load(c) for annotation in annotations_data['completions']: task_annotation = Annotation( result=annotation['result'], task=task, lead_time=annotation['lead_time'], was_cancelled=annotation.get( 'was_cancelled', False), completed_by=project.created_by, ) with suppress_autotime(task_annotation, ['created_at']): task_annotation.created_at = datetime.datetime.fromtimestamp( annotation['created_at'], tz=datetime.datetime.now().astimezone().tzinfo) task_annotation.save() # migrate predictions predictions_path = project_path / 'predictions' / '{}.json'.format( task_id) if predictions_path.exists(): with io.open(os.path.abspath(predictions_path)) as c: predictions_data = json.load(c) for prediction in predictions_data['predictions']: task_prediction = Prediction( result=prediction['result'], task=task) with suppress_autotime(task_prediction, ['created_at']): task_prediction.created_at = datetime.datetime.fromtimestamp( prediction['created_at'], tz=datetime.datetime.now().astimezone().tzinfo) task_prediction.save()
def perform_create(self, ser): task = get_object_with_check_and_log(self.request, Task, pk=self.kwargs['pk']) # annotator has write access only to annotations and it can't be checked it after serializer.save() check_object_permissions(self.request, Annotation(task=task), 'annotations.change_annotation') user = self.request.user # Release task if it has been taken at work (it should be taken by the same user, or it makes sentry error logger.debug(f'User={user} releases task={task}') task.release_lock(user) # updates history update_id = self.request.user.id result = ser.validated_data.get('result') extra_args = {'task_id': self.kwargs['pk']} # save stats about how well annotator annotations coincide with current prediction # only for finished task annotations if result is not None: prediction = Prediction.objects.filter( task=task, model_version=task.project.model_version) if prediction.exists(): prediction = prediction.first() prediction_ser = PredictionSerializer(prediction).data else: logger.debug( f'User={self.request.user}: there are no predictions for task={task}' ) prediction_ser = {} # serialize annotation extra_args.update({ 'prediction': prediction_ser, }) if 'was_cancelled' in self.request.GET: extra_args['was_cancelled'] = bool_from_request( self.request.GET, 'was_cancelled', False) if 'completed_by' not in ser.validated_data: extra_args['completed_by'] = self.request.user # create annotation logger.debug(f'User={self.request.user}: save annotation') annotation = ser.save(**extra_args) logger.debug(f'Save activity for user={self.request.user}') self.request.user.activity_at = timezone.now() self.request.user.save() # if annotation created from draft - remove this draft draft_id = self.request.data.get('draft_id') if draft_id is not None: logger.debug( f'Remove draft {draft_id} after creating annotation {annotation.id}' ) AnnotationDraft.objects.filter(id=draft_id).delete() return annotation
def predictions_to_annotations(project, queryset, **kwargs): request = kwargs['request'] user = request.user model_version = request.data.get('model_version') queryset = queryset.filter(predictions__isnull=False) predictions = Prediction.objects.filter(task__in=queryset, child_annotations__isnull=True) # model version filter if model_version is not None: if isinstance(model_version, list): predictions = predictions.filter( model_version__in=model_version).distinct() else: predictions = predictions.filter(model_version=model_version) predictions_values = list( predictions.values_list('result', 'model_version', 'task_id', 'id')) # prepare annotations annotations = [] tasks_ids = [] for result, model_version, task_id, prediction_id in predictions_values: tasks_ids.append(task_id) annotations.append({ 'result': result, 'completed_by_id': user.pk, 'task_id': task_id, 'parent_prediction_id': prediction_id }) count = len(annotations) logger.debug(f'{count} predictions will be converter to annotations') db_annotations = [Annotation(**annotation) for annotation in annotations] db_annotations = Annotation.objects.bulk_create(db_annotations) Task.objects.filter(id__in=tasks_ids).update(updated_at=now(), updated_by=request.user) if db_annotations: TaskSerializerBulk.post_process_annotations(db_annotations) # Execute webhook for created annotations emit_webhooks_for_instance(user.active_organization, project, WebhookAction.ANNOTATIONS_CREATED, db_annotations) return {'response_code': 200, 'detail': f'Created {count} annotations'}
def propagate_annotations(project, queryset, **kwargs): request = kwargs['request'] user = request.user source_annotation_id = request.data.get('source_annotation_id') annotations = Annotation.objects.filter(task__project=project, id=source_annotation_id) if not annotations: raise DataManagerException( f'Source annotation {source_annotation_id} not found in the current project' ) source_annotation = annotations.first() tasks = set(queryset.values_list('id', flat=True)) try: tasks.remove(source_annotation.task.id) except KeyError: pass # copy source annotation to new annotations for each task db_annotations = [] for i in tasks: db_annotations.append( Annotation(task_id=i, completed_by_id=user.id, result=source_annotation.result, result_count=source_annotation.result_count, parent_annotation_id=source_annotation.id)) db_annotations = Annotation.objects.bulk_create( db_annotations, batch_size=settings.BATCH_SIZE) TaskSerializerBulk.post_process_annotations(db_annotations) return { 'response_code': 200, 'detail': f'Created {len(db_annotations)} annotations' }
def restore_objects(project): """ Create task and annotation for URL tests """ # task_db, annotation_db = None, None if project.pk != 1000: project.pk = 1000 project.title += '2' project.save() try: task_db = Task.objects.get(pk=1000) except Task.DoesNotExist: task_db = Task() task_db.data = {"data": {"image": "kittens.jpg"}} task_db.project = project task_db.id = 1000 # we need to use id 1000 to avoid db last start task_db.save() try: annotation_db = Annotation.objects.get(pk=1000) except Annotation.DoesNotExist: task_db = Task.objects.get(pk=1000) annotation_db = Annotation() annotation = [{ "from_name": "some", "to_name": "x", "type": "none", "value": { "none": ["Opossum"] } }] annotation_db.result = annotation annotation_db.id = 1000 # we need to use id 1000 to avoid db last start annotation_db.task = task_db annotation_db.save() return task_db, annotation_db
def create(self, validated_data): """ Create Tasks and Annotations in bulk """ db_tasks, db_annotations, db_predictions, validated_tasks = [], [], [], validated_data logging.info( f'Try to serialize tasks with annotations, data len = {len(validated_data)}' ) user = self.context.get('user', None) project = self.context.get('project') # to be sure we add tasks with annotations at the same time with transaction.atomic(): # extract annotations and predictions task_annotations, task_predictions = [], [] for task in validated_tasks: task_annotations.append(task.pop('annotations', [])) task_predictions.append(task.pop('predictions', [])) # check annotator permissions for completed by organization = user.active_organization \ if not project.created_by.active_organization else project.created_by.active_organization project_user_ids = organization.members.values_list('user__id', flat=True) annotator_ids = set() for annotations in task_annotations: for annotation in annotations: annotator_ids.add(self.get_completed_by_id(annotation)) for i in annotator_ids: if i not in project_user_ids and i is not None: raise ValidationError( f'Annotations with "completed_by"={i} are produced by annotator ' f'who is not allowed for this project as invited annotator or team member' ) # add tasks first for task in validated_tasks: t = Task(project=project, data=task['data'], meta=task.get('meta', {}), overlap=project.maximum_annotations, file_upload_id=task.get('file_upload_id')) db_tasks.append(t) if settings.DJANGO_DB == settings.DJANGO_DB_SQLITE: self.db_tasks = [] try: last_task = Task.objects.latest('id') current_id = last_task.id + 1 except Task.DoesNotExist: current_id = 1 for task in db_tasks: task.id = current_id current_id += 1 self.db_tasks = Task.objects.bulk_create( db_tasks, batch_size=settings.BATCH_SIZE) else: self.db_tasks = Task.objects.bulk_create( db_tasks, batch_size=settings.BATCH_SIZE) logging.info( f'Tasks serialization success, len = {len(self.db_tasks)}') # add annotations for i, annotations in enumerate(task_annotations): for annotation in annotations: # support both "ground_truth" and "ground_truth" ground_truth = annotation.pop('ground_truth', True) if 'ground_truth' in annotation: ground_truth = annotation.pop('ground_truth', True) # get user id completed_by_id = self.get_completed_by_id( annotation, default=user.id if user else None) annotation.pop('completed_by', None) db_annotations.append( Annotation(task=self.db_tasks[i], ground_truth=ground_truth, completed_by_id=completed_by_id, result=annotation['result'])) # add predictions last_model_version = None for i, predictions in enumerate(task_predictions): for prediction in predictions: prediction_score = prediction.get('score') if prediction_score is not None: try: prediction_score = float(prediction_score) except ValueError as exc: logger.error( f'Can\'t upload prediction score: should be in float format. Reason: {exc}.' f'Fallback to score=None', exc_info=True) prediction_score = None last_model_version = prediction.get( 'model_version', 'undefined') db_predictions.append( Prediction(task=self.db_tasks[i], result=prediction['result'], score=prediction_score, model_version=last_model_version)) # annotations: DB bulk create if settings.DJANGO_DB == settings.DJANGO_DB_SQLITE: self.db_annotations = [] try: last_annotation = Annotation.objects.latest('id') current_id = last_annotation.id + 1 except Annotation.DoesNotExist: current_id = 1 for annotation in db_annotations: annotation.id = current_id current_id += 1 self.db_annotations = Annotation.objects.bulk_create( db_annotations, batch_size=settings.BATCH_SIZE) else: self.db_annotations = Annotation.objects.bulk_create( db_annotations, batch_size=settings.BATCH_SIZE) logging.info( f'Annotations serialization success, len = {len(self.db_annotations)}' ) # predictions: DB bulk create self.db_predictions = Prediction.objects.bulk_create( db_predictions, batch_size=settings.BATCH_SIZE) logging.info( f'Predictions serialization success, len = {len(self.db_predictions)}' ) # renew project model version if it's empty if not project.model_version and last_model_version is not None: project.model_version = last_model_version project.save() return db_tasks
def create(self, validated_data): """ Create Tasks and Annotations in bulk """ db_tasks, db_annotations, db_predictions, validated_tasks = [], [], [], validated_data logging.info( f'Try to serialize tasks with annotations, data len = {len(validated_data)}' ) user = self.context.get('user', None) project = self.context.get('project') organization = user.active_organization \ if not project.created_by.active_organization else project.created_by.active_organization members_email_to_id = dict( organization.members.values_list('user__email', 'user__id')) members_ids = set(members_email_to_id.values()) logger.debug( f"{len(members_email_to_id)} members found in organization {organization}" ) # to be sure we add tasks with annotations at the same time with transaction.atomic(): # extract annotations and predictions task_annotations, task_predictions = [], [] for task in validated_tasks: annotations = task.pop('annotations', []) # insert a valid "completed_by_id" by existing member self._insert_valid_completed_by_id_or_raise( annotations, members_email_to_id, members_ids, user or project.created_by) predictions = task.pop('predictions', []) task_annotations.append(annotations) task_predictions.append(predictions) # add tasks first for task in validated_tasks: t = Task(project=project, data=task['data'], meta=task.get('meta', {}), overlap=project.maximum_annotations, file_upload_id=task.get('file_upload_id')) db_tasks.append(t) # deprecated meta warning if 'meta' in task: logger.warning( 'You task data has field "meta" which is deprecated and it will be removed in future' ) if settings.DJANGO_DB == settings.DJANGO_DB_SQLITE: self.db_tasks = [] try: last_task = Task.objects.latest('id') current_id = last_task.id + 1 except Task.DoesNotExist: current_id = 1 for task in db_tasks: task.id = current_id current_id += 1 self.db_tasks = Task.objects.bulk_create( db_tasks, batch_size=settings.BATCH_SIZE) else: self.db_tasks = Task.objects.bulk_create( db_tasks, batch_size=settings.BATCH_SIZE) logging.info( f'Tasks serialization success, len = {len(self.db_tasks)}') # add annotations for i, annotations in enumerate(task_annotations): for annotation in annotations: # support both "ground_truth" and "ground_truth" ground_truth = annotation.pop('ground_truth', True) was_cancelled = annotation.pop('was_cancelled', False) db_annotations.append( Annotation( task=self.db_tasks[i], ground_truth=ground_truth, was_cancelled=was_cancelled, completed_by_id=annotation['completed_by_id'], result=annotation['result'])) # add predictions last_model_version = None for i, predictions in enumerate(task_predictions): for prediction in predictions: prediction_score = prediction.get('score') if prediction_score is not None: try: prediction_score = float(prediction_score) except ValueError as exc: logger.error( f'Can\'t upload prediction score: should be in float format. Reason: {exc}.' f'Fallback to score=None', exc_info=True) prediction_score = None last_model_version = prediction.get( 'model_version', 'undefined') db_predictions.append( Prediction(task=self.db_tasks[i], result=prediction['result'], score=prediction_score, model_version=last_model_version)) # annotations: DB bulk create if settings.DJANGO_DB == settings.DJANGO_DB_SQLITE: self.db_annotations = [] try: last_annotation = Annotation.objects.latest('id') current_id = last_annotation.id + 1 except Annotation.DoesNotExist: current_id = 1 for annotation in db_annotations: annotation.id = current_id current_id += 1 self.db_annotations = Annotation.objects.bulk_create( db_annotations, batch_size=settings.BATCH_SIZE) else: self.db_annotations = Annotation.objects.bulk_create( db_annotations, batch_size=settings.BATCH_SIZE) logging.info( f'Annotations serialization success, len = {len(self.db_annotations)}' ) # predictions: DB bulk create self.db_predictions = Prediction.objects.bulk_create( db_predictions, batch_size=settings.BATCH_SIZE) logging.info( f'Predictions serialization success, len = {len(self.db_predictions)}' ) # renew project model version if it's empty if not project.model_version and last_model_version is not None: project.model_version = last_model_version project.save() return db_tasks
def create(self, validated_data): """ Create Tasks and Annotations in bulk """ db_tasks, db_annotations, db_predictions, validated_tasks = [], [], [], validated_data logging.info(f'Try to serialize tasks with annotations, data len = {len(validated_data)}') user = self.context.get('user', None) organization = user.active_organization \ if not self.project.created_by.active_organization else self.project.created_by.active_organization members_email_to_id = dict(organization.members.values_list('user__email', 'user__id')) members_ids = set(members_email_to_id.values()) logger.debug(f"{len(members_email_to_id)} members found in organization {organization}") # to be sure we add tasks with annotations at the same time with transaction.atomic(): # extract annotations and predictions task_annotations, task_predictions = [], [] for task in validated_tasks: annotations = task.pop('annotations', []) # insert a valid "completed_by_id" by existing member self._insert_valid_completed_by_id_or_raise( annotations, members_email_to_id, members_ids, user or self.project.created_by) predictions = task.pop('predictions', []) task_annotations.append(annotations) task_predictions.append(predictions) # add tasks first max_overlap = self.project.maximum_annotations # identify max inner id tasks = Task.objects.filter(project=self.project) max_inner_id = (tasks.order_by("-inner_id")[0].inner_id + 1) if tasks else 1 for i, task in enumerate(validated_tasks): t = Task( project=self.project, data=task['data'], meta=task.get('meta', {}), overlap=max_overlap, is_labeled=len(task_annotations[i]) >= max_overlap, file_upload_id=task.get('file_upload_id'), inner_id=max_inner_id + i ) db_tasks.append(t) if settings.DJANGO_DB == settings.DJANGO_DB_SQLITE: self.db_tasks = [] try: last_task = Task.objects.latest('id') current_id = last_task.id + 1 except Task.DoesNotExist: current_id = 1 for task in db_tasks: task.id = current_id current_id += 1 self.db_tasks = Task.objects.bulk_create(db_tasks, batch_size=settings.BATCH_SIZE) else: self.db_tasks = Task.objects.bulk_create(db_tasks, batch_size=settings.BATCH_SIZE) logging.info(f'Tasks serialization success, len = {len(self.db_tasks)}') # add annotations for i, annotations in enumerate(task_annotations): for annotation in annotations: if not isinstance(annotation, dict): continue # support both "ground_truth" and "ground_truth" ground_truth = annotation.pop('ground_truth', True) was_cancelled = annotation.pop('was_cancelled', False) lead_time = annotation.pop('lead_time', None) db_annotations.append(Annotation(task=self.db_tasks[i], ground_truth=ground_truth, was_cancelled=was_cancelled, completed_by_id=annotation['completed_by_id'], result=annotation['result'], lead_time=lead_time)) # add predictions last_model_version = None for i, predictions in enumerate(task_predictions): for prediction in predictions: if not isinstance(prediction, dict): continue # we need to call result normalizer here since "bulk_create" doesn't call save() method result = Prediction.prepare_prediction_result(prediction['result'], self.project) prediction_score = prediction.get('score') if prediction_score is not None: try: prediction_score = float(prediction_score) except ValueError as exc: logger.error( f'Can\'t upload prediction score: should be in float format. Reason: {exc}.' f'Fallback to score=None', exc_info=True) prediction_score = None last_model_version = prediction.get('model_version', 'undefined') db_predictions.append(Prediction(task=self.db_tasks[i], result=result, score=prediction_score, model_version=last_model_version)) # annotations: DB bulk create if settings.DJANGO_DB == settings.DJANGO_DB_SQLITE: self.db_annotations = [] try: last_annotation = Annotation.objects.latest('id') current_id = last_annotation.id + 1 except Annotation.DoesNotExist: current_id = 1 for annotation in db_annotations: annotation.id = current_id current_id += 1 self.db_annotations = Annotation.objects.bulk_create(db_annotations, batch_size=settings.BATCH_SIZE) else: self.db_annotations = Annotation.objects.bulk_create(db_annotations, batch_size=settings.BATCH_SIZE) logging.info(f'Annotations serialization success, len = {len(self.db_annotations)}') # predictions: DB bulk create self.db_predictions = Prediction.objects.bulk_create(db_predictions, batch_size=settings.BATCH_SIZE) logging.info(f'Predictions serialization success, len = {len(self.db_predictions)}') # renew project model version if it's empty if not self.project.model_version and last_model_version is not None: self.project.model_version = last_model_version self.project.save() self.post_process_annotations(self.db_annotations) return db_tasks