Exemple #1
0
    def predict_many_tasks(self, tasks):
        self.update_state()
        if self.not_ready:
            logger.debug(f'ML backend {self} is not ready')
            return

        if isinstance(tasks, list):
            from tasks.models import Task
            tasks = Task.objects.filter(id__in=[task.id for task in tasks])

        tasks_ser = TaskSimpleSerializer(tasks, many=True).data
        ml_api_result = self.api.make_predictions(tasks_ser,
                                                  self.model_version,
                                                  self.project)
        if ml_api_result.is_error:
            logger.error(
                f'Prediction not created for project {self}: {ml_api_result.error_message}'
            )
            return

        responses = ml_api_result.response['results']

        if len(responses) == 0:
            logger.error(
                f'ML backend returned empty prediction for project {self}')
            return

        # ML Backend doesn't support batch of tasks, do it one by one
        elif len(responses) == 1:
            logger.warning(
                f"'ML backend '{self.title}' doesn't support batch processing of tasks, "
                f"switched to one-by-one task retrieving")
            for task in tasks:
                self.predict_one_task(task)
            return

        # wrong result number
        elif len(responses) != len(tasks_ser):
            logger.error(
                f'ML backend returned response number {len(responses)} != task number {len(tasks_ser)}'
            )

        predictions = []
        for task, response in zip(tasks_ser, responses):
            predictions.append({
                'task': task['id'],
                'result': response['result'],
                'score': response.get('score'),
                'model_version': self.model_version
            })
        with conditional_atomic():
            prediction_ser = PredictionSerializer(data=predictions, many=True)
            prediction_ser.is_valid(raise_exception=True)
            prediction_ser.save()
Exemple #2
0
    def __predict_one_task(self, task):
        self.update_state()
        if self.not_ready:
            logger.debug(f'ML backend {self} is not ready to predict {task}')
            return
        if task.predictions.filter(model_version=self.model_version).exists():
            # prediction already exists
            logger.info(
                f'Skip creating prediction with ML backend {self} for task {task}: model version '
                f'{self.model_version} is up-to-date')
            return
        ml_api = self.api

        task_ser = TaskSimpleSerializer(task).data
        ml_api_result = ml_api.make_predictions([task_ser], self.model_version,
                                                self.project)
        if ml_api_result.is_error:
            logger.warning(
                f'Prediction not created for project {self}: {ml_api_result.error_message}'
            )
            return
        results = ml_api_result.response['results']
        if len(results) == 0:
            logger.error(
                f'ML backend returned empty prediction for project {self}',
                extra={'sentry_skip': True})
            return
        prediction_response = results[0]
        task_id = task_ser['id']
        r = prediction_response['result']
        score = prediction_response.get('score')
        with conditional_atomic():
            prediction = Prediction.objects.create(
                result=r,
                score=safe_float(score),
                model_version=self.model_version,
                task_id=task_id,
                cluster=prediction_response.get('cluster'),
                neighbors=prediction_response.get('neighbors'),
                mislabeling=safe_float(
                    prediction_response.get('mislabeling', 0)),
            )
            logger.debug(f'Prediction {prediction} created')

        return prediction
Exemple #3
0
    def get(self, request, *args, **kwargs):
        project = get_object_with_check_and_log(request,
                                                Project,
                                                pk=self.kwargs['pk'])
        # TODO: LSE option
        # if not project.is_published:
        #     raise PermissionDenied('Project is not published.')
        self.check_object_permissions(request, project)
        user = request.user

        # support actions api call from actions/next_task.py
        if hasattr(self, 'prepared_tasks'):
            project.prepared_tasks = self.prepared_tasks
            external_prepared_tasks_used = True
        # get prepared tasks from request params (filters, selected items)
        else:
            project.prepared_tasks = get_prepared_queryset(
                self.request, project)
            external_prepared_tasks_used = False

        # detect solved and not solved tasks
        user_solved_tasks_array = user.annotations.filter(
            ground_truth=False).filter(Q(task__isnull=False)).values_list(
                'task__pk', flat=True)

        with conditional_atomic():
            not_solved_tasks = project.prepared_tasks.\
                exclude(pk__in=user_solved_tasks_array).filter(is_labeled=False)
            not_solved_tasks_count = not_solved_tasks.count()

            # return nothing if there are no tasks remain
            if not_solved_tasks_count == 0:
                raise NotFound(
                    f'There are no tasks remaining to be annotated by the user={user}'
                )
            logger.debug(
                f'{not_solved_tasks_count} tasks that still need to be annotated for user={user}'
            )

            # ordered by data manager
            if external_prepared_tasks_used:
                next_task = not_solved_tasks.first()
                if not next_task:
                    raise NotFound('No more tasks found')
                return self._make_response(next_task, request)

            # If current user has already lock one task - return it (without setting the lock again)
            next_task = Task.get_locked_by(user, project)
            if next_task:
                return self._make_response(next_task,
                                           request,
                                           use_task_lock=False)

            if project.show_ground_truth_first:
                logger.debug(
                    f'User={request.user} tries ground truth from {not_solved_tasks_count} tasks'
                )
                next_task = self._try_ground_truth(not_solved_tasks, project)
                if next_task:
                    return self._make_response(next_task, request)

            if project.show_overlap_first:
                # don't output anything - just filter tasks with overlap
                logger.debug(
                    f'User={request.user} tries overlap first from {not_solved_tasks_count} tasks'
                )
                _, not_solved_tasks = self._try_tasks_with_overlap(
                    not_solved_tasks)

            # if there any tasks in progress (with maximum number of annotations), randomly sampling from them
            logger.debug(
                f'User={request.user} tries depth first from {not_solved_tasks_count} tasks'
            )
            next_task = self._try_breadth_first(not_solved_tasks)
            if next_task:
                return self._make_response(next_task, request)

            if project.sampling == project.UNCERTAINTY:
                logger.debug(
                    f'User={request.user} tries uncertainty sampling from {not_solved_tasks_count} tasks'
                )
                next_task = self._try_uncertainty_sampling(
                    not_solved_tasks, project, user_solved_tasks_array)

            elif project.sampling == project.UNIFORM:
                logger.debug(
                    f'User={request.user} tries random sampling from {not_solved_tasks_count} tasks'
                )
                next_task = self._get_random_unlocked(not_solved_tasks)

            elif project.sampling == project.SEQUENCE:
                logger.debug(
                    f'User={request.user} tries sequence sampling from {not_solved_tasks_count} tasks'
                )
                next_task = self._get_first_unlocked(
                    not_solved_tasks.all().order_by('id'))

            if next_task:
                return self._make_response(next_task, request)
            else:
                raise NotFound(
                    f'There exist some unsolved tasks for the user={user}, but they seem to be locked by another users'
                )
Exemple #4
0
    def get(self, request, *args, **kwargs):
        project = get_object_with_check_and_log(request,
                                                Project,
                                                pk=self.kwargs['pk'])
        self.check_object_permissions(request, project)
        user = request.user

        # support actions api call from actions/next_task.py
        if hasattr(self, 'prepared_tasks'):
            project.prepared_tasks = self.prepared_tasks
        # get prepared tasks from request params (filters, selected items)
        else:
            project.prepared_tasks = get_prepared_queryset(
                self.request, project)

        # detect solved and not solved tasks
        user_solved_tasks_array = user.annotations.filter(
            ground_truth=False).filter(Q(task__isnull=False)).values_list(
                'task__pk', flat=True)

        with conditional_atomic():
            not_solved_tasks = project.prepared_tasks.\
                exclude(pk__in=user_solved_tasks_array)

            # if annotator is assigned for tasks, he must to solve it regardless of is_labeled=True
            assigned_flag = hasattr(self,
                                    'assignee_flag') and self.assignee_flag
            if not assigned_flag:
                not_solved_tasks = not_solved_tasks.annotate(
                    annotation_number=Count('annotations')).filter(
                        annotation_number__lte=project.maximum_annotations)

            not_solved_tasks_count = not_solved_tasks.count()

            # return nothing if there are no tasks remain
            if not_solved_tasks_count == 0:
                raise NotFound(
                    f'There are no tasks remaining to be annotated by the user={user}'
                )
            logger.debug(
                f'{not_solved_tasks_count} tasks that still need to be annotated for user={user}'
            )

            # ordered by data manager
            if assigned_flag:
                next_task = not_solved_tasks.first()
                if not next_task:
                    raise NotFound('No more tasks found')
                return self._make_response(next_task,
                                           request,
                                           use_task_lock=False)

            # If current user has already lock one task - return it (without setting the lock again)
            next_task = Task.get_locked_by(user, tasks=not_solved_tasks)
            if next_task:
                return self._make_response(next_task,
                                           request,
                                           use_task_lock=False)

            if project.show_ground_truth_first:
                logger.debug(
                    f'User={request.user} tries ground truth from {not_solved_tasks_count} tasks'
                )
                next_task = self._try_ground_truth(not_solved_tasks, project)
                if next_task:
                    return self._make_response(next_task, request)

            if project.show_overlap_first:
                # don't output anything - just filter tasks with overlap
                logger.debug(
                    f'User={request.user} tries overlap first from {not_solved_tasks_count} tasks'
                )
                _, not_solved_tasks = self._try_tasks_with_overlap(
                    not_solved_tasks)

            # don't use this mode for data manager sorting, because the sorting becomes not obvious
            if project.sampling != project.SEQUENCE:
                # if there any tasks in progress (with maximum number of annotations), randomly sampling from them
                logger.debug(
                    f'User={request.user} tries depth first from {not_solved_tasks_count} tasks'
                )
                next_task = self._try_breadth_first(not_solved_tasks)
                if next_task:
                    return self._make_response(next_task, request)

            if project.sampling == project.UNCERTAINTY:
                logger.debug(
                    f'User={request.user} tries uncertainty sampling from {not_solved_tasks_count} tasks'
                )
                next_task = self._try_uncertainty_sampling(
                    not_solved_tasks, project, user_solved_tasks_array)

            elif project.sampling == project.UNIFORM:
                logger.debug(
                    f'User={request.user} tries random sampling from {not_solved_tasks_count} tasks'
                )
                next_task = self._get_random_unlocked(not_solved_tasks)

            elif project.sampling == project.SEQUENCE:
                logger.debug(
                    f'User={request.user} tries sequence sampling from {not_solved_tasks_count} tasks'
                )
                next_task = self._get_first_unlocked(not_solved_tasks)

            if next_task:
                return self._make_response(next_task, request)
            else:
                raise NotFound(
                    f'There are still some tasks to complete for the user={user}, but they seem to be locked by another user.'
                )
Exemple #5
0
    def get(self, request, *args, **kwargs):
        project = get_object_with_check_and_log(request,
                                                Project,
                                                pk=self.kwargs['pk'])
        self.check_object_permissions(request, project)
        user = request.user
        self.current_user = user
        dm_queue = filters_ordering_selected_items_exist(request.data)

        # support actions api call from actions/next_task.py
        if hasattr(self, 'prepared_tasks'):
            project.prepared_tasks = self.prepared_tasks
        # get prepared tasks from request params (filters, selected items)
        else:
            project.prepared_tasks = get_prepared_queryset(
                self.request, project)

        # detect solved and not solved tasks
        assigned_flag = hasattr(self, 'assignee_flag') and self.assignee_flag
        user_solved_tasks_array = user.annotations.filter(ground_truth=False)
        user_solved_tasks_array = user_solved_tasks_array.filter(task__isnull=False)\
            .distinct().values_list('task__pk', flat=True)

        with conditional_atomic():
            not_solved_tasks = project.prepared_tasks.\
                exclude(pk__in=user_solved_tasks_array)

            # if annotator is assigned for tasks, he must to solve it regardless of is_labeled=True

            if not assigned_flag:
                not_solved_tasks = not_solved_tasks.filter(is_labeled=False)

            # used only for debug logging, disabled for performance reasons
            not_solved_tasks_count = 'unknown'

            next_task = None
            # ordered by data manager
            if assigned_flag and not dm_queue:
                next_task = not_solved_tasks.first()
                if not next_task:
                    raise NotFound('No more tasks found')
                return self._make_response(next_task,
                                           request,
                                           use_task_lock=False,
                                           queue='Manually assigned queue')

            # If current user has already lock one task - return it (without setting the lock again)
            next_task = Task.get_locked_by(user, tasks=not_solved_tasks)
            if next_task and not dm_queue:
                return self._make_response(next_task,
                                           request,
                                           use_task_lock=False,
                                           queue='Task lock')

            if project.show_ground_truth_first and not dm_queue:
                logger.debug(
                    f'User={request.user} tries ground truth from {not_solved_tasks_count} tasks'
                )
                next_task = self._try_ground_truth(not_solved_tasks, project)
                if next_task:
                    return self._make_response(next_task,
                                               request,
                                               queue='Ground truth queue')

            queue_info = ''

            # show tasks with overlap > 1 first
            if project.show_overlap_first and not dm_queue:
                # don't output anything - just filter tasks with overlap
                logger.debug(
                    f'User={request.user} tries overlap first from {not_solved_tasks_count} tasks'
                )
                _, not_solved_tasks = self._try_tasks_with_overlap(
                    not_solved_tasks)
                queue_info += 'Show overlap first'

            # if there any tasks in progress (with maximum number of annotations), randomly sampling from them
            logger.debug(
                f'User={request.user} tries depth first from {not_solved_tasks_count} tasks'
            )

            if project.maximum_annotations > 1 and not dm_queue:
                next_task = self._try_breadth_first(not_solved_tasks)
                if next_task:
                    queue_info += (' & ' if queue_info else
                                   '') + 'Breadth first queue'
                    return self._make_response(next_task,
                                               request,
                                               queue=queue_info)

            # data manager queue
            if dm_queue:
                queue_info += (' & '
                               if queue_info else '') + 'Data manager queue'
                logger.debug(
                    f'User={request.user} tries sequence sampling from {not_solved_tasks_count} tasks'
                )
                next_task = not_solved_tasks.first()

            elif project.sampling == project.SEQUENCE:
                queue_info += (' & ' if queue_info else '') + 'Sequence queue'
                logger.debug(
                    f'User={request.user} tries sequence sampling from {not_solved_tasks_count} tasks'
                )
                next_task = self._get_first_unlocked(not_solved_tasks)

            elif project.sampling == project.UNCERTAINTY:
                queue_info += (' & ' if queue_info else
                               '') + 'Active learning or random queue'
                logger.debug(
                    f'User={request.user} tries uncertainty sampling from {not_solved_tasks_count} tasks'
                )
                next_task = self._try_uncertainty_sampling(
                    not_solved_tasks, project, user_solved_tasks_array)

            elif project.sampling == project.UNIFORM:
                queue_info += (' & '
                               if queue_info else '') + 'Uniform random queue'
                logger.debug(
                    f'User={request.user} tries random sampling from {not_solved_tasks_count} tasks'
                )
                next_task = self._get_random_unlocked(not_solved_tasks)

            if next_task:
                return self._make_response(next_task,
                                           request,
                                           queue=queue_info)
            else:
                raise NotFound(
                    f'There are still some tasks to complete for the user={user}, '
                    f'but they seem to be locked by another user.')