Example #1
0
def run_task_predictions(ml_backend_id, batch_size=100):
    """
    Run prediction and update db, stats counts and project prerequisites
    :param project_id:
    :param batch_size:
    :return:
    """
    ml_backend = MLBackend.objects.get(id=ml_backend_id)
    response = ml_backend.setup()
    if response.is_error:
        raise ValueError(response.error_message)
    else:
        if response.response['model_version'] != ml_backend.model_version:
            ml_backend.model_version = response.response['model_version']
            ml_backend.save()

    # collect tasks without predictions for current model version
    tasks_without_predictions = ml_backend.project.tasks.annotate(
        model_version=F('predictions__model_version'),
        num_predictions=Count('predictions')
    ).filter(~Q(model_version=ml_backend.model_version) | Q(num_predictions=0))

    if not tasks_without_predictions.exists():
        logger.info(f'Predictions for project {ml_backend.project} with version {ml_backend.model_version} already exist, '
                       f'update is not needed')
        return {'status': 'ok'}
    else:
        logger.info(f'Found {tasks_without_predictions.count()} tasks without predictions '
                       f'from model version {ml_backend.model_version} in project {ml_backend.project}')

    # TODO: randomize tasks selection so that taken tasks don't clash with each other with high probability
    tasks = TaskSerializer(tasks_without_predictions[:batch_size], many=True).data

    failed_tasks = []
    for task in tasks:
        task_id = task['id']
        ml_api_result = ml_backend.api.make_predictions([task], ml_backend.model_version, ml_backend.project)
        if not _validate_ml_api_result(ml_api_result, [task], logger):
            logger.warning(f'Project {ml_backend.project}: task {task.id} failed')
            failed_tasks.append(task)
            continue

        prediction_result = ml_api_result.response['results'][0]

        with transaction.atomic():
            Prediction.objects.filter(task_id=task_id, model_version=ml_backend.model_version).delete()
            Prediction.objects.create(
                task_id=task_id,
                model_version=ml_backend.model_version,
                result=prediction_result['result'],
                score=safe_float(prediction_result.get('score', 0)),
                cluster=prediction_result.get('cluster'),
                neighbors=prediction_result.get('neighbors'),
                mislabeling=safe_float(prediction_result.get('mislabeling', 0))
            )
        logger.info(f'Project {ml_backend.project}: task {task_id} processed with model version {ml_backend.model_version}')

    MLBackendPredictionJob.objects.filter(job_id=get_current_job().id).delete()
    logger.info(f'Total task processes: {len(tasks)}, failed: {len(failed_tasks)}')
    return {'status': 'ok', 'processed_num': len(tasks), 'failed': failed_tasks}
Example #2
0
    def predict_one_task(self, task):
        if self.not_ready:
            logger.debug(f'ML backend {self} is not ready to predict {task}')
            return
        if task.predictions.filter(model_version=self.model_version).exists():
            # prediction already exists
            logger.info(
                f'Skip creating prediction with ML backend {self} for task {task}: model version is up-to-date'
            )
            return
        ml_api = self.api

        task_ser = TaskSerializer(task).data
        ml_api_result = ml_api.make_predictions([task_ser], self.model_version,
                                                self.project)
        if ml_api_result.is_error:
            logger.warning(
                f'Prediction not created for project {self}: {ml_api_result.error_message}'
            )
            return
        results = ml_api_result.response['results']
        if len(results) == 0:
            logger.error(
                f'ML backend returned empty prediction for project {self}')
            return
        prediction_response = results[0]
        task_id = task_ser['id']
        r = prediction_response['result']
        score = prediction_response.get('score')
        matching_score = None
        prediction = Prediction.objects.create(
            result=r,
            score=safe_float(score),
            model_version=self.model_version,
            task_id=task_id,
            cluster=prediction_response.get('cluster'),
            neighbors=prediction_response.get('neighbors'),
            mislabeling=safe_float(prediction_response.get('mislabeling', 0)))
        logger.info(
            f'Prediction created: result={r}, score={score}, id={prediction.id}'
        )

        model_version = ml_api_result.response.get('model_version')
        if model_version != self.model_version:
            self.model_version = model_version
            self.save()
            logger.info(
                f'Project {self} updates model version to {model_version}')

        return prediction
Example #3
0
    def __predict_one_task(self, task):
        self.update_state()
        if self.not_ready:
            logger.debug(f'ML backend {self} is not ready to predict {task}')
            return
        if task.predictions.filter(model_version=self.model_version).exists():
            # prediction already exists
            logger.info(
                f'Skip creating prediction with ML backend {self} for task {task}: model version '
                f'{self.model_version} is up-to-date')
            return
        ml_api = self.api

        task_ser = TaskSimpleSerializer(task).data
        ml_api_result = ml_api.make_predictions([task_ser], self.model_version,
                                                self.project)
        if ml_api_result.is_error:
            logger.warning(
                f'Prediction not created for project {self}: {ml_api_result.error_message}'
            )
            return
        results = ml_api_result.response['results']
        if len(results) == 0:
            logger.error(
                f'ML backend returned empty prediction for project {self}',
                extra={'sentry_skip': True})
            return
        prediction_response = results[0]
        task_id = task_ser['id']
        r = prediction_response['result']
        score = prediction_response.get('score')
        with conditional_atomic():
            prediction = Prediction.objects.create(
                result=r,
                score=safe_float(score),
                model_version=self.model_version,
                task_id=task_id,
                cluster=prediction_response.get('cluster'),
                neighbors=prediction_response.get('neighbors'),
                mislabeling=safe_float(
                    prediction_response.get('mislabeling', 0)),
            )
            logger.debug(f'Prediction {prediction} created')

        return prediction