Exemple #1
0
    def _create_advisor(self, sub_train_job):
        model = self._meta_store.get_model(sub_train_job.model_id)
        service_type = ServiceType.ADVISOR
        install_command = parse_model_install_command(model.dependencies,
                                                      enable_gpu=False)
        environment_vars = {
            'WORKER_INSTALL_COMMAND': install_command,
        }

        service = self._create_service(service_type=service_type,
                                       docker_image=model.docker_image,
                                       environment_vars=environment_vars)

        self._meta_store.update_sub_train_job(sub_train_job,
                                              advisor_service_id=service.id)
        self._meta_store.commit()

        return service
    def _create_train_job_worker(self, sub_train_job, gpus=1):
        model = self._meta_store.get_model(sub_train_job.model_id)
        service_type = ServiceType.TRAIN
        install_command = parse_model_install_command(model.dependencies,
                                                      enable_gpu=(gpus > 0))
        environment_vars = {
            'WORKER_INSTALL_COMMAND': install_command,
        }

        service = self._create_service(service_type=service_type,
                                       docker_image=model.docker_image,
                                       environment_vars=environment_vars,
                                       gpus=gpus)

        self._meta_store.create_train_job_worker(
            service_id=service.id, sub_train_job_id=sub_train_job.id)
        self._meta_store.commit()

        return service
Exemple #3
0
    def _create_inference_job_worker(self,
                                     inference_job,
                                     trial=None,
                                     model_id=None,
                                     gpus=0):
        trial_id = None
        checkpoint_id = None
        if trial is not None:
            sub_train_job = self._meta_store.get_sub_train_job(
                trial.sub_train_job_id)
            model = self._meta_store.get_model(sub_train_job.model_id)
            trial_id = trial.id
        elif model_id is not None:
            model = self._meta_store.get_model(model_id)
            checkpoint_id = model.checkpoint_id
        else:
            raise ServiceDeploymentError("No model found")
        service_type = ServiceType.INFERENCE
        install_command = parse_model_install_command(model.dependencies,
                                                      enable_gpu=(gpus > 0))
        environment_vars = {
            'WORKER_INSTALL_COMMAND': install_command,
        }

        service = self._create_service(service_type=service_type,
                                       docker_image=model.docker_image,
                                       environment_vars=environment_vars,
                                       gpus=gpus)

        self._meta_store.create_inference_job_worker(
            service_id=service.id,
            inference_job_id=inference_job.id,
            trial_id=trial_id,
            checkpoint_id=checkpoint_id)
        self._meta_store.commit()

        return service