예제 #1
0
    def launch(self) -> Tuple[Dict[Any, Any], Optional[str]]:
        dbx_echo("Launching job via run now API")
        jobs_service = JobsService(self.api_client)

        all_jobs = jobs_service.list_jobs().get("jobs", [])

        matching_jobs = [
            j for j in all_jobs if j["settings"]["name"] == self.job
        ]

        if not matching_jobs:
            raise Exception(f"Job with name {self.job} not found")

        if len(matching_jobs) > 1:
            raise Exception(
                f"Job with name {self.job} is duplicated. Please make job name unique."
            )

        job_data = matching_jobs[0]
        job_id = job_data["job_id"]

        active_runs = jobs_service.list_runs(job_id,
                                             active_only=True).get("runs", [])

        for run in active_runs:
            if self.existing_runs == "pass":
                dbx_echo("Passing the existing runs status check")

            if self.existing_runs == "wait":
                dbx_echo(
                    f'Waiting for job run with id {run["run_id"]} to be finished'
                )
                _wait_run(self.api_client, run)

            if self.existing_runs == "cancel":
                dbx_echo(f'Cancelling run with id {run["run_id"]}')
                _cancel_run(self.api_client, run)

        if self.prepared_parameters:
            dbx_echo(
                f"Default launch parameters are overridden with the following: {self.prepared_parameters}"
            )
            # we don't do a null-check here since the job existence will be already done during listing above.
            job_settings = job_data.get("settings")

            # here we define the job type to correctly pass parameters
            extra_payload_key = _define_payload_key(job_settings)

            extra_payload = {extra_payload_key: self.prepared_parameters}

            run_data = jobs_service.run_now(job_id, **extra_payload)

        else:
            run_data = jobs_service.run_now(job_id)

        return run_data, job_id
예제 #2
0
파일: launch.py 프로젝트: eliasvakkuri/dbx
    def launch(self) -> Tuple[Dict[Any, Any], Optional[str]]:
        dbx_echo("Launching job via run now API")
        jobs_service = JobsService(self.api_client)

        all_jobs = jobs_service.list_jobs().get("jobs", [])

        matching_jobs = [
            j for j in all_jobs if j["settings"]["name"] == self.job
        ]

        if not matching_jobs:
            raise Exception(f"Job with name {self.job} not found")

        if len(matching_jobs) > 1:
            raise Exception(
                f"Job with name {self.job} is duplicated. Please make job name unique."
            )

        job_id = matching_jobs[0]["job_id"]

        active_runs = jobs_service.list_runs(job_id,
                                             active_only=True).get("runs", [])

        for run in active_runs:
            if self.existing_runs == "pass":
                dbx_echo("Passing the existing runs status check")

            if self.existing_runs == "wait":
                dbx_echo(
                    f'Waiting for job run with id {run["run_id"]} to be finished'
                )
                _wait_run(self.api_client, run)

            if self.existing_runs == "cancel":
                dbx_echo(f'Cancelling run with id {run["run_id"]}')
                _cancel_run(self.api_client, run)

        if self.override_parameters:
            _prepared_parameters = sum(
                [[k, v] for k, v in self.override_parameters.items()], [])
            dbx_echo(
                f"Default launch parameters are overridden with the following: {_prepared_parameters}"
            )
            run_data = jobs_service.run_now(job_id,
                                            python_params=_prepared_parameters)
        else:
            run_data = jobs_service.run_now(job_id)

        return run_data, job_id
예제 #3
0
def launch(
        environment: str,
        job: str,
        trace: bool,
        kill_on_sigterm: bool,
        existing_runs: str,
        tags: List[str],
        parameters: List[str],
):
    dbx_echo(f"Launching job {job} on environment {environment}")

    api_client = prepare_environment(environment)
    additional_tags = parse_multiple(tags)
    override_parameters = parse_multiple(parameters)

    filter_string = generate_filter_string(environment, additional_tags)

    runs = mlflow.search_runs(filter_string=filter_string, max_results=1)

    if runs.empty:
        raise EnvironmentError(
            f"""
        No runs provided per given set of filters:
            {filter_string}
        Please check experiment UI to verify current status of deployments.
        """
        )

    run_info = runs.iloc[0].to_dict()

    dbx_echo("Successfully found deployment per given job name")

    deployment_run_id = run_info["run_id"]

    with mlflow.start_run(run_id=deployment_run_id) as deployment_run:
        with mlflow.start_run(nested=True):

            artifact_base_uri = deployment_run.info.artifact_uri
            deployments = _load_deployments(api_client, artifact_base_uri)
            job_id = deployments.get(job)

            if not job_id:
                raise Exception(
                    f"Job with name {job} not found in the latest deployment" % job
                )

            jobs_service = JobsService(api_client)
            active_runs = jobs_service.list_runs(job_id, active_only=True).get(
                "runs", []
            )

            for run in active_runs:
                if existing_runs == "pass":
                    dbx_echo("Passing the existing runs status check")

                if existing_runs == "wait":
                    dbx_echo(
                        f'Waiting for job run with id {run["run_id"]} to be finished'
                    )
                    _wait_run(api_client, run)

                if existing_runs == "cancel":
                    dbx_echo(f'Cancelling run with id {run["run_id"]}')
                    _cancel_run(api_client, run)

            if override_parameters:
                _prepared_parameters = sum(
                    [[k, v] for k, v in override_parameters.items()], []
                )
                dbx_echo(
                    f"Default launch parameters are overridden with the following: {_prepared_parameters}"
                )
                run_data = jobs_service.run_now(
                    job_id, python_params=_prepared_parameters
                )
            else:
                run_data = jobs_service.run_now(job_id)

            if trace:
                dbx_echo("Tracing job run")
                if kill_on_sigterm:
                    dbx_echo("Click Ctrl+C to stop the job run")
                    try:
                        dbx_status = _trace_run(api_client, run_data)
                    except KeyboardInterrupt:
                        dbx_status = "CANCELLED"
                        dbx_echo("Cancelling the run gracefully")
                        _cancel_run(api_client, run_data)
                        dbx_echo("Run cancelled successfully")
                else:
                    dbx_status = _trace_run(api_client, run_data)

                if dbx_status == "ERROR":
                    raise Exception(
                        "Tracked job failed during execution. "
                        "Please check Databricks UI for job logs"
                    )
                dbx_echo("Launch command finished")

            else:
                dbx_status = "NOT_TRACKED"
                dbx_echo(
                    "Job successfully launched in non-tracking mode. Please check Databricks UI for job status"
                )

            deployment_tags = {
                "job_id": job_id,
                "run_id": run_data["run_id"],
                "dbx_action_type": "launch",
                "dbx_status": dbx_status,
                "dbx_environment": environment,
            }

            mlflow.set_tags(deployment_tags)