Exemple #1
0
def test_execute_run_fail_pipeline():
    with get_bar_repo_handle() as repo_handle:
        pipeline_handle = PipelineHandle("fail", repo_handle)
        runner = CliRunner()

        with instance_for_test(
                overrides={
                    "compute_logs": {
                        "module":
                        "dagster.core.storage.noop_compute_log_manager",
                        "class": "NoOpComputeLogManager",
                    }
                }) as instance:
            instance = DagsterInstance.get()
            run = create_run_for_test(instance,
                                      pipeline_name="foo",
                                      run_id="new_run")

            input_json = serialize_dagster_namedtuple(
                ExecuteRunArgs(
                    pipeline_origin=pipeline_handle.get_python_origin(),
                    pipeline_run_id=run.run_id,
                    instance_ref=instance.get_ref(),
                ))

            result = runner_execute_run(
                runner,
                [input_json],
            )
            assert result.exit_code == 0

            assert "RUN_FAILURE" in result.stdout, "no match, result: {}".format(
                result)

            run = create_run_for_test(instance,
                                      pipeline_name="foo",
                                      run_id="new_run_raise_on_error")

            input_json_raise_on_failure = serialize_dagster_namedtuple(
                ExecuteRunArgs(
                    pipeline_origin=pipeline_handle.get_python_origin(),
                    pipeline_run_id=run.run_id,
                    instance_ref=instance.get_ref(),
                    set_exit_code_on_failure=True,
                ))

            result = runner.invoke(api.execute_run_command,
                                   [input_json_raise_on_failure])

            assert result.exit_code != 0, str(result.stdout)

            assert "RUN_FAILURE" in result.stdout, "no match, result: {}".format(
                result)

            # Framework errors (e.g. running a run that has already run) also result in a non-zero error code
            result = runner.invoke(api.execute_run_command,
                                   [input_json_raise_on_failure])
            assert result.exit_code != 0, str(result.stdout)
Exemple #2
0
def test_execute_run():
    with get_foo_pipeline_handle() as pipeline_handle:
        runner = CliRunner()

        with instance_for_test(
                overrides={
                    "compute_logs": {
                        "module":
                        "dagster.core.storage.noop_compute_log_manager",
                        "class": "NoOpComputeLogManager",
                    }
                }) as instance:
            instance = DagsterInstance.get()
            run = create_run_for_test(instance,
                                      pipeline_name="foo",
                                      run_id="new_run")

            input_json = serialize_dagster_namedtuple(
                ExecuteRunArgs(
                    pipeline_origin=pipeline_handle.get_python_origin(),
                    pipeline_run_id=run.run_id,
                    instance_ref=instance.get_ref(),
                ))

            result = runner_execute_run(
                runner,
                [input_json],
            )

            assert "PIPELINE_SUCCESS" in result.stdout, "no match, result: {}".format(
                result.stdout)

            # Framework errors (e.g. running a run that has already run) still result in a non-zero error code
            result = runner.invoke(api.execute_run_command, [input_json])
            assert result.exit_code == 0
Exemple #3
0
def test_execute_run_with_structured_logs(pipeline_handle):
    runner = CliRunner()

    with instance_for_test(
            overrides={
                "compute_logs": {
                    "module": "dagster.core.storage.noop_compute_log_manager",
                    "class": "NoOpComputeLogManager",
                }
            }) as instance:
        instance = DagsterInstance.get()
        run = create_run_for_test(instance,
                                  pipeline_name="foo",
                                  run_id="new_run")

        input_json = serialize_dagster_namedtuple(
            ExecuteRunArgs(
                pipeline_origin=pipeline_handle.get_origin(),
                pipeline_run_id=run.run_id,
                instance_ref=instance.get_ref(),
            ))

        result = runner_execute_run_with_structured_logs(
            runner,
            [input_json],
        )

    assert "PIPELINE_SUCCESS" in result.stdout, "no match, result: {}".format(
        result)
def test_execute_run_cannot_load():
    with get_foo_pipeline_handle() as pipeline_handle:
        runner = CliRunner()

        with instance_for_test(
                overrides={
                    "compute_logs": {
                        "module":
                        "dagster.core.storage.noop_compute_log_manager",
                        "class": "NoOpComputeLogManager",
                    }
                }) as instance:
            instance = DagsterInstance.get()

            input_json = serialize_dagster_namedtuple(
                ExecuteRunArgs(
                    pipeline_origin=pipeline_handle.get_python_origin(),
                    pipeline_run_id="FOOBAR",
                    instance_ref=instance.get_ref(),
                ))

            result = runner.invoke(
                api.execute_run_command,
                [input_json],
            )

            assert result.exit_code != 0

            assert "Pipeline run with id 'FOOBAR' not found for run execution" in str(
                result.exception), "no match, result: {}".format(result.stdout)
Exemple #5
0
    def launch_run(self, run, external_pipeline):
        check.inst_param(run, "run", PipelineRun)
        check.inst_param(external_pipeline, "external_pipeline",
                         ExternalPipeline)

        job_name = "dagster-run-{}".format(run.run_id)
        pod_name = job_name

        user_defined_k8s_config = get_user_defined_k8s_config(
            frozentags(run.tags))

        pipeline_origin = external_pipeline.get_python_origin()
        repository_origin = pipeline_origin.repository_origin

        job_config = (self._get_grpc_job_config(
            repository_origin.container_image)
                      if repository_origin.container_image else
                      self.get_static_job_config())

        self._instance.add_run_tags(
            run.run_id,
            {DOCKER_IMAGE_TAG: job_config.job_image},
        )

        input_json = serialize_dagster_namedtuple(
            ExecuteRunArgs(
                pipeline_origin=pipeline_origin,
                pipeline_run_id=run.run_id,
                instance_ref=None,
            ))

        job = construct_dagster_k8s_job(
            job_config=job_config,
            args=["dagster", "api", "execute_run", input_json],
            job_name=job_name,
            pod_name=pod_name,
            component="run_coordinator",
            user_defined_k8s_config=user_defined_k8s_config,
        )

        self._batch_api.create_namespaced_job(body=job,
                                              namespace=self.job_namespace)
        self._instance.report_engine_event(
            "Kubernetes run worker job launched",
            run,
            EngineEventData([
                EventMetadataEntry.text(job_name, "Kubernetes Job name"),
                EventMetadataEntry.text(self.job_namespace,
                                        "Kubernetes Namespace"),
                EventMetadataEntry.text(run.run_id, "Run ID"),
            ]),
            cls=self.__class__,
        )
        return run
Exemple #6
0
    def launch_run(self, context: LaunchRunContext) -> None:
        run = context.pipeline_run
        job_name = get_job_name_from_run_id(run.run_id)
        pipeline_origin = context.pipeline_code_origin

        args = ExecuteRunArgs(
            pipeline_origin=pipeline_origin,
            pipeline_run_id=run.run_id,
            instance_ref=self._instance.get_ref(),
        ).get_command_args()

        self._launch_k8s_job_with_args(job_name, args, run, pipeline_origin)
    def launch_run(self, instance, run, external_pipeline):
        if isinstance(
                external_pipeline.get_external_origin().
                external_repository_origin.repository_location_origin,
                GrpcServerRepositoryLocationOrigin,
        ):
            repository_location_handle = (
                external_pipeline.repository_handle.repository_location_handle)

            if not isinstance(repository_location_handle,
                              GrpcServerRepositoryLocationHandle):
                raise DagsterInvariantViolationError(
                    "Expected RepositoryLocationHandle to be of type "
                    "GrpcServerRepositoryLocationHandle but found type {}".
                    format(type(repository_location_handle)))

            repository_name = external_pipeline.repository_handle.repository_name
            location_name = external_pipeline.repository_handle.repository_location_handle.location_name
            pipeline_origin = PipelinePythonOrigin(
                pipeline_name=external_pipeline.name,
                repository_origin=repository_location_handle.
                get_repository_python_origin(repository_name),
            )
        else:
            location_name = 'local'
            pipeline_origin = external_pipeline.get_python_origin()

        input_json = serialize_dagster_namedtuple(
            ExecuteRunArgs(
                pipeline_origin=pipeline_origin,
                pipeline_run_id=run.run_id,
                instance_ref=None,
            ))

        app = self._get_app(location_name)
        sig = app.signature('launch_run',
                            args=(input_json, ),
                            queue=f"{location_name}-pipelines")
        result = sig.delay()
        instance.report_engine_event(
            "Started Celery task for pipeline (task id: {result.id}).".format(
                result=result),
            run,
            EngineEventData(metadata_entries=[
                EventMetadataEntry.text(result.id, "task_id"),
            ]),
        )

        return run
Exemple #8
0
 def launch_run(self, instance, run, external_pipeline):
     check.inst_param(run, "run", PipelineRun)
     input_json = serialize_dagster_namedtuple(
         ExecuteRunArgs(
             pipeline_origin=external_pipeline.get_python_origin(),
             pipeline_run_id=run.run_id,
             instance_ref=instance.get_ref(),
         ))
     # this maps run configuration to task overrides
     # this way we can pass in parameters from the dagit configuration that user has entered in the UI
     overrides = self.generate_task_overrides(run)
     self.client.run_task(
         command=["api", "execute_run_with_structured_logs", input_json],
         overrides=overrides,
     )
     self.run_id_to_task_offset[run.run_id] = self.client.offset
Exemple #9
0
def cli_api_execute_run(output_file, instance, pipeline_origin, pipeline_run):
    check.str_param(output_file, 'output_file')
    check.inst_param(instance, 'instance', DagsterInstance)
    check.inst_param(pipeline_origin, 'pipeline_origin', PipelinePythonOrigin)
    check.inst_param(pipeline_run, 'pipeline_run', PipelineRun)

    from dagster.cli.api import ExecuteRunArgs, ExecuteRunArgsLoadComplete

    with safe_tempfile_path() as input_file:
        write_unary_input(
            input_file,
            ExecuteRunArgs(
                pipeline_origin=pipeline_origin,
                pipeline_run_id=pipeline_run.run_id,
                instance_ref=instance.get_ref(),
            ),
        )

        parts = [
            pipeline_origin.executable_path,
            '-m',
            'dagster',
            'api',
            'execute_run',
            input_file,
            output_file,
        ]

        instance.report_engine_event(
            'About to start process for pipeline "{pipeline_name}" (run_id: {run_id}).'
            .format(pipeline_name=pipeline_run.pipeline_name,
                    run_id=pipeline_run.run_id),
            pipeline_run,
            engine_event_data=EngineEventData(
                marker_start='cli_api_subprocess_init'),
        )

        process = open_ipc_subprocess(parts)

        # we need to process this event in order to ensure that the called process loads the input
        event = next(ipc_read_event_stream(output_file))

        check.inst(event, ExecuteRunArgsLoadComplete)

        return process
def test_execute_run_with_structured_logs(pipeline_handle):
    runner = CliRunner()

    with seven.TemporaryDirectory() as temp_dir:
        with environ({'DAGSTER_HOME': temp_dir}):
            instance = DagsterInstance.get()
            run = create_run_for_test(instance,
                                      pipeline_name='foo',
                                      run_id='new_run')

            input_json = serialize_dagster_namedtuple(
                ExecuteRunArgs(
                    pipeline_origin=pipeline_handle.get_origin(),
                    pipeline_run_id=run.run_id,
                    instance_ref=instance.get_ref(),
                ))

            result = runner_execute_run_with_structured_logs(
                runner,
                [input_json],
            )

    assert 'PIPELINE_SUCCESS' in result.stdout, 'no match, result: {}'.format(
        result)
Exemple #11
0
    def launch_run(self, context: LaunchRunContext) -> None:
        run = context.pipeline_run

        job_name = get_job_name_from_run_id(run.run_id)
        pod_name = job_name
        exc_config = _get_validated_celery_k8s_executor_config(run.run_config)
        env_vars = None

        job_image_from_executor_config = exc_config.get("job_image")

        pipeline_origin = context.pipeline_code_origin
        repository_origin = pipeline_origin.repository_origin

        job_image = repository_origin.container_image

        if job_image:
            if job_image_from_executor_config:
                job_image = job_image_from_executor_config
                self._instance.report_engine_event(
                    f"You have specified a job_image {job_image_from_executor_config} in your executor configuration, "
                    f"but also {job_image} in your user-code deployment. Using the job image {job_image_from_executor_config} "
                    f"from executor configuration as it takes precedence.",
                    run,
                    cls=self.__class__,
                )
        else:
            if not job_image_from_executor_config:
                raise DagsterInvariantViolationError(
                    "You have not specified a job_image in your executor configuration. "
                    "To resolve this error, specify the job_image configuration in the executor "
                    "config section in your run config. \n"
                    "Note: You may also be seeing this error because you are using the configured API. "
                    "Using configured with the celery-k8s executor is not supported at this time, "
                    "and the job_image must be configured at the top-level executor config without "
                    "using configured.")

            job_image = job_image_from_executor_config

        job_config = self.get_k8s_job_config(job_image, exc_config)

        self._instance.add_run_tags(
            run.run_id,
            {DOCKER_IMAGE_TAG: job_config.job_image},
        )

        user_defined_k8s_config = get_user_defined_k8s_config(
            frozentags(run.tags))

        from dagster.cli.api import ExecuteRunArgs

        run_args = ExecuteRunArgs(
            pipeline_origin=pipeline_origin,
            pipeline_run_id=run.run_id,
            instance_ref=self._instance.get_ref(),
        )

        job = construct_dagster_k8s_job(
            job_config,
            args=run_args.get_command_args(),
            job_name=job_name,
            pod_name=pod_name,
            component="run_worker",
            user_defined_k8s_config=user_defined_k8s_config,
            env_vars=env_vars,
            labels={
                "dagster/job": pipeline_origin.pipeline_name,
            },
        )

        job_namespace = exc_config.get("job_namespace")

        self._instance.report_engine_event(
            "Creating Kubernetes run worker job",
            run,
            EngineEventData([
                EventMetadataEntry.text(job_name, "Kubernetes Job name"),
                EventMetadataEntry.text(job_namespace, "Kubernetes Namespace"),
                EventMetadataEntry.text(run.run_id, "Run ID"),
            ]),
            cls=self.__class__,
        )

        self._batch_api.create_namespaced_job(body=job,
                                              namespace=job_namespace)
        self._instance.report_engine_event(
            "Kubernetes run worker job created",
            run,
            EngineEventData([
                EventMetadataEntry.text(job_name, "Kubernetes Job name"),
                EventMetadataEntry.text(job_namespace, "Kubernetes Namespace"),
                EventMetadataEntry.text(run.run_id, "Run ID"),
            ]),
            cls=self.__class__,
        )
Exemple #12
0
    def launch_run(self, instance, run, external_pipeline):
        check.inst_param(instance, "instance", DagsterInstance)
        check.inst_param(run, "run", PipelineRun)
        check.inst_param(external_pipeline, "external_pipeline", ExternalPipeline)

        job_name = get_job_name_from_run_id(run.run_id)
        pod_name = job_name
        exc_config = _get_validated_celery_k8s_executor_config(run.run_config)

        job_image = None
        pipeline_origin = None
        env_vars = None
        if isinstance(external_pipeline.get_origin(), PipelineGrpcServerOrigin):
            if exc_config.get("job_image"):
                raise DagsterInvariantViolationError(
                    "Cannot specify job_image in executor config when loading pipeline "
                    "from GRPC server."
                )

            repository_location_handle = (
                external_pipeline.repository_handle.repository_location_handle
            )

            if not isinstance(repository_location_handle, GrpcServerRepositoryLocationHandle):
                raise DagsterInvariantViolationError(
                    "Expected RepositoryLocationHandle to be of type "
                    "GrpcServerRepositoryLocationHandle but found type {}".format(
                        type(repository_location_handle)
                    )
                )

            job_image = repository_location_handle.get_current_image()
            env_vars = {"DAGSTER_CURRENT_IMAGE": job_image}

            repository_name = external_pipeline.repository_handle.repository_name
            pipeline_origin = PipelinePythonOrigin(
                pipeline_name=external_pipeline.name,
                repository_origin=repository_location_handle.get_repository_python_origin(
                    repository_name
                ),
            )

        else:
            job_image = exc_config.get("job_image")
            if not job_image:
                raise DagsterInvariantViolationError(
                    "Cannot find job_image in celery-k8s executor config."
                )
            pipeline_origin = external_pipeline.get_origin()

        job_config = DagsterK8sJobConfig(
            dagster_home=self.dagster_home,
            instance_config_map=self.instance_config_map,
            postgres_password_secret=self.postgres_password_secret,
            job_image=check.str_param(job_image, "job_image"),
            image_pull_policy=exc_config.get("image_pull_policy"),
            image_pull_secrets=exc_config.get("image_pull_secrets"),
            service_account_name=exc_config.get("service_account_name"),
            env_config_maps=exc_config.get("env_config_maps"),
            env_secrets=exc_config.get("env_secrets"),
        )

        user_defined_k8s_config = get_user_defined_k8s_config(frozentags(external_pipeline.tags))

        from dagster.cli.api import ExecuteRunArgs

        input_json = serialize_dagster_namedtuple(
            # depends on DagsterInstance.get() returning the same instance
            # https://github.com/dagster-io/dagster/issues/2757
            ExecuteRunArgs(
                pipeline_origin=pipeline_origin, pipeline_run_id=run.run_id, instance_ref=None,
            )
        )

        job = construct_dagster_k8s_job(
            job_config,
            command=["dagster"],
            args=["api", "execute_run_with_structured_logs", input_json],
            job_name=job_name,
            pod_name=pod_name,
            component="run_coordinator",
            user_defined_k8s_config=user_defined_k8s_config,
            env_vars=env_vars,
        )

        job_namespace = exc_config.get("job_namespace")

        api = kubernetes.client.BatchV1Api()
        api.create_namespaced_job(body=job, namespace=job_namespace)

        self._instance.report_engine_event(
            "Kubernetes run_coordinator job launched",
            run,
            EngineEventData(
                [
                    EventMetadataEntry.text(job_name, "Kubernetes Job name"),
                    EventMetadataEntry.text(pod_name, "Kubernetes Pod name"),
                    EventMetadataEntry.text(job_namespace, "Kubernetes Namespace"),
                    EventMetadataEntry.text(run.run_id, "Run ID"),
                ]
            ),
            cls=self.__class__,
        )
        return run
Exemple #13
0
    def launch_run(self, instance, run, external_pipeline):
        check.inst_param(instance, "instance", DagsterInstance)
        check.inst_param(run, "run", PipelineRun)
        check.inst_param(external_pipeline, "external_pipeline",
                         ExternalPipeline)

        job_name = get_job_name_from_run_id(run.run_id)
        pod_name = job_name
        exc_config = _get_validated_celery_k8s_executor_config(run.run_config)

        job_image = None
        pipeline_origin = None
        env_vars = None

        job_image_from_executor_config = exc_config.get("job_image")

        # If the user is using user-code deployments, we grab the image from the gRPC server.
        if isinstance(
                external_pipeline.get_external_origin().
                external_repository_origin.repository_location_origin,
                GrpcServerRepositoryLocationOrigin,
        ):

            repository_location_handle = (
                external_pipeline.repository_handle.repository_location_handle)

            if not isinstance(repository_location_handle,
                              GrpcServerRepositoryLocationHandle):
                raise DagsterInvariantViolationError(
                    "Expected RepositoryLocationHandle to be of type "
                    "GrpcServerRepositoryLocationHandle but found type {}".
                    format(type(repository_location_handle)))

            repository_name = external_pipeline.repository_handle.repository_name
            repository_origin = repository_location_handle.reload_repository_python_origin(
                repository_name)
            pipeline_origin = PipelinePythonOrigin(
                pipeline_name=external_pipeline.name,
                repository_origin=repository_origin)

            job_image = repository_origin.container_image
            env_vars = {"DAGSTER_CURRENT_IMAGE": job_image}

            if job_image_from_executor_config:
                raise DagsterInvariantViolationError(
                    "You have specified a job_image {job_image_from_executor_config} in your executor configuration, "
                    "but also {job_image} in your user-code deployment. You cannot specify a job_image "
                    "in your executor config when using user-code deployments because the job image is "
                    "pulled from the deployment. To resolve this error, remove the job_image "
                    "configuration from your executor configuration (which is a part of your run configuration)"
                )

        else:
            if not job_image_from_executor_config:
                raise DagsterInvariantViolationError(
                    "You have not specified a job_image in your executor configuration. "
                    "To resolve this error, specify the job_image configuration in the executor "
                    "config section in your run config. \n"
                    "Note: You may also be seeing this error because you are using the configured API. "
                    "Using configured with the celery-k8s executor is not supported at this time, "
                    "and the job_image must be configured at the top-level executor config without "
                    "using configured.")

            job_image = job_image_from_executor_config
            pipeline_origin = external_pipeline.get_python_origin()

        job_config = DagsterK8sJobConfig(
            dagster_home=self.dagster_home,
            instance_config_map=self.instance_config_map,
            postgres_password_secret=self.postgres_password_secret,
            job_image=check.str_param(job_image, "job_image"),
            image_pull_policy=exc_config.get("image_pull_policy"),
            image_pull_secrets=exc_config.get("image_pull_secrets"),
            service_account_name=exc_config.get("service_account_name"),
            env_config_maps=exc_config.get("env_config_maps"),
            env_secrets=exc_config.get("env_secrets"),
        )

        user_defined_k8s_config = get_user_defined_k8s_config(
            frozentags(run.tags))

        from dagster.cli.api import ExecuteRunArgs

        input_json = serialize_dagster_namedtuple(
            # depends on DagsterInstance.get() returning the same instance
            # https://github.com/dagster-io/dagster/issues/2757
            ExecuteRunArgs(
                pipeline_origin=pipeline_origin,
                pipeline_run_id=run.run_id,
                instance_ref=None,
            ))

        job = construct_dagster_k8s_job(
            job_config,
            args=["dagster", "api", "execute_run", input_json],
            job_name=job_name,
            pod_name=pod_name,
            component="run_coordinator",
            user_defined_k8s_config=user_defined_k8s_config,
            env_vars=env_vars,
        )

        job_namespace = exc_config.get("job_namespace")

        self._batch_api.create_namespaced_job(body=job,
                                              namespace=job_namespace)
        self._instance.report_engine_event(
            "Kubernetes run_coordinator job launched",
            run,
            EngineEventData([
                EventMetadataEntry.text(job_name, "Kubernetes Job name"),
                EventMetadataEntry.text(job_namespace, "Kubernetes Namespace"),
                EventMetadataEntry.text(run.run_id, "Run ID"),
            ]),
            cls=self.__class__,
        )
        return run
Exemple #14
0
    def launch_run(self, instance, run, external_pipeline):
        check.inst_param(run, "run", PipelineRun)
        check.inst_param(external_pipeline, "external_pipeline", ExternalPipeline)

        job_name = "dagster-run-{}".format(run.run_id)
        pod_name = job_name

        user_defined_k8s_config = get_user_defined_k8s_config(frozentags(run.tags))

        pipeline_origin = None
        job_config = None
        if isinstance(
            external_pipeline.get_external_origin().external_repository_origin.repository_location_origin,
            GrpcServerRepositoryLocationOrigin,
        ):
            if self._job_image:
                raise DagsterInvariantViolationError(
                    "Cannot specify job_image in run launcher config when loading pipeline "
                    "from GRPC server."
                )

            repository_location_handle = (
                external_pipeline.repository_handle.repository_location_handle
            )

            if not isinstance(repository_location_handle, GrpcServerRepositoryLocationHandle):
                raise DagsterInvariantViolationError(
                    "Expected RepositoryLocationHandle to be of type "
                    "GrpcServerRepositoryLocationHandle but found type {}".format(
                        type(repository_location_handle)
                    )
                )

            repository_name = external_pipeline.repository_handle.repository_name

            repository_origin = repository_location_handle.reload_repository_python_origin(
                repository_name
            )

            job_image = repository_origin.container_image

            pipeline_origin = PipelinePythonOrigin(
                pipeline_name=external_pipeline.name, repository_origin=repository_origin
            )

            job_config = self._get_grpc_job_config(job_image)
        else:
            pipeline_origin = external_pipeline.get_python_origin()
            job_config = self._get_static_job_config()

        input_json = serialize_dagster_namedtuple(
            ExecuteRunArgs(
                pipeline_origin=pipeline_origin,
                pipeline_run_id=run.run_id,
                instance_ref=None,
            )
        )

        job = construct_dagster_k8s_job(
            job_config=job_config,
            args=["dagster", "api", "execute_run", input_json],
            job_name=job_name,
            pod_name=pod_name,
            component="run_coordinator",
            user_defined_k8s_config=user_defined_k8s_config,
        )

        self._batch_api.create_namespaced_job(body=job, namespace=self.job_namespace)
        self._instance.report_engine_event(
            "Kubernetes run worker job launched",
            run,
            EngineEventData(
                [
                    EventMetadataEntry.text(job_name, "Kubernetes Job name"),
                    EventMetadataEntry.text(self.job_namespace, "Kubernetes Namespace"),
                    EventMetadataEntry.text(run.run_id, "Run ID"),
                ]
            ),
            cls=self.__class__,
        )
        return run
Exemple #15
0
    def launch_run(self, instance, run, external_pipeline):
        check.inst_param(run, 'run', PipelineRun)

        job_name = get_job_name_from_run_id(run.run_id)
        pod_name = job_name

        exc_config = _get_validated_celery_k8s_executor_config(run.run_config)

        job_config = DagsterK8sJobConfig(
            dagster_home=self.dagster_home,
            instance_config_map=self.instance_config_map,
            postgres_password_secret=self.postgres_password_secret,
            job_image=exc_config.get('job_image'),
            image_pull_policy=exc_config.get('image_pull_policy'),
            image_pull_secrets=exc_config.get('image_pull_secrets'),
            service_account_name=exc_config.get('service_account_name'),
            env_config_maps=exc_config.get('env_config_maps'),
            env_secrets=exc_config.get('env_secrets'),
        )

        resources = get_k8s_resource_requirements(frozentags(external_pipeline.tags))

        from dagster.cli.api import ExecuteRunArgs

        input_json = serialize_dagster_namedtuple(
            # depends on DagsterInstance.get() returning the same instance
            # https://github.com/dagster-io/dagster/issues/2757
            ExecuteRunArgs(
                pipeline_origin=external_pipeline.get_origin(),
                pipeline_run_id=run.run_id,
                instance_ref=None,
            )
        )

        job = construct_dagster_k8s_job(
            job_config,
            command=['dagster'],
            args=['api', 'execute_run_with_structured_logs', input_json],
            job_name=job_name,
            pod_name=pod_name,
            component='runmaster',
            resources=resources,
        )

        job_namespace = exc_config.get('job_namespace')

        api = kubernetes.client.BatchV1Api()
        api.create_namespaced_job(body=job, namespace=job_namespace)

        self._instance.report_engine_event(
            'Kubernetes runmaster job launched',
            run,
            EngineEventData(
                [
                    EventMetadataEntry.text(job_name, 'Kubernetes Job name'),
                    EventMetadataEntry.text(pod_name, 'Kubernetes Pod name'),
                    EventMetadataEntry.text(job_namespace, 'Kubernetes Namespace'),
                    EventMetadataEntry.text(run.run_id, 'Run ID'),
                ]
            ),
            cls=CeleryK8sRunLauncher,
        )
        return run
Exemple #16
0
    def launch_run(self, run, external_pipeline):
        check.inst_param(run, "run", PipelineRun)
        check.inst_param(external_pipeline, "external_pipeline", ExternalPipeline)

        job_name = get_job_name_from_run_id(run.run_id)
        pod_name = job_name
        exc_config = _get_validated_celery_k8s_executor_config(run.run_config)
        env_vars = None

        job_image_from_executor_config = exc_config.get("job_image")

        pipeline_origin = external_pipeline.get_python_origin()
        repository_origin = pipeline_origin.repository_origin

        job_image = repository_origin.container_image

        if job_image:
            if job_image_from_executor_config:
                job_image = job_image_from_executor_config
                self._instance.report_engine_event(
                    f"You have specified a job_image {job_image_from_executor_config} in your executor configuration, "
                    f"but also {job_image} in your user-code deployment. Using the job image {job_image_from_executor_config} "
                    f"from executor configuration as it takes precedence.",
                    run,
                    cls=self.__class__,
                )
        else:
            if not job_image_from_executor_config:
                raise DagsterInvariantViolationError(
                    "You have not specified a job_image in your executor configuration. "
                    "To resolve this error, specify the job_image configuration in the executor "
                    "config section in your run config. \n"
                    "Note: You may also be seeing this error because you are using the configured API. "
                    "Using configured with the celery-k8s executor is not supported at this time, "
                    "and the job_image must be configured at the top-level executor config without "
                    "using configured."
                )

            job_image = job_image_from_executor_config

        job_config = DagsterK8sJobConfig(
            dagster_home=self.dagster_home,
            instance_config_map=self.instance_config_map,
            postgres_password_secret=self.postgres_password_secret,
            job_image=check.str_param(job_image, "job_image"),
            image_pull_policy=exc_config.get("image_pull_policy"),
            image_pull_secrets=exc_config.get("image_pull_secrets"),
            service_account_name=exc_config.get("service_account_name"),
            env_config_maps=exc_config.get("env_config_maps"),
            env_secrets=exc_config.get("env_secrets"),
        )

        self._instance.add_run_tags(
            run.run_id,
            {DOCKER_IMAGE_TAG: job_config.job_image},
        )

        user_defined_k8s_config = get_user_defined_k8s_config(frozentags(run.tags))

        from dagster.cli.api import ExecuteRunArgs

        input_json = serialize_dagster_namedtuple(
            # depends on DagsterInstance.get() returning the same instance
            # https://github.com/dagster-io/dagster/issues/2757
            ExecuteRunArgs(
                pipeline_origin=pipeline_origin,
                pipeline_run_id=run.run_id,
                instance_ref=None,
            )
        )

        job = construct_dagster_k8s_job(
            job_config,
            args=["dagster", "api", "execute_run", input_json],
            job_name=job_name,
            pod_name=pod_name,
            component="run_coordinator",
            user_defined_k8s_config=user_defined_k8s_config,
            env_vars=env_vars,
        )

        job_namespace = exc_config.get("job_namespace")

        self._instance.report_engine_event(
            "Creating Kubernetes run worker job",
            run,
            EngineEventData(
                [
                    EventMetadataEntry.text(job_name, "Kubernetes Job name"),
                    EventMetadataEntry.text(job_namespace, "Kubernetes Namespace"),
                    EventMetadataEntry.text(run.run_id, "Run ID"),
                ]
            ),
            cls=self.__class__,
        )

        self._batch_api.create_namespaced_job(body=job, namespace=job_namespace)
        self._instance.report_engine_event(
            "Kubernetes run worker job created",
            run,
            EngineEventData(
                [
                    EventMetadataEntry.text(job_name, "Kubernetes Job name"),
                    EventMetadataEntry.text(job_namespace, "Kubernetes Namespace"),
                    EventMetadataEntry.text(run.run_id, "Run ID"),
                ]
            ),
            cls=self.__class__,
        )
        return run
Exemple #17
0
    def launch_run(self, instance, run, external_pipeline):
        check.inst_param(run, 'run', PipelineRun)

        job_name = get_job_name_from_run_id(run.run_id)
        pod_name = job_name
        exc_config = _get_validated_celery_k8s_executor_config(run.run_config)

        job_image = None
        pipeline_origin = None
        env_vars = None
        if isinstance(external_pipeline.get_origin(),
                      PipelineGrpcServerOrigin):
            if exc_config.get('job_image'):
                raise DagsterInvariantViolationError(
                    'Cannot specify job_image in executor config when loading pipeline '
                    'from GRPC server.')

            repository_location_handle = (
                external_pipeline.repository_handle.repository_location_handle)

            if not isinstance(repository_location_handle,
                              GrpcServerRepositoryLocationHandle):
                raise DagsterInvariantViolationError(
                    'Expected RepositoryLocationHandle to be of type '
                    'GrpcServerRepositoryLocationHandle but found type {}'.
                    format(type(repository_location_handle)))

            job_image = repository_location_handle.get_current_image()
            env_vars = {'DAGSTER_CURRENT_IMAGE': job_image}

            repository_name = external_pipeline.repository_handle.repository_name
            pipeline_origin = PipelinePythonOrigin(
                pipeline_name=external_pipeline.name,
                repository_origin=repository_location_handle.
                get_repository_python_origin(repository_name),
            )

        else:
            job_image = exc_config.get('job_image')
            if not job_image:
                raise DagsterInvariantViolationError(
                    'Cannot find job_image in celery-k8s executor config.')
            pipeline_origin = external_pipeline.get_origin()

        job_config = DagsterK8sJobConfig(
            dagster_home=self.dagster_home,
            instance_config_map=self.instance_config_map,
            postgres_password_secret=self.postgres_password_secret,
            job_image=check.str_param(job_image, 'job_image'),
            image_pull_policy=exc_config.get('image_pull_policy'),
            image_pull_secrets=exc_config.get('image_pull_secrets'),
            service_account_name=exc_config.get('service_account_name'),
            env_config_maps=exc_config.get('env_config_maps'),
            env_secrets=exc_config.get('env_secrets'),
        )

        resources = get_k8s_resource_requirements(
            frozentags(external_pipeline.tags))

        from dagster.cli.api import ExecuteRunArgs

        input_json = serialize_dagster_namedtuple(
            # depends on DagsterInstance.get() returning the same instance
            # https://github.com/dagster-io/dagster/issues/2757
            ExecuteRunArgs(
                pipeline_origin=pipeline_origin,
                pipeline_run_id=run.run_id,
                instance_ref=None,
            ))

        job = construct_dagster_k8s_job(
            job_config,
            command=['dagster'],
            args=['api', 'execute_run_with_structured_logs', input_json],
            job_name=job_name,
            pod_name=pod_name,
            component='run_coordinator',
            resources=resources,
            env_vars=env_vars,
        )

        job_namespace = exc_config.get('job_namespace')

        api = kubernetes.client.BatchV1Api()
        api.create_namespaced_job(body=job, namespace=job_namespace)

        self._instance.report_engine_event(
            'Kubernetes run_coordinator job launched',
            run,
            EngineEventData([
                EventMetadataEntry.text(job_name, 'Kubernetes Job name'),
                EventMetadataEntry.text(pod_name, 'Kubernetes Pod name'),
                EventMetadataEntry.text(job_namespace, 'Kubernetes Namespace'),
                EventMetadataEntry.text(run.run_id, 'Run ID'),
            ]),
            cls=CeleryK8sRunLauncher,
        )
        return run
Exemple #18
0
    def launch_run(self, instance, run, external_pipeline):
        check.inst_param(run, 'run', PipelineRun)
        check.inst_param(external_pipeline, 'external_pipeline', ExternalPipeline)

        job_name = 'dagster-run-{}'.format(run.run_id)
        pod_name = job_name

        resources = get_k8s_resource_requirements(frozentags(external_pipeline.tags))

        pipeline_origin = None
        job_config = None
        if isinstance(external_pipeline.get_origin(), PipelineGrpcServerOrigin):
            if self._job_image:
                raise DagsterInvariantViolationError(
                    'Cannot specify job_image in run launcher config when loading pipeline '
                    'from GRPC server.'
                )

            repository_location_handle = (
                external_pipeline.repository_handle.repository_location_handle
            )

            if not isinstance(repository_location_handle, GrpcServerRepositoryLocationHandle):
                raise DagsterInvariantViolationError(
                    'Expected RepositoryLocationHandle to be of type '
                    'GrpcServerRepositoryLocationHandle but found type {}'.format(
                        type(repository_location_handle)
                    )
                )

            job_image = repository_location_handle.get_current_image()

            job_config = self._get_grpc_job_config(job_image)

            repository_name = external_pipeline.repository_handle.repository_name
            pipeline_origin = PipelinePythonOrigin(
                pipeline_name=external_pipeline.name,
                repository_origin=repository_location_handle.get_repository_python_origin(
                    repository_name
                ),
            )
        else:
            pipeline_origin = external_pipeline.get_origin()
            job_config = self._get_static_job_config()

        input_json = serialize_dagster_namedtuple(
            ExecuteRunArgs(
                pipeline_origin=pipeline_origin, pipeline_run_id=run.run_id, instance_ref=None,
            )
        )

        job = construct_dagster_k8s_job(
            job_config=job_config,
            command=['dagster'],
            args=['api', 'execute_run_with_structured_logs', input_json],
            job_name=job_name,
            pod_name=pod_name,
            component='runmaster',
            resources=resources,
        )

        self._batch_api.create_namespaced_job(body=job, namespace=self.job_namespace)
        self._instance.report_engine_event(
            'Kubernetes runmaster job launched',
            run,
            EngineEventData(
                [
                    EventMetadataEntry.text(job_name, 'Kubernetes Job name'),
                    EventMetadataEntry.text(pod_name, 'Kubernetes Pod name'),
                    EventMetadataEntry.text(self.job_namespace, 'Kubernetes Namespace'),
                    EventMetadataEntry.text(run.run_id, 'Run ID'),
                ]
            ),
            cls=K8sRunLauncher,
        )
        return run