예제 #1
0
def validate_tags(tags):
    valid_tags = {}
    for key, value in check.opt_dict_param(tags, "tags", key_type=str).items():
        if not isinstance(value, str):
            valid = False
            err_reason = 'Could not JSON encode value "{}"'.format(value)
            try:
                str_val = seven.json.dumps(value)
                err_reason = 'JSON encoding "{json}" of value "{val}" is not equivalent to original value'.format(
                    json=str_val, val=value)

                valid = seven.json.loads(str_val) == value
            except Exception:  # pylint: disable=broad-except
                pass

            if not valid:
                raise DagsterInvalidDefinitionError(
                    'Invalid value for tag "{key}", {err_reason}. Tag values must be strings '
                    "or meet the constraint that json.loads(json.dumps(value)) == value."
                    .format(key=key, err_reason=err_reason))

            valid_tags[key] = str_val
        else:
            valid_tags[key] = value

    return frozentags(valid_tags)
예제 #2
0
    def launch_step(self, step_handler_context: StepHandlerContext):
        events = []

        assert (len(
            step_handler_context.execute_step_args.step_keys_to_execute) == 1
                ), "Launching multiple steps is not currently supported"
        step_key = step_handler_context.execute_step_args.step_keys_to_execute[
            0]

        k8s_name_key = get_k8s_job_name(
            step_handler_context.execute_step_args.pipeline_run_id,
            step_key,
        )
        job_name = "dagster-job-%s" % (k8s_name_key)
        pod_name = "dagster-job-%s" % (k8s_name_key)

        input_json = serialize_dagster_namedtuple(
            step_handler_context.execute_step_args)
        args = ["dagster", "api", "execute_step", input_json]

        job_config = self._job_config
        if not job_config.job_image:
            job_config = job_config.with_image(
                step_handler_context.execute_step_args.pipeline_origin.
                repository_origin.container_image)

        if not job_config.job_image:
            raise Exception(
                "No image included in either executor config or the pipeline")

        user_defined_k8s_config = get_user_defined_k8s_config(
            frozentags(step_handler_context.step_tags[step_key]))

        job = construct_dagster_k8s_job(
            job_config=job_config,
            args=args,
            job_name=job_name,
            pod_name=pod_name,
            component="step_worker",
            user_defined_k8s_config=user_defined_k8s_config,
        )

        events.append(
            DagsterEvent(
                event_type_value=DagsterEventType.ENGINE_EVENT.value,
                pipeline_name=step_handler_context.execute_step_args.
                pipeline_origin.pipeline_name,
                step_key=step_key,
                message=
                f"Executing step {step_key} in Kubernetes job {job_name}",
                event_specific_data=EngineEventData([
                    EventMetadataEntry.text(step_key, "Step key"),
                    EventMetadataEntry.text(job_name, "Kubernetes Job name"),
                ], ),
            ))

        self._batch_api.create_namespaced_job(body=job,
                                              namespace=self._job_namespace)

        return events
예제 #3
0
파일: dependency.py 프로젝트: zkan/dagster
    def __new__(cls, name, alias=None, tags=None):
        name = check.str_param(name, 'name')
        alias = check.opt_str_param(alias, 'alias')
        tags = frozentags(
            check.opt_dict_param(tags, 'tags', value_type=str, key_type=str))

        return super(cls, SolidInvocation).__new__(cls, name, alias, tags)
예제 #4
0
파일: composition.py 프로젝트: zkan/dagster
 def tag(self, tags):
     tags = validate_tags(tags)
     return CallableSolidNode(
         self.solid_def,
         self.given_alias,
         frozentags(tags) if self.tags is None else self.tags.updated_with(tags),
     )
예제 #5
0
 def tag(self, tags):
     tags = validate_tags(tags)
     return PendingNodeInvocation(
         self.node_def,
         self.given_alias,
         frozentags(tags)
         if self.tags is None else self.tags.updated_with(tags),
     )
예제 #6
0
 def tag(self, tags):
     tags = validate_tags(tags)
     return PendingNodeInvocation(
         node_def=self.node_def,
         given_alias=self.given_alias,
         tags=frozentags(tags)
         if self.tags is None else self.tags.updated_with(tags),
         hook_defs=self.hook_defs,
     )
예제 #7
0
 def __new__(cls, name, alias=None, tags=None, hook_defs=None):
     name = check.str_param(name, "name")
     alias = check.opt_str_param(alias, "alias")
     tags = frozentags(
         check.opt_dict_param(tags, "tags", value_type=str, key_type=str))
     hook_defs = frozenset(
         check.opt_set_param(hook_defs, "hook_defs",
                             of_type=HookDefinition))
     return super(cls, SolidInvocation).__new__(cls, name, alias, tags,
                                                hook_defs)
예제 #8
0
    def _launch_k8s_job_with_args(self, job_name, args, run, pipeline_origin):
        pod_name = job_name

        user_defined_k8s_config = get_user_defined_k8s_config(frozentags(run.tags))
        repository_origin = pipeline_origin.repository_origin

        job_config = (
            self._get_grpc_job_config(repository_origin.container_image)
            if repository_origin.container_image
            else self.get_static_job_config()
        )

        self._instance.add_run_tags(
            run.run_id,
            {DOCKER_IMAGE_TAG: job_config.job_image},
        )

        job = construct_dagster_k8s_job(
            job_config=job_config,
            args=args,
            job_name=job_name,
            pod_name=pod_name,
            component="run_worker",
            user_defined_k8s_config=user_defined_k8s_config,
            labels={
                "dagster/job": pipeline_origin.pipeline_name,
            },
        )

        self._instance.report_engine_event(
            "Creating Kubernetes run worker job",
            run,
            EngineEventData(
                [
                    EventMetadataEntry.text(job_name, "Kubernetes Job name"),
                    EventMetadataEntry.text(self.job_namespace, "Kubernetes Namespace"),
                    EventMetadataEntry.text(run.run_id, "Run ID"),
                ]
            ),
            cls=self.__class__,
        )

        self._batch_api.create_namespaced_job(body=job, namespace=self.job_namespace)
        self._instance.report_engine_event(
            "Kubernetes run worker job created",
            run,
            EngineEventData(
                [
                    EventMetadataEntry.text(job_name, "Kubernetes Job name"),
                    EventMetadataEntry.text(self.job_namespace, "Kubernetes Namespace"),
                    EventMetadataEntry.text(run.run_id, "Run ID"),
                ]
            ),
            cls=self.__class__,
        )
예제 #9
0
    def __new__(cls, name, alias=None, resource_mapper_fn=None, tags=None):
        name = check.str_param(name, 'name')
        alias = check.opt_str_param(alias, 'alias')
        resource_mapper_fn = check.opt_callable_param(
            resource_mapper_fn, 'resource_mapper_fn',
            SolidInvocation.default_resource_mapper_fn)
        tags = frozentags(
            check.opt_dict_param(tags, 'tags', value_type=str, key_type=str))

        return super(cls, SolidInvocation).__new__(cls, name, alias,
                                                   resource_mapper_fn, tags)
예제 #10
0
    def launch_run(self, run, external_pipeline):
        check.inst_param(run, "run", PipelineRun)
        check.inst_param(external_pipeline, "external_pipeline",
                         ExternalPipeline)

        job_name = "dagster-run-{}".format(run.run_id)
        pod_name = job_name

        user_defined_k8s_config = get_user_defined_k8s_config(
            frozentags(run.tags))

        pipeline_origin = external_pipeline.get_python_origin()
        repository_origin = pipeline_origin.repository_origin

        job_config = (self._get_grpc_job_config(
            repository_origin.container_image)
                      if repository_origin.container_image else
                      self.get_static_job_config())

        self._instance.add_run_tags(
            run.run_id,
            {DOCKER_IMAGE_TAG: job_config.job_image},
        )

        input_json = serialize_dagster_namedtuple(
            ExecuteRunArgs(
                pipeline_origin=pipeline_origin,
                pipeline_run_id=run.run_id,
                instance_ref=None,
            ))

        job = construct_dagster_k8s_job(
            job_config=job_config,
            args=["dagster", "api", "execute_run", input_json],
            job_name=job_name,
            pod_name=pod_name,
            component="run_coordinator",
            user_defined_k8s_config=user_defined_k8s_config,
        )

        self._batch_api.create_namespaced_job(body=job,
                                              namespace=self.job_namespace)
        self._instance.report_engine_event(
            "Kubernetes run worker job launched",
            run,
            EngineEventData([
                EventMetadataEntry.text(job_name, "Kubernetes Job name"),
                EventMetadataEntry.text(self.job_namespace,
                                        "Kubernetes Namespace"),
                EventMetadataEntry.text(run.run_id, "Run ID"),
            ]),
            cls=self.__class__,
        )
        return run
예제 #11
0
    def launch_steps(
        self,
        step_contexts: List[IStepContext],
        known_state: KnownExecutionState,
    ):
        assert len(
            step_contexts
        ) == 1, "Launching multiple steps is not currently supported"
        step_context = step_contexts[0]

        k8s_name_key = get_k8s_job_name(
            self.pipeline_context.plan_data.pipeline_run.run_id,
            step_context.step.key,
        )
        job_name = "dagster-job-%s" % (k8s_name_key)
        pod_name = "dagster-job-%s" % (k8s_name_key)
        pipeline_origin = self.pipeline_context.reconstructable_pipeline.get_python_origin(
        )

        execute_step_args = ExecuteStepArgs(
            pipeline_origin=pipeline_origin,
            pipeline_run_id=self.pipeline_context.pipeline_run.run_id,
            step_keys_to_execute=[step_context.step.key],
            instance_ref=self.pipeline_context.instance.get_ref(),
            retry_mode=self.retries.for_inner_plan(),
            known_state=known_state,
            should_verify_step=True,
        )

        input_json = serialize_dagster_namedtuple(execute_step_args)
        args = ["dagster", "api", "execute_step", input_json]

        job_config = self._job_config
        if not job_config.job_image:
            job_config = job_config.with_image(
                pipeline_origin.repository_origin.container_image)

        if not job_config.job_image:
            raise Exception(
                "No image included in either executor config or the pipeline")

        job = construct_dagster_k8s_job(
            job_config,
            args,
            job_name,
            get_user_defined_k8s_config(frozentags()),
            pod_name,
        )

        kubernetes.config.load_incluster_config()
        kubernetes.client.BatchV1Api().create_namespaced_job(
            body=job, namespace=self._job_namespace)
예제 #12
0
    def _launch_k8s_job_with_args(self, job_name, args, run):
        container_context = self.get_container_context_for_run(run)

        pod_name = job_name

        pipeline_origin = run.pipeline_code_origin
        user_defined_k8s_config = get_user_defined_k8s_config(
            frozentags(run.tags))
        repository_origin = pipeline_origin.repository_origin

        job_config = container_context.get_k8s_job_config(
            job_image=repository_origin.container_image, run_launcher=self)

        self._instance.add_run_tags(
            run.run_id,
            {DOCKER_IMAGE_TAG: job_config.job_image},
        )

        job = construct_dagster_k8s_job(
            job_config=job_config,
            args=args,
            job_name=job_name,
            pod_name=pod_name,
            component="run_worker",
            user_defined_k8s_config=user_defined_k8s_config,
            labels={
                "dagster/job": pipeline_origin.pipeline_name,
                "dagster/run-id": run.run_id,
            },
        )

        self._instance.report_engine_event(
            "Creating Kubernetes run worker job",
            run,
            EngineEventData([
                MetadataEntry("Kubernetes Job name", value=job_name),
                MetadataEntry("Kubernetes Namespace",
                              value=container_context.namespace),
                MetadataEntry("Run ID", value=run.run_id),
            ]),
            cls=self.__class__,
        )

        self._batch_api.create_namespaced_job(
            body=job, namespace=container_context.namespace)
        self._instance.report_engine_event(
            "Kubernetes run worker job created",
            run,
            cls=self.__class__,
        )
예제 #13
0
    def launch_run(self, instance, run, external_pipeline):
        check.inst_param(run, 'run', PipelineRun)
        check.inst_param(external_pipeline, 'external_pipeline',
                         ExternalPipeline)

        job_name = 'dagster-run-{}'.format(run.run_id)
        pod_name = job_name

        resources = get_k8s_resource_requirements(
            frozentags(external_pipeline.tags))

        job = construct_dagster_k8s_job(
            job_config=self.job_config,
            command=['dagster-graphql'],
            args=[
                '-p',
                'executeRunInProcess',
                '-v',
                seven.json.dumps({
                    'runId':
                    run.run_id,
                    'repositoryName':
                    external_pipeline.handle.repository_name,
                    'repositoryLocationName':
                    external_pipeline.handle.location_name,
                }),
                '--remap-sigterm',
            ],
            job_name=job_name,
            pod_name=pod_name,
            component='runmaster',
            resources=resources,
        )

        self._batch_api.create_namespaced_job(body=job,
                                              namespace=self.job_namespace)
        self._instance.report_engine_event(
            'Kubernetes runmaster job launched',
            run,
            EngineEventData([
                EventMetadataEntry.text(job_name, 'Kubernetes Job name'),
                EventMetadataEntry.text(pod_name, 'Kubernetes Pod name'),
                EventMetadataEntry.text(self.job_namespace,
                                        'Kubernetes Namespace'),
                EventMetadataEntry.text(run.run_id, 'Run ID'),
            ]),
            cls=K8sRunLauncher,
        )
        return run
예제 #14
0
파일: dependency.py 프로젝트: keyz/dagster
 def __new__(
     cls,
     name: str,
     alias: Optional[str] = None,
     tags: Dict[str, str] = None,
     hook_defs: AbstractSet[HookDefinition] = None,
     retry_policy: Optional[RetryPolicy] = None,
 ):
     return super().__new__(
         cls,
         name=check.str_param(name, "name"),
         alias=check.opt_str_param(alias, "alias"),
         tags=frozentags(check.opt_dict_param(tags, "tags", value_type=str, key_type=str)),
         hook_defs=frozenset(
             check.opt_set_param(hook_defs, "hook_defs", of_type=HookDefinition)
         ),
         retry_policy=check.opt_inst_param(retry_policy, "retry_policy", RetryPolicy),
     )
예제 #15
0
    def launch_run(self, instance, run, external_pipeline):
        check.inst_param(run, 'run', PipelineRun)

        job_name = get_job_name_from_run_id(run.run_id)
        pod_name = job_name

        exc_config = _get_validated_celery_k8s_executor_config(run.run_config)

        job_config = DagsterK8sJobConfig(
            dagster_home=self.dagster_home,
            instance_config_map=self.instance_config_map,
            postgres_password_secret=self.postgres_password_secret,
            job_image=exc_config.get('job_image'),
            image_pull_policy=exc_config.get('image_pull_policy'),
            image_pull_secrets=exc_config.get('image_pull_secrets'),
            service_account_name=exc_config.get('service_account_name'),
            env_config_maps=exc_config.get('env_config_maps'),
            env_secrets=exc_config.get('env_secrets'),
        )

        resources = get_k8s_resource_requirements(frozentags(external_pipeline.tags))

        from dagster.cli.api import ExecuteRunArgs

        input_json = serialize_dagster_namedtuple(
            # depends on DagsterInstance.get() returning the same instance
            # https://github.com/dagster-io/dagster/issues/2757
            ExecuteRunArgs(
                pipeline_origin=external_pipeline.get_origin(),
                pipeline_run_id=run.run_id,
                instance_ref=None,
            )
        )

        job = construct_dagster_k8s_job(
            job_config,
            command=['dagster'],
            args=['api', 'execute_run_with_structured_logs', input_json],
            job_name=job_name,
            pod_name=pod_name,
            component='runmaster',
            resources=resources,
        )

        job_namespace = exc_config.get('job_namespace')

        api = kubernetes.client.BatchV1Api()
        api.create_namespaced_job(body=job, namespace=job_namespace)

        self._instance.report_engine_event(
            'Kubernetes runmaster job launched',
            run,
            EngineEventData(
                [
                    EventMetadataEntry.text(job_name, 'Kubernetes Job name'),
                    EventMetadataEntry.text(pod_name, 'Kubernetes Pod name'),
                    EventMetadataEntry.text(job_namespace, 'Kubernetes Namespace'),
                    EventMetadataEntry.text(run.run_id, 'Run ID'),
                ]
            ),
            cls=CeleryK8sRunLauncher,
        )
        return run
예제 #16
0
    def launch_run(self, instance, run, external_pipeline):
        check.inst_param(run, 'run', PipelineRun)

        job_name = get_job_name_from_run_id(run.run_id)
        pod_name = job_name
        exc_config = _get_validated_celery_k8s_executor_config(run.run_config)

        job_image = None
        pipeline_origin = None
        env_vars = None
        if isinstance(external_pipeline.get_origin(),
                      PipelineGrpcServerOrigin):
            if exc_config.get('job_image'):
                raise DagsterInvariantViolationError(
                    'Cannot specify job_image in executor config when loading pipeline '
                    'from GRPC server.')

            repository_location_handle = (
                external_pipeline.repository_handle.repository_location_handle)

            if not isinstance(repository_location_handle,
                              GrpcServerRepositoryLocationHandle):
                raise DagsterInvariantViolationError(
                    'Expected RepositoryLocationHandle to be of type '
                    'GrpcServerRepositoryLocationHandle but found type {}'.
                    format(type(repository_location_handle)))

            job_image = repository_location_handle.get_current_image()
            env_vars = {'DAGSTER_CURRENT_IMAGE': job_image}

            repository_name = external_pipeline.repository_handle.repository_name
            pipeline_origin = PipelinePythonOrigin(
                pipeline_name=external_pipeline.name,
                repository_origin=repository_location_handle.
                get_repository_python_origin(repository_name),
            )

        else:
            job_image = exc_config.get('job_image')
            if not job_image:
                raise DagsterInvariantViolationError(
                    'Cannot find job_image in celery-k8s executor config.')
            pipeline_origin = external_pipeline.get_origin()

        job_config = DagsterK8sJobConfig(
            dagster_home=self.dagster_home,
            instance_config_map=self.instance_config_map,
            postgres_password_secret=self.postgres_password_secret,
            job_image=check.str_param(job_image, 'job_image'),
            image_pull_policy=exc_config.get('image_pull_policy'),
            image_pull_secrets=exc_config.get('image_pull_secrets'),
            service_account_name=exc_config.get('service_account_name'),
            env_config_maps=exc_config.get('env_config_maps'),
            env_secrets=exc_config.get('env_secrets'),
        )

        resources = get_k8s_resource_requirements(
            frozentags(external_pipeline.tags))

        from dagster.cli.api import ExecuteRunArgs

        input_json = serialize_dagster_namedtuple(
            # depends on DagsterInstance.get() returning the same instance
            # https://github.com/dagster-io/dagster/issues/2757
            ExecuteRunArgs(
                pipeline_origin=pipeline_origin,
                pipeline_run_id=run.run_id,
                instance_ref=None,
            ))

        job = construct_dagster_k8s_job(
            job_config,
            command=['dagster'],
            args=['api', 'execute_run_with_structured_logs', input_json],
            job_name=job_name,
            pod_name=pod_name,
            component='run_coordinator',
            resources=resources,
            env_vars=env_vars,
        )

        job_namespace = exc_config.get('job_namespace')

        api = kubernetes.client.BatchV1Api()
        api.create_namespaced_job(body=job, namespace=job_namespace)

        self._instance.report_engine_event(
            'Kubernetes run_coordinator job launched',
            run,
            EngineEventData([
                EventMetadataEntry.text(job_name, 'Kubernetes Job name'),
                EventMetadataEntry.text(pod_name, 'Kubernetes Pod name'),
                EventMetadataEntry.text(job_namespace, 'Kubernetes Namespace'),
                EventMetadataEntry.text(run.run_id, 'Run ID'),
            ]),
            cls=CeleryK8sRunLauncher,
        )
        return run
예제 #17
0
 def tags(self):
     return frozentags(**merge_dicts(self._graph_def.tags, self._tags))
예제 #18
0
파일: launcher.py 프로젝트: zuodh/dagster
    def launch_run(self, instance, run, external_pipeline):
        check.inst_param(run, 'run', PipelineRun)

        job_name = get_job_name_from_run_id(run.run_id)
        pod_name = job_name

        exc_config = _get_validated_celery_k8s_executor_config(run.run_config)

        job_config = DagsterK8sJobConfig(
            dagster_home=self.dagster_home,
            instance_config_map=self.instance_config_map,
            postgres_password_secret=self.postgres_password_secret,
            job_image=exc_config.get('job_image'),
            image_pull_policy=exc_config.get('image_pull_policy'),
            image_pull_secrets=exc_config.get('image_pull_secrets'),
            service_account_name=exc_config.get('service_account_name'),
            env_config_maps=exc_config.get('env_config_maps'),
            env_secrets=exc_config.get('env_secrets'),
        )

        resources = get_k8s_resource_requirements(
            frozentags(external_pipeline.tags))

        job = construct_dagster_graphql_k8s_job(
            job_config,
            args=[
                '-p',
                'executeRunInProcess',
                '-v',
                seven.json.dumps({
                    'runId':
                    run.run_id,
                    'repositoryName':
                    external_pipeline.handle.repository_name,
                    'repositoryLocationName':
                    external_pipeline.handle.location_name,
                }),
                '--remap-sigterm',
            ],
            job_name=job_name,
            pod_name=pod_name,
            component='runmaster',
            resources=resources,
        )

        job_namespace = exc_config.get('job_namespace')

        api = kubernetes.client.BatchV1Api()
        api.create_namespaced_job(body=job, namespace=job_namespace)

        self._instance.report_engine_event(
            'Kubernetes runmaster job launched',
            run,
            EngineEventData([
                EventMetadataEntry.text(job_name, 'Kubernetes Job name'),
                EventMetadataEntry.text(pod_name, 'Kubernetes Pod name'),
                EventMetadataEntry.text(job_namespace, 'Kubernetes Namespace'),
                EventMetadataEntry.text(run.run_id, 'Run ID'),
            ]),
            cls=CeleryK8sRunLauncher,
        )
        return run
예제 #19
0
    def launch_run(self, instance, run, external_pipeline):
        check.inst_param(instance, "instance", DagsterInstance)
        check.inst_param(run, "run", PipelineRun)
        check.inst_param(external_pipeline, "external_pipeline",
                         ExternalPipeline)

        job_name = get_job_name_from_run_id(run.run_id)
        pod_name = job_name
        exc_config = _get_validated_celery_k8s_executor_config(run.run_config)

        job_image = None
        pipeline_origin = None
        env_vars = None

        job_image_from_executor_config = exc_config.get("job_image")

        # If the user is using user-code deployments, we grab the image from the gRPC server.
        if isinstance(
                external_pipeline.get_external_origin().
                external_repository_origin.repository_location_origin,
                GrpcServerRepositoryLocationOrigin,
        ):

            repository_location_handle = (
                external_pipeline.repository_handle.repository_location_handle)

            if not isinstance(repository_location_handle,
                              GrpcServerRepositoryLocationHandle):
                raise DagsterInvariantViolationError(
                    "Expected RepositoryLocationHandle to be of type "
                    "GrpcServerRepositoryLocationHandle but found type {}".
                    format(type(repository_location_handle)))

            repository_name = external_pipeline.repository_handle.repository_name
            repository_origin = repository_location_handle.reload_repository_python_origin(
                repository_name)
            pipeline_origin = PipelinePythonOrigin(
                pipeline_name=external_pipeline.name,
                repository_origin=repository_origin)

            job_image = repository_origin.container_image
            env_vars = {"DAGSTER_CURRENT_IMAGE": job_image}

            if job_image_from_executor_config:
                raise DagsterInvariantViolationError(
                    "You have specified a job_image {job_image_from_executor_config} in your executor configuration, "
                    "but also {job_image} in your user-code deployment. You cannot specify a job_image "
                    "in your executor config when using user-code deployments because the job image is "
                    "pulled from the deployment. To resolve this error, remove the job_image "
                    "configuration from your executor configuration (which is a part of your run configuration)"
                )

        else:
            if not job_image_from_executor_config:
                raise DagsterInvariantViolationError(
                    "You have not specified a job_image in your executor configuration. "
                    "To resolve this error, specify the job_image configuration in the executor "
                    "config section in your run config. \n"
                    "Note: You may also be seeing this error because you are using the configured API. "
                    "Using configured with the celery-k8s executor is not supported at this time, "
                    "and the job_image must be configured at the top-level executor config without "
                    "using configured.")

            job_image = job_image_from_executor_config
            pipeline_origin = external_pipeline.get_python_origin()

        job_config = DagsterK8sJobConfig(
            dagster_home=self.dagster_home,
            instance_config_map=self.instance_config_map,
            postgres_password_secret=self.postgres_password_secret,
            job_image=check.str_param(job_image, "job_image"),
            image_pull_policy=exc_config.get("image_pull_policy"),
            image_pull_secrets=exc_config.get("image_pull_secrets"),
            service_account_name=exc_config.get("service_account_name"),
            env_config_maps=exc_config.get("env_config_maps"),
            env_secrets=exc_config.get("env_secrets"),
        )

        user_defined_k8s_config = get_user_defined_k8s_config(
            frozentags(run.tags))

        from dagster.cli.api import ExecuteRunArgs

        input_json = serialize_dagster_namedtuple(
            # depends on DagsterInstance.get() returning the same instance
            # https://github.com/dagster-io/dagster/issues/2757
            ExecuteRunArgs(
                pipeline_origin=pipeline_origin,
                pipeline_run_id=run.run_id,
                instance_ref=None,
            ))

        job = construct_dagster_k8s_job(
            job_config,
            args=["dagster", "api", "execute_run", input_json],
            job_name=job_name,
            pod_name=pod_name,
            component="run_coordinator",
            user_defined_k8s_config=user_defined_k8s_config,
            env_vars=env_vars,
        )

        job_namespace = exc_config.get("job_namespace")

        self._batch_api.create_namespaced_job(body=job,
                                              namespace=job_namespace)
        self._instance.report_engine_event(
            "Kubernetes run_coordinator job launched",
            run,
            EngineEventData([
                EventMetadataEntry.text(job_name, "Kubernetes Job name"),
                EventMetadataEntry.text(job_namespace, "Kubernetes Namespace"),
                EventMetadataEntry.text(run.run_id, "Run ID"),
            ]),
            cls=self.__class__,
        )
        return run
예제 #20
0
    def launch_run(self, instance, run, external_pipeline):
        check.inst_param(run, "run", PipelineRun)
        check.inst_param(external_pipeline, "external_pipeline", ExternalPipeline)

        job_name = "dagster-run-{}".format(run.run_id)
        pod_name = job_name

        user_defined_k8s_config = get_user_defined_k8s_config(frozentags(run.tags))

        pipeline_origin = None
        job_config = None
        if isinstance(
            external_pipeline.get_external_origin().external_repository_origin.repository_location_origin,
            GrpcServerRepositoryLocationOrigin,
        ):
            if self._job_image:
                raise DagsterInvariantViolationError(
                    "Cannot specify job_image in run launcher config when loading pipeline "
                    "from GRPC server."
                )

            repository_location_handle = (
                external_pipeline.repository_handle.repository_location_handle
            )

            if not isinstance(repository_location_handle, GrpcServerRepositoryLocationHandle):
                raise DagsterInvariantViolationError(
                    "Expected RepositoryLocationHandle to be of type "
                    "GrpcServerRepositoryLocationHandle but found type {}".format(
                        type(repository_location_handle)
                    )
                )

            repository_name = external_pipeline.repository_handle.repository_name

            repository_origin = repository_location_handle.reload_repository_python_origin(
                repository_name
            )

            job_image = repository_origin.container_image

            pipeline_origin = PipelinePythonOrigin(
                pipeline_name=external_pipeline.name, repository_origin=repository_origin
            )

            job_config = self._get_grpc_job_config(job_image)
        else:
            pipeline_origin = external_pipeline.get_python_origin()
            job_config = self._get_static_job_config()

        input_json = serialize_dagster_namedtuple(
            ExecuteRunArgs(
                pipeline_origin=pipeline_origin,
                pipeline_run_id=run.run_id,
                instance_ref=None,
            )
        )

        job = construct_dagster_k8s_job(
            job_config=job_config,
            args=["dagster", "api", "execute_run", input_json],
            job_name=job_name,
            pod_name=pod_name,
            component="run_coordinator",
            user_defined_k8s_config=user_defined_k8s_config,
        )

        self._batch_api.create_namespaced_job(body=job, namespace=self.job_namespace)
        self._instance.report_engine_event(
            "Kubernetes run worker job launched",
            run,
            EngineEventData(
                [
                    EventMetadataEntry.text(job_name, "Kubernetes Job name"),
                    EventMetadataEntry.text(self.job_namespace, "Kubernetes Namespace"),
                    EventMetadataEntry.text(run.run_id, "Run ID"),
                ]
            ),
            cls=self.__class__,
        )
        return run
예제 #21
0
    def launch_step(self, step_handler_context: StepHandlerContext):
        events = []

        assert (
            len(step_handler_context.execute_step_args.step_keys_to_execute) == 1
        ), "Launching multiple steps is not currently supported"
        step_key = step_handler_context.execute_step_args.step_keys_to_execute[0]

        job_name = self._get_k8s_step_job_name(step_handler_context)
        pod_name = job_name

        args = step_handler_context.execute_step_args.get_command_args()

        container_context = self._get_container_context(step_handler_context)

        job_config = container_context.get_k8s_job_config(
            self._executor_image, step_handler_context.instance.run_launcher
        )

        if not job_config.job_image:
            job_config = job_config.with_image(
                step_handler_context.execute_step_args.pipeline_origin.repository_origin.container_image
            )

        if not job_config.job_image:
            raise Exception("No image included in either executor config or the job")

        user_defined_k8s_config = get_user_defined_k8s_config(
            frozentags(step_handler_context.step_tags[step_key])
        )

        job = construct_dagster_k8s_job(
            job_config=job_config,
            args=args,
            job_name=job_name,
            pod_name=pod_name,
            component="step_worker",
            user_defined_k8s_config=user_defined_k8s_config,
            labels={
                "dagster/job": step_handler_context.execute_step_args.pipeline_origin.pipeline_name,
                "dagster/op": step_key,
                "dagster/run-id": step_handler_context.execute_step_args.pipeline_run_id,
            },
        )

        events.append(
            DagsterEvent(
                event_type_value=DagsterEventType.ENGINE_EVENT.value,
                pipeline_name=step_handler_context.execute_step_args.pipeline_origin.pipeline_name,
                step_key=step_key,
                message=f"Executing step {step_key} in Kubernetes job {job_name}",
                event_specific_data=EngineEventData(
                    [
                        MetadataEntry("Step key", value=step_key),
                        MetadataEntry("Kubernetes Job name", value=job_name),
                    ],
                ),
            )
        )

        self._batch_api.create_namespaced_job(body=job, namespace=container_context.namespace)

        return events
예제 #22
0
    def launch_run(self, instance, run, external_pipeline):
        check.inst_param(instance, "instance", DagsterInstance)
        check.inst_param(run, "run", PipelineRun)
        check.inst_param(external_pipeline, "external_pipeline", ExternalPipeline)

        job_name = get_job_name_from_run_id(run.run_id)
        pod_name = job_name
        exc_config = _get_validated_celery_k8s_executor_config(run.run_config)

        job_image = None
        pipeline_origin = None
        env_vars = None
        if isinstance(external_pipeline.get_origin(), PipelineGrpcServerOrigin):
            if exc_config.get("job_image"):
                raise DagsterInvariantViolationError(
                    "Cannot specify job_image in executor config when loading pipeline "
                    "from GRPC server."
                )

            repository_location_handle = (
                external_pipeline.repository_handle.repository_location_handle
            )

            if not isinstance(repository_location_handle, GrpcServerRepositoryLocationHandle):
                raise DagsterInvariantViolationError(
                    "Expected RepositoryLocationHandle to be of type "
                    "GrpcServerRepositoryLocationHandle but found type {}".format(
                        type(repository_location_handle)
                    )
                )

            job_image = repository_location_handle.get_current_image()
            env_vars = {"DAGSTER_CURRENT_IMAGE": job_image}

            repository_name = external_pipeline.repository_handle.repository_name
            pipeline_origin = PipelinePythonOrigin(
                pipeline_name=external_pipeline.name,
                repository_origin=repository_location_handle.get_repository_python_origin(
                    repository_name
                ),
            )

        else:
            job_image = exc_config.get("job_image")
            if not job_image:
                raise DagsterInvariantViolationError(
                    "Cannot find job_image in celery-k8s executor config."
                )
            pipeline_origin = external_pipeline.get_origin()

        job_config = DagsterK8sJobConfig(
            dagster_home=self.dagster_home,
            instance_config_map=self.instance_config_map,
            postgres_password_secret=self.postgres_password_secret,
            job_image=check.str_param(job_image, "job_image"),
            image_pull_policy=exc_config.get("image_pull_policy"),
            image_pull_secrets=exc_config.get("image_pull_secrets"),
            service_account_name=exc_config.get("service_account_name"),
            env_config_maps=exc_config.get("env_config_maps"),
            env_secrets=exc_config.get("env_secrets"),
        )

        user_defined_k8s_config = get_user_defined_k8s_config(frozentags(external_pipeline.tags))

        from dagster.cli.api import ExecuteRunArgs

        input_json = serialize_dagster_namedtuple(
            # depends on DagsterInstance.get() returning the same instance
            # https://github.com/dagster-io/dagster/issues/2757
            ExecuteRunArgs(
                pipeline_origin=pipeline_origin, pipeline_run_id=run.run_id, instance_ref=None,
            )
        )

        job = construct_dagster_k8s_job(
            job_config,
            command=["dagster"],
            args=["api", "execute_run_with_structured_logs", input_json],
            job_name=job_name,
            pod_name=pod_name,
            component="run_coordinator",
            user_defined_k8s_config=user_defined_k8s_config,
            env_vars=env_vars,
        )

        job_namespace = exc_config.get("job_namespace")

        api = kubernetes.client.BatchV1Api()
        api.create_namespaced_job(body=job, namespace=job_namespace)

        self._instance.report_engine_event(
            "Kubernetes run_coordinator job launched",
            run,
            EngineEventData(
                [
                    EventMetadataEntry.text(job_name, "Kubernetes Job name"),
                    EventMetadataEntry.text(pod_name, "Kubernetes Pod name"),
                    EventMetadataEntry.text(job_namespace, "Kubernetes Namespace"),
                    EventMetadataEntry.text(run.run_id, "Run ID"),
                ]
            ),
            cls=self.__class__,
        )
        return run
예제 #23
0
    def launch_run(self, run, external_pipeline):
        check.inst_param(run, "run", PipelineRun)
        check.inst_param(external_pipeline, "external_pipeline", ExternalPipeline)

        job_name = get_job_name_from_run_id(run.run_id)
        pod_name = job_name
        exc_config = _get_validated_celery_k8s_executor_config(run.run_config)
        env_vars = None

        job_image_from_executor_config = exc_config.get("job_image")

        pipeline_origin = external_pipeline.get_python_origin()
        repository_origin = pipeline_origin.repository_origin

        job_image = repository_origin.container_image

        if job_image:
            if job_image_from_executor_config:
                job_image = job_image_from_executor_config
                self._instance.report_engine_event(
                    f"You have specified a job_image {job_image_from_executor_config} in your executor configuration, "
                    f"but also {job_image} in your user-code deployment. Using the job image {job_image_from_executor_config} "
                    f"from executor configuration as it takes precedence.",
                    run,
                    cls=self.__class__,
                )
        else:
            if not job_image_from_executor_config:
                raise DagsterInvariantViolationError(
                    "You have not specified a job_image in your executor configuration. "
                    "To resolve this error, specify the job_image configuration in the executor "
                    "config section in your run config. \n"
                    "Note: You may also be seeing this error because you are using the configured API. "
                    "Using configured with the celery-k8s executor is not supported at this time, "
                    "and the job_image must be configured at the top-level executor config without "
                    "using configured."
                )

            job_image = job_image_from_executor_config

        job_config = DagsterK8sJobConfig(
            dagster_home=self.dagster_home,
            instance_config_map=self.instance_config_map,
            postgres_password_secret=self.postgres_password_secret,
            job_image=check.str_param(job_image, "job_image"),
            image_pull_policy=exc_config.get("image_pull_policy"),
            image_pull_secrets=exc_config.get("image_pull_secrets"),
            service_account_name=exc_config.get("service_account_name"),
            env_config_maps=exc_config.get("env_config_maps"),
            env_secrets=exc_config.get("env_secrets"),
        )

        self._instance.add_run_tags(
            run.run_id,
            {DOCKER_IMAGE_TAG: job_config.job_image},
        )

        user_defined_k8s_config = get_user_defined_k8s_config(frozentags(run.tags))

        from dagster.cli.api import ExecuteRunArgs

        input_json = serialize_dagster_namedtuple(
            # depends on DagsterInstance.get() returning the same instance
            # https://github.com/dagster-io/dagster/issues/2757
            ExecuteRunArgs(
                pipeline_origin=pipeline_origin,
                pipeline_run_id=run.run_id,
                instance_ref=None,
            )
        )

        job = construct_dagster_k8s_job(
            job_config,
            args=["dagster", "api", "execute_run", input_json],
            job_name=job_name,
            pod_name=pod_name,
            component="run_coordinator",
            user_defined_k8s_config=user_defined_k8s_config,
            env_vars=env_vars,
        )

        job_namespace = exc_config.get("job_namespace")

        self._instance.report_engine_event(
            "Creating Kubernetes run worker job",
            run,
            EngineEventData(
                [
                    EventMetadataEntry.text(job_name, "Kubernetes Job name"),
                    EventMetadataEntry.text(job_namespace, "Kubernetes Namespace"),
                    EventMetadataEntry.text(run.run_id, "Run ID"),
                ]
            ),
            cls=self.__class__,
        )

        self._batch_api.create_namespaced_job(body=job, namespace=job_namespace)
        self._instance.report_engine_event(
            "Kubernetes run worker job created",
            run,
            EngineEventData(
                [
                    EventMetadataEntry.text(job_name, "Kubernetes Job name"),
                    EventMetadataEntry.text(job_namespace, "Kubernetes Namespace"),
                    EventMetadataEntry.text(run.run_id, "Run ID"),
                ]
            ),
            cls=self.__class__,
        )
        return run
예제 #24
0
    def launch_run(self, context: LaunchRunContext) -> None:
        run = context.pipeline_run

        job_name = get_job_name_from_run_id(run.run_id)
        pod_name = job_name
        exc_config = _get_validated_celery_k8s_executor_config(run.run_config)
        env_vars = None

        job_image_from_executor_config = exc_config.get("job_image")

        pipeline_origin = context.pipeline_code_origin
        repository_origin = pipeline_origin.repository_origin

        job_image = repository_origin.container_image

        if job_image:
            if job_image_from_executor_config:
                job_image = job_image_from_executor_config
                self._instance.report_engine_event(
                    f"You have specified a job_image {job_image_from_executor_config} in your executor configuration, "
                    f"but also {job_image} in your user-code deployment. Using the job image {job_image_from_executor_config} "
                    f"from executor configuration as it takes precedence.",
                    run,
                    cls=self.__class__,
                )
        else:
            if not job_image_from_executor_config:
                raise DagsterInvariantViolationError(
                    "You have not specified a job_image in your executor configuration. "
                    "To resolve this error, specify the job_image configuration in the executor "
                    "config section in your run config. \n"
                    "Note: You may also be seeing this error because you are using the configured API. "
                    "Using configured with the celery-k8s executor is not supported at this time, "
                    "and the job_image must be configured at the top-level executor config without "
                    "using configured.")

            job_image = job_image_from_executor_config

        job_config = self.get_k8s_job_config(job_image, exc_config)

        self._instance.add_run_tags(
            run.run_id,
            {DOCKER_IMAGE_TAG: job_config.job_image},
        )

        user_defined_k8s_config = get_user_defined_k8s_config(
            frozentags(run.tags))

        from dagster.cli.api import ExecuteRunArgs

        run_args = ExecuteRunArgs(
            pipeline_origin=pipeline_origin,
            pipeline_run_id=run.run_id,
            instance_ref=self._instance.get_ref(),
        )

        job = construct_dagster_k8s_job(
            job_config,
            args=run_args.get_command_args(),
            job_name=job_name,
            pod_name=pod_name,
            component="run_worker",
            user_defined_k8s_config=user_defined_k8s_config,
            env_vars=env_vars,
            labels={
                "dagster/job": pipeline_origin.pipeline_name,
            },
        )

        job_namespace = exc_config.get("job_namespace")

        self._instance.report_engine_event(
            "Creating Kubernetes run worker job",
            run,
            EngineEventData([
                EventMetadataEntry.text(job_name, "Kubernetes Job name"),
                EventMetadataEntry.text(job_namespace, "Kubernetes Namespace"),
                EventMetadataEntry.text(run.run_id, "Run ID"),
            ]),
            cls=self.__class__,
        )

        self._batch_api.create_namespaced_job(body=job,
                                              namespace=job_namespace)
        self._instance.report_engine_event(
            "Kubernetes run worker job created",
            run,
            EngineEventData([
                EventMetadataEntry.text(job_name, "Kubernetes Job name"),
                EventMetadataEntry.text(job_namespace, "Kubernetes Namespace"),
                EventMetadataEntry.text(run.run_id, "Run ID"),
            ]),
            cls=self.__class__,
        )
예제 #25
0
    def launch_run(self, instance, run, external_pipeline):
        check.inst_param(run, 'run', PipelineRun)
        check.inst_param(external_pipeline, 'external_pipeline', ExternalPipeline)

        job_name = 'dagster-run-{}'.format(run.run_id)
        pod_name = job_name

        resources = get_k8s_resource_requirements(frozentags(external_pipeline.tags))

        pipeline_origin = None
        job_config = None
        if isinstance(external_pipeline.get_origin(), PipelineGrpcServerOrigin):
            if self._job_image:
                raise DagsterInvariantViolationError(
                    'Cannot specify job_image in run launcher config when loading pipeline '
                    'from GRPC server.'
                )

            repository_location_handle = (
                external_pipeline.repository_handle.repository_location_handle
            )

            if not isinstance(repository_location_handle, GrpcServerRepositoryLocationHandle):
                raise DagsterInvariantViolationError(
                    'Expected RepositoryLocationHandle to be of type '
                    'GrpcServerRepositoryLocationHandle but found type {}'.format(
                        type(repository_location_handle)
                    )
                )

            job_image = repository_location_handle.get_current_image()

            job_config = self._get_grpc_job_config(job_image)

            repository_name = external_pipeline.repository_handle.repository_name
            pipeline_origin = PipelinePythonOrigin(
                pipeline_name=external_pipeline.name,
                repository_origin=repository_location_handle.get_repository_python_origin(
                    repository_name
                ),
            )
        else:
            pipeline_origin = external_pipeline.get_origin()
            job_config = self._get_static_job_config()

        input_json = serialize_dagster_namedtuple(
            ExecuteRunArgs(
                pipeline_origin=pipeline_origin, pipeline_run_id=run.run_id, instance_ref=None,
            )
        )

        job = construct_dagster_k8s_job(
            job_config=job_config,
            command=['dagster'],
            args=['api', 'execute_run_with_structured_logs', input_json],
            job_name=job_name,
            pod_name=pod_name,
            component='runmaster',
            resources=resources,
        )

        self._batch_api.create_namespaced_job(body=job, namespace=self.job_namespace)
        self._instance.report_engine_event(
            'Kubernetes runmaster job launched',
            run,
            EngineEventData(
                [
                    EventMetadataEntry.text(job_name, 'Kubernetes Job name'),
                    EventMetadataEntry.text(pod_name, 'Kubernetes Pod name'),
                    EventMetadataEntry.text(self.job_namespace, 'Kubernetes Namespace'),
                    EventMetadataEntry.text(run.run_id, 'Run ID'),
                ]
            ),
            cls=K8sRunLauncher,
        )
        return run