def test_job_serialization(): cfg = DagsterK8sJobConfig( job_image="test/foo:latest", dagster_home="/opt/dagster/dagster_home", image_pull_policy="Always", image_pull_secrets=[{"name": "my_secret"}], service_account_name=None, instance_config_map="some-instance-configmap", postgres_password_secret="some-secret-name", env_config_maps=None, env_secrets=None, ) assert DagsterK8sJobConfig.from_dict(cfg.to_dict()) == cfg
def test_job_serialization(): cfg = DagsterK8sJobConfig( job_image='test/foo:latest', dagster_home='/opt/dagster/dagster_home', image_pull_policy='Always', image_pull_secrets=[{ 'name': 'my_secret' }], service_account_name=None, instance_config_map='some-instance-configmap', postgres_password_secret='some-secret-name', env_config_maps=None, env_secrets=None, ) assert DagsterK8sJobConfig.from_dict(cfg.to_dict()) == cfg
def test_construct_dagster_k8s_job_with_user_defined_env_from(): @graph def user_defined_k8s_env_from_tags_graph(): pass # These fields still work even when using underscore keys user_defined_k8s_config = get_user_defined_k8s_config( user_defined_k8s_env_from_tags_graph.to_job( tags={ USER_DEFINED_K8S_CONFIG_KEY: { "container_config": { "envFrom": [ { "configMapRef": { "name": "user_config_map_ref", "optional": "True", } }, { "secretRef": { "name": "user_secret_ref_one", "optional": "True" } }, { "secretRef": { "name": "user_secret_ref_two", "optional": "False", }, "prefix": "with_prefix", }, ] } } }).tags) cfg = DagsterK8sJobConfig( job_image="test/foo:latest", dagster_home="/opt/dagster/dagster_home", instance_config_map="some-instance-configmap", env_config_maps=["config_map"], env_secrets=["secret"], ) job = construct_dagster_k8s_job( cfg, ["foo", "bar"], "job", user_defined_k8s_config=user_defined_k8s_config).to_dict() env_from = job["spec"]["template"]["spec"]["containers"][0]["env_from"] env_from_mapping = {(env_var.get("config_map_ref") or env_var.get("secret_ref")).get("name"): env_var for env_var in env_from} assert len(env_from_mapping) == 5 assert env_from_mapping["config_map"] assert env_from_mapping["user_config_map_ref"] assert env_from_mapping["secret"] assert env_from_mapping["user_secret_ref_one"] assert env_from_mapping["user_secret_ref_two"]
def test_construct_dagster_k8s_job_with_user_defined_service_account_name(): @graph def user_defined_k8s_service_account_name_tags_graph(): pass user_defined_k8s_config = get_user_defined_k8s_config( user_defined_k8s_service_account_name_tags_graph.to_job(tags={ USER_DEFINED_K8S_CONFIG_KEY: { "pod_spec_config": { "service_account_name": "this-should-take-precedence", }, }, }, ).tags) cfg = DagsterK8sJobConfig( job_image="test/foo:latest", dagster_home="/opt/dagster/dagster_home", instance_config_map="some-instance-configmap", service_account_name="this-should-be-overriden", ) job = construct_dagster_k8s_job( cfg, ["foo", "bar"], "job", user_defined_k8s_config=user_defined_k8s_config).to_dict() service_account_name = job["spec"]["template"]["spec"][ "service_account_name"] assert service_account_name == "this-should-take-precedence"
def celery_k8s_config(): from dagster_k8s import DagsterK8sJobConfig # DagsterK8sJobConfig provides config schema for specifying Dagster K8s Jobs job_config = DagsterK8sJobConfig.config_type_pipeline_run() additional_config = { 'load_incluster_config': Field( bool, is_required=False, default_value=True, description='''Set this value if you are running the launcher within a k8s cluster. If ``True``, we assume the launcher is running within the target cluster and load config using ``kubernetes.config.load_incluster_config``. Otherwise, we will use the k8s config specified in ``kubeconfig_file`` (using ``kubernetes.config.load_kube_config``) or fall back to the default kubeconfig. Default: ``True``.''', ), 'kubeconfig_file': Field( Noneable(str), is_required=False, description='Path to a kubeconfig file to use, if not using default kubeconfig.', ), 'job_namespace': Field( StringSource, is_required=False, default_value='default', description='The namespace into which to launch new jobs. Note that any ' 'other Kubernetes resources the Job requires (such as the service account) must be ' 'present in this namespace. Default: ``"default"``', ), } cfg = merge_dicts(CELERY_CONFIG, job_config) cfg = merge_dicts(cfg, additional_config) return cfg
def test_construct_dagster_k8s_job_with_sidecar_container(): cfg = DagsterK8sJobConfig( job_image="test/foo:latest", dagster_home="/opt/dagster/dagster_home", instance_config_map="test", ) job = construct_dagster_k8s_job(cfg, [], "job123").to_dict() assert job["spec"][ "ttl_seconds_after_finished"] == DEFAULT_K8S_JOB_TTL_SECONDS_AFTER_FINISHED user_defined_cfg = UserDefinedDagsterK8sConfig(pod_spec_config={ "containers": [{ "command": ["echo", "HI"], "image": "sidecar:bar", "name": "sidecar" }] }, ) job = construct_dagster_k8s_job( cfg, [], "job123", user_defined_k8s_config=user_defined_cfg).to_dict() containers = job["spec"]["template"]["spec"]["containers"] assert len(containers) == 2 assert containers[0]["image"] == "test/foo:latest" assert containers[1]["image"] == "sidecar:bar" assert containers[1]["command"] == ["echo", "HI"] assert containers[1]["name"] == "sidecar"
def test_construct_dagster_k8s_job_with_mounts(): cfg = DagsterK8sJobConfig( job_image="test/foo:latest", dagster_home="/opt/dagster/dagster_home", image_pull_policy="Always", image_pull_secrets=[{"name": "my_secret"}], service_account_name=None, instance_config_map="some-instance-configmap", postgres_password_secret=None, env_config_maps=None, env_secrets=None, volume_mounts=[ {"name": "foo", "path": "biz/buz", "sub_path": "file.txt", "configmap": "settings-cm"} ], ) job = construct_dagster_k8s_job(cfg, ["foo", "bar"], "job123").to_dict() assert len(job["spec"]["template"]["spec"]["volumes"]) == 2 foo_volumes = [ volume for volume in job["spec"]["template"]["spec"]["volumes"] if volume["name"] == "foo" ] assert len(foo_volumes) == 1 assert len(job["spec"]["template"]["spec"]["containers"][0]["volume_mounts"]) == 2 foo_volumes_mounts = [ volume for volume in job["spec"]["template"]["spec"]["containers"][0]["volume_mounts"] if volume["name"] == "foo" ] assert len(foo_volumes_mounts) == 1 cfg = DagsterK8sJobConfig( job_image="test/foo:latest", dagster_home="/opt/dagster/dagster_home", image_pull_policy="Always", image_pull_secrets=[{"name": "my_secret"}], service_account_name=None, instance_config_map="some-instance-configmap", postgres_password_secret=None, env_config_maps=None, env_secrets=None, volume_mounts=[ {"name": "foo", "path": "biz/buz", "sub_path": "file.txt", "secret": "settings-secret"} ], ) construct_dagster_k8s_job(cfg, ["foo", "bar"], "job123").to_dict()
def test_construct_dagster_k8s_job_with_user_defined_env(): @graph def user_defined_k8s_env_tags_graph(): pass user_defined_k8s_config = get_user_defined_k8s_config( user_defined_k8s_env_tags_graph.to_job( tags={ USER_DEFINED_K8S_CONFIG_KEY: { "container_config": { "env": [ { "name": "ENV_VAR_1", "value": "one" }, { "name": "ENV_VAR_2", "value": "two" }, { "name": "DD_AGENT_HOST", "valueFrom": { "fieldRef": { "fieldPath": "status.hostIP" } }, }, ] } } }).tags) cfg = DagsterK8sJobConfig( job_image="test/foo:latest", dagster_home="/opt/dagster/dagster_home", instance_config_map="some-instance-configmap", ) job = construct_dagster_k8s_job( cfg, ["foo", "bar"], "job", user_defined_k8s_config=user_defined_k8s_config).to_dict() env = job["spec"]["template"]["spec"]["containers"][0]["env"] env_mapping = remove_none_recursively( {env_var["name"]: env_var for env_var in env}) # Has DAGSTER_HOME and three additional env vars assert len(env_mapping) == 4 assert env_mapping["ENV_VAR_1"]["value"] == "one" assert env_mapping["ENV_VAR_2"]["value"] == "two" assert env_mapping["DD_AGENT_HOST"]["value_from"] == { "field_ref": { "field_path": "status.hostIP" } }
def test_construct_dagster_k8s_job_with_job_op_labels(): common_labels = { "app.kubernetes.io/name": "dagster", "app.kubernetes.io/instance": "dagster", "app.kubernetes.io/version": dagster_version, "app.kubernetes.io/part-of": "dagster", } cfg = DagsterK8sJobConfig( job_image="test/foo:latest", dagster_home="/opt/dagster/dagster_home", instance_config_map="test", ) job1 = construct_dagster_k8s_job( cfg, [], "job123", labels={ "dagster/job": "some_job", "dagster/op": "some_op", }, ).to_dict() expected_labels1 = dict( **common_labels, **{ "dagster/job": "some_job", "dagster/op": "some_op", }, ) assert job1["metadata"]["labels"] == expected_labels1 assert job1["spec"]["template"]["metadata"]["labels"] == expected_labels1 job2 = construct_dagster_k8s_job( cfg, [], "job456", labels={ "dagster/job": "long_job_name_64____01234567890123456789012345678901234567890123", "dagster/op": "long_op_name_64_____01234567890123456789012345678901234567890123", }, ).to_dict() expected_labels2 = dict( **common_labels, **{ # The last character should be truncated. "dagster/job": "long_job_name_64____0123456789012345678901234567890123456789012", "dagster/op": "long_op_name_64_____0123456789012345678901234567890123456789012", }, ) assert job2["metadata"]["labels"] == expected_labels2 assert job2["spec"]["template"]["metadata"]["labels"] == expected_labels2
def celery_k8s_config(): # DagsterK8sJobConfig provides config schema for specifying Dagster K8s Jobs job_config = DagsterK8sJobConfig.config_type_pipeline_run() additional_config = { "load_incluster_config": Field( bool, is_required=False, default_value=True, description= """Set this value if you are running the launcher within a k8s cluster. If ``True``, we assume the launcher is running within the target cluster and load config using ``kubernetes.config.load_incluster_config``. Otherwise, we will use the k8s config specified in ``kubeconfig_file`` (using ``kubernetes.config.load_kube_config``) or fall back to the default kubeconfig. Default: ``True``.""", ), "kubeconfig_file": Field( Noneable(str), is_required=False, description= "Path to a kubeconfig file to use, if not using default kubeconfig.", ), "job_namespace": Field( StringSource, is_required=False, default_value="default", description= "The namespace into which to launch new jobs. Note that any " "other Kubernetes resources the Job requires (such as the service account) must be " 'present in this namespace. Default: ``"default"``', ), "repo_location_name": Field( StringSource, is_required=False, default_value=IN_PROCESS_NAME, description="The repository location name to use for execution.", ), "job_wait_timeout": Field( Float, is_required=False, default_value=DEFAULT_WAIT_TIMEOUT, description= f"Wait this many seconds for a job to complete before marking the run as failed. Defaults to {DEFAULT_WAIT_TIMEOUT} seconds.", ), } cfg = merge_dicts(CELERY_CONFIG, job_config) cfg = merge_dicts(cfg, additional_config) return cfg
def test_construct_dagster_k8s_job_with_user_defined_volume_mounts_snake_case( ): @graph def user_defined_k8s_volume_mounts_tags_graph(): pass # volume_mounts still work even when using underscore keys user_defined_k8s_config = get_user_defined_k8s_config( user_defined_k8s_volume_mounts_tags_graph.to_job( tags={ USER_DEFINED_K8S_CONFIG_KEY: { "container_config": { "volume_mounts": [ { "mountPath": "mount_path", "mountPropagation": "mount_propagation", "name": "a_volume_mount_one", "readOnly": "False", "subPath": "path/", }, { "mountPath": "mount_path", "mountPropagation": "mount_propagation", "name": "a_volume_mount_two", "readOnly": "False", "subPathExpr": "path/", }, ] } } }).tags) cfg = DagsterK8sJobConfig( job_image="test/foo:latest", dagster_home="/opt/dagster/dagster_home", instance_config_map="some-instance-configmap", ) job = construct_dagster_k8s_job( cfg, ["foo", "bar"], "job", user_defined_k8s_config=user_defined_k8s_config).to_dict() volume_mounts = job["spec"]["template"]["spec"]["containers"][0][ "volume_mounts"] volume_mounts_mapping = { volume_mount["name"]: volume_mount for volume_mount in volume_mounts } assert len(volume_mounts_mapping) == 3 assert volume_mounts_mapping["dagster-instance"] assert volume_mounts_mapping["a_volume_mount_one"] assert volume_mounts_mapping["a_volume_mount_two"]
def test_construct_dagster_k8s_job_with_ttl(): cfg = DagsterK8sJobConfig( job_image="test/foo:latest", dagster_home="/opt/dagster/dagster_home", instance_config_map="test", ) job = construct_dagster_k8s_job(cfg, [], "job123").to_dict() assert job["spec"][ "ttl_seconds_after_finished"] == DEFAULT_K8S_JOB_TTL_SECONDS_AFTER_FINISHED user_defined_cfg = UserDefinedDagsterK8sConfig( job_spec_config={"ttl_seconds_after_finished": 0}, ) job = construct_dagster_k8s_job( cfg, [], "job123", user_defined_k8s_config=user_defined_cfg).to_dict() assert job["spec"]["ttl_seconds_after_finished"] == 0
def test_construct_dagster_k8s_job_with_env(): with environ({"ENV_VAR_1": "one", "ENV_VAR_2": "two"}): cfg = DagsterK8sJobConfig( job_image="test/foo:latest", dagster_home="/opt/dagster/dagster_home", instance_config_map="some-instance-configmap", env_vars=["ENV_VAR_1", "ENV_VAR_2"], ) job = construct_dagster_k8s_job(cfg, ["foo", "bar"], "job").to_dict() env = job["spec"]["template"]["spec"]["containers"][0]["env"] env_mapping = {env_var["name"]: env_var for env_var in env} # Has DAGSTER_HOME and two additional env vars assert len(env_mapping) == 3 assert env_mapping["ENV_VAR_1"]["value"] == "one" assert env_mapping["ENV_VAR_2"]["value"] == "two"
def test_sanitize_labels(): cfg = DagsterK8sJobConfig( job_image="test/foo:latest", dagster_home="/opt/dagster/dagster_home", instance_config_map="test", ) job = construct_dagster_k8s_job( cfg, [], "job456", labels={ "dagster/op": "-get_f\o.o[bar-0]-", # pylint: disable=anomalous-backslash-in-string "my_label": "_WhatsUP", }, ).to_dict() assert job["metadata"]["labels"]["dagster/op"] == "get_f-o.o-bar-0" assert job["metadata"]["labels"]["my_label"] == "WhatsUP"
def test_construct_dagster_k8s_job_no_postgres(): cfg = DagsterK8sJobConfig( job_image="test/foo:latest", dagster_home="/opt/dagster/dagster_home", image_pull_policy="Always", image_pull_secrets=[{"name": "my_secret"}], service_account_name=None, instance_config_map="some-instance-configmap", postgres_password_secret=None, env_config_maps=None, env_secrets=None, ) job = construct_dagster_k8s_job(cfg, ["foo", "bar"], "job123").to_dict() assert job["kind"] == "Job" assert job["metadata"]["name"] == "job123" assert job["spec"]["template"]["spec"]["containers"][0]["image"] == "test/foo:latest" assert DAGSTER_PG_PASSWORD_ENV_VAR not in [ env["name"] for env in job["spec"]["template"]["spec"]["containers"][0]["env"] ]
def test_k8s_tag_op(): assert my_op user_defined_cfg = get_user_defined_k8s_config(my_op.tags) cfg = DagsterK8sJobConfig( job_image="test/foo:latest", dagster_home="/opt/dagster/dagster_home", instance_config_map="test", ) job = construct_dagster_k8s_job(cfg, [], "job123", user_defined_k8s_config=user_defined_cfg) assert job.to_dict( )["spec"]["template"]["spec"]["containers"][0]["resources"] == { "requests": { "cpu": "200m", "memory": "32Mi" }, "limits": None, }
def celery_k8s_job_executor(init_context): '''Celery-based executor which launches tasks as Kubernetes Jobs. The Celery executor exposes config settings for the underlying Celery app under the ``config_source`` key. This config corresponds to the "new lowercase settings" introduced in Celery version 4.0 and the object constructed from config will be passed to the :py:class:`celery.Celery` constructor as its ``config_source`` argument. (See https://docs.celeryproject.org/en/latest/userguide/configuration.html for details.) The executor also exposes the ``broker``, `backend`, and ``include`` arguments to the :py:class:`celery.Celery` constructor. In the most common case, you may want to modify the ``broker`` and ``backend`` (e.g., to use Redis instead of RabbitMQ). We expect that ``config_source`` will be less frequently modified, but that when solid executions are especially fast or slow, or when there are different requirements around idempotence or retry, it may make sense to execute pipelines with variations on these settings. If you'd like to configure a Celery Kubernetes Job executor in addition to the :py:class:`~dagster.default_executors`, you should add it to the ``executor_defs`` defined on a :py:class:`~dagster.ModeDefinition` as follows: .. code-block:: python from dagster import ModeDefinition, default_executors, pipeline from dagster_celery.executor_k8s import celery_k8s_job_executor @pipeline(mode_defs=[ ModeDefinition(executor_defs=default_executors + [celery_k8s_job_executor]) ]) def celery_enabled_pipeline(): pass Then you can configure the executor as follows: .. code-block:: YAML execution: celery-k8s: config: job_image: 'my_repo.com/image_name:latest' job_namespace: 'some-namespace' broker: 'pyamqp://guest@localhost//' # Optional[str]: The URL of the Celery broker backend: 'rpc://' # Optional[str]: The URL of the Celery results backend include: ['my_module'] # Optional[List[str]]: Modules every worker should import config_source: # Dict[str, Any]: Any additional parameters to pass to the #... # Celery workers. This dict will be passed as the `config_source` #... # argument of celery.Celery(). Note that the YAML you provide here must align with the configuration with which the Celery workers on which you hope to run were started. If, for example, you point the executor at a different broker than the one your workers are listening to, the workers will never be able to pick up tasks for execution. ''' from dagster_k8s import DagsterK8sJobConfig, CeleryK8sRunLauncher check_cross_process_constraints(init_context) run_launcher = init_context.instance.run_launcher exc_cfg = init_context.executor_config check.inst( run_launcher, CeleryK8sRunLauncher, 'This engine is only compatible with a CeleryK8sRunLauncher; configure the ' 'CeleryK8sRunLauncher on your instance to use it.', ) job_config = DagsterK8sJobConfig( dagster_home=run_launcher.dagster_home, instance_config_map=run_launcher.instance_config_map, postgres_password_secret=run_launcher.postgres_password_secret, job_image=exc_cfg.get('job_image'), image_pull_policy=exc_cfg.get('image_pull_policy'), image_pull_secrets=exc_cfg.get('image_pull_secrets'), service_account_name=exc_cfg.get('service_account_name'), env_config_maps=exc_cfg.get('env_config_maps'), env_secrets=exc_cfg.get('env_secrets'), ) # Set on the instance but overrideable here broker = run_launcher.broker or exc_cfg.get('broker') backend = run_launcher.backend or exc_cfg.get('backend') config_source = run_launcher.config_source or exc_cfg.get('config_source') include = run_launcher.include or exc_cfg.get('include') retries = run_launcher.retries or Retries.from_config(exc_cfg.get('retries')) return CeleryK8sJobConfig( broker=broker, backend=backend, config_source=config_source, include=include, retries=retries, job_config=job_config, job_namespace=exc_cfg.get('job_namespace'), load_incluster_config=exc_cfg.get('load_incluster_config'), kubeconfig_file=exc_cfg.get('kubeconfig_file'), )
def celery_k8s_job_executor(init_context): """Celery-based executor which launches tasks as Kubernetes Jobs. The Celery executor exposes config settings for the underlying Celery app under the ``config_source`` key. This config corresponds to the "new lowercase settings" introduced in Celery version 4.0 and the object constructed from config will be passed to the :py:class:`celery.Celery` constructor as its ``config_source`` argument. (See https://docs.celeryproject.org/en/latest/userguide/configuration.html for details.) The executor also exposes the ``broker``, `backend`, and ``include`` arguments to the :py:class:`celery.Celery` constructor. In the most common case, you may want to modify the ``broker`` and ``backend`` (e.g., to use Redis instead of RabbitMQ). We expect that ``config_source`` will be less frequently modified, but that when solid executions are especially fast or slow, or when there are different requirements around idempotence or retry, it may make sense to execute pipelines with variations on these settings. If you'd like to configure a Celery Kubernetes Job executor in addition to the :py:class:`~dagster.default_executors`, you should add it to the ``executor_defs`` defined on a :py:class:`~dagster.ModeDefinition` as follows: .. literalinclude:: ../../../../../../python_modules/libraries/dagster-celery-k8s/dagster_celery_k8s_tests/example_celery_mode_def.py :language: python Then you can configure the executor as follows: .. code-block:: YAML execution: celery-k8s: config: job_image: 'my_repo.com/image_name:latest' job_namespace: 'some-namespace' broker: 'pyamqp://guest@localhost//' # Optional[str]: The URL of the Celery broker backend: 'rpc://' # Optional[str]: The URL of the Celery results backend include: ['my_module'] # Optional[List[str]]: Modules every worker should import config_source: # Dict[str, Any]: Any additional parameters to pass to the #... # Celery workers. This dict will be passed as the `config_source` #... # argument of celery.Celery(). Note that the YAML you provide here must align with the configuration with which the Celery workers on which you hope to run were started. If, for example, you point the executor at a different broker than the one your workers are listening to, the workers will never be able to pick up tasks for execution. In deployments where the celery_k8s_job_executor is used all appropriate celery and dagster_celery commands must be invoked with the `-A dagster_celery_k8s.app` argument. """ run_launcher = init_context.instance.run_launcher exc_cfg = init_context.executor_config if not isinstance(run_launcher, CeleryK8sRunLauncher): raise DagsterUnmetExecutorRequirementsError( "This engine is only compatible with a CeleryK8sRunLauncher; configure the " "CeleryK8sRunLauncher on your instance to use it.", ) job_config = DagsterK8sJobConfig( dagster_home=run_launcher.dagster_home, instance_config_map=run_launcher.instance_config_map, postgres_password_secret=run_launcher.postgres_password_secret, job_image=exc_cfg.get("job_image") or os.getenv("DAGSTER_CURRENT_IMAGE"), image_pull_policy=exc_cfg.get("image_pull_policy"), image_pull_secrets=exc_cfg.get("image_pull_secrets"), service_account_name=exc_cfg.get("service_account_name"), env_config_maps=exc_cfg.get("env_config_maps"), env_secrets=exc_cfg.get("env_secrets"), ) # Set on the instance but overrideable here broker = run_launcher.broker or exc_cfg.get("broker") backend = run_launcher.backend or exc_cfg.get("backend") config_source = run_launcher.config_source or exc_cfg.get("config_source") include = run_launcher.include or exc_cfg.get("include") retries = run_launcher.retries or RetryMode.from_config( exc_cfg.get("retries")) return CeleryK8sJobExecutor( broker=broker, backend=backend, config_source=config_source, include=include, retries=retries, job_config=job_config, job_namespace=exc_cfg.get("job_namespace"), load_incluster_config=exc_cfg.get("load_incluster_config"), kubeconfig_file=exc_cfg.get("kubeconfig_file"), repo_location_name=exc_cfg.get("repo_location_name"), )
def _execute_step_k8s_job( self, execute_step_args_packed, job_config_dict, job_namespace, load_incluster_config, user_defined_k8s_config_dict=None, kubeconfig_file=None, ): """Run step execution in a K8s job pod.""" execute_step_args = unpack_value( check.dict_param( execute_step_args_packed, "execute_step_args_packed", )) check.inst_param(execute_step_args, "execute_step_args", ExecuteStepArgs) check.invariant( len(execute_step_args.step_keys_to_execute) == 1, "Celery K8s task executor can only execute 1 step at a time", ) # Celery will serialize this as a list job_config = DagsterK8sJobConfig.from_dict(job_config_dict) check.inst_param(job_config, "job_config", DagsterK8sJobConfig) check.str_param(job_namespace, "job_namespace") check.bool_param(load_incluster_config, "load_incluster_config") user_defined_k8s_config = UserDefinedDagsterK8sConfig.from_dict( user_defined_k8s_config_dict) check.opt_inst_param( user_defined_k8s_config, "user_defined_k8s_config", UserDefinedDagsterK8sConfig, ) check.opt_str_param(kubeconfig_file, "kubeconfig_file") # For when launched via DinD or running the cluster if load_incluster_config: kubernetes.config.load_incluster_config() else: kubernetes.config.load_kube_config(kubeconfig_file) instance = DagsterInstance.from_ref(execute_step_args.instance_ref) pipeline_run = instance.get_run_by_id( execute_step_args.pipeline_run_id) check.inst( pipeline_run, PipelineRun, "Could not load run {}".format(execute_step_args.pipeline_run_id), ) step_key = execute_step_args.step_keys_to_execute[0] celery_worker_name = self.request.hostname celery_pod_name = os.environ.get("HOSTNAME") instance.report_engine_event( "Task for step {step_key} picked up by Celery".format( step_key=step_key), pipeline_run, EngineEventData([ EventMetadataEntry.text(celery_worker_name, "Celery worker name"), EventMetadataEntry.text(celery_pod_name, "Celery worker Kubernetes Pod name"), ]), CeleryK8sJobExecutor, step_key=step_key, ) if pipeline_run.status != PipelineRunStatus.STARTED: instance.report_engine_event( "Not scheduling step because pipeline run status is not STARTED", pipeline_run, EngineEventData([ EventMetadataEntry.text(step_key, "Step key"), ]), CeleryK8sJobExecutor, step_key=step_key, ) return [] # Ensure we stay below k8s name length limits k8s_name_key = get_k8s_job_name(execute_step_args.pipeline_run_id, step_key) retry_state = execute_step_args.known_state.get_retry_state() if retry_state.get_attempt_count(step_key): attempt_number = retry_state.get_attempt_count(step_key) job_name = "dagster-job-%s-%d" % (k8s_name_key, attempt_number) pod_name = "dagster-job-%s-%d" % (k8s_name_key, attempt_number) else: job_name = "dagster-job-%s" % (k8s_name_key) pod_name = "dagster-job-%s" % (k8s_name_key) input_json = serialize_dagster_namedtuple(execute_step_args) args = ["dagster", "api", "execute_step", input_json] job = construct_dagster_k8s_job(job_config, args, job_name, user_defined_k8s_config, pod_name) # Running list of events generated from this task execution events = [] # Post event for starting execution job_name = job.metadata.name engine_event = instance.report_engine_event( "Executing step {} in Kubernetes job {}".format( step_key, job_name), pipeline_run, EngineEventData( [ EventMetadataEntry.text(step_key, "Step key"), EventMetadataEntry.text(job_name, "Kubernetes Job name"), EventMetadataEntry.text(job_config.job_image, "Job image"), EventMetadataEntry.text(job_config.image_pull_policy, "Image pull policy"), EventMetadataEntry.text(str(job_config.image_pull_secrets), "Image pull secrets"), EventMetadataEntry.text( str(job_config.service_account_name), "Service account name"), ], marker_end=DELEGATE_MARKER, ), CeleryK8sJobExecutor, # validated above that step_keys is length 1, and it is not possible to use ETH or # execution plan in this function (Celery K8s workers should not access to user code) step_key=step_key, ) events.append(engine_event) try: kubernetes.client.BatchV1Api().create_namespaced_job( body=job, namespace=job_namespace) except kubernetes.client.rest.ApiException as e: if e.reason == "Conflict": # There is an existing job with the same name so proceed and see if the existing job succeeded instance.report_engine_event( "Did not create Kubernetes job {} for step {} since job name already " "exists, proceeding with existing job.".format( job_name, step_key), pipeline_run, EngineEventData( [ EventMetadataEntry.text(step_key, "Step key"), EventMetadataEntry.text(job_name, "Kubernetes Job name"), ], marker_end=DELEGATE_MARKER, ), CeleryK8sJobExecutor, step_key=step_key, ) else: instance.report_engine_event( "Encountered unexpected error while creating Kubernetes job {} for step {}, " "exiting.".format(job_name, step_key), pipeline_run, EngineEventData( [ EventMetadataEntry.text(step_key, "Step key"), ], error=serializable_error_info_from_exc_info( sys.exc_info()), ), CeleryK8sJobExecutor, step_key=step_key, ) return [] try: wait_for_job_success( job_name=job_name, namespace=job_namespace, instance=instance, run_id=execute_step_args.pipeline_run_id, ) except (DagsterK8sError, DagsterK8sTimeoutError) as err: step_failure_event = construct_step_failure_event_and_handle( pipeline_run, step_key, err, instance=instance) events.append(step_failure_event) except DagsterK8sPipelineStatusException: instance.report_engine_event( "Terminating Kubernetes Job because pipeline run status is not STARTED", pipeline_run, EngineEventData([ EventMetadataEntry.text(step_key, "Step key"), EventMetadataEntry.text(job_name, "Kubernetes Job name"), EventMetadataEntry.text(job_namespace, "Kubernetes Job namespace"), ]), CeleryK8sJobExecutor, step_key=step_key, ) delete_job(job_name=job_name, namespace=job_namespace) return [] except ( DagsterK8sUnrecoverableAPIError, DagsterK8sAPIRetryLimitExceeded, # We shouldn't see unwrapped APIExceptions anymore, as they should all be wrapped in # a retry boundary. We still catch it here just in case we missed one so that we can # report it to the event log kubernetes.client.rest.ApiException, ) as err: instance.report_engine_event( "Encountered unexpected error while waiting on Kubernetes job {} for step {}, " "exiting.".format(job_name, step_key), pipeline_run, EngineEventData( [ EventMetadataEntry.text(step_key, "Step key"), ], error=serializable_error_info_from_exc_info( sys.exc_info()), ), CeleryK8sJobExecutor, step_key=step_key, ) return [] try: pod_names = get_pod_names_in_job(job_name, namespace=job_namespace) except kubernetes.client.rest.ApiException as e: instance.report_engine_event( "Encountered unexpected error retreiving Pods for Kubernetes job {} for step {}, " "exiting.".format(job_name, step_key), pipeline_run, EngineEventData( [ EventMetadataEntry.text(step_key, "Step key"), ], error=serializable_error_info_from_exc_info( sys.exc_info()), ), CeleryK8sJobExecutor, step_key=step_key, ) return [] # Post engine event for log retrieval engine_event = instance.report_engine_event( "Retrieving logs from Kubernetes Job pods", pipeline_run, EngineEventData( [EventMetadataEntry.text("\n".join(pod_names), "Pod names")]), CeleryK8sJobExecutor, step_key=step_key, ) events.append(engine_event) logs = [] for pod_name in pod_names: try: raw_logs = retrieve_pod_logs(pod_name, namespace=job_namespace) logs += raw_logs.split("\n") except kubernetes.client.rest.ApiException as e: instance.report_engine_event( "Encountered unexpected error while fetching pod logs for Kubernetes job {}, " "Pod name {} for step {}. Will attempt to continue with other pods." .format(job_name, pod_name, step_key), pipeline_run, EngineEventData( [ EventMetadataEntry.text(step_key, "Step key"), ], error=serializable_error_info_from_exc_info( sys.exc_info()), ), CeleryK8sJobExecutor, step_key=step_key, ) events += filter_dagster_events_from_pod_logs(logs) serialized_events = [ serialize_dagster_namedtuple(event) for event in events ] return serialized_events
def _execute_step_k8s_job( self, instance_ref_dict, step_keys, run_config, mode, repo_name, repo_location_name, run_id, job_config_dict, job_namespace, load_incluster_config, retries_dict, pipeline_origin_packed, user_defined_k8s_config_dict=None, kubeconfig_file=None, ): """Run step execution in a K8s job pod. """ check.dict_param(instance_ref_dict, "instance_ref_dict") check.list_param(step_keys, "step_keys", of_type=str) check.invariant( len(step_keys) == 1, "Celery K8s task executor can only execute 1 step at a time" ) check.dict_param(run_config, "run_config") check.str_param(mode, "mode") check.str_param(repo_name, "repo_name") check.str_param(repo_location_name, "repo_location_name") check.str_param(run_id, "run_id") # Celery will serialize this as a list job_config = DagsterK8sJobConfig.from_dict(job_config_dict) check.inst_param(job_config, "job_config", DagsterK8sJobConfig) check.str_param(job_namespace, "job_namespace") check.bool_param(load_incluster_config, "load_incluster_config") check.dict_param(retries_dict, "retries_dict") pipeline_origin = unpack_value( check.dict_param( pipeline_origin_packed, "pipeline_origin_packed" ) # TODO: make part of args ) check.inst(pipeline_origin, PipelineOrigin) user_defined_k8s_config = UserDefinedDagsterK8sConfig.from_dict( user_defined_k8s_config_dict ) check.opt_inst_param( user_defined_k8s_config, "user_defined_k8s_config", UserDefinedDagsterK8sConfig, ) check.opt_str_param(kubeconfig_file, "kubeconfig_file") # For when launched via DinD or running the cluster if load_incluster_config: kubernetes.config.load_incluster_config() else: kubernetes.config.load_kube_config(kubeconfig_file) instance_ref = InstanceRef.from_dict(instance_ref_dict) instance = DagsterInstance.from_ref(instance_ref) pipeline_run = instance.get_run_by_id(run_id) check.invariant(pipeline_run, "Could not load run {}".format(run_id)) step_key = step_keys[0] celery_worker_name = self.request.hostname celery_pod_name = os.environ.get("HOSTNAME") instance.report_engine_event( "Task for step {step_key} picked up by Celery".format(step_key=step_key), pipeline_run, EngineEventData( [ EventMetadataEntry.text(celery_worker_name, "Celery worker name"), EventMetadataEntry.text(celery_pod_name, "Celery worker Kubernetes Pod name"), ] ), CeleryK8sJobExecutor, step_key=step_key, ) if pipeline_run.status != PipelineRunStatus.STARTED: instance.report_engine_event( "Not scheduling step because pipeline run status is not STARTED", pipeline_run, EngineEventData([EventMetadataEntry.text(step_key, "Step keys"),]), CeleryK8sJobExecutor, step_key=step_key, ) return # Ensure we stay below k8s name length limits k8s_name_key = get_k8s_job_name(run_id, step_key) retries = Retries.from_config(retries_dict) if retries.get_attempt_count(step_key): attempt_number = retries.get_attempt_count(step_key) job_name = "dagster-job-%s-%d" % (k8s_name_key, attempt_number) pod_name = "dagster-job-%s-%d" % (k8s_name_key, attempt_number) else: job_name = "dagster-job-%s" % (k8s_name_key) pod_name = "dagster-job-%s" % (k8s_name_key) input_json = serialize_dagster_namedtuple( ExecuteStepArgs( pipeline_origin=pipeline_origin, pipeline_run_id=run_id, instance_ref=None, mode=mode, step_keys_to_execute=step_keys, run_config=run_config, retries_dict=retries_dict, ) ) command = ["dagster"] args = ["api", "execute_step_with_structured_logs", input_json] job = construct_dagster_k8s_job( job_config, command, args, job_name, user_defined_k8s_config, pod_name ) # Running list of events generated from this task execution events = [] # Post event for starting execution job_name = job.metadata.name engine_event = instance.report_engine_event( "Executing step {} in Kubernetes job {}".format(step_key, job_name), pipeline_run, EngineEventData( [ EventMetadataEntry.text(step_key, "Step keys"), EventMetadataEntry.text(job_name, "Kubernetes Job name"), EventMetadataEntry.text(pod_name, "Kubernetes Pod name"), EventMetadataEntry.text(job_config.job_image, "Job image"), EventMetadataEntry.text(job_config.image_pull_policy, "Image pull policy"), EventMetadataEntry.text( str(job_config.image_pull_secrets), "Image pull secrets" ), EventMetadataEntry.text( str(job_config.service_account_name), "Service account name" ), ], marker_end=DELEGATE_MARKER, ), CeleryK8sJobExecutor, # validated above that step_keys is length 1, and it is not possible to use ETH or # execution plan in this function (Celery K8s workers should not access to user code) step_key=step_key, ) events.append(engine_event) try: kubernetes.client.BatchV1Api().create_namespaced_job(body=job, namespace=job_namespace) except kubernetes.client.rest.ApiException as e: if e.reason == "Conflict": # There is an existing job with the same name so do not procede. instance.report_engine_event( "Did not create Kubernetes job {} for step {} since job name already " "exists, exiting.".format(job_name, step_key), pipeline_run, EngineEventData( [ EventMetadataEntry.text(step_key, "Step keys"), EventMetadataEntry.text(job_name, "Kubernetes Job name"), EventMetadataEntry.text(pod_name, "Kubernetes Pod name"), ], marker_end=DELEGATE_MARKER, ), CeleryK8sJobExecutor, step_key=step_key, ) else: instance.report_engine_event( "Encountered unexpected error while creating Kubernetes job {} for step {}, " "exiting.".format(job_name, step_key), pipeline_run, EngineEventData( [ EventMetadataEntry.text(step_key, "Step keys"), EventMetadataEntry.text(e, "Error"), ] ), CeleryK8sJobExecutor, step_key=step_key, ) return try: wait_for_job_success( job_name=job_name, namespace=job_namespace, instance=instance, run_id=run_id, ) except DagsterK8sPipelineStatusException: instance.report_engine_event( "Terminating Kubernetes Job because pipeline run status is not STARTED", pipeline_run, EngineEventData( [ EventMetadataEntry.text(step_key, "Step keys"), EventMetadataEntry.text(job_name, "Kubernetes Job name"), EventMetadataEntry.text(job_namespace, "Kubernetes Job namespace"), ] ), CeleryK8sJobExecutor, step_key=step_key, ) delete_job(job_name=job_name, namespace=job_namespace) return pod_names = get_pod_names_in_job(job_name, namespace=job_namespace) # Post engine event for log retrieval engine_event = instance.report_engine_event( "Retrieving logs from Kubernetes Job pods", pipeline_run, EngineEventData([EventMetadataEntry.text("\n".join(pod_names), "Pod names")]), CeleryK8sJobExecutor, step_key=step_key, ) events.append(engine_event) logs = [] for pod_name in pod_names: raw_logs = retrieve_pod_logs(pod_name, namespace=job_namespace) logs += raw_logs.split("\n") events += filter_dagster_events_from_pod_logs(logs) serialized_events = [serialize_dagster_namedtuple(event) for event in events] return serialized_events
def _execute_step_k8s_job( _self, instance_ref_dict, step_keys, run_config, mode, repo_name, repo_location_name, run_id, job_config_dict, job_namespace, load_incluster_config, retries_dict, resources=None, kubeconfig_file=None, ): '''Run step execution in a K8s job pod. ''' check.dict_param(instance_ref_dict, 'instance_ref_dict') check.list_param(step_keys, 'step_keys', of_type=str) check.invariant( len(step_keys) == 1, 'Celery K8s task executor can only execute 1 step at a time' ) check.dict_param(run_config, 'run_config') check.str_param(mode, 'mode') check.str_param(repo_name, 'repo_name') check.str_param(repo_location_name, 'repo_location_name') check.str_param(run_id, 'run_id') # Celery will serialize this as a list job_config = DagsterK8sJobConfig.from_dict(job_config_dict) check.inst_param(job_config, 'job_config', DagsterK8sJobConfig) check.str_param(job_namespace, 'job_namespace') check.bool_param(load_incluster_config, 'load_incluster_config') check.dict_param(retries_dict, 'retries_dict') check.opt_dict_param(resources, 'resources', key_type=str, value_type=dict) check.opt_str_param(kubeconfig_file, 'kubeconfig_file') # For when launched via DinD or running the cluster if load_incluster_config: kubernetes.config.load_incluster_config() else: kubernetes.config.load_kube_config(kubeconfig_file) instance_ref = InstanceRef.from_dict(instance_ref_dict) instance = DagsterInstance.from_ref(instance_ref) pipeline_run = instance.get_run_by_id(run_id) check.invariant(pipeline_run, 'Could not load run {}'.format(run_id)) step_keys_str = ", ".join(step_keys) # Ensure we stay below k8s name length limits k8s_name_key = _get_k8s_name_key(run_id, step_keys) retries = Retries.from_config(retries_dict) if retries.get_attempt_count(step_keys[0]): attempt_number = retries.get_attempt_count(step_keys[0]) job_name = 'dagster-job-%s-%d' % (k8s_name_key, attempt_number) pod_name = 'dagster-job-%s-%d' % (k8s_name_key, attempt_number) else: job_name = 'dagster-job-%s' % (k8s_name_key) pod_name = 'dagster-job-%s' % (k8s_name_key) variables = { 'executionParams': { 'runConfigData': run_config, 'mode': mode, 'selector': { 'repositoryLocationName': repo_location_name, 'repositoryName': repo_name, 'pipelineName': pipeline_run.pipeline_name, 'solidSelection': pipeline_run.solid_selection, }, 'executionMetadata': {'runId': run_id}, 'stepKeys': step_keys, }, 'retries': retries.to_graphql_input(), } args = ['-p', 'executePlan', '-v', seven.json.dumps(variables)] job = construct_dagster_graphql_k8s_job(job_config, args, job_name, resources, pod_name) # Running list of events generated from this task execution events = [] # Post event for starting execution engine_event = instance.report_engine_event( 'Executing steps {} in Kubernetes job {}'.format(step_keys_str, job.metadata.name), pipeline_run, EngineEventData( [ EventMetadataEntry.text(step_keys_str, 'Step keys'), EventMetadataEntry.text(job.metadata.name, 'Kubernetes Job name'), EventMetadataEntry.text(pod_name, 'Kubernetes Pod name'), EventMetadataEntry.text(job_config.job_image, 'Job image'), EventMetadataEntry.text(job_config.image_pull_policy, 'Image pull policy'), EventMetadataEntry.text( str(job_config.image_pull_secrets), 'Image pull secrets' ), EventMetadataEntry.text( str(job_config.service_account_name), 'Service account name' ), ], marker_end=DELEGATE_MARKER, ), CeleryK8sJobExecutor, # validated above that step_keys is length 1, and it is not possible to use ETH or # execution plan in this function (Celery K8s workers should not access to user code) step_key=step_keys[0], ) events.append(engine_event) kubernetes.client.BatchV1Api().create_namespaced_job(body=job, namespace=job_namespace) wait_for_job_success(job.metadata.name, namespace=job_namespace) pod_names = get_pod_names_in_job(job.metadata.name, namespace=job_namespace) # Post engine event for log retrieval engine_event = instance.report_engine_event( 'Retrieving logs from Kubernetes Job pods', pipeline_run, EngineEventData([EventMetadataEntry.text('\n'.join(pod_names), 'Pod names')]), CeleryK8sJobExecutor, step_key=step_keys[0], ) events.append(engine_event) logs = [] for pod_name in pod_names: raw_logs = retrieve_pod_logs(pod_name, namespace=job_namespace) logs += raw_logs.split('\n') res = parse_raw_log_lines(logs) handle_execution_errors(res, 'executePlan') step_events = handle_execute_plan_result(res) events += step_events serialized_events = [serialize_dagster_namedtuple(event) for event in events] return serialized_events
def _execute_step_k8s_job( _self, instance_ref_dict, step_keys, run_config, mode, repo_name, repo_location_name, run_id, job_config_dict, job_namespace, load_incluster_config, retries_dict, pipeline_origin_packed, resources=None, kubeconfig_file=None, ): '''Run step execution in a K8s job pod. ''' check.dict_param(instance_ref_dict, 'instance_ref_dict') check.list_param(step_keys, 'step_keys', of_type=str) check.invariant( len(step_keys) == 1, 'Celery K8s task executor can only execute 1 step at a time') check.dict_param(run_config, 'run_config') check.str_param(mode, 'mode') check.str_param(repo_name, 'repo_name') check.str_param(repo_location_name, 'repo_location_name') check.str_param(run_id, 'run_id') # Celery will serialize this as a list job_config = DagsterK8sJobConfig.from_dict(job_config_dict) check.inst_param(job_config, 'job_config', DagsterK8sJobConfig) check.str_param(job_namespace, 'job_namespace') check.bool_param(load_incluster_config, 'load_incluster_config') check.dict_param(retries_dict, 'retries_dict') pipeline_origin = unpack_value( check.dict_param( pipeline_origin_packed, 'pipeline_origin_packed') # TODO: make part of args ) check.inst(pipeline_origin, PipelineOrigin) check.opt_dict_param(resources, 'resources', key_type=str, value_type=dict) check.opt_str_param(kubeconfig_file, 'kubeconfig_file') # For when launched via DinD or running the cluster if load_incluster_config: kubernetes.config.load_incluster_config() else: kubernetes.config.load_kube_config(kubeconfig_file) instance_ref = InstanceRef.from_dict(instance_ref_dict) instance = DagsterInstance.from_ref(instance_ref) pipeline_run = instance.get_run_by_id(run_id) check.invariant(pipeline_run, 'Could not load run {}'.format(run_id)) step_key = step_keys[0] if pipeline_run.status != PipelineRunStatus.STARTED: instance.report_engine_event( 'Not scheduling step because pipeline run status is not STARTED', pipeline_run, EngineEventData([ EventMetadataEntry.text(step_key, 'Step keys'), ]), CeleryK8sJobExecutor, step_key=step_key, ) return # Ensure we stay below k8s name length limits k8s_name_key = get_k8s_job_name(run_id, step_key) retries = Retries.from_config(retries_dict) if retries.get_attempt_count(step_key): attempt_number = retries.get_attempt_count(step_key) job_name = 'dagster-job-%s-%d' % (k8s_name_key, attempt_number) pod_name = 'dagster-job-%s-%d' % (k8s_name_key, attempt_number) else: job_name = 'dagster-job-%s' % (k8s_name_key) pod_name = 'dagster-job-%s' % (k8s_name_key) input_json = serialize_dagster_namedtuple( ExecuteStepArgs( pipeline_origin=pipeline_origin, pipeline_run_id=run_id, instance_ref=None, mode=mode, step_keys_to_execute=step_keys, run_config=run_config, retries_dict=retries_dict, )) command = ['dagster'] args = ['api', 'execute_step_with_structured_logs', input_json] job = construct_dagster_k8s_job(job_config, command, args, job_name, resources, pod_name) # Running list of events generated from this task execution events = [] # Post event for starting execution job_name = job.metadata.name engine_event = instance.report_engine_event( 'Executing step {} in Kubernetes job {}'.format( step_key, job_name), pipeline_run, EngineEventData( [ EventMetadataEntry.text(step_key, 'Step keys'), EventMetadataEntry.text(job_name, 'Kubernetes Job name'), EventMetadataEntry.text(pod_name, 'Kubernetes Pod name'), EventMetadataEntry.text(job_config.job_image, 'Job image'), EventMetadataEntry.text(job_config.image_pull_policy, 'Image pull policy'), EventMetadataEntry.text(str(job_config.image_pull_secrets), 'Image pull secrets'), EventMetadataEntry.text( str(job_config.service_account_name), 'Service account name'), ], marker_end=DELEGATE_MARKER, ), CeleryK8sJobExecutor, # validated above that step_keys is length 1, and it is not possible to use ETH or # execution plan in this function (Celery K8s workers should not access to user code) step_key=step_key, ) events.append(engine_event) try: kubernetes.client.BatchV1Api().create_namespaced_job( body=job, namespace=job_namespace) except kubernetes.client.rest.ApiException as e: if e.reason == 'Conflict': # There is an existing job with the same name so do not procede. instance.report_engine_event( 'Did not create Kubernetes job {} for step {} since job name already ' 'exists, exiting.'.format(job_name, step_key), pipeline_run, EngineEventData( [ EventMetadataEntry.text(step_key, 'Step keys'), EventMetadataEntry.text(job_name, 'Kubernetes Job name'), EventMetadataEntry.text(pod_name, 'Kubernetes Pod name'), ], marker_end=DELEGATE_MARKER, ), CeleryK8sJobExecutor, step_key=step_key, ) else: instance.report_engine_event( 'Encountered unexpected error while creating Kubernetes job {} for step {}, ' 'exiting.'.format(job_name, step_key), pipeline_run, EngineEventData([ EventMetadataEntry.text(step_key, 'Step keys'), EventMetadataEntry.text(e, 'Error'), ]), CeleryK8sJobExecutor, step_key=step_key, ) return try: wait_for_job_success( job_name=job_name, namespace=job_namespace, instance=instance, run_id=run_id, ) except DagsterK8sPipelineStatusException: instance.report_engine_event( 'Terminating Kubernetes Job because pipeline run status is not STARTED', pipeline_run, EngineEventData([ EventMetadataEntry.text(step_key, 'Step keys'), EventMetadataEntry.text(job_name, 'Kubernetes Job name'), EventMetadataEntry.text(job_namespace, 'Kubernetes Job namespace'), ]), CeleryK8sJobExecutor, step_key=step_key, ) delete_job(job_name=job_name, namespace=job_namespace) return pod_names = get_pod_names_in_job(job_name, namespace=job_namespace) # Post engine event for log retrieval engine_event = instance.report_engine_event( 'Retrieving logs from Kubernetes Job pods', pipeline_run, EngineEventData( [EventMetadataEntry.text('\n'.join(pod_names), 'Pod names')]), CeleryK8sJobExecutor, step_key=step_key, ) events.append(engine_event) logs = [] for pod_name in pod_names: raw_logs = retrieve_pod_logs(pod_name, namespace=job_namespace) logs += raw_logs.split('\n') events += filter_dagster_events_from_pod_logs(logs) serialized_events = [ serialize_dagster_namedtuple(event) for event in events ] return serialized_events
def test_construct_dagster_k8s_job_with_mounts(): cfg = DagsterK8sJobConfig( job_image="test/foo:latest", dagster_home="/opt/dagster/dagster_home", image_pull_policy="Always", image_pull_secrets=[{ "name": "my_secret" }], service_account_name=None, instance_config_map="some-instance-configmap", postgres_password_secret=None, env_config_maps=None, env_secrets=None, volume_mounts=[{ "name": "foo", "mountPath": "biz/buz", "subPath": "file.txt" }], volumes=[ { "name": "foo", "configMap": { "name": "settings-cm" } }, ], ) job = construct_dagster_k8s_job(cfg, ["foo", "bar"], "job123").to_dict() assert len(job["spec"]["template"]["spec"]["volumes"]) == 2 foo_volumes = [ volume for volume in job["spec"]["template"]["spec"]["volumes"] if volume["name"] == "foo" ] assert len(foo_volumes) == 1 assert foo_volumes[0]["config_map"]["name"] == "settings-cm" assert len( job["spec"]["template"]["spec"]["containers"][0]["volume_mounts"]) == 2 foo_volumes_mounts = [ volume for volume in job["spec"]["template"]["spec"]["containers"][0] ["volume_mounts"] if volume["name"] == "foo" ] assert len(foo_volumes_mounts) == 1 cfg = DagsterK8sJobConfig( job_image="test/foo:latest", dagster_home="/opt/dagster/dagster_home", image_pull_policy="Always", image_pull_secrets=[{ "name": "my_secret" }], service_account_name=None, instance_config_map="some-instance-configmap", postgres_password_secret=None, env_config_maps=None, env_secrets=None, volume_mounts=[{ "name": "foo", "mountPath": "biz/buz", "subPath": "file.txt" }], volumes=[ { "name": "foo", "secret": { "secretName": "settings-secret" } }, ], ) job = construct_dagster_k8s_job(cfg, ["foo", "bar"], "job123").to_dict() assert len(job["spec"]["template"]["spec"]["volumes"]) == 2 foo_volumes = [ volume for volume in job["spec"]["template"]["spec"]["volumes"] if volume["name"] == "foo" ] assert len(foo_volumes) == 1 assert foo_volumes[0]["secret"]["secret_name"] == "settings-secret" cfg_with_invalid_volume_key = DagsterK8sJobConfig( job_image="test/foo:latest", dagster_home="/opt/dagster/dagster_home", image_pull_policy="Always", image_pull_secrets=[{ "name": "my_secret" }], service_account_name=None, instance_config_map="some-instance-configmap", postgres_password_secret=None, env_config_maps=None, env_secrets=None, volume_mounts=[{ "name": "foo", "mountPath": "biz/buz", "subPath": "file.txt" }], volumes=[ { "name": "foo", "invalidKey": "settings-secret" }, ], ) with pytest.raises( Exception, match="Unexpected keys in model class V1Volume: {'invalidKey'}"): construct_dagster_k8s_job(cfg_with_invalid_volume_key, ["foo", "bar"], "job123").to_dict()