Exemple #1
0
def test_construct_dagster_k8s_job_with_sidecar_container():
    cfg = DagsterK8sJobConfig(
        job_image="test/foo:latest",
        dagster_home="/opt/dagster/dagster_home",
        instance_config_map="test",
    )
    job = construct_dagster_k8s_job(cfg, [], "job123").to_dict()

    assert job["spec"][
        "ttl_seconds_after_finished"] == DEFAULT_K8S_JOB_TTL_SECONDS_AFTER_FINISHED

    user_defined_cfg = UserDefinedDagsterK8sConfig(pod_spec_config={
        "containers": [{
            "command": ["echo", "HI"],
            "image": "sidecar:bar",
            "name": "sidecar"
        }]
    }, )
    job = construct_dagster_k8s_job(
        cfg, [], "job123", user_defined_k8s_config=user_defined_cfg).to_dict()

    containers = job["spec"]["template"]["spec"]["containers"]

    assert len(containers) == 2

    assert containers[0]["image"] == "test/foo:latest"

    assert containers[1]["image"] == "sidecar:bar"
    assert containers[1]["command"] == ["echo", "HI"]
    assert containers[1]["name"] == "sidecar"
Exemple #2
0
def test_construct_dagster_k8s_job_with_job_op_labels():
    common_labels = {
        "app.kubernetes.io/name": "dagster",
        "app.kubernetes.io/instance": "dagster",
        "app.kubernetes.io/version": dagster_version,
        "app.kubernetes.io/part-of": "dagster",
    }

    cfg = DagsterK8sJobConfig(
        job_image="test/foo:latest",
        dagster_home="/opt/dagster/dagster_home",
        instance_config_map="test",
    )
    job1 = construct_dagster_k8s_job(
        cfg,
        [],
        "job123",
        labels={
            "dagster/job": "some_job",
            "dagster/op": "some_op",
        },
    ).to_dict()
    expected_labels1 = dict(
        **common_labels,
        **{
            "dagster/job": "some_job",
            "dagster/op": "some_op",
        },
    )
    assert job1["metadata"]["labels"] == expected_labels1
    assert job1["spec"]["template"]["metadata"]["labels"] == expected_labels1

    job2 = construct_dagster_k8s_job(
        cfg,
        [],
        "job456",
        labels={
            "dagster/job":
            "long_job_name_64____01234567890123456789012345678901234567890123",
            "dagster/op":
            "long_op_name_64_____01234567890123456789012345678901234567890123",
        },
    ).to_dict()
    expected_labels2 = dict(
        **common_labels,
        **{
            # The last character should be truncated.
            "dagster/job":
            "long_job_name_64____0123456789012345678901234567890123456789012",
            "dagster/op":
            "long_op_name_64_____0123456789012345678901234567890123456789012",
        },
    )
    assert job2["metadata"]["labels"] == expected_labels2
    assert job2["spec"]["template"]["metadata"]["labels"] == expected_labels2
Exemple #3
0
def test_construct_dagster_k8s_job_with_ttl():
    cfg = DagsterK8sJobConfig(
        job_image="test/foo:latest",
        dagster_home="/opt/dagster/dagster_home",
        instance_config_map="test",
    )
    job = construct_dagster_k8s_job(cfg, [], "job123").to_dict()
    assert job["spec"][
        "ttl_seconds_after_finished"] == DEFAULT_K8S_JOB_TTL_SECONDS_AFTER_FINISHED

    user_defined_cfg = UserDefinedDagsterK8sConfig(
        job_spec_config={"ttl_seconds_after_finished": 0}, )
    job = construct_dagster_k8s_job(
        cfg, [], "job123", user_defined_k8s_config=user_defined_cfg).to_dict()
    assert job["spec"]["ttl_seconds_after_finished"] == 0
Exemple #4
0
def test_construct_dagster_k8s_job_with_user_defined_env_from():
    @graph
    def user_defined_k8s_env_from_tags_graph():
        pass

    # These fields still work even when using underscore keys
    user_defined_k8s_config = get_user_defined_k8s_config(
        user_defined_k8s_env_from_tags_graph.to_job(
            tags={
                USER_DEFINED_K8S_CONFIG_KEY: {
                    "container_config": {
                        "envFrom": [
                            {
                                "configMapRef": {
                                    "name": "user_config_map_ref",
                                    "optional": "True",
                                }
                            },
                            {
                                "secretRef": {
                                    "name": "user_secret_ref_one",
                                    "optional": "True"
                                }
                            },
                            {
                                "secretRef": {
                                    "name": "user_secret_ref_two",
                                    "optional": "False",
                                },
                                "prefix": "with_prefix",
                            },
                        ]
                    }
                }
            }).tags)

    cfg = DagsterK8sJobConfig(
        job_image="test/foo:latest",
        dagster_home="/opt/dagster/dagster_home",
        instance_config_map="some-instance-configmap",
        env_config_maps=["config_map"],
        env_secrets=["secret"],
    )

    job = construct_dagster_k8s_job(
        cfg, ["foo", "bar"],
        "job",
        user_defined_k8s_config=user_defined_k8s_config).to_dict()

    env_from = job["spec"]["template"]["spec"]["containers"][0]["env_from"]
    env_from_mapping = {(env_var.get("config_map_ref")
                         or env_var.get("secret_ref")).get("name"): env_var
                        for env_var in env_from}

    assert len(env_from_mapping) == 5
    assert env_from_mapping["config_map"]
    assert env_from_mapping["user_config_map_ref"]
    assert env_from_mapping["secret"]
    assert env_from_mapping["user_secret_ref_one"]
    assert env_from_mapping["user_secret_ref_two"]
Exemple #5
0
def test_valid_job_format(run_launcher):
    docker_image = test_project_docker_image()

    run_config = load_yaml_from_path(
        os.path.join(test_project_environments_path(), 'env.yaml'))
    pipeline_name = 'demo_pipeline'
    run = PipelineRun(pipeline_name=pipeline_name, run_config=run_config)

    job_name = 'dagster-run-%s' % run.run_id
    pod_name = 'dagster-run-%s' % run.run_id
    job = construct_dagster_k8s_job(
        job_config=run_launcher.job_config,
        command=['dagster-graphql'],
        args=[
            '-p',
            'executeRunInProcess',
            '-v',
            seven.json.dumps({'runId': run.run_id}),
        ],
        job_name=job_name,
        pod_name=pod_name,
        component='runmaster',
    )

    assert (yaml.dump(
        remove_none_recursively(job.to_dict()),
        default_flow_style=False).strip() == EXPECTED_JOB_SPEC.format(
            run_id=run.run_id,
            job_image=docker_image,
            image_pull_policy=image_pull_policy(),
            dagster_version=dagster_version,
            resources='',
        ).strip())
Exemple #6
0
def test_construct_dagster_k8s_job_with_user_defined_service_account_name():
    @graph
    def user_defined_k8s_service_account_name_tags_graph():
        pass

    user_defined_k8s_config = get_user_defined_k8s_config(
        user_defined_k8s_service_account_name_tags_graph.to_job(tags={
            USER_DEFINED_K8S_CONFIG_KEY: {
                "pod_spec_config": {
                    "service_account_name": "this-should-take-precedence",
                },
            },
        }, ).tags)

    cfg = DagsterK8sJobConfig(
        job_image="test/foo:latest",
        dagster_home="/opt/dagster/dagster_home",
        instance_config_map="some-instance-configmap",
        service_account_name="this-should-be-overriden",
    )

    job = construct_dagster_k8s_job(
        cfg, ["foo", "bar"],
        "job",
        user_defined_k8s_config=user_defined_k8s_config).to_dict()

    service_account_name = job["spec"]["template"]["spec"][
        "service_account_name"]
    assert service_account_name == "this-should-take-precedence"
def test_valid_job_format(run_launcher):
    docker_image = test_project_docker_image()

    run_config = load_yaml_from_path(
        os.path.join(test_project_environments_path(), "env.yaml"))
    pipeline_name = "demo_pipeline"
    run = PipelineRun(pipeline_name=pipeline_name, run_config=run_config)

    job_name = "dagster-run-%s" % run.run_id
    pod_name = "dagster-run-%s" % run.run_id
    job = construct_dagster_k8s_job(
        job_config=run_launcher.job_config,
        command=["dagster"],
        args=["api", "execute_run_with_structured_logs"],
        job_name=job_name,
        pod_name=pod_name,
        component="run_coordinator",
    )

    assert (yaml.dump(
        remove_none_recursively(job.to_dict()),
        default_flow_style=False).strip() == EXPECTED_JOB_SPEC.format(
            run_id=run.run_id,
            job_image=docker_image,
            image_pull_policy=image_pull_policy(),
            dagster_version=dagster_version,
            resources="",
        ).strip())
Exemple #8
0
def test_construct_dagster_k8s_job_with_mounts():
    cfg = DagsterK8sJobConfig(
        job_image="test/foo:latest",
        dagster_home="/opt/dagster/dagster_home",
        image_pull_policy="Always",
        image_pull_secrets=[{"name": "my_secret"}],
        service_account_name=None,
        instance_config_map="some-instance-configmap",
        postgres_password_secret=None,
        env_config_maps=None,
        env_secrets=None,
        volume_mounts=[
            {"name": "foo", "path": "biz/buz", "sub_path": "file.txt", "configmap": "settings-cm"}
        ],
    )
    job = construct_dagster_k8s_job(cfg, ["foo", "bar"], "job123").to_dict()

    assert len(job["spec"]["template"]["spec"]["volumes"]) == 2
    foo_volumes = [
        volume for volume in job["spec"]["template"]["spec"]["volumes"] if volume["name"] == "foo"
    ]
    assert len(foo_volumes) == 1

    assert len(job["spec"]["template"]["spec"]["containers"][0]["volume_mounts"]) == 2
    foo_volumes_mounts = [
        volume
        for volume in job["spec"]["template"]["spec"]["containers"][0]["volume_mounts"]
        if volume["name"] == "foo"
    ]
    assert len(foo_volumes_mounts) == 1

    cfg = DagsterK8sJobConfig(
        job_image="test/foo:latest",
        dagster_home="/opt/dagster/dagster_home",
        image_pull_policy="Always",
        image_pull_secrets=[{"name": "my_secret"}],
        service_account_name=None,
        instance_config_map="some-instance-configmap",
        postgres_password_secret=None,
        env_config_maps=None,
        env_secrets=None,
        volume_mounts=[
            {"name": "foo", "path": "biz/buz", "sub_path": "file.txt", "secret": "settings-secret"}
        ],
    )
    construct_dagster_k8s_job(cfg, ["foo", "bar"], "job123").to_dict()
Exemple #9
0
def test_construct_dagster_k8s_job_with_user_defined_env():
    @graph
    def user_defined_k8s_env_tags_graph():
        pass

    user_defined_k8s_config = get_user_defined_k8s_config(
        user_defined_k8s_env_tags_graph.to_job(
            tags={
                USER_DEFINED_K8S_CONFIG_KEY: {
                    "container_config": {
                        "env": [
                            {
                                "name": "ENV_VAR_1",
                                "value": "one"
                            },
                            {
                                "name": "ENV_VAR_2",
                                "value": "two"
                            },
                            {
                                "name": "DD_AGENT_HOST",
                                "valueFrom": {
                                    "fieldRef": {
                                        "fieldPath": "status.hostIP"
                                    }
                                },
                            },
                        ]
                    }
                }
            }).tags)

    cfg = DagsterK8sJobConfig(
        job_image="test/foo:latest",
        dagster_home="/opt/dagster/dagster_home",
        instance_config_map="some-instance-configmap",
    )

    job = construct_dagster_k8s_job(
        cfg, ["foo", "bar"],
        "job",
        user_defined_k8s_config=user_defined_k8s_config).to_dict()

    env = job["spec"]["template"]["spec"]["containers"][0]["env"]
    env_mapping = remove_none_recursively(
        {env_var["name"]: env_var
         for env_var in env})

    # Has DAGSTER_HOME and three additional env vars
    assert len(env_mapping) == 4
    assert env_mapping["ENV_VAR_1"]["value"] == "one"
    assert env_mapping["ENV_VAR_2"]["value"] == "two"
    assert env_mapping["DD_AGENT_HOST"]["value_from"] == {
        "field_ref": {
            "field_path": "status.hostIP"
        }
    }
Exemple #10
0
def test_valid_job_format_with_backcompat_resources(run_launcher):
    docker_image = test_project_docker_image()

    run_config = load_yaml_from_path(
        os.path.join(test_project_environments_path(), "env.yaml"))
    pipeline_name = "demo_pipeline"
    run = PipelineRun(pipeline_name=pipeline_name, run_config=run_config)

    tags = validate_tags({
        K8S_RESOURCE_REQUIREMENTS_KEY: ({
            "requests": {
                "cpu": "250m",
                "memory": "64Mi"
            },
            "limits": {
                "cpu": "500m",
                "memory": "2560Mi"
            },
        })
    })
    user_defined_k8s_config = get_user_defined_k8s_config(tags)
    job_name = "dagster-run-%s" % run.run_id
    pod_name = "dagster-run-%s" % run.run_id
    job = construct_dagster_k8s_job(
        job_config=run_launcher.job_config,
        command=["dagster-graphql"],
        args=[
            "-p",
            "executeRunInProcess",
            "-v",
            seven.json.dumps({"runId": run.run_id}),
        ],
        job_name=job_name,
        user_defined_k8s_config=user_defined_k8s_config,
        pod_name=pod_name,
        component="run_coordinator",
    )

    assert (yaml.dump(
        remove_none_recursively(job.to_dict()),
        default_flow_style=False).strip() == EXPECTED_JOB_SPEC.format(
            run_id=run.run_id,
            job_image=docker_image,
            image_pull_policy=image_pull_policy(),
            dagster_version=dagster_version,
            resources="""
        resources:
          limits:
            cpu: 500m
            memory: 2560Mi
          requests:
            cpu: 250m
            memory: 64Mi""",
        ).strip())
Exemple #11
0
def test_construct_dagster_k8s_job_with_user_defined_volume_mounts_snake_case(
):
    @graph
    def user_defined_k8s_volume_mounts_tags_graph():
        pass

    # volume_mounts still work even when using underscore keys
    user_defined_k8s_config = get_user_defined_k8s_config(
        user_defined_k8s_volume_mounts_tags_graph.to_job(
            tags={
                USER_DEFINED_K8S_CONFIG_KEY: {
                    "container_config": {
                        "volume_mounts": [
                            {
                                "mountPath": "mount_path",
                                "mountPropagation": "mount_propagation",
                                "name": "a_volume_mount_one",
                                "readOnly": "False",
                                "subPath": "path/",
                            },
                            {
                                "mountPath": "mount_path",
                                "mountPropagation": "mount_propagation",
                                "name": "a_volume_mount_two",
                                "readOnly": "False",
                                "subPathExpr": "path/",
                            },
                        ]
                    }
                }
            }).tags)

    cfg = DagsterK8sJobConfig(
        job_image="test/foo:latest",
        dagster_home="/opt/dagster/dagster_home",
        instance_config_map="some-instance-configmap",
    )

    job = construct_dagster_k8s_job(
        cfg, ["foo", "bar"],
        "job",
        user_defined_k8s_config=user_defined_k8s_config).to_dict()

    volume_mounts = job["spec"]["template"]["spec"]["containers"][0][
        "volume_mounts"]
    volume_mounts_mapping = {
        volume_mount["name"]: volume_mount
        for volume_mount in volume_mounts
    }

    assert len(volume_mounts_mapping) == 3
    assert volume_mounts_mapping["dagster-instance"]
    assert volume_mounts_mapping["a_volume_mount_one"]
    assert volume_mounts_mapping["a_volume_mount_two"]
Exemple #12
0
def test_valid_job_format_with_resources(run_launcher):
    docker_image = test_project_docker_image()

    run_config = load_yaml_from_path(
        os.path.join(test_project_environments_path(), 'env.yaml'))
    pipeline_name = 'demo_pipeline'
    run = PipelineRun(pipeline_name=pipeline_name, run_config=run_config)

    tags = validate_tags({
        K8S_RESOURCE_REQUIREMENTS_KEY: ({
            'requests': {
                'cpu': '250m',
                'memory': '64Mi'
            },
            'limits': {
                'cpu': '500m',
                'memory': '2560Mi'
            },
        })
    })
    resources = get_k8s_resource_requirements(tags)
    job_name = 'dagster-run-%s' % run.run_id
    pod_name = 'dagster-run-%s' % run.run_id
    job = construct_dagster_k8s_job(
        job_config=run_launcher.job_config,
        command=['dagster-graphql'],
        args=[
            '-p',
            'executeRunInProcess',
            '-v',
            seven.json.dumps({'runId': run.run_id}),
        ],
        job_name=job_name,
        resources=resources,
        pod_name=pod_name,
        component='runmaster',
    )

    assert (yaml.dump(
        remove_none_recursively(job.to_dict()),
        default_flow_style=False).strip() == EXPECTED_JOB_SPEC.format(
            run_id=run.run_id,
            job_image=docker_image,
            image_pull_policy=image_pull_policy(),
            dagster_version=dagster_version,
            resources='''
        resources:
          limits:
            cpu: 500m
            memory: 2560Mi
          requests:
            cpu: 250m
            memory: 64Mi''',
        ).strip())
Exemple #13
0
def test_construct_dagster_k8s_job_with_env():
    with environ({"ENV_VAR_1": "one", "ENV_VAR_2": "two"}):
        cfg = DagsterK8sJobConfig(
            job_image="test/foo:latest",
            dagster_home="/opt/dagster/dagster_home",
            instance_config_map="some-instance-configmap",
            env_vars=["ENV_VAR_1", "ENV_VAR_2"],
        )

        job = construct_dagster_k8s_job(cfg, ["foo", "bar"], "job").to_dict()

        env = job["spec"]["template"]["spec"]["containers"][0]["env"]
        env_mapping = {env_var["name"]: env_var for env_var in env}

        # Has DAGSTER_HOME and two additional env vars
        assert len(env_mapping) == 3
        assert env_mapping["ENV_VAR_1"]["value"] == "one"
        assert env_mapping["ENV_VAR_2"]["value"] == "two"
Exemple #14
0
def test_construct_dagster_k8s_job_no_postgres():
    cfg = DagsterK8sJobConfig(
        job_image="test/foo:latest",
        dagster_home="/opt/dagster/dagster_home",
        image_pull_policy="Always",
        image_pull_secrets=[{"name": "my_secret"}],
        service_account_name=None,
        instance_config_map="some-instance-configmap",
        postgres_password_secret=None,
        env_config_maps=None,
        env_secrets=None,
    )
    job = construct_dagster_k8s_job(cfg, ["foo", "bar"], "job123").to_dict()
    assert job["kind"] == "Job"
    assert job["metadata"]["name"] == "job123"
    assert job["spec"]["template"]["spec"]["containers"][0]["image"] == "test/foo:latest"
    assert DAGSTER_PG_PASSWORD_ENV_VAR not in [
        env["name"] for env in job["spec"]["template"]["spec"]["containers"][0]["env"]
    ]
Exemple #15
0
def test_sanitize_labels():
    cfg = DagsterK8sJobConfig(
        job_image="test/foo:latest",
        dagster_home="/opt/dagster/dagster_home",
        instance_config_map="test",
    )

    job = construct_dagster_k8s_job(
        cfg,
        [],
        "job456",
        labels={
            "dagster/op": "-get_f\o.o[bar-0]-",  # pylint: disable=anomalous-backslash-in-string
            "my_label": "_WhatsUP",
        },
    ).to_dict()

    assert job["metadata"]["labels"]["dagster/op"] == "get_f-o.o-bar-0"
    assert job["metadata"]["labels"]["my_label"] == "WhatsUP"
Exemple #16
0
def test_k8s_tag_op():
    assert my_op
    user_defined_cfg = get_user_defined_k8s_config(my_op.tags)

    cfg = DagsterK8sJobConfig(
        job_image="test/foo:latest",
        dagster_home="/opt/dagster/dagster_home",
        instance_config_map="test",
    )
    job = construct_dagster_k8s_job(cfg, [],
                                    "job123",
                                    user_defined_k8s_config=user_defined_cfg)

    assert job.to_dict(
    )["spec"]["template"]["spec"]["containers"][0]["resources"] == {
        "requests": {
            "cpu": "200m",
            "memory": "32Mi"
        },
        "limits": None,
    }
Exemple #17
0
    def _execute_step_k8s_job(
        self,
        execute_step_args_packed,
        job_config_dict,
        job_namespace,
        load_incluster_config,
        user_defined_k8s_config_dict=None,
        kubeconfig_file=None,
    ):
        """Run step execution in a K8s job pod."""
        execute_step_args = unpack_value(
            check.dict_param(
                execute_step_args_packed,
                "execute_step_args_packed",
            ))
        check.inst_param(execute_step_args, "execute_step_args",
                         ExecuteStepArgs)
        check.invariant(
            len(execute_step_args.step_keys_to_execute) == 1,
            "Celery K8s task executor can only execute 1 step at a time",
        )

        # Celery will serialize this as a list
        job_config = DagsterK8sJobConfig.from_dict(job_config_dict)
        check.inst_param(job_config, "job_config", DagsterK8sJobConfig)
        check.str_param(job_namespace, "job_namespace")

        check.bool_param(load_incluster_config, "load_incluster_config")

        user_defined_k8s_config = UserDefinedDagsterK8sConfig.from_dict(
            user_defined_k8s_config_dict)
        check.opt_inst_param(
            user_defined_k8s_config,
            "user_defined_k8s_config",
            UserDefinedDagsterK8sConfig,
        )
        check.opt_str_param(kubeconfig_file, "kubeconfig_file")

        # For when launched via DinD or running the cluster
        if load_incluster_config:
            kubernetes.config.load_incluster_config()
        else:
            kubernetes.config.load_kube_config(kubeconfig_file)

        instance = DagsterInstance.from_ref(execute_step_args.instance_ref)
        pipeline_run = instance.get_run_by_id(
            execute_step_args.pipeline_run_id)

        check.inst(
            pipeline_run,
            PipelineRun,
            "Could not load run {}".format(execute_step_args.pipeline_run_id),
        )
        step_key = execute_step_args.step_keys_to_execute[0]

        celery_worker_name = self.request.hostname
        celery_pod_name = os.environ.get("HOSTNAME")
        instance.report_engine_event(
            "Task for step {step_key} picked up by Celery".format(
                step_key=step_key),
            pipeline_run,
            EngineEventData([
                EventMetadataEntry.text(celery_worker_name,
                                        "Celery worker name"),
                EventMetadataEntry.text(celery_pod_name,
                                        "Celery worker Kubernetes Pod name"),
            ]),
            CeleryK8sJobExecutor,
            step_key=step_key,
        )

        if pipeline_run.status != PipelineRunStatus.STARTED:
            instance.report_engine_event(
                "Not scheduling step because pipeline run status is not STARTED",
                pipeline_run,
                EngineEventData([
                    EventMetadataEntry.text(step_key, "Step key"),
                ]),
                CeleryK8sJobExecutor,
                step_key=step_key,
            )
            return []

        # Ensure we stay below k8s name length limits
        k8s_name_key = get_k8s_job_name(execute_step_args.pipeline_run_id,
                                        step_key)

        retry_state = execute_step_args.known_state.get_retry_state()

        if retry_state.get_attempt_count(step_key):
            attempt_number = retry_state.get_attempt_count(step_key)
            job_name = "dagster-job-%s-%d" % (k8s_name_key, attempt_number)
            pod_name = "dagster-job-%s-%d" % (k8s_name_key, attempt_number)
        else:
            job_name = "dagster-job-%s" % (k8s_name_key)
            pod_name = "dagster-job-%s" % (k8s_name_key)

        input_json = serialize_dagster_namedtuple(execute_step_args)
        args = ["dagster", "api", "execute_step", input_json]

        job = construct_dagster_k8s_job(job_config, args, job_name,
                                        user_defined_k8s_config, pod_name)

        # Running list of events generated from this task execution
        events = []

        # Post event for starting execution
        job_name = job.metadata.name
        engine_event = instance.report_engine_event(
            "Executing step {} in Kubernetes job {}".format(
                step_key, job_name),
            pipeline_run,
            EngineEventData(
                [
                    EventMetadataEntry.text(step_key, "Step key"),
                    EventMetadataEntry.text(job_name, "Kubernetes Job name"),
                    EventMetadataEntry.text(job_config.job_image, "Job image"),
                    EventMetadataEntry.text(job_config.image_pull_policy,
                                            "Image pull policy"),
                    EventMetadataEntry.text(str(job_config.image_pull_secrets),
                                            "Image pull secrets"),
                    EventMetadataEntry.text(
                        str(job_config.service_account_name),
                        "Service account name"),
                ],
                marker_end=DELEGATE_MARKER,
            ),
            CeleryK8sJobExecutor,
            # validated above that step_keys is length 1, and it is not possible to use ETH or
            # execution plan in this function (Celery K8s workers should not access to user code)
            step_key=step_key,
        )
        events.append(engine_event)
        try:
            kubernetes.client.BatchV1Api().create_namespaced_job(
                body=job, namespace=job_namespace)
        except kubernetes.client.rest.ApiException as e:
            if e.reason == "Conflict":
                # There is an existing job with the same name so proceed and see if the existing job succeeded
                instance.report_engine_event(
                    "Did not create Kubernetes job {} for step {} since job name already "
                    "exists, proceeding with existing job.".format(
                        job_name, step_key),
                    pipeline_run,
                    EngineEventData(
                        [
                            EventMetadataEntry.text(step_key, "Step key"),
                            EventMetadataEntry.text(job_name,
                                                    "Kubernetes Job name"),
                        ],
                        marker_end=DELEGATE_MARKER,
                    ),
                    CeleryK8sJobExecutor,
                    step_key=step_key,
                )
            else:
                instance.report_engine_event(
                    "Encountered unexpected error while creating Kubernetes job {} for step {}, "
                    "exiting.".format(job_name, step_key),
                    pipeline_run,
                    EngineEventData(
                        [
                            EventMetadataEntry.text(step_key, "Step key"),
                        ],
                        error=serializable_error_info_from_exc_info(
                            sys.exc_info()),
                    ),
                    CeleryK8sJobExecutor,
                    step_key=step_key,
                )
                return []

        try:
            wait_for_job_success(
                job_name=job_name,
                namespace=job_namespace,
                instance=instance,
                run_id=execute_step_args.pipeline_run_id,
            )
        except (DagsterK8sError, DagsterK8sTimeoutError) as err:
            step_failure_event = construct_step_failure_event_and_handle(
                pipeline_run, step_key, err, instance=instance)
            events.append(step_failure_event)
        except DagsterK8sPipelineStatusException:
            instance.report_engine_event(
                "Terminating Kubernetes Job because pipeline run status is not STARTED",
                pipeline_run,
                EngineEventData([
                    EventMetadataEntry.text(step_key, "Step key"),
                    EventMetadataEntry.text(job_name, "Kubernetes Job name"),
                    EventMetadataEntry.text(job_namespace,
                                            "Kubernetes Job namespace"),
                ]),
                CeleryK8sJobExecutor,
                step_key=step_key,
            )
            delete_job(job_name=job_name, namespace=job_namespace)
            return []
        except (
                DagsterK8sUnrecoverableAPIError,
                DagsterK8sAPIRetryLimitExceeded,
                # We shouldn't see unwrapped APIExceptions anymore, as they should all be wrapped in
                # a retry boundary. We still catch it here just in case we missed one so that we can
                # report it to the event log
                kubernetes.client.rest.ApiException,
        ) as err:
            instance.report_engine_event(
                "Encountered unexpected error while waiting on Kubernetes job {} for step {}, "
                "exiting.".format(job_name, step_key),
                pipeline_run,
                EngineEventData(
                    [
                        EventMetadataEntry.text(step_key, "Step key"),
                    ],
                    error=serializable_error_info_from_exc_info(
                        sys.exc_info()),
                ),
                CeleryK8sJobExecutor,
                step_key=step_key,
            )
            return []

        try:
            pod_names = get_pod_names_in_job(job_name, namespace=job_namespace)
        except kubernetes.client.rest.ApiException as e:
            instance.report_engine_event(
                "Encountered unexpected error retreiving Pods for Kubernetes job {} for step {}, "
                "exiting.".format(job_name, step_key),
                pipeline_run,
                EngineEventData(
                    [
                        EventMetadataEntry.text(step_key, "Step key"),
                    ],
                    error=serializable_error_info_from_exc_info(
                        sys.exc_info()),
                ),
                CeleryK8sJobExecutor,
                step_key=step_key,
            )
            return []

        # Post engine event for log retrieval
        engine_event = instance.report_engine_event(
            "Retrieving logs from Kubernetes Job pods",
            pipeline_run,
            EngineEventData(
                [EventMetadataEntry.text("\n".join(pod_names), "Pod names")]),
            CeleryK8sJobExecutor,
            step_key=step_key,
        )
        events.append(engine_event)

        logs = []
        for pod_name in pod_names:
            try:
                raw_logs = retrieve_pod_logs(pod_name, namespace=job_namespace)
                logs += raw_logs.split("\n")
            except kubernetes.client.rest.ApiException as e:
                instance.report_engine_event(
                    "Encountered unexpected error while fetching pod logs for Kubernetes job {}, "
                    "Pod name {} for step {}. Will attempt to continue with other pods."
                    .format(job_name, pod_name, step_key),
                    pipeline_run,
                    EngineEventData(
                        [
                            EventMetadataEntry.text(step_key, "Step key"),
                        ],
                        error=serializable_error_info_from_exc_info(
                            sys.exc_info()),
                    ),
                    CeleryK8sJobExecutor,
                    step_key=step_key,
                )

        events += filter_dagster_events_from_pod_logs(logs)
        serialized_events = [
            serialize_dagster_namedtuple(event) for event in events
        ]
        return serialized_events
Exemple #18
0
def test_valid_job_format_with_user_defined_k8s_config(run_launcher):
    docker_image = test_project_docker_image()

    run_config = load_yaml_from_path(
        os.path.join(test_project_environments_path(), "env.yaml"))
    pipeline_name = "demo_pipeline"
    run = PipelineRun(pipeline_name=pipeline_name, run_config=run_config)

    tags = validate_tags({
        USER_DEFINED_K8S_CONFIG_KEY: ({
            "container_config": {
                "resources": {
                    "requests": {
                        "cpu": "250m",
                        "memory": "64Mi"
                    },
                    "limits": {
                        "cpu": "500m",
                        "memory": "2560Mi"
                    },
                }
            },
            "pod_template_spec_metadata": {
                "annotations": {
                    "cluster-autoscaler.kubernetes.io/safe-to-evict": "true"
                },
                "labels": {
                    "spotinst.io/restrict-scale-down": "true"
                },
            },
            "pod_spec_config": {
                "affinity": {
                    "nodeAffinity": {
                        "requiredDuringSchedulingIgnoredDuringExecution": {
                            "nodeSelectorTerms": [{
                                "matchExpressions": [{
                                    "key":
                                    "kubernetes.io/e2e-az-name",
                                    "operator":
                                    "In",
                                    "values": ["e2e-az1", "e2e-az2"],
                                }]
                            }]
                        }
                    }
                }
            },
        })
    })
    user_defined_k8s_config = get_user_defined_k8s_config(tags)
    job_name = "dagster-run-%s" % run.run_id
    pod_name = "dagster-run-%s" % run.run_id
    job = construct_dagster_k8s_job(
        job_config=run_launcher.job_config,
        command=["dagster"],
        args=["api", "execute_run_with_structured_logs"],
        job_name=job_name,
        user_defined_k8s_config=user_defined_k8s_config,
        pod_name=pod_name,
        component="run_coordinator",
    )

    assert (yaml.dump(remove_none_recursively(job.to_dict()),
                      default_flow_style=False).strip() ==
            EXPECTED_CONFIGURED_JOB_SPEC.format(
                run_id=run.run_id,
                job_image=docker_image,
                image_pull_policy=image_pull_policy(),
                dagster_version=dagster_version,
                labels="spotinst.io/restrict-scale-down: 'true'",
                resources="""
        resources:
          limits:
            cpu: 500m
            memory: 2560Mi
          requests:
            cpu: 250m
            memory: 64Mi""",
                annotations="""annotations:
        cluster-autoscaler.kubernetes.io/safe-to-evict: \'true\'""",
                affinity="""affinity:
        nodeAffinity:
          requiredDuringSchedulingIgnoredDuringExecution:
            nodeSelectorTerms:
            - matchExpressions:
              - key: kubernetes.io/e2e-az-name
                operator: In
                values:
                - e2e-az1
                - e2e-az2""",
            ).strip())
Exemple #19
0
def test_construct_dagster_k8s_job_with_mounts():
    cfg = DagsterK8sJobConfig(
        job_image="test/foo:latest",
        dagster_home="/opt/dagster/dagster_home",
        image_pull_policy="Always",
        image_pull_secrets=[{
            "name": "my_secret"
        }],
        service_account_name=None,
        instance_config_map="some-instance-configmap",
        postgres_password_secret=None,
        env_config_maps=None,
        env_secrets=None,
        volume_mounts=[{
            "name": "foo",
            "mountPath": "biz/buz",
            "subPath": "file.txt"
        }],
        volumes=[
            {
                "name": "foo",
                "configMap": {
                    "name": "settings-cm"
                }
            },
        ],
    )
    job = construct_dagster_k8s_job(cfg, ["foo", "bar"], "job123").to_dict()

    assert len(job["spec"]["template"]["spec"]["volumes"]) == 2
    foo_volumes = [
        volume for volume in job["spec"]["template"]["spec"]["volumes"]
        if volume["name"] == "foo"
    ]
    assert len(foo_volumes) == 1
    assert foo_volumes[0]["config_map"]["name"] == "settings-cm"

    assert len(
        job["spec"]["template"]["spec"]["containers"][0]["volume_mounts"]) == 2
    foo_volumes_mounts = [
        volume for volume in job["spec"]["template"]["spec"]["containers"][0]
        ["volume_mounts"] if volume["name"] == "foo"
    ]
    assert len(foo_volumes_mounts) == 1

    cfg = DagsterK8sJobConfig(
        job_image="test/foo:latest",
        dagster_home="/opt/dagster/dagster_home",
        image_pull_policy="Always",
        image_pull_secrets=[{
            "name": "my_secret"
        }],
        service_account_name=None,
        instance_config_map="some-instance-configmap",
        postgres_password_secret=None,
        env_config_maps=None,
        env_secrets=None,
        volume_mounts=[{
            "name": "foo",
            "mountPath": "biz/buz",
            "subPath": "file.txt"
        }],
        volumes=[
            {
                "name": "foo",
                "secret": {
                    "secretName": "settings-secret"
                }
            },
        ],
    )
    job = construct_dagster_k8s_job(cfg, ["foo", "bar"], "job123").to_dict()
    assert len(job["spec"]["template"]["spec"]["volumes"]) == 2
    foo_volumes = [
        volume for volume in job["spec"]["template"]["spec"]["volumes"]
        if volume["name"] == "foo"
    ]
    assert len(foo_volumes) == 1
    assert foo_volumes[0]["secret"]["secret_name"] == "settings-secret"

    cfg_with_invalid_volume_key = DagsterK8sJobConfig(
        job_image="test/foo:latest",
        dagster_home="/opt/dagster/dagster_home",
        image_pull_policy="Always",
        image_pull_secrets=[{
            "name": "my_secret"
        }],
        service_account_name=None,
        instance_config_map="some-instance-configmap",
        postgres_password_secret=None,
        env_config_maps=None,
        env_secrets=None,
        volume_mounts=[{
            "name": "foo",
            "mountPath": "biz/buz",
            "subPath": "file.txt"
        }],
        volumes=[
            {
                "name": "foo",
                "invalidKey": "settings-secret"
            },
        ],
    )
    with pytest.raises(
            Exception,
            match="Unexpected keys in model class V1Volume: {'invalidKey'}"):
        construct_dagster_k8s_job(cfg_with_invalid_volume_key, ["foo", "bar"],
                                  "job123").to_dict()
Exemple #20
0
    def _execute_step_k8s_job(
        self,
        instance_ref_dict,
        step_keys,
        run_config,
        mode,
        repo_name,
        repo_location_name,
        run_id,
        job_config_dict,
        job_namespace,
        load_incluster_config,
        retries_dict,
        pipeline_origin_packed,
        user_defined_k8s_config_dict=None,
        kubeconfig_file=None,
    ):
        """Run step execution in a K8s job pod.
        """

        check.dict_param(instance_ref_dict, "instance_ref_dict")
        check.list_param(step_keys, "step_keys", of_type=str)
        check.invariant(
            len(step_keys) == 1, "Celery K8s task executor can only execute 1 step at a time"
        )
        check.dict_param(run_config, "run_config")
        check.str_param(mode, "mode")
        check.str_param(repo_name, "repo_name")
        check.str_param(repo_location_name, "repo_location_name")
        check.str_param(run_id, "run_id")

        # Celery will serialize this as a list
        job_config = DagsterK8sJobConfig.from_dict(job_config_dict)
        check.inst_param(job_config, "job_config", DagsterK8sJobConfig)
        check.str_param(job_namespace, "job_namespace")

        check.bool_param(load_incluster_config, "load_incluster_config")
        check.dict_param(retries_dict, "retries_dict")

        pipeline_origin = unpack_value(
            check.dict_param(
                pipeline_origin_packed, "pipeline_origin_packed"
            )  # TODO: make part of args
        )
        check.inst(pipeline_origin, PipelineOrigin)

        user_defined_k8s_config = UserDefinedDagsterK8sConfig.from_dict(
            user_defined_k8s_config_dict
        )
        check.opt_inst_param(
            user_defined_k8s_config, "user_defined_k8s_config", UserDefinedDagsterK8sConfig,
        )
        check.opt_str_param(kubeconfig_file, "kubeconfig_file")

        # For when launched via DinD or running the cluster
        if load_incluster_config:
            kubernetes.config.load_incluster_config()
        else:
            kubernetes.config.load_kube_config(kubeconfig_file)

        instance_ref = InstanceRef.from_dict(instance_ref_dict)
        instance = DagsterInstance.from_ref(instance_ref)
        pipeline_run = instance.get_run_by_id(run_id)

        check.invariant(pipeline_run, "Could not load run {}".format(run_id))
        step_key = step_keys[0]

        celery_worker_name = self.request.hostname
        celery_pod_name = os.environ.get("HOSTNAME")
        instance.report_engine_event(
            "Task for step {step_key} picked up by Celery".format(step_key=step_key),
            pipeline_run,
            EngineEventData(
                [
                    EventMetadataEntry.text(celery_worker_name, "Celery worker name"),
                    EventMetadataEntry.text(celery_pod_name, "Celery worker Kubernetes Pod name"),
                ]
            ),
            CeleryK8sJobExecutor,
            step_key=step_key,
        )

        if pipeline_run.status != PipelineRunStatus.STARTED:
            instance.report_engine_event(
                "Not scheduling step because pipeline run status is not STARTED",
                pipeline_run,
                EngineEventData([EventMetadataEntry.text(step_key, "Step keys"),]),
                CeleryK8sJobExecutor,
                step_key=step_key,
            )
            return

        # Ensure we stay below k8s name length limits
        k8s_name_key = get_k8s_job_name(run_id, step_key)

        retries = Retries.from_config(retries_dict)

        if retries.get_attempt_count(step_key):
            attempt_number = retries.get_attempt_count(step_key)
            job_name = "dagster-job-%s-%d" % (k8s_name_key, attempt_number)
            pod_name = "dagster-job-%s-%d" % (k8s_name_key, attempt_number)
        else:
            job_name = "dagster-job-%s" % (k8s_name_key)
            pod_name = "dagster-job-%s" % (k8s_name_key)

        input_json = serialize_dagster_namedtuple(
            ExecuteStepArgs(
                pipeline_origin=pipeline_origin,
                pipeline_run_id=run_id,
                instance_ref=None,
                mode=mode,
                step_keys_to_execute=step_keys,
                run_config=run_config,
                retries_dict=retries_dict,
            )
        )
        command = ["dagster"]
        args = ["api", "execute_step_with_structured_logs", input_json]

        job = construct_dagster_k8s_job(
            job_config, command, args, job_name, user_defined_k8s_config, pod_name
        )

        # Running list of events generated from this task execution
        events = []

        # Post event for starting execution
        job_name = job.metadata.name
        engine_event = instance.report_engine_event(
            "Executing step {} in Kubernetes job {}".format(step_key, job_name),
            pipeline_run,
            EngineEventData(
                [
                    EventMetadataEntry.text(step_key, "Step keys"),
                    EventMetadataEntry.text(job_name, "Kubernetes Job name"),
                    EventMetadataEntry.text(pod_name, "Kubernetes Pod name"),
                    EventMetadataEntry.text(job_config.job_image, "Job image"),
                    EventMetadataEntry.text(job_config.image_pull_policy, "Image pull policy"),
                    EventMetadataEntry.text(
                        str(job_config.image_pull_secrets), "Image pull secrets"
                    ),
                    EventMetadataEntry.text(
                        str(job_config.service_account_name), "Service account name"
                    ),
                ],
                marker_end=DELEGATE_MARKER,
            ),
            CeleryK8sJobExecutor,
            # validated above that step_keys is length 1, and it is not possible to use ETH or
            # execution plan in this function (Celery K8s workers should not access to user code)
            step_key=step_key,
        )
        events.append(engine_event)

        try:
            kubernetes.client.BatchV1Api().create_namespaced_job(body=job, namespace=job_namespace)
        except kubernetes.client.rest.ApiException as e:
            if e.reason == "Conflict":
                # There is an existing job with the same name so do not procede.
                instance.report_engine_event(
                    "Did not create Kubernetes job {} for step {} since job name already "
                    "exists, exiting.".format(job_name, step_key),
                    pipeline_run,
                    EngineEventData(
                        [
                            EventMetadataEntry.text(step_key, "Step keys"),
                            EventMetadataEntry.text(job_name, "Kubernetes Job name"),
                            EventMetadataEntry.text(pod_name, "Kubernetes Pod name"),
                        ],
                        marker_end=DELEGATE_MARKER,
                    ),
                    CeleryK8sJobExecutor,
                    step_key=step_key,
                )
            else:
                instance.report_engine_event(
                    "Encountered unexpected error while creating Kubernetes job {} for step {}, "
                    "exiting.".format(job_name, step_key),
                    pipeline_run,
                    EngineEventData(
                        [
                            EventMetadataEntry.text(step_key, "Step keys"),
                            EventMetadataEntry.text(e, "Error"),
                        ]
                    ),
                    CeleryK8sJobExecutor,
                    step_key=step_key,
                )
            return

        try:
            wait_for_job_success(
                job_name=job_name, namespace=job_namespace, instance=instance, run_id=run_id,
            )
        except DagsterK8sPipelineStatusException:
            instance.report_engine_event(
                "Terminating Kubernetes Job because pipeline run status is not STARTED",
                pipeline_run,
                EngineEventData(
                    [
                        EventMetadataEntry.text(step_key, "Step keys"),
                        EventMetadataEntry.text(job_name, "Kubernetes Job name"),
                        EventMetadataEntry.text(job_namespace, "Kubernetes Job namespace"),
                    ]
                ),
                CeleryK8sJobExecutor,
                step_key=step_key,
            )
            delete_job(job_name=job_name, namespace=job_namespace)
            return

        pod_names = get_pod_names_in_job(job_name, namespace=job_namespace)

        # Post engine event for log retrieval
        engine_event = instance.report_engine_event(
            "Retrieving logs from Kubernetes Job pods",
            pipeline_run,
            EngineEventData([EventMetadataEntry.text("\n".join(pod_names), "Pod names")]),
            CeleryK8sJobExecutor,
            step_key=step_key,
        )
        events.append(engine_event)

        logs = []
        for pod_name in pod_names:
            raw_logs = retrieve_pod_logs(pod_name, namespace=job_namespace)
            logs += raw_logs.split("\n")

        events += filter_dagster_events_from_pod_logs(logs)
        serialized_events = [serialize_dagster_namedtuple(event) for event in events]
        return serialized_events
Exemple #21
0
    def _execute_step_k8s_job(
        _self,
        instance_ref_dict,
        step_keys,
        run_config,
        mode,
        repo_name,
        repo_location_name,
        run_id,
        job_config_dict,
        job_namespace,
        load_incluster_config,
        retries_dict,
        pipeline_origin_packed,
        resources=None,
        kubeconfig_file=None,
    ):
        '''Run step execution in a K8s job pod.
        '''

        check.dict_param(instance_ref_dict, 'instance_ref_dict')
        check.list_param(step_keys, 'step_keys', of_type=str)
        check.invariant(
            len(step_keys) == 1,
            'Celery K8s task executor can only execute 1 step at a time')
        check.dict_param(run_config, 'run_config')
        check.str_param(mode, 'mode')
        check.str_param(repo_name, 'repo_name')
        check.str_param(repo_location_name, 'repo_location_name')
        check.str_param(run_id, 'run_id')

        # Celery will serialize this as a list
        job_config = DagsterK8sJobConfig.from_dict(job_config_dict)
        check.inst_param(job_config, 'job_config', DagsterK8sJobConfig)
        check.str_param(job_namespace, 'job_namespace')

        check.bool_param(load_incluster_config, 'load_incluster_config')
        check.dict_param(retries_dict, 'retries_dict')

        pipeline_origin = unpack_value(
            check.dict_param(
                pipeline_origin_packed,
                'pipeline_origin_packed')  # TODO: make part of args
        )
        check.inst(pipeline_origin, PipelineOrigin)

        check.opt_dict_param(resources,
                             'resources',
                             key_type=str,
                             value_type=dict)
        check.opt_str_param(kubeconfig_file, 'kubeconfig_file')

        # For when launched via DinD or running the cluster
        if load_incluster_config:
            kubernetes.config.load_incluster_config()
        else:
            kubernetes.config.load_kube_config(kubeconfig_file)

        instance_ref = InstanceRef.from_dict(instance_ref_dict)
        instance = DagsterInstance.from_ref(instance_ref)
        pipeline_run = instance.get_run_by_id(run_id)

        check.invariant(pipeline_run, 'Could not load run {}'.format(run_id))

        step_key = step_keys[0]
        if pipeline_run.status != PipelineRunStatus.STARTED:
            instance.report_engine_event(
                'Not scheduling step because pipeline run status is not STARTED',
                pipeline_run,
                EngineEventData([
                    EventMetadataEntry.text(step_key, 'Step keys'),
                ]),
                CeleryK8sJobExecutor,
                step_key=step_key,
            )
            return

        # Ensure we stay below k8s name length limits
        k8s_name_key = get_k8s_job_name(run_id, step_key)

        retries = Retries.from_config(retries_dict)

        if retries.get_attempt_count(step_key):
            attempt_number = retries.get_attempt_count(step_key)
            job_name = 'dagster-job-%s-%d' % (k8s_name_key, attempt_number)
            pod_name = 'dagster-job-%s-%d' % (k8s_name_key, attempt_number)
        else:
            job_name = 'dagster-job-%s' % (k8s_name_key)
            pod_name = 'dagster-job-%s' % (k8s_name_key)

        input_json = serialize_dagster_namedtuple(
            ExecuteStepArgs(
                pipeline_origin=pipeline_origin,
                pipeline_run_id=run_id,
                instance_ref=None,
                mode=mode,
                step_keys_to_execute=step_keys,
                run_config=run_config,
                retries_dict=retries_dict,
            ))
        command = ['dagster']
        args = ['api', 'execute_step_with_structured_logs', input_json]

        job = construct_dagster_k8s_job(job_config, command, args, job_name,
                                        resources, pod_name)

        # Running list of events generated from this task execution
        events = []

        # Post event for starting execution
        job_name = job.metadata.name
        engine_event = instance.report_engine_event(
            'Executing step {} in Kubernetes job {}'.format(
                step_key, job_name),
            pipeline_run,
            EngineEventData(
                [
                    EventMetadataEntry.text(step_key, 'Step keys'),
                    EventMetadataEntry.text(job_name, 'Kubernetes Job name'),
                    EventMetadataEntry.text(pod_name, 'Kubernetes Pod name'),
                    EventMetadataEntry.text(job_config.job_image, 'Job image'),
                    EventMetadataEntry.text(job_config.image_pull_policy,
                                            'Image pull policy'),
                    EventMetadataEntry.text(str(job_config.image_pull_secrets),
                                            'Image pull secrets'),
                    EventMetadataEntry.text(
                        str(job_config.service_account_name),
                        'Service account name'),
                ],
                marker_end=DELEGATE_MARKER,
            ),
            CeleryK8sJobExecutor,
            # validated above that step_keys is length 1, and it is not possible to use ETH or
            # execution plan in this function (Celery K8s workers should not access to user code)
            step_key=step_key,
        )
        events.append(engine_event)

        try:
            kubernetes.client.BatchV1Api().create_namespaced_job(
                body=job, namespace=job_namespace)
        except kubernetes.client.rest.ApiException as e:
            if e.reason == 'Conflict':
                # There is an existing job with the same name so do not procede.
                instance.report_engine_event(
                    'Did not create Kubernetes job {} for step {} since job name already '
                    'exists, exiting.'.format(job_name, step_key),
                    pipeline_run,
                    EngineEventData(
                        [
                            EventMetadataEntry.text(step_key, 'Step keys'),
                            EventMetadataEntry.text(job_name,
                                                    'Kubernetes Job name'),
                            EventMetadataEntry.text(pod_name,
                                                    'Kubernetes Pod name'),
                        ],
                        marker_end=DELEGATE_MARKER,
                    ),
                    CeleryK8sJobExecutor,
                    step_key=step_key,
                )
            else:
                instance.report_engine_event(
                    'Encountered unexpected error while creating Kubernetes job {} for step {}, '
                    'exiting.'.format(job_name, step_key),
                    pipeline_run,
                    EngineEventData([
                        EventMetadataEntry.text(step_key, 'Step keys'),
                        EventMetadataEntry.text(e, 'Error'),
                    ]),
                    CeleryK8sJobExecutor,
                    step_key=step_key,
                )
            return

        try:
            wait_for_job_success(
                job_name=job_name,
                namespace=job_namespace,
                instance=instance,
                run_id=run_id,
            )
        except DagsterK8sPipelineStatusException:
            instance.report_engine_event(
                'Terminating Kubernetes Job because pipeline run status is not STARTED',
                pipeline_run,
                EngineEventData([
                    EventMetadataEntry.text(step_key, 'Step keys'),
                    EventMetadataEntry.text(job_name, 'Kubernetes Job name'),
                    EventMetadataEntry.text(job_namespace,
                                            'Kubernetes Job namespace'),
                ]),
                CeleryK8sJobExecutor,
                step_key=step_key,
            )
            delete_job(job_name=job_name, namespace=job_namespace)
            return

        pod_names = get_pod_names_in_job(job_name, namespace=job_namespace)

        # Post engine event for log retrieval
        engine_event = instance.report_engine_event(
            'Retrieving logs from Kubernetes Job pods',
            pipeline_run,
            EngineEventData(
                [EventMetadataEntry.text('\n'.join(pod_names), 'Pod names')]),
            CeleryK8sJobExecutor,
            step_key=step_key,
        )
        events.append(engine_event)

        logs = []
        for pod_name in pod_names:
            raw_logs = retrieve_pod_logs(pod_name, namespace=job_namespace)
            logs += raw_logs.split('\n')

        events += filter_dagster_events_from_pod_logs(logs)
        serialized_events = [
            serialize_dagster_namedtuple(event) for event in events
        ]
        return serialized_events
Exemple #22
0
def test_valid_job_format_with_user_defined_k8s_config(run_launcher):
    docker_image = test_project_docker_image()

    run_config = load_yaml_from_path(
        os.path.join(test_project_environments_path(), 'env.yaml'))
    pipeline_name = 'demo_pipeline'
    run = PipelineRun(pipeline_name=pipeline_name, run_config=run_config)

    tags = validate_tags({
        USER_DEFINED_K8S_CONFIG_KEY: ({
            'container_config': {
                'resources': {
                    'requests': {
                        'cpu': '250m',
                        'memory': '64Mi'
                    },
                    'limits': {
                        'cpu': '500m',
                        'memory': '2560Mi'
                    },
                }
            },
            'pod_template_spec_metadata': {
                'annotations': {
                    "cluster-autoscaler.kubernetes.io/safe-to-evict": "true"
                }
            },
            'pod_spec_config': {
                'affinity': {
                    'nodeAffinity': {
                        'requiredDuringSchedulingIgnoredDuringExecution': {
                            'nodeSelectorTerms': [{
                                'matchExpressions': [{
                                    'key':
                                    'kubernetes.io/e2e-az-name',
                                    'operator':
                                    'In',
                                    'values': ['e2e-az1', 'e2e-az2'],
                                }]
                            }]
                        }
                    }
                }
            },
        })
    })
    user_defined_k8s_config = get_user_defined_k8s_config(tags)
    job_name = 'dagster-run-%s' % run.run_id
    pod_name = 'dagster-run-%s' % run.run_id
    job = construct_dagster_k8s_job(
        job_config=run_launcher.job_config,
        command=['dagster-graphql'],
        args=[
            '-p',
            'executeRunInProcess',
            '-v',
            seven.json.dumps({'runId': run.run_id}),
        ],
        job_name=job_name,
        user_defined_k8s_config=user_defined_k8s_config,
        pod_name=pod_name,
        component='run_coordinator',
    )

    assert (yaml.dump(remove_none_recursively(job.to_dict()),
                      default_flow_style=False).strip() ==
            EXPECTED_CONFIGURED_JOB_SPEC.format(
                run_id=run.run_id,
                job_image=docker_image,
                image_pull_policy=image_pull_policy(),
                dagster_version=dagster_version,
                resources='''
        resources:
          limits:
            cpu: 500m
            memory: 2560Mi
          requests:
            cpu: 250m
            memory: 64Mi''',
                annotations='''annotations:
        cluster-autoscaler.kubernetes.io/safe-to-evict: \'true\'''',
                affinity='''affinity:
        nodeAffinity:
          requiredDuringSchedulingIgnoredDuringExecution:
            nodeSelectorTerms:
            - matchExpressions:
              - key: kubernetes.io/e2e-az-name
                operator: In
                values:
                - e2e-az1
                - e2e-az2''',
            ).strip())