def create_run(instance, **kwargs):  # pylint: disable=redefined-outer-name
    with get_foo_pipeline_handle() as pipeline_handle:
        create_run_for_test(
            instance,
            external_pipeline_origin=pipeline_handle.get_external_origin(),
            pipeline_name="foo",
            **kwargs)
Ejemplo n.º 2
0
def create_run(instance, **kwargs):
    with get_foo_pipeline_handle() as pipeline_handle:
        create_run_for_test(
            instance,
            external_pipeline_origin=pipeline_handle.get_external_origin(),
            pipeline_name="foo",
            **kwargs,
        )
Ejemplo n.º 3
0
def test_execute_run_fail_pipeline():
    with get_bar_repo_handle() as repo_handle:
        pipeline_handle = PipelineHandle("fail", repo_handle)
        runner = CliRunner()

        with instance_for_test(
                overrides={
                    "compute_logs": {
                        "module":
                        "dagster.core.storage.noop_compute_log_manager",
                        "class": "NoOpComputeLogManager",
                    }
                }) as instance:
            instance = DagsterInstance.get()
            run = create_run_for_test(instance,
                                      pipeline_name="foo",
                                      run_id="new_run")

            input_json = serialize_dagster_namedtuple(
                ExecuteRunArgs(
                    pipeline_origin=pipeline_handle.get_python_origin(),
                    pipeline_run_id=run.run_id,
                    instance_ref=instance.get_ref(),
                ))

            result = runner_execute_run(
                runner,
                [input_json],
            )
            assert result.exit_code == 0

            assert "RUN_FAILURE" in result.stdout, "no match, result: {}".format(
                result)

            run = create_run_for_test(instance,
                                      pipeline_name="foo",
                                      run_id="new_run_raise_on_error")

            input_json_raise_on_failure = serialize_dagster_namedtuple(
                ExecuteRunArgs(
                    pipeline_origin=pipeline_handle.get_python_origin(),
                    pipeline_run_id=run.run_id,
                    instance_ref=instance.get_ref(),
                    set_exit_code_on_failure=True,
                ))

            result = runner.invoke(api.execute_run_command,
                                   [input_json_raise_on_failure])

            assert result.exit_code != 0, str(result.stdout)

            assert "RUN_FAILURE" in result.stdout, "no match, result: {}".format(
                result)

            # Framework errors (e.g. running a run that has already run) also result in a non-zero error code
            result = runner.invoke(api.execute_run_command,
                                   [input_json_raise_on_failure])
            assert result.exit_code != 0, str(result.stdout)
Ejemplo n.º 4
0
def test_k8s_executor_config_override(kubeconfig_file):
    # Construct a K8s run launcher in a fake k8s environment.
    mock_k8s_client_batch_api = mock.MagicMock()
    celery_k8s_run_launcher = CeleryK8sRunLauncher(
        instance_config_map="dagster-instance",
        postgres_password_secret="dagster-postgresql-secret",
        dagster_home="/opt/dagster/dagster_home",
        load_incluster_config=False,
        kubeconfig_file=kubeconfig_file,
        k8s_client_batch_api=mock_k8s_client_batch_api,
    )

    external_pipeline = get_test_project_external_pipeline(
        "demo_pipeline", "my_image:tag")

    # Launch the run in a fake Dagster instance.
    with instance_for_test() as instance:
        celery_k8s_run_launcher.register_instance(instance)
        pipeline_name = "demo_pipeline"

        # Launch without custom job_image
        run = create_run_for_test(
            instance,
            pipeline_name=pipeline_name,
            run_config={"execution": {
                "celery-k8s": {}
            }},
        )
        celery_k8s_run_launcher.launch_run(run, external_pipeline)

        # Launch with custom job_image
        run = create_run_for_test(
            instance,
            pipeline_name=pipeline_name,
            run_config={
                "execution": {
                    "celery-k8s": {
                        "config": {
                            "job_image": "fake-image-name"
                        }
                    }
                }
            },
        )
        celery_k8s_run_launcher.launch_run(run, external_pipeline)

    # Check that user defined k8s config was passed down to the k8s job.
    mock_method_calls = mock_k8s_client_batch_api.method_calls
    assert len(mock_method_calls) > 0

    _, _args, kwargs = mock_method_calls[0]
    assert kwargs["body"].spec.template.spec.containers[
        0].image == "my_image:tag"

    _, _args, kwargs = mock_method_calls[1]
    assert kwargs["body"].spec.template.spec.containers[
        0].image == "fake-image-name"
Ejemplo n.º 5
0
def create_invalid_run(instance, **kwargs):
    create_run_for_test(
        instance,
        external_pipeline_origin=ExternalPipelineOrigin(
            ExternalRepositoryOrigin(
                InProcessRepositoryLocationOrigin(
                    ReconstructableRepository(ModuleCodePointer(
                        "fake", "fake"))),
                "foo",
            ),
            "wrong-pipeline",
        ),
        pipeline_name="wrong-pipeline",
        **kwargs,
    )
Ejemplo n.º 6
0
def test_raise_on_error(kubeconfig_file):
    # Construct a K8s run launcher in a fake k8s environment.
    mock_k8s_client_batch_api = mock.MagicMock()
    k8s_run_launcher = K8sRunLauncher(
        service_account_name="dagit-admin",
        instance_config_map="dagster-instance",
        postgres_password_secret="dagster-postgresql-secret",
        dagster_home="/opt/dagster/dagster_home",
        job_image="fake_job_image",
        load_incluster_config=False,
        kubeconfig_file=kubeconfig_file,
        k8s_client_batch_api=mock_k8s_client_batch_api,
        fail_pod_on_run_failure=True,
    )
    # Create fake external pipeline.
    recon_pipeline = reconstructable(fake_pipeline)
    recon_repo = recon_pipeline.repository
    repo_def = recon_repo.get_definition()
    loadable_target_origin = LoadableTargetOrigin(python_file=__file__)

    with instance_for_test() as instance:
        with in_process_test_workspace(instance, loadable_target_origin) as workspace:
            location = workspace.get_repository_location(workspace.repository_location_names[0])
            repo_handle = RepositoryHandle(
                repository_name=repo_def.name,
                repository_location=location,
            )
            fake_external_pipeline = external_pipeline_from_recon_pipeline(
                recon_pipeline,
                solid_selection=None,
                repository_handle=repo_handle,
            )

            # Launch the run in a fake Dagster instance.
            pipeline_name = "demo_pipeline"
            run = create_run_for_test(
                instance,
                pipeline_name=pipeline_name,
                external_pipeline_origin=fake_external_pipeline.get_external_origin(),
                pipeline_code_origin=fake_external_pipeline.get_python_origin(),
            )
            k8s_run_launcher.register_instance(instance)
            k8s_run_launcher.launch_run(LaunchRunContext(run, workspace))

        mock_method_calls = mock_k8s_client_batch_api.method_calls
        assert len(mock_method_calls) > 0
        method_name, _args, kwargs = mock_method_calls[0]
        assert method_name == "create_namespaced_job"

        container = kwargs["body"].spec.template.spec.containers[0]
        args = container.args
        assert (
            args
            == ExecuteRunArgs(
                pipeline_origin=run.pipeline_code_origin,
                pipeline_run_id=run.run_id,
                instance_ref=instance.get_ref(),
                set_exit_code_on_failure=True,
            ).get_command_args()
        )
Ejemplo n.º 7
0
def test_execute_on_celery(  # pylint: disable=redefined-outer-name
        dagster_docker_image, dagster_instance, helm_namespace):
    run_config = merge_dicts(
        merge_yamls([
            os.path.join(test_project_environments_path(), 'env.yaml'),
            os.path.join(test_project_environments_path(), 'env_s3.yaml'),
        ]),
        get_celery_engine_config(dagster_docker_image=dagster_docker_image,
                                 helm_namespace=helm_namespace),
    )

    pipeline_name = 'demo_pipeline_celery'
    run = create_run_for_test(
        dagster_instance,
        pipeline_name=pipeline_name,
        run_config=run_config,
        mode='default',
    )

    dagster_instance.launch_run(
        run.run_id, get_test_project_external_pipeline(pipeline_name))

    result = wait_for_job_and_get_logs(job_name='dagster-run-%s' % run.run_id,
                                       namespace=helm_namespace)

    assert not result.get('errors')
    assert result['data']
    assert (
        result['data']['executeRunInProcess']['__typename'] ==
        'ExecuteRunInProcessSuccess'), 'no match, result: {}'.format(result)
Ejemplo n.º 8
0
def test_execute_on_celery_k8s_default(  # pylint: disable=redefined-outer-name
        dagster_docker_image, dagster_instance, helm_namespace):
    run_config = merge_dicts(
        merge_yamls([
            os.path.join(get_test_project_environments_path(), "env.yaml"),
            os.path.join(get_test_project_environments_path(), "env_s3.yaml"),
        ]),
        get_celery_engine_config(dagster_docker_image=dagster_docker_image,
                                 job_namespace=helm_namespace),
    )

    pipeline_name = "demo_pipeline_celery"
    run = create_run_for_test(
        dagster_instance,
        pipeline_name=pipeline_name,
        run_config=run_config,
        mode="default",
    )

    with get_test_project_external_pipeline(
            pipeline_name) as external_pipeline:
        dagster_instance.launch_run(
            run.run_id,
            ReOriginatedExternalPipelineForTest(external_pipeline),
        )

        result = wait_for_job_and_get_raw_logs(job_name="dagster-run-%s" %
                                               run.run_id,
                                               namespace=helm_namespace)

        assert "PIPELINE_SUCCESS" in result, "no match, result: {}".format(
            result)

        updated_run = dagster_instance.get_run_by_id(run.run_id)
        assert updated_run.tags[DOCKER_IMAGE_TAG] == dagster_docker_image
Ejemplo n.º 9
0
def test_single():
    instance = DagsterInstance.local_temp()
    pipeline_name = 'foo_pipeline'
    pipeline_run = create_run_for_test(instance, pipeline_name=pipeline_name)

    step_keys = ['A', 'B', 'C']

    with instance.compute_log_manager.watch(pipeline_run):
        print('outer 1')
        print('outer 2')
        print('outer 3')

        for step_key in step_keys:
            inner_step(instance, pipeline_run, step_key)

    for step_key in step_keys:
        stdout = instance.compute_log_manager.read_logs_file(
            pipeline_run.run_id, step_key, ComputeIOType.STDOUT)
        assert normalize_file_content(
            stdout.data) == expected_inner_output(step_key)

    full_out = instance.compute_log_manager.read_logs_file(
        pipeline_run.run_id, pipeline_name, ComputeIOType.STDOUT)

    assert normalize_file_content(full_out.data).startswith(
        expected_outer_prefix())
Ejemplo n.º 10
0
def test_sync_run_launcher_run():
    with seven.TemporaryDirectory() as temp_dir:
        instance = DagsterInstance.local_temp(
            temp_dir,
            overrides={
                "run_launcher": {
                    "module":
                    "dagster.core.launcher.sync_in_memory_run_launcher",
                    "class": "SyncInMemoryRunLauncher",
                }
            },
        )

        external_repo = get_main_external_repo(instance)
        external_pipeline = external_repo.get_full_external_pipeline(
            "noop_pipeline")

        run = create_run_for_test(instance=instance,
                                  pipeline_name=external_pipeline.name)

        run = instance.run_launcher.launch_run(
            instance=instance, run=run, external_pipeline=external_pipeline)

        completed_run = instance.get_run_by_id(run.run_id)
        assert completed_run.is_success
Ejemplo n.º 11
0
def test_k8s_run_launcher(dagster_instance_for_k8s_run_launcher,
                          helm_namespace_for_k8s_run_launcher):
    run_config = load_yaml_from_path(
        os.path.join(get_test_project_environments_path(), "env.yaml"))
    pipeline_name = "demo_pipeline"
    with get_test_project_workspace_and_external_pipeline(
            dagster_instance_for_k8s_run_launcher, pipeline_name) as (
                workspace,
                external_pipeline,
            ):
        reoriginated_pipeline = ReOriginatedExternalPipelineForTest(
            external_pipeline)
        run = create_run_for_test(
            dagster_instance_for_k8s_run_launcher,
            pipeline_name=pipeline_name,
            run_config=run_config,
            mode="default",
            external_pipeline_origin=reoriginated_pipeline.get_external_origin(
            ),
            pipeline_code_origin=reoriginated_pipeline.get_python_origin(),
        )

        dagster_instance_for_k8s_run_launcher.launch_run(run.run_id, workspace)
        result = wait_for_job_and_get_raw_logs(
            job_name="dagster-run-%s" % run.run_id,
            namespace=helm_namespace_for_k8s_run_launcher)

        assert "PIPELINE_SUCCESS" in result, "no match, result: {}".format(
            result)
Ejemplo n.º 12
0
def test_k8s_run_launcher_default(
    dagster_instance_for_k8s_run_launcher, helm_namespace_for_k8s_run_launcher, dagster_docker_image
):
    # sanity check that we have a K8sRunLauncher
    check.inst(dagster_instance_for_k8s_run_launcher.run_launcher, K8sRunLauncher)
    pods = DagsterKubernetesClient.production_client().core_api.list_namespaced_pod(
        namespace=helm_namespace_for_k8s_run_launcher
    )
    celery_pod_names = [p.metadata.name for p in pods.items if "celery-workers" in p.metadata.name]
    check.invariant(not celery_pod_names)

    run_config = merge_dicts(
        load_yaml_from_path(os.path.join(get_test_project_environments_path(), "env.yaml")),
        load_yaml_from_path(os.path.join(get_test_project_environments_path(), "env_s3.yaml")),
        {
            "execution": {
                "k8s": {
                    "config": {
                        "job_namespace": helm_namespace_for_k8s_run_launcher,
                        "job_image": dagster_docker_image,
                        "image_pull_policy": image_pull_policy(),
                        "env_config_maps": ["dagster-pipeline-env"]
                        + ([TEST_AWS_CONFIGMAP_NAME] if not IS_BUILDKITE else []),
                    }
                }
            },
        },
    )

    pipeline_name = "demo_k8s_executor_pipeline"
    tags = {"key": "value"}

    with get_test_project_location_and_external_pipeline(pipeline_name) as (
        location,
        external_pipeline,
    ):
        run = create_run_for_test(
            dagster_instance_for_k8s_run_launcher,
            pipeline_name=pipeline_name,
            run_config=run_config,
            tags=tags,
            mode="default",
            pipeline_snapshot=external_pipeline.pipeline_snapshot,
            execution_plan_snapshot=location.get_external_execution_plan(
                external_pipeline, run_config, "default", None, None
            ).execution_plan_snapshot,
        )
        dagster_instance_for_k8s_run_launcher.launch_run(
            run.run_id,
            ReOriginatedExternalPipelineForTest(external_pipeline),
        )

        result = wait_for_job_and_get_raw_logs(
            job_name="dagster-run-%s" % run.run_id, namespace=helm_namespace_for_k8s_run_launcher
        )

        assert "PIPELINE_SUCCESS" in result, "no match, result: {}".format(result)

        updated_run = dagster_instance_for_k8s_run_launcher.get_run_by_id(run.run_id)
        assert updated_run.tags[DOCKER_IMAGE_TAG] == get_test_project_docker_image()
Ejemplo n.º 13
0
def test_multi():
    instance = DagsterInstance.local_temp()
    pipeline_name = 'foo_pipeline'
    pipeline_run = create_run_for_test(instance, pipeline_name=pipeline_name)
    context = get_multiprocessing_context()

    step_keys = ['A', 'B', 'C']

    with instance.compute_log_manager.watch(pipeline_run):
        print('outer 1')
        print('outer 2')
        print('outer 3')

        for step_key in step_keys:
            process = context.Process(target=execute_inner,
                                      args=(step_key, pipeline_run,
                                            instance.get_ref()))
            process.start()
            process.join()

    for step_key in step_keys:
        stdout = instance.compute_log_manager.read_logs_file(
            pipeline_run.run_id, step_key, ComputeIOType.STDOUT)
        assert normalize_file_content(
            stdout.data) == expected_inner_output(step_key)

    full_out = instance.compute_log_manager.read_logs_file(
        pipeline_run.run_id, pipeline_name, ComputeIOType.STDOUT)

    # The way that the multiprocess compute-logging interacts with pytest (which stubs out the
    # sys.stdout fileno) makes this difficult to test.  The pytest-captured stdout only captures
    # the stdout from the outer process, not also the inner process
    assert normalize_file_content(full_out.data).startswith(
        expected_outer_prefix())
Ejemplo n.º 14
0
def test_k8s_run_launcher_default(
    dagster_instance_for_k8s_run_launcher, helm_namespace_for_k8s_run_launcher
):
    # sanity check that we have a K8sRunLauncher
    check.inst(dagster_instance_for_k8s_run_launcher.run_launcher, K8sRunLauncher)
    pods = DagsterKubernetesClient.production_client().core_api.list_namespaced_pod(
        namespace=helm_namespace_for_k8s_run_launcher
    )
    celery_pod_names = [p.metadata.name for p in pods.items if "celery-workers" in p.metadata.name]
    check.invariant(not celery_pod_names)

    run_config = load_yaml_from_path(os.path.join(test_project_environments_path(), "env.yaml"))
    pipeline_name = "demo_pipeline"
    tags = {"key": "value"}
    run = create_run_for_test(
        dagster_instance_for_k8s_run_launcher,
        pipeline_name=pipeline_name,
        run_config=run_config,
        tags=tags,
        mode="default",
    )

    dagster_instance_for_k8s_run_launcher.launch_run(
        run.run_id,
        ReOriginatedExternalPipelineForTest(get_test_project_external_pipeline(pipeline_name)),
    )

    result = wait_for_job_and_get_raw_logs(
        job_name="dagster-run-%s" % run.run_id, namespace=helm_namespace_for_k8s_run_launcher
    )

    assert "PIPELINE_SUCCESS" in result, "no match, result: {}".format(result)
Ejemplo n.º 15
0
def test_k8s_run_launcher_celery(dagster_instance, helm_namespace):

    environment_dict = merge_dicts(
        merge_yamls([
            os.path.join(test_project_environments_path(), 'env.yaml'),
            os.path.join(test_project_environments_path(), 'env_s3.yaml'),
        ]),
        get_celery_engine_config(),
    )

    assert 'celery-k8s' in environment_dict['execution']

    pipeline_name = 'demo_pipeline_celery'
    tags = {'key': 'value'}
    run = create_run_for_test(
        dagster_instance,
        pipeline_name=pipeline_name,
        environment_dict=environment_dict,
        tags=tags,
        mode='default',
    )

    dagster_instance.launch_run(run.run_id)
    result = wait_for_job_and_get_logs(job_name='dagster-run-%s' % run.run_id,
                                       namespace=helm_namespace)

    assert not result.get('errors')
    assert result['data']
    assert (result['data']['startPipelineExecutionForCreatedRun']['__typename']
            == 'StartPipelineRunSuccess')
Ejemplo n.º 16
0
def test_submit_run():
    with instance_for_test(
        overrides={
            "run_coordinator": {
                "module": "dagster.core.test_utils",
                "class": "MockedRunCoordinator",
            }
        }
    ) as instance:
        with get_bar_workspace(instance) as workspace:
            external_pipeline = (
                workspace.get_repository_location("bar_repo_location")
                .get_repository("bar_repo")
                .get_full_external_pipeline("foo")
            )

            run = create_run_for_test(
                instance=instance,
                pipeline_name=external_pipeline.name,
                run_id="foo-bar",
                external_pipeline_origin=external_pipeline.get_external_origin(),
                pipeline_code_origin=external_pipeline.get_python_origin(),
            )

            instance.submit_run(run.run_id, workspace)

            assert len(instance.run_coordinator.queue()) == 1
            assert instance.run_coordinator.queue()[0].run_id == "foo-bar"
Ejemplo n.º 17
0
def test_execute_run():
    with get_foo_pipeline_handle() as pipeline_handle:
        runner = CliRunner()

        with instance_for_test(
                overrides={
                    "compute_logs": {
                        "module":
                        "dagster.core.storage.noop_compute_log_manager",
                        "class": "NoOpComputeLogManager",
                    }
                }) as instance:
            instance = DagsterInstance.get()
            run = create_run_for_test(instance,
                                      pipeline_name="foo",
                                      run_id="new_run")

            input_json = serialize_dagster_namedtuple(
                ExecuteRunArgs(
                    pipeline_origin=pipeline_handle.get_python_origin(),
                    pipeline_run_id=run.run_id,
                    instance_ref=instance.get_ref(),
                ))

            result = runner_execute_run(
                runner,
                [input_json],
            )

            assert "PIPELINE_SUCCESS" in result.stdout, "no match, result: {}".format(
                result.stdout)

            # Framework errors (e.g. running a run that has already run) still result in a non-zero error code
            result = runner.invoke(api.execute_run_command, [input_json])
            assert result.exit_code == 0
Ejemplo n.º 18
0
def test_execute_run_with_structured_logs():
    with get_foo_pipeline_handle() as pipeline_handle:
        runner = CliRunner()

        with instance_for_test(
            overrides={
                "compute_logs": {
                    "module": "dagster.core.storage.noop_compute_log_manager",
                    "class": "NoOpComputeLogManager",
                }
            }
        ) as instance:
            instance = DagsterInstance.get()
            run = create_run_for_test(instance, pipeline_name="foo", run_id="new_run")

            input_json = serialize_dagster_namedtuple(
                ExecuteRunArgs(
                    pipeline_origin=pipeline_handle.get_origin(),
                    pipeline_run_id=run.run_id,
                    instance_ref=instance.get_ref(),
                )
            )

            result = runner_execute_run_with_structured_logs(runner, [input_json],)

        assert "PIPELINE_SUCCESS" in result.stdout, "no match, result: {}".format(result)
Ejemplo n.º 19
0
def test_execute_on_celery_resource_requirements(  # pylint: disable=redefined-outer-name
    dagster_docker_image, dagster_instance, helm_namespace
):
    run_config = merge_dicts(
        merge_yamls([os.path.join(test_project_environments_path(), 'env_s3.yaml'),]),
        {
            'execution': {
                'celery-k8s': {
                    'config': {
                        'job_image': dagster_docker_image,
                        'job_namespace': helm_namespace,
                        'image_pull_policy': 'Always',
                        'env_config_maps': ['dagster-pipeline-env'],
                    }
                }
            },
        },
    )

    pipeline_name = 'resources_limit_pipeline_celery'
    run = create_run_for_test(
        dagster_instance, pipeline_name=pipeline_name, run_config=run_config, mode='default',
    )

    dagster_instance.launch_run(run.run_id, get_test_project_external_pipeline(pipeline_name))

    result = wait_for_job_and_get_logs(
        job_name='dagster-run-%s' % run.run_id, namespace=helm_namespace
    )

    assert not result.get('errors')
    assert result['data']
    assert (
        result['data']['executeRunInProcess']['__typename'] == 'ExecuteRunInProcessSuccess'
    ), 'no match, result: {}'.format(result)
Ejemplo n.º 20
0
def test_sync_run_launcher_run():
    with instance_for_test(overrides={
            "run_launcher": {
                "module": "dagster.core.launcher.sync_in_memory_run_launcher",
                "class": "SyncInMemoryRunLauncher",
            }
    }, ) as instance:
        with get_main_workspace(instance) as workspace:
            location = workspace.get_repository_location(
                main_repo_location_name())
            external_repo = location.get_repository(main_repo_name())
            external_pipeline = external_repo.get_full_external_pipeline(
                "noop_pipeline")

            run = create_run_for_test(
                instance=instance,
                pipeline_name=external_pipeline.name,
                external_pipeline_origin=external_pipeline.get_external_origin(
                ),
                pipeline_code_origin=external_pipeline.get_python_origin(),
            )

            run = instance.launch_run(run_id=run.run_id, workspace=workspace)

            completed_run = instance.get_run_by_id(run.run_id)
            assert completed_run.is_success
Ejemplo n.º 21
0
def test_execute_subset_on_celery_k8s(  # pylint: disable=redefined-outer-name
        dagster_docker_image, dagster_instance, helm_namespace):
    run_config = merge_dicts(
        merge_yamls([
            os.path.join(get_test_project_environments_path(),
                         "env_subset.yaml"),
            os.path.join(get_test_project_environments_path(), "env_s3.yaml"),
        ]),
        get_celery_engine_config(dagster_docker_image=dagster_docker_image,
                                 job_namespace=helm_namespace),
    )

    pipeline_name = "demo_pipeline_celery"
    run = create_run_for_test(
        dagster_instance,
        pipeline_name=pipeline_name,
        run_config=run_config,
        mode="default",
        solids_to_execute={"count_letters"},
    )

    dagster_instance.launch_run(
        run.run_id,
        ReOriginatedExternalPipelineForTest(
            get_test_project_external_pipeline(pipeline_name)),
    )

    result = wait_for_job_and_get_raw_logs(job_name="dagster-run-%s" %
                                           run.run_id,
                                           namespace=helm_namespace)

    assert "PIPELINE_SUCCESS" in result, "no match, result: {}".format(result)
Ejemplo n.º 22
0
def test_failing_k8s_run_launcher(dagster_instance, helm_namespace):
    run_config = {"blah blah this is wrong": {}}
    pipeline_name = "demo_pipeline"
    run = create_run_for_test(dagster_instance,
                              pipeline_name=pipeline_name,
                              run_config=run_config)
    dagster_instance.launch_run(
        run.run_id,
        ReOriginatedExternalPipelineForTest(
            get_test_project_external_pipeline(pipeline_name)),
    )
    result = wait_for_job_and_get_raw_logs(job_name="dagster-run-%s" %
                                           run.run_id,
                                           namespace=helm_namespace)

    assert "PIPELINE_SUCCESS" not in result, "no match, result: {}".format(
        result)

    event_records = dagster_instance.all_logs(run.run_id)

    assert any([
        'Undefined field "blah blah this is wrong"' in str(event)
        for event in event_records
    ])
    assert any([
        'Missing required field "solids"' in str(event)
        for event in event_records
    ])
Ejemplo n.º 23
0
def test_execute_step_with_structured_logs(pipeline_handle):
    runner = CliRunner()

    with instance_for_test(
            overrides={
                "compute_logs": {
                    "module": "dagster.core.storage.noop_compute_log_manager",
                    "class": "NoOpComputeLogManager",
                }
            }) as instance:
        run = create_run_for_test(instance,
                                  pipeline_name="foo",
                                  run_id="new_run")

        input_json = serialize_dagster_namedtuple(
            ExecuteStepArgs(
                pipeline_origin=pipeline_handle.get_origin(),
                pipeline_run_id=run.run_id,
                instance_ref=instance.get_ref(),
            ))

        result = runner_execute_step_with_structured_logs(
            runner,
            [input_json],
        )

    assert "STEP_SUCCESS" in result.stdout
Ejemplo n.º 24
0
def test_step_handler(kubeconfig_file):

    mock_k8s_client_batch_api = mock.MagicMock()
    handler = K8sStepHandler(
        job_config=DagsterK8sJobConfig(instance_config_map="foobar", job_image="bizbuz"),
        job_namespace="foo",
        load_incluster_config=False,
        kubeconfig_file=kubeconfig_file,
        k8s_client_batch_api=mock_k8s_client_batch_api,
    )

    with instance_for_test() as instance:
        run = create_run_for_test(
            instance,
            pipeline_name="bar",
        )
        handler.launch_step(
            StepHandlerContext(
                instance,
                ExecuteStepArgs(
                    reconstructable(bar).get_python_origin(), run.run_id, ["foo_solid"]
                ),
                {"foo_solid": {}},
            )
        )

        # Check that user defined k8s config was passed down to the k8s job.
        mock_method_calls = mock_k8s_client_batch_api.method_calls
        assert len(mock_method_calls) > 0
        method_name, _args, kwargs = mock_method_calls[0]
        assert method_name == "create_namespaced_job"
        assert kwargs["body"].spec.template.spec.containers[0].image == "bizbuz"
Ejemplo n.º 25
0
def test_execute_on_celery_k8s(  # pylint: disable=redefined-outer-name
        dagster_docker_image, dagster_instance, helm_namespace):
    run_config = merge_dicts(
        merge_yamls([
            os.path.join(test_project_environments_path(), 'env.yaml'),
            os.path.join(test_project_environments_path(), 'env_s3.yaml'),
        ]),
        get_celery_engine_config(dagster_docker_image=dagster_docker_image,
                                 job_namespace=helm_namespace),
    )

    pipeline_name = 'demo_pipeline_celery'
    run = create_run_for_test(
        dagster_instance,
        pipeline_name=pipeline_name,
        run_config=run_config,
        mode='default',
    )

    dagster_instance.launch_run(
        run.run_id,
        ReOriginatedExternalPipelineForTest(
            get_test_project_external_pipeline(pipeline_name)),
    )

    result = wait_for_job_and_get_raw_logs(job_name='dagster-run-%s' %
                                           run.run_id,
                                           namespace=helm_namespace)

    assert 'PIPELINE_SUCCESS' in result, 'no match, result: {}'.format(result)
Ejemplo n.º 26
0
def test_multi():
    with instance_for_test() as instance:
        pipeline_name = "foo_pipeline"
        pipeline_run = create_run_for_test(instance,
                                           pipeline_name=pipeline_name)

        step_keys = ["A", "B", "C"]

        with instance.compute_log_manager.watch(pipeline_run):
            print("outer 1")  # pylint: disable=print-call
            print("outer 2")  # pylint: disable=print-call
            print("outer 3")  # pylint: disable=print-call

            for step_key in step_keys:
                process = multiprocessing.Process(target=execute_inner,
                                                  args=(step_key, pipeline_run,
                                                        instance.get_ref()))
                process.start()
                process.join()

        for step_key in step_keys:
            stdout = instance.compute_log_manager.read_logs_file(
                pipeline_run.run_id, step_key, ComputeIOType.STDOUT)
            assert normalize_file_content(
                stdout.data) == expected_inner_output(step_key)

        full_out = instance.compute_log_manager.read_logs_file(
            pipeline_run.run_id, pipeline_name, ComputeIOType.STDOUT)

        # The way that the multiprocess compute-logging interacts with pytest (which stubs out the
        # sys.stdout fileno) makes this difficult to test.  The pytest-captured stdout only captures
        # the stdout from the outer process, not also the inner process
        assert normalize_file_content(full_out.data).startswith(
            expected_outer_prefix())
Ejemplo n.º 27
0
def test_single():
    with instance_for_test() as instance:
        pipeline_name = "foo_pipeline"
        pipeline_run = create_run_for_test(instance,
                                           pipeline_name=pipeline_name)

        step_keys = ["A", "B", "C"]

        with instance.compute_log_manager.watch(pipeline_run):
            print("outer 1")  # pylint: disable=print-call
            print("outer 2")  # pylint: disable=print-call
            print("outer 3")  # pylint: disable=print-call

            for step_key in step_keys:
                inner_step(instance, pipeline_run, step_key)

        for step_key in step_keys:
            stdout = instance.compute_log_manager.read_logs_file(
                pipeline_run.run_id, step_key, ComputeIOType.STDOUT)
            assert normalize_file_content(
                stdout.data) == expected_inner_output(step_key)

        full_out = instance.compute_log_manager.read_logs_file(
            pipeline_run.run_id, pipeline_name, ComputeIOType.STDOUT)

        assert normalize_file_content(full_out.data).startswith(
            expected_outer_prefix())
Ejemplo n.º 28
0
def test_k8s_run_launcher_celery(dagster_instance, helm_namespace):

    run_config = merge_dicts(
        merge_yamls([
            os.path.join(test_project_environments_path(), 'env.yaml'),
            os.path.join(test_project_environments_path(), 'env_s3.yaml'),
        ]),
        get_celery_engine_config(),
    )

    assert 'celery-k8s' in run_config['execution']

    pipeline_name = 'demo_pipeline_celery'
    tags = {'key': 'value'}
    run = create_run_for_test(
        dagster_instance,
        pipeline_name=pipeline_name,
        run_config=run_config,
        tags=tags,
        mode='default',
    )

    dagster_instance.launch_run(
        run.run_id, get_test_project_external_pipeline(pipeline_name))
    result = wait_for_job_and_get_logs(job_name='dagster-run-%s' % run.run_id,
                                       namespace=helm_namespace)

    assert not result.get('errors')
    assert result['data']
    assert (
        result['data']['executeRunInProcess']['__typename'] ==
        'ExecuteRunInProcessSuccess'), 'no match, result: {}'.format(result)
Ejemplo n.º 29
0
def test_k8s_run_launcher_with_celery_executor_fails(
        dagster_docker_image, dagster_instance_for_k8s_run_launcher,
        helm_namespace_for_k8s_run_launcher):
    run_config = merge_dicts(
        merge_yamls([
            os.path.join(get_test_project_environments_path(), "env.yaml"),
            os.path.join(get_test_project_environments_path(), "env_s3.yaml"),
        ]),
        get_celery_engine_config(
            dagster_docker_image=dagster_docker_image,
            job_namespace=helm_namespace_for_k8s_run_launcher,
        ),
    )

    pipeline_name = "demo_pipeline_celery"

    with get_test_project_location_and_external_pipeline(pipeline_name) as (
            location,
            external_pipeline,
    ):
        run = create_run_for_test(
            dagster_instance_for_k8s_run_launcher,
            pipeline_name=pipeline_name,
            run_config=run_config,
            mode="default",
            pipeline_snapshot=external_pipeline.pipeline_snapshot,
            execution_plan_snapshot=location.get_external_execution_plan(
                external_pipeline, run_config, "default", None,
                None).execution_plan_snapshot,
        )
        dagster_instance_for_k8s_run_launcher.launch_run(
            run.run_id,
            ReOriginatedExternalPipelineForTest(external_pipeline),
        )

        timeout = datetime.timedelta(0, 120)

        found_pipeline_failure = False

        start_time = datetime.datetime.now()

        while datetime.datetime.now() < start_time + timeout:
            event_records = dagster_instance_for_k8s_run_launcher.all_logs(
                run.run_id)

            for event_record in event_records:
                if event_record.dagster_event:
                    if (event_record.dagster_event.event_type ==
                            DagsterEventType.PIPELINE_INIT_FAILURE):
                        found_pipeline_failure = True

            if found_pipeline_failure:
                break

            time.sleep(5)

        assert found_pipeline_failure
        assert (dagster_instance_for_k8s_run_launcher.get_run_by_id(
            run.run_id).status == PipelineRunStatus.FAILURE)
Ejemplo n.º 30
0
def test_user_defined_k8s_config_in_run_tags(kubeconfig_file):
    # Construct a K8s run launcher in a fake k8s environment.
    mock_k8s_client_batch_api = mock.MagicMock()
    celery_k8s_run_launcher = CeleryK8sRunLauncher(
        instance_config_map="dagster-instance",
        postgres_password_secret="dagster-postgresql-secret",
        dagster_home="/opt/dagster/dagster_home",
        load_incluster_config=False,
        kubeconfig_file=kubeconfig_file,
        k8s_client_batch_api=mock_k8s_client_batch_api,
    )

    # Construct Dagster run tags with user defined k8s config.
    expected_resources = {
        "requests": {"cpu": "250m", "memory": "64Mi"},
        "limits": {"cpu": "500m", "memory": "2560Mi"},
    }
    user_defined_k8s_config = UserDefinedDagsterK8sConfig(
        container_config={"resources": expected_resources},
    )
    user_defined_k8s_config_json = json.dumps(user_defined_k8s_config.to_dict())
    tags = {"dagster-k8s/config": user_defined_k8s_config_json}

    # Create fake external pipeline.
    recon_pipeline = reconstructable(fake_pipeline)
    recon_repo = recon_pipeline.repository
    location_origin = InProcessRepositoryLocationOrigin(recon_repo)
    location_handle = location_origin.create_handle()
    repo_def = recon_repo.get_definition()
    repo_handle = RepositoryHandle(
        repository_name=repo_def.name,
        repository_location_handle=location_handle,
    )
    fake_external_pipeline = external_pipeline_from_recon_pipeline(
        recon_pipeline,
        solid_selection=None,
        repository_handle=repo_handle,
    )

    # Launch the run in a fake Dagster instance.
    with instance_for_test() as instance:
        celery_k8s_run_launcher.initialize(instance)
        pipeline_name = "demo_pipeline"
        run_config = {"execution": {"celery-k8s": {"config": {"job_image": "fake-image-name"}}}}
        run = create_run_for_test(
            instance,
            pipeline_name=pipeline_name,
            run_config=run_config,
            tags=tags,
        )
        celery_k8s_run_launcher.launch_run(instance, run, fake_external_pipeline)

    # Check that user defined k8s config was passed down to the k8s job.
    mock_method_calls = mock_k8s_client_batch_api.method_calls
    assert len(mock_method_calls) > 0
    method_name, _args, kwargs = mock_method_calls[0]
    assert method_name == "create_namespaced_job"
    job_resources = kwargs["body"].spec.template.spec.containers[0].resources
    assert job_resources == expected_resources