Beispiel #1
0
def test_memoization_multiprocess_execution():

    with instance_for_test() as instance:
        result = execute_pipeline(
            reconstructable(get_version_strategy_pipeline),
            instance=instance,
            run_config={"execution": {"multiprocess": {}}},
        )

        assert result.success

        memoized_plan = create_execution_plan(
            get_version_strategy_pipeline(), instance_ref=instance.get_ref()
        )
        assert len(memoized_plan.step_keys_to_execute) == 0
Beispiel #2
0
def test_do_it_live_emr():
    sync_code()

    result = execute_pipeline(
        reconstructable(define_pyspark_pipe),
        mode='prod',
        run_config={
            'solids': {'blah': {'config': {'foo': 'a string', 'bar': 123}}},
            'resources': {
                'pyspark_step_launcher': {'config': BASE_EMR_PYSPARK_STEP_LAUNCHER_CONFIG},
            },
            'storage': {'s3': {'config': {'s3_bucket': S3_BUCKET, 's3_prefix': 'test_pyspark'}}},
        },
    )
    assert result.success
Beispiel #3
0
def test_hello_world_graph():
    from dagstermill.examples.repository import build_hello_world_job
    from dagster import reconstructable

    with instance_for_test() as instance:
        result = None
        try:
            result = execute_pipeline(
                reconstructable(build_hello_world_job),
                instance=instance,
            )
            assert result.success
        finally:
            if result:
                cleanup_result_notebook(result)
Beispiel #4
0
def test_map_basic_fail():
    with instance_for_test() as instance:
        result = execute_pipeline(
            reconstructable(dynamic_pipeline),
            instance=instance,
            run_config={"solids": {
                "emit": {
                    "config": {
                        "fail": True
                    }
                }
            }},
            raise_on_error=False,
        )
        assert not result.success
Beispiel #5
0
def test_pyspark_databricks(mock_wait, mock_get_step_events, mock_put_file, mock_submit_run):
    mock_get_step_events.return_value = execute_pipeline(
        pipeline=reconstructable(define_do_nothing_pipe), mode="local"
    ).events_by_step_key["do_nothing_solid.compute"]

    result = execute_pipeline(
        pipeline=reconstructable(define_do_nothing_pipe),
        mode="prod_s3",
        run_config={
            "resources": {
                "pyspark_step_launcher": {
                    "config": deep_merge_dicts(
                        BASE_DATABRICKS_PYSPARK_STEP_LAUNCHER_CONFIG,
                        {"databricks_host": "", "databricks_token": ""},
                    ),
                },
            },
        },
    )
    assert result.success
    assert mock_wait.call_count == 1
    assert mock_get_step_events.call_count == 1
    assert mock_put_file.call_count == 4
    assert mock_submit_run.call_count == 1
def test_invalid_instance():
    result = execute_pipeline(
        reconstructable(define_diamond_pipeline),
        environment_dict={'storage': {'filesystem': {}}, 'execution': {'multiprocess': {}}},
        instance=DagsterInstance.ephemeral(),
        raise_on_error=False,
    )
    assert not result.success
    assert len(result.event_list) == 1
    assert result.event_list[0].is_failure
    assert (
        result.event_list[0].pipeline_init_failure_data.error.cls_name
        == 'DagsterUnmetExecutorRequirementsError'
    )
    assert 'non-ephemeral instance' in result.event_list[0].pipeline_init_failure_data.error.message
Beispiel #7
0
def test_mem_storage_error_pipeline_multiprocess():
    with instance_for_test() as instance:
        with pytest.raises(
                DagsterUnmetExecutorRequirementsError,
                match=
                "your pipeline includes solid outputs that will not be stored somewhere where other processes can retrieve them.",
        ):
            execute_pipeline(
                reconstructable(define_in_mem_pipeline),
                run_config={"execution": {
                    "multiprocess": {}
                }},
                instance=instance,
                raise_on_error=False,
            )
Beispiel #8
0
def test_diamond_multi_execution():
    with instance_for_test() as instance:
        pipe = reconstructable(define_diamond_pipeline)
        result = execute_pipeline(
            pipe,
            run_config={
                "execution": {
                    "multiprocess": {}
                },
            },
            instance=instance,
        )
        assert result.success

        assert result.result_for_solid("adder").output_value() == 11
Beispiel #9
0
def test_priorities_mp():
    with instance_for_test() as instance:
        pipe = reconstructable(priority_test)
        result = execute_pipeline(
            pipe,
            {
                "execution": {"multiprocess": {"config": {"max_concurrent": 1}}},
                "storage": {"filesystem": {}},
            },
            instance=instance,
        )
        assert result.success
        assert [
            str(event.solid_handle) for event in result.step_event_list if event.is_step_success
        ] == ["high", "high_2", "none", "none_2", "low", "low_2"]
Beispiel #10
0
def test_pipeline_failure_sensor_has_request():
    with instance_for_test() as instance:
        with suppress(Exception):
            execute_pipeline(
                reconstructable(your_pipeline_name),
                run_config={"solids": {"foo": {"config": {"fail": True}}}},
                instance=instance,
            )

        context = SensorExecutionContext(
            instance_ref=instance.get_ref(), last_run_key=None, last_completion_time=None
        )

        requests = pipeline_failure_sensor.get_execution_data(context)
        assert len(requests) == 1
Beispiel #11
0
def test_map_empty():
    with instance_for_test() as instance:
        result = execute_pipeline(
            reconstructable(dynamic_pipeline),
            instance=instance,
            run_config={"solids": {
                "emit": {
                    "config": {
                        "range": 0
                    }
                }
            }},
        )
        assert result.success
        assert result.result_for_solid("double_total").output_value() == 0
def test_fan_out_in_out_in(run_config):
    with instance_for_test() as instance:
        result = execute_pipeline(
            reconstructable(fan_repeat),
            instance=instance,
            run_config=run_config,
        )
        assert result.success
        assert (result.result_for_solid("sum_numbers").output_value() == 24
                )  # (0, 1, 2) x 2 x 2 x 2 = (0, 8, 16)

        empty_result = execute_pipeline(
            reconstructable(fan_repeat),
            instance=instance,
            run_config={"solids": {
                "emit": {
                    "config": {
                        "range": 0
                    }
                }
            }},
        )
        assert empty_result.success
        assert empty_result.result_for_solid("sum_numbers").output_value() == 0
def test_multiprocessing_resource_teardown_failure():
    pipeline = reconstructable(define_resource_teardown_failure_pipeline)
    result = execute_pipeline(
        pipeline,
        environment_dict={'storage': {'filesystem': {}}, 'execution': {'multiprocess': {}}},
        instance=DagsterInstance.local_temp(),
        raise_on_error=False,
    )
    assert result.success
    error_events = [
        event
        for event in result.event_list
        if event.is_engine_event and event.event_specific_data.error
    ]
    assert len(error_events) > 1
Beispiel #14
0
def test_error_pipeline_multiprocess():
    with instance_for_test() as instance:
        result = execute_pipeline(
            reconstructable(define_error_pipeline),
            run_config={
                "storage": {
                    "filesystem": {}
                },
                "execution": {
                    "multiprocess": {}
                }
            },
            instance=instance,
        )
        assert not result.success
Beispiel #15
0
def test_optional_outputs():
    with instance_for_test() as instance:
        single_result = execute_pipeline(optional_stuff)
        assert single_result.success
        assert not [event for event in single_result.step_event_list if event.is_step_failure]
        assert len([event for event in single_result.step_event_list if event.is_step_skipped]) == 2

        multi_result = execute_pipeline(
            reconstructable(optional_stuff),
            run_config={"storage": {"filesystem": {}}, "execution": {"multiprocess": {}}},
            instance=instance,
        )
        assert multi_result.success
        assert not [event for event in multi_result.step_event_list if event.is_step_failure]
        assert len([event for event in multi_result.step_event_list if event.is_step_skipped]) == 2
Beispiel #16
0
def test_do_it_live_databricks_s3():
    result = execute_pipeline(
        reconstructable(define_pyspark_pipe),
        mode="prod_s3",
        run_config={
            "solids": {"blah": {"config": {"foo": "a string", "bar": 123}}},
            "resources": {
                "pyspark_step_launcher": {"config": BASE_DATABRICKS_PYSPARK_STEP_LAUNCHER_CONFIG},
                "io_manager": {
                    "config": {"s3_bucket": "elementl-databricks", "s3_prefix": "dagster-test"}
                },
            },
        },
    )
    assert result.success
Beispiel #17
0
def test_invalid_instance():
    result = execute_pipeline(
        reconstructable(define_diamond_pipeline),
        run_config={"storage": {"filesystem": {}}, "execution": {"multiprocess": {}}},
        instance=DagsterInstance.ephemeral(),
        raise_on_error=False,
    )
    assert not result.success
    assert len(result.event_list) == 1
    assert result.event_list[0].is_failure
    assert (
        result.event_list[0].pipeline_init_failure_data.error.cls_name
        == "DagsterUnmetExecutorRequirementsError"
    )
    assert "non-ephemeral instance" in result.event_list[0].pipeline_init_failure_data.error.message
def test_launcher_requests_retry():
    mode = 'request_retry'
    with seven.TemporaryDirectory() as tmpdir:
        result = execute_pipeline(
            pipeline=reconstructable(define_basic_pipeline),
            mode=mode,
            environment_dict=make_environment_dict(tmpdir, mode),
        )
        assert result.result_for_solid('return_two').output_value() == 2
        assert result.result_for_solid('add_one').output_value() == 3
        for step_key, events in result.events_by_step_key.items():
            if step_key:
                event_types = [event.event_type for event in events]
                assert DagsterEventType.STEP_UP_FOR_RETRY in event_types
                assert DagsterEventType.STEP_RESTARTED in event_types
Beispiel #19
0
def test_partial_reexecute_multiproc():
    with instance_for_test() as instance:
        result_1 = execute_pipeline(
            reconstructable(dynamic_pipeline),
            run_config={"execution": {"multiprocess": {}}},
            instance=instance,
        )
        assert result_1.success

        result_2 = reexecute_pipeline(
            reconstructable(dynamic_pipeline),
            parent_run_id=result_1.run_id,
            instance=instance,
            step_selection=["sum_numbers*"],
        )
        assert result_2.success

        result_3 = reexecute_pipeline(
            reconstructable(dynamic_pipeline),
            parent_run_id=result_1.run_id,
            instance=instance,
            step_selection=["multiply_by_two[1]*"],
        )
        assert result_3.success
def test_multiprocessing_resource_teardown_failure():
    pipeline = reconstructable(define_resource_teardown_failure_pipeline)
    result = execute_pipeline(
        pipeline,
        run_config={"storage": {"filesystem": {}}, "execution": {"multiprocess": {}}},
        instance=DagsterInstance.local_temp(),
        raise_on_error=False,
    )
    assert result.success
    error_events = [
        event
        for event in result.event_list
        if event.is_engine_event and event.event_specific_data.error
    ]
    assert len(error_events) > 1
def test_logging():
    with safe_tempfile_path() as test_file_path:
        with safe_tempfile_path() as critical_file_path:
            with instance_for_test() as instance:
                execute_pipeline(
                    reconstructable(hello_logging_pipeline),
                    {
                        "loggers": {
                            "test": {
                                "config": {
                                    "name": "test",
                                    "file_path": test_file_path,
                                    "log_level": "DEBUG",
                                }
                            },
                            "critical": {
                                "config": {
                                    "name": "critical",
                                    "file_path": critical_file_path,
                                    "log_level": "CRITICAL",
                                }
                            },
                        }
                    },
                    instance=instance,
                )

                with open(test_file_path, "r") as test_file:
                    records = [
                        json.loads(line)
                        for line in test_file.read().strip("\n").split("\n")
                        if line
                    ]

                with open(critical_file_path, "r") as critical_file:
                    critical_records = [
                        json.loads(line)
                        for line in critical_file.read().strip("\n").split("\n")
                        if line
                    ]

    messages = [x["dagster_meta"]["orig_message"] for x in records]

    assert "Hello, there!" in messages

    critical_messages = [x["dagster_meta"]["orig_message"] for x in critical_records]

    assert "Hello, there!" not in critical_messages
Beispiel #22
0
def test_no_postgres(kubeconfig_file):
    # Construct a K8s run launcher in a fake k8s environment.
    mock_k8s_client_batch_api = mock.MagicMock()
    k8s_run_launcher = K8sRunLauncher(
        service_account_name="dagit-admin",
        instance_config_map="dagster-instance",
        dagster_home="/opt/dagster/dagster_home",
        job_image="fake_job_image",
        load_incluster_config=False,
        kubeconfig_file=kubeconfig_file,
        k8s_client_batch_api=mock_k8s_client_batch_api,
    )

    # Create fake external pipeline.
    recon_pipeline = reconstructable(fake_pipeline)
    recon_repo = recon_pipeline.repository
    repo_def = recon_repo.get_definition()
    location_origin = InProcessRepositoryLocationOrigin(recon_repo)
    with location_origin.create_location() as location:
        repo_handle = RepositoryHandle(
            repository_name=repo_def.name,
            repository_location=location,
        )
        fake_external_pipeline = external_pipeline_from_recon_pipeline(
            recon_pipeline,
            solid_selection=None,
            repository_handle=repo_handle,
        )

        # Launch the run in a fake Dagster instance.
        with instance_for_test() as instance:
            pipeline_name = "demo_pipeline"
            run = create_run_for_test(instance, pipeline_name=pipeline_name)
            k8s_run_launcher.register_instance(instance)
            run = k8s_run_launcher.launch_run(run, fake_external_pipeline)

            updated_run = instance.get_run_by_id(run.run_id)
            assert updated_run.tags[DOCKER_IMAGE_TAG] == "fake_job_image"

        # Check that user defined k8s config was passed down to the k8s job.
        mock_method_calls = mock_k8s_client_batch_api.method_calls
        assert len(mock_method_calls) > 0
        method_name, _args, kwargs = mock_method_calls[0]
        assert method_name == "create_namespaced_job"
        assert DAGSTER_PG_PASSWORD_ENV_VAR not in [
            env.name
            for env in kwargs["body"].spec.template.spec.containers[0].env
        ]
Beispiel #23
0
def test_custom_executor_fn():
    _explode_pid["pid"] = os.getpid()

    with instance_for_test() as instance:
        run_config = {
            "solids": {
                "solid_that_uses_adder_resource": {
                    "inputs": {
                        "number": {
                            "value": 4
                        }
                    }
                }
            },
            "intermediate_storage": {
                "filesystem": {}
            },
        }
        execution_plan = create_execution_plan(
            pipeline_with_mode,
            run_config,
        )

        pipeline_run = instance.create_run_for_pipeline(
            pipeline_def=pipeline_with_mode,
            execution_plan=execution_plan,
            run_config=run_config,
        )

        recon_pipeline = reconstructable(pipeline_with_mode)

        execute_run_host_mode(
            ExplodingTestPipeline(recon_pipeline.repository,
                                  recon_pipeline.pipeline_name),
            pipeline_run,
            instance,
            executor_defs=[test_executor],
            raise_on_error=True,
        )

        assert instance.get_run_by_id(
            pipeline_run.run_id).status == PipelineRunStatus.SUCCESS

        logs = instance.all_logs(pipeline_run.run_id)
        assert any(
            e.is_dagster_event
            and "Executing steps using multiprocess executor" in e.message
            for e in logs)
Beispiel #24
0
def test_local():
    result = execute_pipeline(
        pipeline=reconstructable(define_pyspark_pipe),
        mode="local",
        run_config={
            "solids": {
                "blah": {
                    "config": {
                        "foo": "a string",
                        "bar": 123
                    }
                }
            }
        },
    )
    assert result.success
Beispiel #25
0
def test_dynamic_execute():
    from .test_jobs import define_dynamic_job

    TestStepHandler.reset()
    with instance_for_test() as instance:
        result = execute_pipeline(
            reconstructable(define_dynamic_job),
            instance=instance,
        )
        TestStepHandler.wait_for_processes()

    assert result.success
    assert (len([
        e for e in result.event_list
        if e.event_type_value == DagsterEventType.STEP_START.value
    ]) == 11)
def test_multiproc():

    with instance_for_test() as instance:

        result = execute_pipeline(
            reconstructable(multiproc_test),
            run_config={
                "resources": {
                    "io_manager": {
                        "config": {"base_dir": path.join(instance.root_directory, "storage")}
                    }
                },
            },
            instance=instance,
        )
        assert result.success
def test_launcher_requests_retry():
    mode = "request_retry"
    with tempfile.TemporaryDirectory() as tmpdir:
        result = execute_pipeline(
            pipeline=reconstructable(define_basic_pipeline),
            mode=mode,
            run_config=make_run_config(tmpdir, mode),
        )
        assert result.success
        assert result.result_for_solid("return_two").output_value() == 2
        assert result.result_for_solid("add_one").output_value() == 3
        for step_key, events in result.events_by_step_key.items():
            if step_key:
                event_types = [event.event_type for event in events]
                assert DagsterEventType.STEP_UP_FOR_RETRY in event_types
                assert DagsterEventType.STEP_RESTARTED in event_types
Beispiel #28
0
def test_reexec_dynamic_with_optional_output_job_2():
    with instance_for_test() as instance:
        result = dynamic_with_optional_output_job().execute_in_process(
            instance=instance)

        # re-execute the step where the source yielded an output
        re_result = reexecute_pipeline(
            reconstructable(dynamic_with_optional_output_job),
            parent_run_id=result.run_id,
            instance=instance,
            step_selection=["echo[1]"],
        )
        assert re_result.success
        assert re_result.result_for_solid("echo").output_value() == {
            "1": 1,
        }
Beispiel #29
0
def test_local():
    result = execute_pipeline(
        pipeline=reconstructable(define_pyspark_pipe),
        mode='local',
        environment_dict={
            'solids': {
                'blah': {
                    'config': {
                        'foo': 'a string',
                        'bar': 123
                    }
                }
            }
        },
    )
    assert result.success
Beispiel #30
0
def test_map_selection(run_config):
    with instance_for_test() as instance:
        result = execute_pipeline(
            reconstructable(dynamic_pipeline),
            instance=instance,
            run_config=merge_dicts(
                {"solids": {
                    "emit": {
                        "inputs": {
                            "num": 2
                        }
                    }
                }}, run_config),
            solid_selection=["emit*", "emit_ten"],
        )
        assert result.success
        assert result.result_for_solid("double_total").output_value() == 40