def test_memoization_multiprocess_execution(): with instance_for_test() as instance: result = execute_pipeline( reconstructable(get_version_strategy_pipeline), instance=instance, run_config={"execution": {"multiprocess": {}}}, ) assert result.success memoized_plan = create_execution_plan( get_version_strategy_pipeline(), instance_ref=instance.get_ref() ) assert len(memoized_plan.step_keys_to_execute) == 0
def test_do_it_live_emr(): sync_code() result = execute_pipeline( reconstructable(define_pyspark_pipe), mode='prod', run_config={ 'solids': {'blah': {'config': {'foo': 'a string', 'bar': 123}}}, 'resources': { 'pyspark_step_launcher': {'config': BASE_EMR_PYSPARK_STEP_LAUNCHER_CONFIG}, }, 'storage': {'s3': {'config': {'s3_bucket': S3_BUCKET, 's3_prefix': 'test_pyspark'}}}, }, ) assert result.success
def test_hello_world_graph(): from dagstermill.examples.repository import build_hello_world_job from dagster import reconstructable with instance_for_test() as instance: result = None try: result = execute_pipeline( reconstructable(build_hello_world_job), instance=instance, ) assert result.success finally: if result: cleanup_result_notebook(result)
def test_map_basic_fail(): with instance_for_test() as instance: result = execute_pipeline( reconstructable(dynamic_pipeline), instance=instance, run_config={"solids": { "emit": { "config": { "fail": True } } }}, raise_on_error=False, ) assert not result.success
def test_pyspark_databricks(mock_wait, mock_get_step_events, mock_put_file, mock_submit_run): mock_get_step_events.return_value = execute_pipeline( pipeline=reconstructable(define_do_nothing_pipe), mode="local" ).events_by_step_key["do_nothing_solid.compute"] result = execute_pipeline( pipeline=reconstructable(define_do_nothing_pipe), mode="prod_s3", run_config={ "resources": { "pyspark_step_launcher": { "config": deep_merge_dicts( BASE_DATABRICKS_PYSPARK_STEP_LAUNCHER_CONFIG, {"databricks_host": "", "databricks_token": ""}, ), }, }, }, ) assert result.success assert mock_wait.call_count == 1 assert mock_get_step_events.call_count == 1 assert mock_put_file.call_count == 4 assert mock_submit_run.call_count == 1
def test_invalid_instance(): result = execute_pipeline( reconstructable(define_diamond_pipeline), environment_dict={'storage': {'filesystem': {}}, 'execution': {'multiprocess': {}}}, instance=DagsterInstance.ephemeral(), raise_on_error=False, ) assert not result.success assert len(result.event_list) == 1 assert result.event_list[0].is_failure assert ( result.event_list[0].pipeline_init_failure_data.error.cls_name == 'DagsterUnmetExecutorRequirementsError' ) assert 'non-ephemeral instance' in result.event_list[0].pipeline_init_failure_data.error.message
def test_mem_storage_error_pipeline_multiprocess(): with instance_for_test() as instance: with pytest.raises( DagsterUnmetExecutorRequirementsError, match= "your pipeline includes solid outputs that will not be stored somewhere where other processes can retrieve them.", ): execute_pipeline( reconstructable(define_in_mem_pipeline), run_config={"execution": { "multiprocess": {} }}, instance=instance, raise_on_error=False, )
def test_diamond_multi_execution(): with instance_for_test() as instance: pipe = reconstructable(define_diamond_pipeline) result = execute_pipeline( pipe, run_config={ "execution": { "multiprocess": {} }, }, instance=instance, ) assert result.success assert result.result_for_solid("adder").output_value() == 11
def test_priorities_mp(): with instance_for_test() as instance: pipe = reconstructable(priority_test) result = execute_pipeline( pipe, { "execution": {"multiprocess": {"config": {"max_concurrent": 1}}}, "storage": {"filesystem": {}}, }, instance=instance, ) assert result.success assert [ str(event.solid_handle) for event in result.step_event_list if event.is_step_success ] == ["high", "high_2", "none", "none_2", "low", "low_2"]
def test_pipeline_failure_sensor_has_request(): with instance_for_test() as instance: with suppress(Exception): execute_pipeline( reconstructable(your_pipeline_name), run_config={"solids": {"foo": {"config": {"fail": True}}}}, instance=instance, ) context = SensorExecutionContext( instance_ref=instance.get_ref(), last_run_key=None, last_completion_time=None ) requests = pipeline_failure_sensor.get_execution_data(context) assert len(requests) == 1
def test_map_empty(): with instance_for_test() as instance: result = execute_pipeline( reconstructable(dynamic_pipeline), instance=instance, run_config={"solids": { "emit": { "config": { "range": 0 } } }}, ) assert result.success assert result.result_for_solid("double_total").output_value() == 0
def test_fan_out_in_out_in(run_config): with instance_for_test() as instance: result = execute_pipeline( reconstructable(fan_repeat), instance=instance, run_config=run_config, ) assert result.success assert (result.result_for_solid("sum_numbers").output_value() == 24 ) # (0, 1, 2) x 2 x 2 x 2 = (0, 8, 16) empty_result = execute_pipeline( reconstructable(fan_repeat), instance=instance, run_config={"solids": { "emit": { "config": { "range": 0 } } }}, ) assert empty_result.success assert empty_result.result_for_solid("sum_numbers").output_value() == 0
def test_multiprocessing_resource_teardown_failure(): pipeline = reconstructable(define_resource_teardown_failure_pipeline) result = execute_pipeline( pipeline, environment_dict={'storage': {'filesystem': {}}, 'execution': {'multiprocess': {}}}, instance=DagsterInstance.local_temp(), raise_on_error=False, ) assert result.success error_events = [ event for event in result.event_list if event.is_engine_event and event.event_specific_data.error ] assert len(error_events) > 1
def test_error_pipeline_multiprocess(): with instance_for_test() as instance: result = execute_pipeline( reconstructable(define_error_pipeline), run_config={ "storage": { "filesystem": {} }, "execution": { "multiprocess": {} } }, instance=instance, ) assert not result.success
def test_optional_outputs(): with instance_for_test() as instance: single_result = execute_pipeline(optional_stuff) assert single_result.success assert not [event for event in single_result.step_event_list if event.is_step_failure] assert len([event for event in single_result.step_event_list if event.is_step_skipped]) == 2 multi_result = execute_pipeline( reconstructable(optional_stuff), run_config={"storage": {"filesystem": {}}, "execution": {"multiprocess": {}}}, instance=instance, ) assert multi_result.success assert not [event for event in multi_result.step_event_list if event.is_step_failure] assert len([event for event in multi_result.step_event_list if event.is_step_skipped]) == 2
def test_do_it_live_databricks_s3(): result = execute_pipeline( reconstructable(define_pyspark_pipe), mode="prod_s3", run_config={ "solids": {"blah": {"config": {"foo": "a string", "bar": 123}}}, "resources": { "pyspark_step_launcher": {"config": BASE_DATABRICKS_PYSPARK_STEP_LAUNCHER_CONFIG}, "io_manager": { "config": {"s3_bucket": "elementl-databricks", "s3_prefix": "dagster-test"} }, }, }, ) assert result.success
def test_invalid_instance(): result = execute_pipeline( reconstructable(define_diamond_pipeline), run_config={"storage": {"filesystem": {}}, "execution": {"multiprocess": {}}}, instance=DagsterInstance.ephemeral(), raise_on_error=False, ) assert not result.success assert len(result.event_list) == 1 assert result.event_list[0].is_failure assert ( result.event_list[0].pipeline_init_failure_data.error.cls_name == "DagsterUnmetExecutorRequirementsError" ) assert "non-ephemeral instance" in result.event_list[0].pipeline_init_failure_data.error.message
def test_launcher_requests_retry(): mode = 'request_retry' with seven.TemporaryDirectory() as tmpdir: result = execute_pipeline( pipeline=reconstructable(define_basic_pipeline), mode=mode, environment_dict=make_environment_dict(tmpdir, mode), ) assert result.result_for_solid('return_two').output_value() == 2 assert result.result_for_solid('add_one').output_value() == 3 for step_key, events in result.events_by_step_key.items(): if step_key: event_types = [event.event_type for event in events] assert DagsterEventType.STEP_UP_FOR_RETRY in event_types assert DagsterEventType.STEP_RESTARTED in event_types
def test_partial_reexecute_multiproc(): with instance_for_test() as instance: result_1 = execute_pipeline( reconstructable(dynamic_pipeline), run_config={"execution": {"multiprocess": {}}}, instance=instance, ) assert result_1.success result_2 = reexecute_pipeline( reconstructable(dynamic_pipeline), parent_run_id=result_1.run_id, instance=instance, step_selection=["sum_numbers*"], ) assert result_2.success result_3 = reexecute_pipeline( reconstructable(dynamic_pipeline), parent_run_id=result_1.run_id, instance=instance, step_selection=["multiply_by_two[1]*"], ) assert result_3.success
def test_multiprocessing_resource_teardown_failure(): pipeline = reconstructable(define_resource_teardown_failure_pipeline) result = execute_pipeline( pipeline, run_config={"storage": {"filesystem": {}}, "execution": {"multiprocess": {}}}, instance=DagsterInstance.local_temp(), raise_on_error=False, ) assert result.success error_events = [ event for event in result.event_list if event.is_engine_event and event.event_specific_data.error ] assert len(error_events) > 1
def test_logging(): with safe_tempfile_path() as test_file_path: with safe_tempfile_path() as critical_file_path: with instance_for_test() as instance: execute_pipeline( reconstructable(hello_logging_pipeline), { "loggers": { "test": { "config": { "name": "test", "file_path": test_file_path, "log_level": "DEBUG", } }, "critical": { "config": { "name": "critical", "file_path": critical_file_path, "log_level": "CRITICAL", } }, } }, instance=instance, ) with open(test_file_path, "r") as test_file: records = [ json.loads(line) for line in test_file.read().strip("\n").split("\n") if line ] with open(critical_file_path, "r") as critical_file: critical_records = [ json.loads(line) for line in critical_file.read().strip("\n").split("\n") if line ] messages = [x["dagster_meta"]["orig_message"] for x in records] assert "Hello, there!" in messages critical_messages = [x["dagster_meta"]["orig_message"] for x in critical_records] assert "Hello, there!" not in critical_messages
def test_no_postgres(kubeconfig_file): # Construct a K8s run launcher in a fake k8s environment. mock_k8s_client_batch_api = mock.MagicMock() k8s_run_launcher = K8sRunLauncher( service_account_name="dagit-admin", instance_config_map="dagster-instance", dagster_home="/opt/dagster/dagster_home", job_image="fake_job_image", load_incluster_config=False, kubeconfig_file=kubeconfig_file, k8s_client_batch_api=mock_k8s_client_batch_api, ) # Create fake external pipeline. recon_pipeline = reconstructable(fake_pipeline) recon_repo = recon_pipeline.repository repo_def = recon_repo.get_definition() location_origin = InProcessRepositoryLocationOrigin(recon_repo) with location_origin.create_location() as location: repo_handle = RepositoryHandle( repository_name=repo_def.name, repository_location=location, ) fake_external_pipeline = external_pipeline_from_recon_pipeline( recon_pipeline, solid_selection=None, repository_handle=repo_handle, ) # Launch the run in a fake Dagster instance. with instance_for_test() as instance: pipeline_name = "demo_pipeline" run = create_run_for_test(instance, pipeline_name=pipeline_name) k8s_run_launcher.register_instance(instance) run = k8s_run_launcher.launch_run(run, fake_external_pipeline) updated_run = instance.get_run_by_id(run.run_id) assert updated_run.tags[DOCKER_IMAGE_TAG] == "fake_job_image" # Check that user defined k8s config was passed down to the k8s job. mock_method_calls = mock_k8s_client_batch_api.method_calls assert len(mock_method_calls) > 0 method_name, _args, kwargs = mock_method_calls[0] assert method_name == "create_namespaced_job" assert DAGSTER_PG_PASSWORD_ENV_VAR not in [ env.name for env in kwargs["body"].spec.template.spec.containers[0].env ]
def test_custom_executor_fn(): _explode_pid["pid"] = os.getpid() with instance_for_test() as instance: run_config = { "solids": { "solid_that_uses_adder_resource": { "inputs": { "number": { "value": 4 } } } }, "intermediate_storage": { "filesystem": {} }, } execution_plan = create_execution_plan( pipeline_with_mode, run_config, ) pipeline_run = instance.create_run_for_pipeline( pipeline_def=pipeline_with_mode, execution_plan=execution_plan, run_config=run_config, ) recon_pipeline = reconstructable(pipeline_with_mode) execute_run_host_mode( ExplodingTestPipeline(recon_pipeline.repository, recon_pipeline.pipeline_name), pipeline_run, instance, executor_defs=[test_executor], raise_on_error=True, ) assert instance.get_run_by_id( pipeline_run.run_id).status == PipelineRunStatus.SUCCESS logs = instance.all_logs(pipeline_run.run_id) assert any( e.is_dagster_event and "Executing steps using multiprocess executor" in e.message for e in logs)
def test_local(): result = execute_pipeline( pipeline=reconstructable(define_pyspark_pipe), mode="local", run_config={ "solids": { "blah": { "config": { "foo": "a string", "bar": 123 } } } }, ) assert result.success
def test_dynamic_execute(): from .test_jobs import define_dynamic_job TestStepHandler.reset() with instance_for_test() as instance: result = execute_pipeline( reconstructable(define_dynamic_job), instance=instance, ) TestStepHandler.wait_for_processes() assert result.success assert (len([ e for e in result.event_list if e.event_type_value == DagsterEventType.STEP_START.value ]) == 11)
def test_multiproc(): with instance_for_test() as instance: result = execute_pipeline( reconstructable(multiproc_test), run_config={ "resources": { "io_manager": { "config": {"base_dir": path.join(instance.root_directory, "storage")} } }, }, instance=instance, ) assert result.success
def test_launcher_requests_retry(): mode = "request_retry" with tempfile.TemporaryDirectory() as tmpdir: result = execute_pipeline( pipeline=reconstructable(define_basic_pipeline), mode=mode, run_config=make_run_config(tmpdir, mode), ) assert result.success assert result.result_for_solid("return_two").output_value() == 2 assert result.result_for_solid("add_one").output_value() == 3 for step_key, events in result.events_by_step_key.items(): if step_key: event_types = [event.event_type for event in events] assert DagsterEventType.STEP_UP_FOR_RETRY in event_types assert DagsterEventType.STEP_RESTARTED in event_types
def test_reexec_dynamic_with_optional_output_job_2(): with instance_for_test() as instance: result = dynamic_with_optional_output_job().execute_in_process( instance=instance) # re-execute the step where the source yielded an output re_result = reexecute_pipeline( reconstructable(dynamic_with_optional_output_job), parent_run_id=result.run_id, instance=instance, step_selection=["echo[1]"], ) assert re_result.success assert re_result.result_for_solid("echo").output_value() == { "1": 1, }
def test_local(): result = execute_pipeline( pipeline=reconstructable(define_pyspark_pipe), mode='local', environment_dict={ 'solids': { 'blah': { 'config': { 'foo': 'a string', 'bar': 123 } } } }, ) assert result.success
def test_map_selection(run_config): with instance_for_test() as instance: result = execute_pipeline( reconstructable(dynamic_pipeline), instance=instance, run_config=merge_dicts( {"solids": { "emit": { "inputs": { "num": 2 } } }}, run_config), solid_selection=["emit*", "emit_ten"], ) assert result.success assert result.result_for_solid("double_total").output_value() == 40