Beispiel #1
0
def test_ecs_run_launcher_inits():
    DagsterInstance.local_temp(
        overrides={
            "run_launcher": {
                "module": "dagster_aws.ecs.launcher",
                "class": "ECSRunLauncher"
            }
        })
Beispiel #2
0
def test_yielded_results_config():
    run_config = {
        'resources': {
            'ge_data_context': {
                'config': {
                    'ge_root_dir':
                    file_relative_path(__file__, "./great_expectations")
                }
            }
        }
    }
    result = execute_pipeline(
        reconstructable(hello_world_pipeline),
        run_config=run_config,
        mode='basic',
        instance=DagsterInstance.local_temp(),
    )
    assert result.result_for_solid(
        "reyielder").output_value()[0]["success_percent"] == 100
    expectations = result.result_for_solid(
        "ge_validation_solid").expectation_results_during_compute
    assert len(expectations) == 1
    mainexpect = expectations[0]
    assert mainexpect.success
    metadata = mainexpect.metadata_entries[0].entry_data.data
    assert metadata['overall'] == {
        'evaluated_expectations': 11,
        'success_percent': 100.0,
        'successful_expectations': 11,
        'unsuccessful_expectations': 0,
    }
Beispiel #3
0
def temp_instance():
    with seven.TemporaryDirectory() as temp_dir:
        instance = DagsterInstance.local_temp(temp_dir)
        try:
            yield instance
        finally:
            instance.run_launcher.join()
Beispiel #4
0
def test_multiprocess_executor():
    result = execute_pipeline(
        run_config={
            # This section controls how the run will be executed.
            # The multiprocess executor runs each step in its own sub process.
            "execution": {
                "multiprocess": {}
            },
            # This section controls how values will be passed from one solid to the next.
            # The default is in memory, so here we set it to filesystem to allow the
            # separate subprocess to get the values
            "intermediate_storage": {
                "filesystem": {}
            },
        },
        # The default instance for this API is an in memory ephemeral one.
        # To allow the multiple processes to coordinate we use one here
        # backed by a temporary directory.
        instance=DagsterInstance.local_temp(),
        # A ReconstructablePipeline is necessary to load the pipeline in child processes.
        # reconstructable() is a utility function that captures where the
        # PipelineDefinition came from.
        pipeline=reconstructable(predict_color),
    )
    assert result.success
def test_pipeline_run_creation_race():
    with seven.TemporaryDirectory() as tempdir:
        instance = DagsterInstance.local_temp(tempdir)
        run_id = 'run_id'

        # Spy on the result of add_run
        add_run_spy = Spy(instance._run_storage.add_run)  # pylint: disable=protected-access
        add_run_mock = mock.MagicMock(side_effect=add_run_spy)
        instance._run_storage.add_run = add_run_mock  # pylint: disable=protected-access

        # This invocation should successfully add the run to run storage
        pipeline_run = register_managed_run_for_test(instance, run_id=run_id)
        assert len(add_run_mock.call_args_list) == 1
        assert instance.has_run(run_id)

        # Check that add_run did not receive DagsterRunAlreadyExists exception and that
        # it successfully returned
        assert add_run_spy.exceptions == []
        assert len(add_run_spy.return_values) == 1

        # (*) Simulate a race where second invocation receives has_run() is False
        fetched_pipeline_run = ''
        with mock.patch.object(instance, 'has_run', mock.MagicMock(return_value=False)):
            fetched_pipeline_run = register_managed_run_for_test(instance, run_id=run_id)

        # Check that add_run received DagsterRunAlreadyExists exception and did not return value
        assert len(add_run_mock.call_args_list) == 2
        assert add_run_spy.exceptions == [DagsterRunAlreadyExists]
        assert len(add_run_spy.return_values) == 1

        assert pipeline_run == fetched_pipeline_run
        assert instance.has_run(run_id)
        assert len(instance.get_runs()) == 1
Beispiel #6
0
def test_multiple_local_cluster():
    cluster_configs = [
        {
            "n_workers": 1,
            "threads_per_worker": 2,
            "dashboard_address": None,
        },
        {
            "n_workers": 2,
            "threads_per_worker": 1,
            "dashboard_address": None,
        },
    ]

    for cluster_config in cluster_configs:
        run_config = {
            "resources": {
                "dask": {
                    "config": {
                        "cluster": {
                            "local": cluster_config
                        }
                    }
                }
            }
        }
        result = execute_pipeline(
            scheduler_info_pipeline,
            run_config=run_config,
            instance=DagsterInstance.local_temp(),
        )
        _assert_scheduler_info_result(result, cluster_config)
Beispiel #7
0
def test_execute_pipeline_iterator():
    records = []

    def event_callback(record):
        assert isinstance(record, EventRecord)
        records.append(record)

    pipeline = PipelineDefinition(
        name='basic_resource_pipeline',
        solid_defs=[resource_solid],
        mode_defs=[
            ModeDefinition(
                resource_defs={'a': resource_a, 'b': resource_b},
                logger_defs={'callback': construct_event_logger(event_callback)},
            )
        ],
    )
    iterator = execute_pipeline_iterator(
        pipeline,
        environment_dict={'loggers': {'callback': {}}},
        instance=DagsterInstance.local_temp(),
    )

    event_type = None
    while event_type != 'STEP_START':
        event = next(iterator)
        event_type = event.event_type_value

    iterator.close()
    events = [record.dagster_event for record in records if record.is_dagster_event]
    messages = [record.user_message for record in records if not record.is_dagster_event]
    assert len([event for event in events if event.is_pipeline_failure]) > 0
    assert len([message for message in messages if message == 'CLEANING A']) > 0
    assert len([message for message in messages if message == 'CLEANING B']) > 0
def test_multiple_outputs_only_emit_one_multiproc():
    pipe = ExecutionTargetHandle.for_pipeline_python_file(
        __file__, 'define_multi_out'
    ).build_pipeline_definition()
    result = execute_pipeline(
        pipe,
        environment_dict={'storage': {'filesystem': {}}, 'execution': {'multiprocess': {}}},
        instance=DagsterInstance.local_temp(),
    )
    assert result.success

    solid_result = result.result_for_solid('multiple_outputs')
    assert set(solid_result.output_values.keys()) == set(['output_one'])

    with pytest.raises(
        DagsterInvariantViolationError,
        match="Output 'not_defined' not defined in solid 'multiple_outputs'",
    ):
        solid_result.output_value('not_defined')

    with pytest.raises(DagsterInvariantViolationError, match='Did not find result output_two'):
        solid_result.output_value('output_two')

    with pytest.raises(
        DagsterInvariantViolationError,
        match=(
            'Tried to get result for solid not_present in multiple_outputs_only_emit_one_pipeline. '
            'No such top level solid.'
        ),
    ):
        result.result_for_solid('not_present')

    assert result.result_for_solid('downstream_two').skipped
def test_sync_run_launcher_run():
    with seven.TemporaryDirectory() as temp_dir:
        instance = DagsterInstance.local_temp(
            temp_dir,
            overrides={
                "run_launcher": {
                    "module":
                    "dagster.core.launcher.sync_in_memory_run_launcher",
                    "class": "SyncInMemoryRunLauncher",
                }
            },
        )

        external_repo = get_main_external_repo(instance)
        external_pipeline = external_repo.get_full_external_pipeline(
            "noop_pipeline")

        run = create_run_for_test(instance=instance,
                                  pipeline_name=external_pipeline.name)

        run = instance.run_launcher.launch_run(
            instance=instance, run=run, external_pipeline=external_pipeline)

        completed_run = instance.get_run_by_id(run.run_id)
        assert completed_run.is_success
Beispiel #10
0
def test_multiproc_markers():
    pipe = ExecutionTargetHandle.for_pipeline_python_file(
        __file__, 'define_pipeline'
    ).build_pipeline_definition()
    instance = DagsterInstance.local_temp()
    result = execute_pipeline(
        pipe,
        instance=instance,
        environment_dict={'execution': {'multiprocess': {}}, 'storage': {'filesystem': {}}},
    )
    assert result.success
    events = instance.all_logs(result.run_id)
    start_markers = {}
    end_markers = {}
    for event in events:
        dagster_event = event.dagster_event
        if dagster_event.is_engine_event:
            if dagster_event.engine_event_data.marker_start:
                key = '{step}.{marker}'.format(
                    step=event.step_key, marker=dagster_event.engine_event_data.marker_start
                )
                start_markers[key] = event.timestamp
            if dagster_event.engine_event_data.marker_end:
                key = '{step}.{marker}'.format(
                    step=event.step_key, marker=dagster_event.engine_event_data.marker_end
                )
                end_markers[key] = event.timestamp

    seen = set()
    assert set(start_markers.keys()) == set(end_markers.keys())
    for key in end_markers:
        assert end_markers[key] - start_markers[key] > 0
        seen.add(key)

    assert 'ping.compute.multiprocess_subprocess_init' in end_markers
Beispiel #11
0
def test_yielded_results_config_pandas(snapshot):
    run_config = {
        "resources": {
            "ge_data_context": {
                "config": {
                    "ge_root_dir":
                    file_relative_path(__file__, "./great_expectations")
                }
            }
        }
    }
    result = execute_pipeline(
        reconstructable(hello_world_pandas_pipeline),
        run_config=run_config,
        mode="basic",
        instance=DagsterInstance.local_temp(),
    )
    assert result.result_for_solid(
        "reyielder").output_value()[0]["success_percent"] == 100
    expectations = result.result_for_solid(
        "ge_validation_solid").expectation_results_during_compute
    assert len(expectations) == 1
    mainexpect = expectations[0]
    assert mainexpect.success
    # purge system specific metadata for testing
    metadata = mainexpect.metadata_entries[0].entry_data.md_str.split(
        "### Info")[0]
    snapshot.assert_match(metadata)
Beispiel #12
0
def test_execute_celery_docker():
    docker_image = test_project_docker_image()
    docker_config = {
        "image": docker_image,
        "env_vars": ["AWS_ACCESS_KEY_ID", "AWS_SECRET_ACCESS_KEY"],
    }

    if IS_BUILDKITE:
        ecr_client = boto3.client("ecr", region_name="us-west-1")
        token = ecr_client.get_authorization_token()
        username, password = (base64.b64decode(
            token["authorizationData"][0]
            ["authorizationToken"]).decode().split(":"))
        registry = token["authorizationData"][0]["proxyEndpoint"]

        docker_config["registry"] = {
            "url": registry,
            "username": username,
            "password": password,
        }

    else:
        try:
            client = docker.from_env()
            client.images.get(docker_image)
            print(  # pylint: disable=print-call
                "Found existing image tagged {image}, skipping image build. To rebuild, first run: "
                "docker rmi {image}".format(image=docker_image))
        except docker.errors.ImageNotFound:
            build_and_tag_test_image(docker_image)

    with seven.TemporaryDirectory() as temp_dir:

        run_config = merge_dicts(
            merge_yamls([
                os.path.join(test_project_environments_path(), "env.yaml"),
                os.path.join(test_project_environments_path(), "env_s3.yaml"),
            ]),
            {
                "execution": {
                    "celery-docker": {
                        "config": {
                            "docker": docker_config,
                            "config_source": {
                                "task_always_eager": True
                            },
                        }
                    }
                },
            },
        )

        result = execute_pipeline(
            get_test_project_recon_pipeline("docker_celery_pipeline"),
            run_config=run_config,
            instance=DagsterInstance.local_temp(temp_dir),
        )
        assert result.success
def test_execute_celery_docker():
    docker_image = test_project_docker_image()
    docker_config = {
        'image': docker_image,
        'env_vars': ['AWS_ACCESS_KEY_ID', 'AWS_SECRET_ACCESS_KEY'],
    }

    if IS_BUILDKITE:
        ecr_client = boto3.client('ecr', region_name='us-west-1')
        token = ecr_client.get_authorization_token()
        username, password = (base64.b64decode(
            token['authorizationData'][0]
            ['authorizationToken']).decode().split(':'))
        registry = token['authorizationData'][0]['proxyEndpoint']

        docker_config['registry'] = {
            'url': registry,
            'username': username,
            'password': password,
        }

    else:
        try:
            client = docker.from_env()
            client.images.get(docker_image)
            print(  # pylint: disable=print-call
                'Found existing image tagged {image}, skipping image build. To rebuild, first run: '
                'docker rmi {image}'.format(image=docker_image))
        except docker.errors.ImageNotFound:
            build_and_tag_test_image(docker_image)

    with seven.TemporaryDirectory() as temp_dir:

        run_config = merge_dicts(
            merge_yamls([
                os.path.join(test_project_environments_path(), 'env.yaml'),
                os.path.join(test_project_environments_path(), 'env_s3.yaml'),
            ]),
            {
                'execution': {
                    'celery-docker': {
                        'config': {
                            'docker': docker_config,
                            'config_source': {
                                'task_always_eager': True
                            },
                        }
                    }
                },
            },
        )

        result = execute_pipeline(
            get_test_project_recon_pipeline('docker_celery_pipeline'),
            run_config=run_config,
            instance=DagsterInstance.local_temp(temp_dir),
        )
        assert result.success
Beispiel #14
0
def test_execute_plan_iterator():
    records = []

    def event_callback(record):
        assert isinstance(record, EventRecord)
        records.append(record)

    instance = DagsterInstance.local_temp()

    pipeline = PipelineDefinition(
        name='basic_resource_pipeline',
        solid_defs=[resource_solid],
        mode_defs=[
            ModeDefinition(
                resource_defs={
                    'a': resource_a,
                    'b': resource_b
                },
                logger_defs={
                    'callback': construct_event_logger(event_callback)
                },
            )
        ],
    )
    environment_dict = {'loggers': {'callback': {}}}
    pipeline_run = instance.create_run(
        PipelineRun(
            pipeline_name=pipeline.name,
            run_id=make_new_run_id(),
            environment_dict={'loggers': {
                'callback': {}
            }},
            mode='default',
            status=PipelineRunStatus.NOT_STARTED,
        ))

    execution_plan = create_execution_plan(pipeline, environment_dict)
    iterator = execute_plan_iterator(execution_plan,
                                     pipeline_run,
                                     instance,
                                     environment_dict=environment_dict)

    event_type = None
    while event_type != 'STEP_START':
        event = next(iterator)
        event_type = event.event_type_value

    iterator.close()
    messages = [
        record.user_message for record in records
        if not record.is_dagster_event
    ]
    assert len([message
                for message in messages if message == 'CLEANING A']) > 0
    assert len([message
                for message in messages if message == 'CLEANING B']) > 0
Beispiel #15
0
def test_execute_plan_iterator():
    records = []

    def event_callback(record):
        assert isinstance(record, EventRecord)
        records.append(record)

    instance = DagsterInstance.local_temp()

    pipeline = PipelineDefinition(
        name='basic_resource_pipeline',
        solid_defs=[resource_solid],
        mode_defs=[
            ModeDefinition(
                resource_defs={
                    'a': resource_a,
                    'b': resource_b
                },
                logger_defs={
                    'callback': construct_event_logger(event_callback)
                },
            )
        ],
    )
    run_config = {'loggers': {'callback': {}}}

    execution_plan = create_execution_plan(pipeline, run_config=run_config)
    pipeline_run = instance.create_run_for_pipeline(
        pipeline_def=pipeline,
        run_config={'loggers': {
            'callback': {}
        }},
        execution_plan=execution_plan,
    )

    iterator = execute_plan_iterator(execution_plan,
                                     pipeline_run,
                                     instance,
                                     run_config=run_config)

    event_type = None
    while event_type != 'STEP_START':
        event = next(iterator)
        event_type = event.event_type_value

    iterator.close()
    messages = [
        record.user_message for record in records
        if not record.is_dagster_event
    ]
    assert len([message
                for message in messages if message == 'CLEANING A']) > 0
    assert len([message
                for message in messages if message == 'CLEANING B']) > 0
Beispiel #16
0
def test_execute_run_iterator():
    records = []

    def event_callback(record):
        assert isinstance(record, EventRecord)
        records.append(record)

    instance = DagsterInstance.local_temp()

    pipeline_def = PipelineDefinition(
        name="basic_resource_pipeline",
        solid_defs=[resource_solid],
        mode_defs=[
            ModeDefinition(
                resource_defs={
                    "a": resource_a,
                    "b": resource_b
                },
                logger_defs={
                    "callback": construct_event_logger(event_callback)
                },
            )
        ],
    )
    pipeline_run = instance.create_run_for_pipeline(
        pipeline_def=pipeline_def,
        run_config={"loggers": {
            "callback": {}
        }},
        mode="default",
    )

    iterator = execute_run_iterator(InMemoryExecutablePipeline(pipeline_def),
                                    pipeline_run,
                                    instance=instance)

    event_type = None
    while event_type != "STEP_START":
        event = next(iterator)
        event_type = event.event_type_value

    iterator.close()
    events = [
        record.dagster_event for record in records if record.is_dagster_event
    ]
    messages = [
        record.user_message for record in records
        if not record.is_dagster_event
    ]
    assert len([event for event in events if event.is_pipeline_failure]) > 0
    assert len([message
                for message in messages if message == "CLEANING A"]) > 0
    assert len([message
                for message in messages if message == "CLEANING B"]) > 0
Beispiel #17
0
def test_dask_pipeline():
    run_config = load_yaml_from_globs(
        file_relative_path(__file__, "../../docs_snippets/deploying/dask_hello_world.yaml")
    )
    result = execute_pipeline(
        reconstructable(dask_pipeline),
        run_config=run_config,
        instance=DagsterInstance.local_temp(),
    )
    assert result.success
    assert result.result_for_solid("hello_world").output_value() == "Hello, World!"
def test_execute_pipeline_iterator():
    records = []

    def event_callback(record):
        assert isinstance(record, EventRecord)
        records.append(record)

    pipeline = PipelineDefinition(
        name="basic_resource_pipeline",
        solid_defs=[resource_solid],
        mode_defs=[
            ModeDefinition(
                resource_defs={
                    "a": resource_a,
                    "b": resource_b
                },
                logger_defs={
                    "callback": construct_event_logger(event_callback)
                },
            )
        ],
    )
    iterator = execute_pipeline_iterator(
        pipeline,
        run_config={"loggers": {
            "callback": {}
        }},
        instance=DagsterInstance.local_temp(),
    )

    event_type = None
    while event_type != "STEP_START":
        event = next(iterator)
        event_type = event.event_type_value

    iterator.close()
    events = [
        record.dagster_event for record in records if record.is_dagster_event
    ]
    messages = [
        record.user_message for record in records
        if not record.is_dagster_event
    ]
    pipeline_failure_events = [
        event for event in events if event.is_pipeline_failure
    ]
    assert len(pipeline_failure_events) == 1
    assert "GeneratorExit" in pipeline_failure_events[
        0].pipeline_failure_data.error.message
    assert len([message
                for message in messages if message == "CLEANING A"]) > 0
    assert len([message
                for message in messages if message == "CLEANING B"]) > 0
Beispiel #19
0
def test_pipeline(pg_hostname, postgres):  # pylint: disable=unused-argument
    reconstructable_pipeline = ReconstructablePipeline.for_module(
        "dbt_example", "dbt_example_pipeline")
    assert set([
        solid.name
        for solid in reconstructable_pipeline.get_definition().solids
    ]) == {
        "download_file",
        "load_cereals_from_csv",
        "run_cereals_models",
        "test_cereals_models",
        "analyze_cereals",
        "post_plot_to_slack",
    }
    with TemporaryDirectory() as tempdir:

        res = execute_pipeline(
            ReconstructablePipeline.for_module("dbt_example",
                                               "dbt_example_pipeline"),
            instance=DagsterInstance.local_temp(tempdir=tempdir),
            mode="dev",
            run_config={
                "solids": {
                    "download_file": {
                        "config": {
                            "url": CEREALS_DATASET_URL,
                            "target_path": "cereals.csv",
                        }
                    },
                    "post_plot_to_slack": {
                        "config": {
                            "channels": ["foo_channel"]
                        }
                    },
                },
                "resources": {
                    "db": {
                        "config": {
                            "db_url":
                            (f"postgresql://*****:*****@{pg_hostname}"
                             ":5432/dbt_example")
                        }
                    },
                    "slack": {
                        "config": {
                            "token": "nonce"
                        }
                    },
                },
            },
            raise_on_error=False,
        )
        assert res.success
Beispiel #20
0
def test_pipeline_tags():
    dag = get_dag()

    instance = DagsterInstance.local_temp()
    manager = instance.compute_log_manager

    # When mode is default and tags are set, run with tags
    result = execute_pipeline(
        pipeline=make_dagster_pipeline_from_airflow_dag(
            dag, {AIRFLOW_EXECUTION_DATE_STR: EXECUTION_DATE_MINUS_WEEK_FMT}),
        instance=instance,
    )
    check_compute_logs(manager, result, EXECUTION_DATE_MINUS_WEEK_FMT)
Beispiel #21
0
def test_pipelines_success(file_path, run_config_path):

    with pushd(
            file_relative_path(__file__,
                               '../../../docs_snippets/legacy/data_science/')):
        instance = DagsterInstance.local_temp()
        run_config = load_yaml_from_path(
            run_config_path) if run_config_path else None
        recon_pipeline = ReconstructablePipeline.for_file(
            file_path, 'iris_pipeline')

        pipeline_result = execute_pipeline(recon_pipeline,
                                           run_config=run_config,
                                           instance=instance)
        assert pipeline_result.success
Beispiel #22
0
def test_sync_run_launcher_from_configurable_class():
    with seven.TemporaryDirectory() as temp_dir:
        instance_no_hijack = DagsterInstance.local_temp(
            temp_dir,
            overrides={
                "run_launcher": {
                    "module":
                    "dagster.core.launcher.sync_in_memory_run_launcher",
                    "class": "SyncInMemoryRunLauncher",
                }
            },
        )

        assert isinstance(instance_no_hijack.run_launcher,
                          SyncInMemoryRunLauncher)
Beispiel #23
0
def test_priorities_mp():
    pipe = ExecutionTargetHandle.for_pipeline_python_file(
        __file__, 'priority_test'
    ).build_pipeline_definition()
    result = execute_pipeline(
        pipe,
        {
            'execution': {'multiprocess': {'config': {'max_concurrent': 1}}},
            'storage': {'filesystem': {}},
        },
        instance=DagsterInstance.local_temp(),
    )
    assert result.success
    assert [
        str(event.solid_handle) for event in result.step_event_list if event.is_step_success
    ] == ['high', 'high_2', 'none', 'none_2', 'low', 'low_2']
Beispiel #24
0
def test_pipelines_success(file_path, run_config_path):

    with pushd(
            file_relative_path(__file__,
                               "../../../docs_snippets/legacy/data_science/")):
        instance = DagsterInstance.local_temp()
        run_config = load_yaml_from_path(
            run_config_path) if run_config_path else None
        recon_pipeline = ReconstructablePipeline.for_file(
            file_path, "iris_pipeline")

        pipeline_result = execute_pipeline(
            recon_pipeline,
            run_config=run_config,
            instance=instance,
            solid_selection=["k_means_iris"],  # skip download_file in tests
        )
        assert pipeline_result.success
Beispiel #25
0
def test_yielded_results_config():
    run_config = {
        "resources": {
            "ge_data_context": {
                "config": {"ge_root_dir": file_relative_path(__file__, "./great_expectations")}
            }
        }
    }
    result = execute_pipeline(
        reconstructable(hello_world_pipeline),
        run_config=run_config,
        mode="basic",
        instance=DagsterInstance.local_temp(),
    )
    assert result.result_for_solid("reyielder").output_value()[0]["success_percent"] == 100
    expectations = result.result_for_solid("ge_validation_solid").expectation_results_during_compute
    assert len(expectations) == 1
    mainexpect = expectations[0]
    assert mainexpect.success
def test_execute_run_bad_state():
    records = []

    def event_callback(record):
        assert isinstance(record, EventRecord)
        records.append(record)

    instance = DagsterInstance.local_temp()

    pipeline_def = PipelineDefinition(
        name="basic_resource_pipeline",
        solid_defs=[resource_solid],
        mode_defs=[
            ModeDefinition(
                resource_defs={
                    "a": resource_a,
                    "b": resource_b
                },
                logger_defs={
                    "callback": construct_event_logger(event_callback)
                },
            )
        ],
    )
    pipeline_run = instance.create_run_for_pipeline(
        pipeline_def=pipeline_def,
        run_config={
            "loggers": {
                "callback": {}
            }
        },
        mode="default",
    ).with_status(PipelineRunStatus.SUCCESS)

    with pytest.raises(
            check.CheckError,
            match=r"Pipeline run basic_resource_pipeline \({}\) in state"
            r" PipelineRunStatus.SUCCESS, expected PipelineRunStatus.NOT_STARTED"
            .format(pipeline_run.run_id),
    ):
        execute_run(InMemoryPipeline(pipeline_def),
                    pipeline_run,
                    instance=instance)
Beispiel #27
0
def execute_multiprocessing():
    from dagster import reconstructable, DagsterInstance

    execute_pipeline(
        # A ReconstructablePipeline is necessary to load the pipeline in child processes.
        # reconstructable() is a utility function that captures where the
        # PipelineDefinition came from.
        reconstructable(parallel_pipeline),
        run_config={
            # This section controls how the run will be executed.
            # The multiprocess executor runs each solid in its own sub process.
            "execution": {
                "multiprocess": {}
            },
        },
        # The default instance for this API is an in memory ephemeral one.
        # To allow the multiple processes to coordinate we use one here
        # backed by a temporary directory.
        instance=DagsterInstance.local_temp(),
    )
Beispiel #28
0
def test_multiple_outputs_only_emit_one_multiproc():
    pipe = reconstructable(define_multi_out)
    result = execute_pipeline(
        pipe,
        run_config={
            "storage": {
                "filesystem": {}
            },
            "execution": {
                "multiprocess": {}
            }
        },
        instance=DagsterInstance.local_temp(),
    )
    assert result.success

    solid_result = result.result_for_solid("multiple_outputs")
    assert set(solid_result.output_values.keys()) == set(["output_one"])

    with pytest.raises(
            DagsterInvariantViolationError,
            match=
            "Output 'not_defined' not defined in solid 'multiple_outputs'",
    ):
        solid_result.output_value("not_defined")

    with pytest.raises(DagsterInvariantViolationError,
                       match="Did not find result output_two"):
        solid_result.output_value("output_two")

    with pytest.raises(
            DagsterInvariantViolationError,
            match=re.escape(
                "Tried to get result for solid 'not_present' in "
                "'multiple_outputs_only_emit_one_pipeline'. No such top level solid."
            ),
    ):
        result.result_for_solid("not_present")

    assert result.result_for_solid("downstream_two").skipped
def test_multiple_outputs_only_emit_one_multiproc():
    pipe = reconstructable(define_multi_out)
    result = execute_pipeline(
        pipe,
        environment_dict={
            'storage': {
                'filesystem': {}
            },
            'execution': {
                'multiprocess': {}
            }
        },
        instance=DagsterInstance.local_temp(),
    )
    assert result.success

    solid_result = result.result_for_solid('multiple_outputs')
    assert set(solid_result.output_values.keys()) == set(['output_one'])

    with pytest.raises(
            DagsterInvariantViolationError,
            match=
            "Output 'not_defined' not defined in solid 'multiple_outputs'",
    ):
        solid_result.output_value('not_defined')

    with pytest.raises(DagsterInvariantViolationError,
                       match='Did not find result output_two'):
        solid_result.output_value('output_two')

    with pytest.raises(
            DagsterInvariantViolationError,
            match=re.escape(
                'Tried to get result for solid \'not_present\' in '
                '\'multiple_outputs_only_emit_one_pipeline\'. No such top level solid.'
            ),
    ):
        result.result_for_solid('not_present')

    assert result.result_for_solid('downstream_two').skipped
Beispiel #30
0
def test_multiproc_markers():
    instance = DagsterInstance.local_temp()
    result = execute_pipeline(
        reconstructable(define_pipeline),
        instance=instance,
        run_config={
            "execution": {
                "multiprocess": {}
            },
            "storage": {
                "filesystem": {}
            }
        },
    )
    assert result.success
    events = instance.all_logs(result.run_id)
    start_markers = {}
    end_markers = {}
    for event in events:
        dagster_event = event.dagster_event
        if dagster_event.is_engine_event:
            if dagster_event.engine_event_data.marker_start:
                key = "{step}.{marker}".format(
                    step=event.step_key,
                    marker=dagster_event.engine_event_data.marker_start)
                start_markers[key] = event.timestamp
            if dagster_event.engine_event_data.marker_end:
                key = "{step}.{marker}".format(
                    step=event.step_key,
                    marker=dagster_event.engine_event_data.marker_end)
                end_markers[key] = event.timestamp

    seen = set()
    assert set(start_markers.keys()) == set(end_markers.keys())
    for key in end_markers:
        assert end_markers[key] - start_markers[key] > 0
        seen.add(key)

    assert "ping.compute.multiprocess_subprocess_init" in end_markers