def test_in_pipeline_manager_solid_config(): with in_pipeline_manager() as manager: assert manager.context.solid_config is None with in_pipeline_manager( pipeline_name='hello_world_config_pipeline', solid_handle=SolidHandle('hello_world_config', None), executable_dict=ReconstructablePipeline.for_module( 'dagstermill.examples.repository', 'define_hello_world_config_pipeline', ).to_dict(), ) as manager: assert manager.context.solid_config == {'greeting': 'hello'} with in_pipeline_manager( pipeline_name='hello_world_config_pipeline', solid_handle=SolidHandle('hello_world_config', None), run_config={ 'solids': { 'hello_world_config': { 'config': { 'greeting': 'bonjour' } } } }, executable_dict=ReconstructablePipeline.for_module( 'dagstermill.examples.repository', 'define_hello_world_config_pipeline', ).to_dict(), ) as manager: assert manager.context.solid_config == {'greeting': 'bonjour'}
def test_pipeline(pg_hostname, postgres): # pylint: disable=unused-argument reconstructable_pipeline = ReconstructablePipeline.for_module( "dbt_example", "dbt_example_pipeline") assert set([ solid.name for solid in reconstructable_pipeline.get_definition().solids ]) == { "download_file", "load_cereals_from_csv", "run_cereals_models", "test_cereals_models", "analyze_cereals", "post_plot_to_slack", } with instance_for_test() as instance: with tempfile.TemporaryDirectory() as temp_dir: res = execute_pipeline( ReconstructablePipeline.for_module("dbt_example", "dbt_example_pipeline"), instance=instance, mode="dev", run_config={ "solids": { "download_file": { "config": { "url": CEREALS_DATASET_URL, "target_path": "cereals.csv", } }, "post_plot_to_slack": { "config": { "channels": ["foo_channel"] } }, }, "resources": { "db": { "config": { "db_url": (f"postgresql://*****:*****@{pg_hostname}" ":5432/dbt_example") } }, "slack": { "config": { "token": "nonce" } }, "io_manager": { "config": { "base_dir": temp_dir } }, }, }, ) assert res.success
def test_in_pipeline_manager_solid_config(): with in_pipeline_manager() as manager: assert manager.context.solid_config is None with in_pipeline_manager( pipeline_name="hello_world_config_pipeline", solid_handle=NodeHandle("hello_world_config", None), executable_dict=ReconstructablePipeline.for_module( "dagstermill.examples.repository", "hello_world_config_pipeline", ).to_dict(), step_key="hello_world_config", ) as manager: assert manager.context.solid_config == {"greeting": "hello"} with in_pipeline_manager( pipeline_name="hello_world_config_pipeline", solid_handle=NodeHandle("hello_world_config", None), run_config={ "solids": { "hello_world_config": {"config": {"greeting": "bonjour"}}, "goodbye_config": {"config": {"farewell": "goodbye"}}, } }, executable_dict=ReconstructablePipeline.for_module( "dagstermill.examples.repository", "hello_world_config_pipeline", ).to_dict(), step_key="hello_world_config", ) as manager: assert manager.context.solid_config == {"greeting": "bonjour"} with in_pipeline_manager( pipeline_name="hello_world_config_pipeline", solid_handle=NodeHandle("goodbye_config", None), run_config={ "solids": { "hello_world_config": { "config": {"greeting": "bonjour"}, }, "goodbye_config": {"config": {"farewell": "goodbye"}}, } }, executable_dict=ReconstructablePipeline.for_module( "dagstermill.examples.repository", "hello_world_config_pipeline", ).to_dict(), step_key="goodbye_config", ) as manager: assert manager.context.solid_config == {"farewell": "goodbye"}
def test_execute_execute_plan_mutation(): pipeline_name = 'sleepy_pipeline' pipeline = ReconstructablePipeline.for_module( 'dagster_examples.toys.sleepy', pipeline_name) instance = DagsterInstance.local_temp() pipeline_run = instance.create_run_for_pipeline( pipeline_def=pipeline.get_definition()) variables = { 'executionParams': { 'environmentConfigData': {}, 'mode': 'default', 'selector': { 'name': pipeline_name }, 'executionMetadata': { 'runId': pipeline_run.run_id }, } } result = execute_execute_plan_mutation( pipeline.get_reconstructable_repository(), variables, instance_ref=instance.get_ref()) seen_events = set() for event in result: seen_events.add((event.event_type_value, event.step_key)) assert seen_events == EXPECTED_EVENTS
def test_event_callback_logging(): events = defaultdict(list) def _event_callback(record): assert isinstance(record, EventRecord) if record.is_dagster_event: events[record.dagster_event.event_type].append(record) pipeline = ReconstructablePipeline.for_module( "dagstermill.examples.repository", "define_hello_logging_pipeline", ) pipeline_def = pipeline.get_definition() instance = DagsterInstance.local_temp() pipeline_run = instance.create_run_for_pipeline(pipeline_def) instance.watch_event_logs(pipeline_run.run_id, -1, _event_callback) execute_run(pipeline, pipeline_run, instance) passed_before_timeout = False retries = 5 while retries > 0: time.sleep(0.333) if DagsterEventType.PIPELINE_FAILURE in events.keys(): break if DagsterEventType.PIPELINE_SUCCESS in events.keys(): passed_before_timeout = True break retries -= 1 assert passed_before_timeout
def test_cli_execute_failure(): # currently paths in env files have to be relative to where the # script has launched so we have to simulate that # with pytest.raises(DagsterExecutionStepExecutionError) as e_info: cwd = os.getcwd() try: os.chdir(file_relative_path(__file__, "../..")) with instance_for_test() as instance: result = do_execute_command( pipeline=ReconstructablePipeline.for_module( "dagster_pandas.examples", "pandas_hello_world_fails_test", ), instance=instance, config=[ file_relative_path( __file__, "../../dagster_pandas/examples/pandas_hello_world/*.yaml" ) ], ) failures = [ event for event in result.step_event_list if event.is_failure ] finally: # restore cwd os.chdir(cwd) assert len(failures) == 1 assert "I am a programmer and I make error" in failures[ 0].step_failure_data.error.cause.message
def test_papermill_pandas_hello_world_pipeline(): pipeline = ReconstructablePipeline.for_module( "dagster_pandas.examples", "papermill_pandas_hello_world_pipeline") with instance_for_test() as instance: pipeline_result = execute_pipeline( pipeline, { "solids": { "papermill_pandas_hello_world": { "inputs": { "df": { "csv": { "path": file_relative_path(__file__, "num_prod.csv") } } } } } }, instance=instance, ) assert pipeline_result.success solid_result = pipeline_result.result_for_solid( "papermill_pandas_hello_world") expected = pd.read_csv(file_relative_path(__file__, "num_prod.csv")) + 1 assert solid_result.output_value().equals(expected)
def test_cli_execute_failure(): # currently paths in env files have to be relative to where the # script has launched so we have to simulate that # with pytest.raises(DagsterExecutionStepExecutionError) as e_info: cwd = os.getcwd() try: os.chdir(file_relative_path(__file__, '../..')) result = do_execute_command( pipeline=ReconstructablePipeline.for_module( 'dagster_pandas.examples.pandas_hello_world.pipeline', 'pandas_hello_world_fails'), env_file_list=[ file_relative_path( __file__, '../../dagster_pandas/examples/pandas_hello_world/*.yaml') ], ) failures = [ event for event in result.step_event_list if event.is_failure ] finally: # restore cwd os.chdir(cwd) assert len(failures) == 1 assert 'I am a programmer and I make error' in failures[ 0].step_failure_data.error.message
def test_papermill_pandas_hello_world_pipeline(): pipeline = ReconstructablePipeline.for_module( 'dagster_pandas.examples', 'papermill_pandas_hello_world_pipeline') pipeline_result = execute_pipeline( pipeline, { 'solids': { 'papermill_pandas_hello_world': { 'inputs': { 'df': { 'csv': { 'path': file_relative_path(__file__, 'num_prod.csv') } } } } } }, instance=DagsterInstance.local_temp(), ) assert pipeline_result.success solid_result = pipeline_result.result_for_solid( 'papermill_pandas_hello_world') expected = pd.read_csv(file_relative_path(__file__, 'num_prod.csv')) + 1 assert solid_result.output_value().equals(expected)
def test_papermill_pandas_hello_world_pipeline(): job = ReconstructablePipeline.for_module( "dagster_pandas.examples", "papermill_pandas_hello_world_test" ) with tempfile.TemporaryDirectory() as temp_dir: with instance_for_test() as instance: pipeline_result = execute_pipeline( job, { "ops": { "papermill_pandas_hello_world": { "inputs": { "df": { "csv": {"path": file_relative_path(__file__, "num_prod.csv")} } }, } }, "resources": { "io_manager": { "config": {"base_dir": temp_dir}, }, }, }, instance=instance, ) assert pipeline_result.success solid_result = pipeline_result.result_for_solid("papermill_pandas_hello_world") expected = pd.read_csv(file_relative_path(__file__, "num_prod.csv")) + 1 assert solid_result.output_value().equals(expected)
def test_execute_pipeline(): environment = { 'solids': { 'sum_solid': { 'inputs': {'num': {'csv': {'path': file_relative_path(__file__, 'num.csv')}}} } } } result = execute_pipeline( ReconstructablePipeline.for_module( 'dagster_pandas.examples.pandas_hello_world.pipeline', 'pandas_hello_world' ), run_config=environment, ) assert result.success assert result.result_for_solid('sum_solid').output_value().to_dict('list') == { 'num1': [1, 3], 'num2': [2, 4], 'sum': [3, 7], } assert result.result_for_solid('sum_sq_solid').output_value().to_dict('list') == { 'num1': [1, 3], 'num2': [2, 4], 'sum': [3, 7], 'sum_sq': [9, 49], }
def test_execute_execute_plan_mutation_raw(): pipeline_name = 'sleepy_pipeline' workspace = workspace_from_load_target( ModuleTarget('dagster_examples.toys.sleepy', pipeline_name)) pipeline = ReconstructablePipeline.for_module( 'dagster_examples.toys.sleepy', pipeline_name) instance = DagsterInstance.local_temp() pipeline_run = instance.create_run_for_pipeline( pipeline_def=pipeline.get_definition()) variables = { 'executionParams': { 'runConfigData': {}, 'mode': 'default', 'selector': { 'repositoryLocationName': pipeline_name, 'repositoryName': '<<unnamed>>', 'pipelineName': pipeline_name, }, 'executionMetadata': { 'runId': pipeline_run.run_id }, } } result = execute_execute_plan_mutation_raw(workspace, variables, instance_ref=instance.get_ref()) seen_events = set() for event in result: seen_events.add((event.dagster_event.event_type_value, event.step_key)) assert seen_events == EXPECTED_EVENTS
def exec_for_test(fn_name, env=None, raise_on_error=True, **kwargs): result = None recon_pipeline = ReconstructablePipeline.for_module("dagstermill.examples.repository", fn_name) with instance_for_test() as instance: try: result = execute_pipeline( recon_pipeline, env, instance=instance, raise_on_error=raise_on_error, **kwargs ) yield result finally: if result: cleanup_result_notebook(result)
def test_yield_unserializable_result(): manager = Manager() assert manager.yield_result(threading.Lock()) with in_pipeline_manager( pipeline_name="hello_world_with_output_pipeline", solid_handle=SolidHandle("hello_world_output", None), executable_dict=ReconstructablePipeline.for_module( "dagstermill.examples.repository", "define_hello_world_with_output_pipeline", ).to_dict(), ) as manager: with pytest.raises(TypeError): manager.yield_result(threading.Lock())
def exec_for_test(fn_name, env=None, raise_on_error=True, **kwargs): result = None pipeline = ReconstructablePipeline.for_module( 'dagstermill.examples.repository', fn_name) try: result = execute_pipeline(pipeline, env, instance=DagsterInstance.local_temp(), raise_on_error=raise_on_error, **kwargs) yield result finally: if result: cleanup_result_notebook(result)
def in_pipeline_manager( pipeline_name="hello_world_pipeline", solid_handle=NodeHandle("hello_world", None), step_key="hello_world", executable_dict=None, mode=None, **kwargs, ): manager = Manager() run_id = make_new_run_id() with instance_for_test() as instance: marshal_dir = tempfile.mkdtemp() if not executable_dict: executable_dict = ReconstructablePipeline.for_module( "dagstermill.examples.repository", "hello_world_pipeline" ).to_dict() pipeline_run_dict = pack_value( PipelineRun( pipeline_name=pipeline_name, run_id=run_id, mode=mode or "default", run_config=None, step_keys_to_execute=None, status=PipelineRunStatus.NOT_STARTED, ) ) try: with safe_tempfile_path() as output_log_file_path: context_dict = { "pipeline_run_dict": pipeline_run_dict, "solid_handle_kwargs": solid_handle._asdict(), "executable_dict": executable_dict, "marshal_dir": marshal_dir, "run_config": {}, "output_log_path": output_log_file_path, "instance_ref_dict": pack_value(instance.get_ref()), "step_key": step_key, } manager.reconstitute_pipeline_context(**dict(context_dict, **kwargs)) yield manager finally: shutil.rmtree(marshal_dir)
def in_pipeline_manager(pipeline_name='hello_world_pipeline', solid_handle=SolidHandle('hello_world', None), executable_dict=None, mode=None, **kwargs): manager = Manager() run_id = make_new_run_id() instance = DagsterInstance.local_temp() marshal_dir = tempfile.mkdtemp() if not executable_dict: executable_dict = ReconstructablePipeline.for_module( 'dagstermill.examples.repository', 'define_hello_world_pipeline').to_dict() pipeline_run_dict = pack_value( PipelineRun( pipeline_name=pipeline_name, run_id=run_id, mode=mode or 'default', run_config=None, step_keys_to_execute=None, status=PipelineRunStatus.NOT_STARTED, )) try: with safe_tempfile_path() as output_log_file_path: context_dict = { 'pipeline_run_dict': pipeline_run_dict, 'solid_handle_kwargs': solid_handle._asdict(), 'executable_dict': executable_dict, 'marshal_dir': marshal_dir, 'run_config': {}, 'output_log_path': output_log_file_path, 'instance_ref_dict': pack_value(instance.get_ref()), } manager.reconstitute_pipeline_context( **dict(context_dict, **kwargs)) yield manager finally: shutil.rmtree(marshal_dir)
def test_in_pipeline_manager_with_resources(): with tempfile.NamedTemporaryFile() as fd: path = fd.name try: with in_pipeline_manager( pipeline_name="resource_pipeline", executable_dict=ReconstructablePipeline.for_module( "dagstermill.examples.repository", "resource_pipeline", ).to_dict(), solid_handle=NodeHandle("hello_world_resource", None), run_config={"resources": { "list": { "config": path } }}, mode="prod", ) as manager: assert "list" in manager.context.resources._asdict() with open(path, "rb") as fd: messages = pickle.load(fd) messages = [message.split(": ") for message in messages] assert len(messages) == 1 assert messages[0][1] == "Opened" manager.teardown_resources() with open(path, "rb") as fd: messages = pickle.load(fd) messages = [message.split(": ") for message in messages] assert len(messages) == 2 assert messages[1][1] == "Closed" finally: if os.path.exists(path): os.unlink(path)
def test_in_pipeline_manager_with_resources(): with tempfile.NamedTemporaryFile() as fd: path = fd.name try: with in_pipeline_manager( pipeline_name='resource_pipeline', executable_dict=ReconstructablePipeline.for_module( 'dagstermill.examples.repository', 'define_resource_pipeline', ).to_dict(), solid_handle=SolidHandle('hello_world_resource', None), run_config={'resources': { 'list': { 'config': path } }}, mode='prod', ) as manager: assert len(manager.context.resources._asdict()) == 1 with open(path, 'rb') as fd: messages = pickle.load(fd) messages = [message.split(': ') for message in messages] assert len(messages) == 1 assert messages[0][1] == 'Opened' manager.teardown_resources() with open(path, 'rb') as fd: messages = pickle.load(fd) messages = [message.split(': ') for message in messages] assert len(messages) == 2 assert messages[1][1] == 'Closed' finally: if os.path.exists(path): os.unlink(path)
def test_cli_execute(): # currently paths in env files have to be relative to where the # script has launched so we have to simulate that cwd = os.getcwd() try: os.chdir(file_relative_path(__file__, '../..')) do_execute_command( pipeline=ReconstructablePipeline.for_module( 'dagster_pandas.examples.pandas_hello_world.pipeline', 'pandas_hello_world'), env_file_list=[ file_relative_path( __file__, '../../dagster_pandas/examples/pandas_hello_world/*.yaml') ], ) finally: # restore cwd os.chdir(cwd)
def test_execute_pipeline(): environment = { "solids": { "sum_solid": { "inputs": { "num": { "csv": { "path": file_relative_path(__file__, "num.csv") } } } } } } result = execute_pipeline( ReconstructablePipeline.for_module( "dagster_pandas.examples.pandas_hello_world.pipeline", "pandas_hello_world"), run_config=environment, ) assert result.success assert result.result_for_solid("sum_solid").output_value().to_dict( "list") == { "num1": [1, 3], "num2": [2, 4], "sum": [3, 7], } assert result.result_for_solid("sum_sq_solid").output_value().to_dict( "list") == { "num1": [1, 3], "num2": [2, 4], "sum": [3, 7], "sum_sq": [9, 49], }
def test_cli_execute(): # currently paths in env files have to be relative to where the # script has launched so we have to simulate that cwd = os.getcwd() try: os.chdir(file_relative_path(__file__, "../..")) do_execute_command( pipeline=ReconstructablePipeline.for_module( "dagster_pandas.examples.pandas_hello_world.pipeline", "pandas_hello_world"), instance=DagsterInstance.get(), config=[ file_relative_path( __file__, "../../dagster_pandas/examples/pandas_hello_world/*.yaml") ], ) finally: # restore cwd os.chdir(cwd)
import os # pylint: disable=unused-argument import pytest from dagster import execute_pipeline, file_relative_path from dagster.core.definitions.reconstructable import ReconstructablePipeline from dagster.core.test_utils import instance_for_test from dagster.utils import load_yaml_from_globs ingest_pipeline = ReconstructablePipeline.for_module( "airline_demo.pipelines", "define_airline_demo_ingest_pipeline", ) warehouse_pipeline = ReconstructablePipeline.for_module( "airline_demo.pipelines", "define_airline_demo_warehouse_pipeline", ) def config_path(relative_path): return file_relative_path( __file__, os.path.join("../airline_demo/environments/", relative_path) ) @pytest.mark.db @pytest.mark.nettest @pytest.mark.py3 @pytest.mark.spark def test_ingest_pipeline_fast(postgres, pg_hostname): with instance_for_test() as instance: ingest_config_dict = load_yaml_from_globs(
import os # pylint: disable=unused-argument import pytest from dagster import execute_pipeline, file_relative_path from dagster.core.definitions.reconstructable import ReconstructablePipeline from dagster.core.instance import DagsterInstance from dagster.utils import load_yaml_from_globs ingest_pipeline = ReconstructablePipeline.for_module( 'airline_demo.pipelines', 'define_airline_demo_ingest_pipeline', ) warehouse_pipeline = ReconstructablePipeline.for_module( 'airline_demo.pipelines', 'define_airline_demo_warehouse_pipeline', ) def config_path(relative_path): return file_relative_path( __file__, os.path.join('../airline_demo/environments/', relative_path) ) @pytest.mark.db @pytest.mark.nettest @pytest.mark.py3 @pytest.mark.spark def test_ingest_pipeline_fast(postgres, pg_hostname): ingest_config_dict = load_yaml_from_globs(
def recon_pipeline_for_cli_args(kwargs): '''Builds a ReconstructablePipeline for CLI arguments, which can be any of the combinations for repo/pipeline loading above. ''' check.dict_param(kwargs, 'kwargs') pipeline_name = kwargs.get('pipeline_name') if pipeline_name and not isinstance(pipeline_name, six.string_types): if len(pipeline_name) == 1: pipeline_name = pipeline_name[0] else: check.failed( 'Can only handle zero or one pipeline args. Got {pipeline_name}'.format( pipeline_name=repr(pipeline_name) ) ) # Pipeline from repository YAML and pipeline_name if ( pipeline_name and kwargs.get('module_name') is None and kwargs.get('python_file') is None and kwargs.get('repository_yaml') is not None ): _cli_load_invariant(kwargs.get('fn_name') is None) repo_yaml = ( os.path.abspath(kwargs.get('repository_yaml')) if kwargs.get('repository_yaml') else DEFAULT_REPOSITORY_YAML_FILENAME ) _cli_load_invariant( os.path.exists(repo_yaml), 'Expected to use file "{}" to load repository but it does not exist. ' 'Verify your current working directory or CLI arguments.'.format(repo_yaml), ) return ReconstructableRepository.from_yaml(repo_yaml).get_reconstructable_pipeline( pipeline_name ) # Pipeline from repository python file elif kwargs.get('python_file') and kwargs.get('fn_name') and pipeline_name: _cli_load_invariant(kwargs.get('repository_yaml') is None) _cli_load_invariant(kwargs.get('module_name') is None) return ReconstructableRepository.for_file( os.path.abspath(kwargs['python_file']), kwargs['fn_name'] ).get_reconstructable_pipeline(pipeline_name) # Pipeline from repository module elif kwargs.get('module_name') and kwargs.get('fn_name') and pipeline_name: _cli_load_invariant(kwargs.get('repository_yaml') is None) _cli_load_invariant(kwargs.get('python_file') is None) return ReconstructableRepository.for_module( kwargs['module_name'], kwargs['fn_name'] ).get_reconstructable_pipeline(pipeline_name) # Pipeline from pipeline python file elif kwargs.get('python_file') and kwargs.get('fn_name') and not pipeline_name: _cli_load_invariant(kwargs.get('repository_yaml') is None) _cli_load_invariant(kwargs.get('module_name') is None) return ReconstructablePipeline.for_file( os.path.abspath(kwargs['python_file']), kwargs['fn_name'] ) # Pipeline from pipeline module elif kwargs.get('module_name') and kwargs.get('fn_name') and not pipeline_name: _cli_load_invariant(kwargs.get('repository_yaml') is None) _cli_load_invariant(kwargs.get('python_file') is None) return ReconstructablePipeline.for_module(kwargs['module_name'], kwargs['fn_name']) else: _cli_load_invariant(False)