def test_pandas_dask(): run_config = { "solids": { "pandas_solid": { "inputs": { "df": { "csv": { "path": file_relative_path(__file__, "ex.csv") } } } } } } result = execute_pipeline( ReconstructablePipeline.for_file(__file__, pandas_pipeline.name), run_config={ "storage": { "filesystem": {} }, "execution": { "dask": { "config": { "cluster": { "local": { "timeout": 30 } } } } }, **run_config, }, instance=DagsterInstance.local_temp(), ) assert result.success
def test_pandas_dask(): environment_dict = { 'solids': { 'pandas_solid': { 'inputs': { 'df': { 'csv': { 'path': file_relative_path(__file__, 'ex.csv') } } } } } } result = execute_pipeline( ReconstructablePipeline.for_file(__file__, pandas_pipeline.name), environment_dict={ 'storage': { 'filesystem': {} }, 'execution': { 'dask': { 'config': { 'cluster': { 'local': { 'timeout': 30 } } } } }, **environment_dict, }, instance=DagsterInstance.local_temp(), ) assert result.success
def test_engine_error(): with mock.patch( "dagster.core.execution.context.system.PlanData.raise_on_error", return_value=True, ): with pytest.raises(DagsterSubprocessError): with tempfile.TemporaryDirectory() as tempdir: with instance_for_test(temp_dir=tempdir) as instance: storage = os.path.join(tempdir, "flakey_storage") execute_pipeline( ReconstructablePipeline.for_file( REPO_FILE, "engine_error"), run_config={ "intermediate_storage": { "filesystem": { "config": { "base_dir": storage } } }, "execution": { "celery": { "config": { "config_source": { "task_always_eager": True } } } }, "solids": { "destroy": { "config": storage } }, }, instance=instance, )
def test_hello_world_reexecution(): with exec_for_test("hello_world_pipeline") as result: assert result.success output_notebook_path = get_path([ x for x in result.step_event_list if x.event_type_value == "ASSET_MATERIALIZATION" ][0]) with tempfile.NamedTemporaryFile( "w+", suffix=".py") as reexecution_notebook_file: reexecution_notebook_file.write( ("from dagster import pipeline\n" "from dagstermill import define_dagstermill_solid\n\n\n" "reexecution_solid = define_dagstermill_solid(\n" " 'hello_world_reexecution', '{output_notebook_path}'\n" ")\n\n" "@pipeline\n" "def reexecution_pipeline():\n" " reexecution_solid()\n").format( output_notebook_path=output_notebook_path)) reexecution_notebook_file.flush() result = None reexecution_pipeline = ReconstructablePipeline.for_file( reexecution_notebook_file.name, "reexecution_pipeline") reexecution_result = None with instance_for_test() as instance: try: reexecution_result = execute_pipeline(reexecution_pipeline, instance=instance) assert reexecution_result.success finally: if reexecution_result: cleanup_result_notebook(reexecution_result)
def execute_eagerly_on_celery(tempdir, pipeline_name, tags=None): return execute_pipeline( ReconstructablePipeline.for_file(__file__, pipeline_name), run_config={ 'storage': { 'filesystem': { 'config': { 'base_dir': tempdir } } }, 'execution': { 'celery': { 'config': { 'config_source': { 'task_always_eager': True } } } }, }, instance=DagsterInstance.local_temp(tempdir=tempdir), tags=tags, )
def test_pandas_dask(): run_config = { "solids": { "pandas_solid": { "inputs": { "df": { "csv": { "path": file_relative_path(__file__, "ex.csv") } } } } } } with instance_for_test() as instance: result = execute_pipeline( ReconstructablePipeline.for_file(__file__, pandas_pipeline.name), run_config={ "execution": { "dask": { "config": { "cluster": { "local": { "timeout": 30 } } } } }, **run_config, }, instance=instance, ) assert result.success
def recon_pipeline_for_cli_args(kwargs): '''Builds a ReconstructablePipeline for CLI arguments, which can be any of the combinations for repo/pipeline loading above. ''' check.dict_param(kwargs, 'kwargs') pipeline_name = kwargs.get('pipeline_name') if pipeline_name and not isinstance(pipeline_name, six.string_types): if len(pipeline_name) == 1: pipeline_name = pipeline_name[0] else: check.failed( 'Can only handle zero or one pipeline args. Got {pipeline_name}'.format( pipeline_name=repr(pipeline_name) ) ) # Pipeline from repository YAML and pipeline_name if ( pipeline_name and kwargs.get('module_name') is None and kwargs.get('python_file') is None and kwargs.get('repository_yaml') is not None ): _cli_load_invariant(kwargs.get('fn_name') is None) repo_yaml = ( os.path.abspath(kwargs.get('repository_yaml')) if kwargs.get('repository_yaml') else DEFAULT_REPOSITORY_YAML_FILENAME ) _cli_load_invariant( os.path.exists(repo_yaml), 'Expected to use file "{}" to load repository but it does not exist. ' 'Verify your current working directory or CLI arguments.'.format(repo_yaml), ) return ReconstructableRepository.from_yaml(repo_yaml).get_reconstructable_pipeline( pipeline_name ) # Pipeline from repository python file elif kwargs.get('python_file') and kwargs.get('fn_name') and pipeline_name: _cli_load_invariant(kwargs.get('repository_yaml') is None) _cli_load_invariant(kwargs.get('module_name') is None) return ReconstructableRepository.for_file( os.path.abspath(kwargs['python_file']), kwargs['fn_name'] ).get_reconstructable_pipeline(pipeline_name) # Pipeline from repository module elif kwargs.get('module_name') and kwargs.get('fn_name') and pipeline_name: _cli_load_invariant(kwargs.get('repository_yaml') is None) _cli_load_invariant(kwargs.get('python_file') is None) return ReconstructableRepository.for_module( kwargs['module_name'], kwargs['fn_name'] ).get_reconstructable_pipeline(pipeline_name) # Pipeline from pipeline python file elif kwargs.get('python_file') and kwargs.get('fn_name') and not pipeline_name: _cli_load_invariant(kwargs.get('repository_yaml') is None) _cli_load_invariant(kwargs.get('module_name') is None) return ReconstructablePipeline.for_file( os.path.abspath(kwargs['python_file']), kwargs['fn_name'] ) # Pipeline from pipeline module elif kwargs.get('module_name') and kwargs.get('fn_name') and not pipeline_name: _cli_load_invariant(kwargs.get('repository_yaml') is None) _cli_load_invariant(kwargs.get('python_file') is None) return ReconstructablePipeline.for_module(kwargs['module_name'], kwargs['fn_name']) else: _cli_load_invariant(False)
def test_terminate_pipeline_on_celery(rabbitmq): with start_celery_worker(): with tempfile.TemporaryDirectory() as tempdir: pipeline_def = ReconstructablePipeline.for_file( REPO_FILE, "interrupt_pipeline") with instance_for_test_tempdir(tempdir) as instance: run_config = { "intermediate_storage": { "filesystem": { "config": { "base_dir": tempdir } } }, "execution": { "celery": {} }, } results = [] result_types = [] interrupt_thread = None received_interrupt = False try: for result in execute_pipeline_iterator( pipeline=pipeline_def, run_config=run_config, instance=instance, ): # Interrupt once the first step starts if (result.event_type == DagsterEventType.STEP_START and not interrupt_thread): interrupt_thread = Thread(target=send_interrupt, args=()) interrupt_thread.start() results.append(result) result_types.append(result.event_type) assert False except DagsterExecutionInterruptedError: received_interrupt = True interrupt_thread.join() assert received_interrupt # At least one step succeeded (the one that was running when the interrupt fired) assert DagsterEventType.STEP_SUCCESS in result_types # At least one step was revoked (and there were no step failure events) revoke_steps = [ result for result in results if result.event_type == DagsterEventType.ENGINE_EVENT and "was revoked." in result.message ] assert len(revoke_steps) > 0 # The overall pipeline failed assert DagsterEventType.PIPELINE_FAILURE in result_types
def sleepy_recon_pipeline(): return ReconstructablePipeline.for_file( file_relative_path(__file__, 'sleepy.py'), 'sleepy_pipeline' )