def test_pipelines_success(file_path, run_config_path): with pushd( file_relative_path(__file__, "../../../docs_snippets/legacy/data_science/")): with instance_for_test() as instance: run_config = load_yaml_from_path( run_config_path) if run_config_path else {} recon_pipeline = ReconstructablePipeline.for_file( file_path, "iris_classify") with tempfile.TemporaryDirectory() as temp_dir: run_config["resources"] = { "io_manager": { "config": { "base_dir": temp_dir } } } pipeline_result = execute_pipeline( recon_pipeline, run_config=run_config, instance=instance, solid_selection=["k_means_iris" ], # skip download_file in tests ) assert pipeline_result.success
def execute_pipeline_on_celery(pipeline_name, instance=None, run_config=None, tempdir=None, tags=None, subset=None): with tempdir_wrapper(tempdir) as tempdir: pipeline_def = ReconstructablePipeline.for_file( REPO_FILE, pipeline_name).subset_for_execution(subset) with _instance_wrapper(instance) as wrapped_instance: run_config = run_config or { "resources": { "io_manager": { "config": { "base_dir": tempdir } } }, "execution": { "celery": {} }, } result = execute_pipeline( pipeline_def, run_config=run_config, instance=wrapped_instance, tags=tags, ) yield result
def test_engine_error(instance, tempdir): with mock.patch( "dagster.core.execution.context.system.PlanData.raise_on_error", return_value=True, ): with pytest.raises(DagsterSubprocessError): storage = os.path.join(tempdir, "flakey_storage") execute_pipeline( ReconstructablePipeline.for_file(REPO_FILE, "engine_error"), run_config={ "resources": {"io_manager": {"config": {"base_dir": storage}}}, "execution": { "celery": {"config": {"config_source": {"task_always_eager": True}}} }, "solids": {"destroy": {"config": storage}}, }, instance=instance, )
def test_dask_terminate(): run_config = { "solids": { "sleepy_dask_solid": { "inputs": { "df": { "read": { "csv": { "path": file_relative_path(__file__, "ex*.csv") } } } } } } } interrupt_thread = None result_types = [] with instance_for_test() as instance: for result in execute_pipeline_iterator( pipeline=ReconstructablePipeline.for_file( __file__, sleepy_dask_pipeline.name), run_config=run_config, instance=instance, ): # Interrupt once the first step starts if result.event_type == DagsterEventType.STEP_START and not interrupt_thread: interrupt_thread = Thread(target=send_interrupt, args=()) interrupt_thread.start() if result.event_type == DagsterEventType.STEP_FAILURE: assert ("DagsterExecutionInterruptedError" in result.event_specific_data.error.message) result_types.append(result.event_type) interrupt_thread.join() assert DagsterEventType.STEP_FAILURE in result_types assert DagsterEventType.PIPELINE_FAILURE in result_types
def test_terminate_pipeline_on_celery(dagster_celery_worker, instance, tempdir): pipeline_def = ReconstructablePipeline.for_file(REPO_FILE, "interrupt_pipeline") run_config = { "resources": {"io_manager": {"config": {"base_dir": tempdir}}}, "execution": {"celery": {}}, } results = [] result_types = [] interrupt_thread = None for result in execute_pipeline_iterator( pipeline=pipeline_def, run_config=run_config, instance=instance, ): # Interrupt once the first step starts if result.event_type == DagsterEventType.STEP_START and not interrupt_thread: interrupt_thread = Thread(target=send_interrupt, args=()) interrupt_thread.start() results.append(result) result_types.append(result.event_type) interrupt_thread.join() # At least one step succeeded (the one that was running when the interrupt fired) assert DagsterEventType.STEP_SUCCESS in result_types # At least one step was revoked (and there were no step failure events) revoke_steps = [ result for result in results if result.event_type == DagsterEventType.ENGINE_EVENT and "was revoked." in result.message ] assert len(revoke_steps) > 0 # The overall pipeline failed assert DagsterEventType.PIPELINE_FAILURE in result_types
def test_dask(): run_config = { "solids": { "dask_solid": { "inputs": { "df": { "read": { "csv": { "path": file_relative_path(__file__, "ex*.csv") } } } } } } } with instance_for_test() as instance: result = execute_pipeline( ReconstructablePipeline.for_file(__file__, dask_pipeline.name), run_config={ "execution": { "dask": { "config": { "cluster": { "local": { "timeout": 30 } } } } }, **run_config, }, instance=instance, ) assert result.success
def test_hello_world_reexecution(): with exec_for_test("hello_world_pipeline") as result: assert result.success output_notebook_path = get_path( [x for x in result.step_event_list if x.event_type_value == "ASSET_MATERIALIZATION"][0] ) with tempfile.NamedTemporaryFile("w+", suffix=".py") as reexecution_notebook_file: reexecution_notebook_file.write( ( "from dagster import pipeline\n" "from dagstermill import define_dagstermill_solid\n\n\n" "reexecution_solid = define_dagstermill_solid(\n" " 'hello_world_reexecution', '{output_notebook_path}'\n" ")\n\n" "@pipeline\n" "def reexecution_pipeline():\n" " reexecution_solid()\n" ).format(output_notebook_path=output_notebook_path) ) reexecution_notebook_file.flush() result = None reexecution_pipeline = ReconstructablePipeline.for_file( reexecution_notebook_file.name, "reexecution_pipeline" ) reexecution_result = None with instance_for_test() as instance: try: reexecution_result = execute_pipeline(reexecution_pipeline, instance=instance) assert reexecution_result.success finally: if reexecution_result: cleanup_result_notebook(reexecution_result)