def test_execute_step_verify_step(): with get_foo_pipeline_handle() as pipeline_handle: runner = CliRunner() with instance_for_test( overrides={ "compute_logs": { "module": "dagster.core.storage.noop_compute_log_manager", "class": "NoOpComputeLogManager", } }) as instance: run = create_run_for_test( instance, pipeline_name="foo", run_id="new_run", run_config={"storage": { "filesystem": {} }}, ) input_json = serialize_dagster_namedtuple( ExecuteStepArgs( pipeline_origin=pipeline_handle.get_python_origin(), pipeline_run_id=run.run_id, step_keys_to_execute=None, instance_ref=instance.get_ref(), )) # Check that verify succeeds for step that has hasn't been fun (case 3) retries = Retries.from_config({"enabled": {}}) assert verify_step(instance, run, retries, step_keys_to_execute=["do_something"]) # Check that verify fails when trying to retry with no original attempt (case 3) retries = Retries.from_config({"enabled": {}}) retries.mark_attempt("do_something") assert not verify_step( instance, run, retries, step_keys_to_execute=["do_something"]) # Test trying to re-run a retry fails verify_step (case 2) with mock.patch("dagster.cli.api.get_step_stats_by_key" ) as _step_stats_by_key: _step_stats_by_key.return_value = { "do_something": RunStepKeyStatsSnapshot(run_id=run.run_id, step_key="do_something", attempts=2) } retries = Retries.from_config({"enabled": {}}) retries.mark_attempt("do_something") assert not verify_step(instance, run, retries, step_keys_to_execute=["do_something"]) runner_execute_step( runner, [input_json], ) # # Check that verify fails for step that has already run (case 1) retries = Retries.from_config({"enabled": {}}) assert not verify_step( instance, run, retries, step_keys_to_execute=["do_something"])
def test_mock_start_worker(worker_patch): with instance_for_test(): start_worker("dagster_test_worker") assert_called(worker_patch)
def test_partitions_for_hourly_schedule_decorators_with_timezone(): with instance_for_test() as instance: with pendulum.test(create_pendulum_time(2019, 2, 27, 0, 1, 1, tz="US/Central")): start_date = datetime(year=2019, month=1, day=1) # You can specify a start date with no timezone and it will be assumed to be # in the execution timezone @hourly_schedule( pipeline_name="foo_pipeline", start_date=start_date, execution_time=time(hour=0, minute=25), execution_timezone="US/Central", ) def hourly_central_schedule(hourly_time): return {"hourly_time": hourly_time.isoformat()} assert hourly_central_schedule.execution_timezone == "US/Central" _check_partitions( hourly_central_schedule, HOURS_UNTIL_FEBRUARY_27, pendulum.instance(start_date, tz="US/Central"), DEFAULT_HOURLY_FORMAT_WITH_TIMEZONE, relativedelta(hours=1), ) valid_time = create_pendulum_time( year=2019, month=1, day=27, hour=1, minute=25, tz="US/Central" ) context_with_valid_time = build_schedule_context(instance, valid_time) execution_data = hourly_central_schedule.get_execution_data(context_with_valid_time) assert len(execution_data) == 1 assert isinstance(execution_data[0], RunRequest) assert execution_data[0].run_config == { "hourly_time": create_pendulum_time( year=2019, month=1, day=27, hour=0, tz="US/Central" ).isoformat() } # You can specify a start date in a different timezone and it will be transformed into the # execution timezone start_date_with_different_timezone = create_pendulum_time( 2019, 1, 1, 0, tz="US/Pacific" ) @hourly_schedule( pipeline_name="foo_pipeline", start_date=start_date_with_different_timezone, execution_time=time(hour=0, minute=25), execution_timezone="US/Central", ) def hourly_central_schedule_with_timezone_start_time(hourly_time): return {"hourly_time": hourly_time.isoformat()} _check_partitions( hourly_central_schedule_with_timezone_start_time, HOURS_UNTIL_FEBRUARY_27 - 2, # start date is two hours later since it's in PT to_timezone(start_date_with_different_timezone, "US/Central"), DEFAULT_HOURLY_FORMAT_WITH_TIMEZONE, relativedelta(hours=1), ) # test partition_hours_offset=0 @hourly_schedule( pipeline_name="foo_pipeline", start_date=start_date_with_different_timezone, execution_time=time(hour=0, minute=25), execution_timezone="US/Central", partition_hours_offset=0, ) def hourly_schedule_for_current_hour(hourly_time): return {"hourly_time": hourly_time.isoformat()} valid_time = create_pendulum_time( year=2019, month=1, day=27, hour=1, minute=25, tz="US/Central" ) context_with_valid_time = build_schedule_context(instance, valid_time) execution_data = hourly_schedule_for_current_hour.get_execution_data( context_with_valid_time ) assert len(execution_data) == 1 assert isinstance(execution_data[0], RunRequest) assert execution_data[0].run_config == { "hourly_time": create_pendulum_time( year=2019, month=1, day=27, hour=1, tz="US/Central" ).isoformat() } # test partition_hours_offset=2 @hourly_schedule( pipeline_name="foo_pipeline", start_date=start_date_with_different_timezone, execution_time=time(hour=0, minute=25), execution_timezone="US/Central", partition_hours_offset=2, ) def hourly_schedule_for_two_hours_ago(hourly_time): return {"hourly_time": hourly_time.isoformat()} valid_time = create_pendulum_time( year=2019, month=1, day=27, hour=1, minute=25, tz="US/Central" ) context_with_valid_time = build_schedule_context(instance, valid_time) execution_data = hourly_schedule_for_two_hours_ago.get_execution_data( context_with_valid_time ) assert len(execution_data) == 1 assert isinstance(execution_data[0], RunRequest) assert execution_data[0].run_config == { "hourly_time": create_pendulum_time( year=2019, month=1, day=26, hour=23, tz="US/Central" ).isoformat() }
def test_engine_events(get_external_pipeline, run_config): # pylint: disable=redefined-outer-name with instance_for_test() as instance: pipeline_run = instance.create_run_for_pipeline( pipeline_def=math_diamond, run_config=run_config) run_id = pipeline_run.run_id assert instance.get_run_by_id( run_id).status == PipelineRunStatus.NOT_STARTED with get_external_pipeline( pipeline_run.pipeline_name) as external_pipeline: instance.launch_run(pipeline_run.run_id, external_pipeline) finished_pipeline_run = poll_for_finished_run(instance, run_id) assert finished_pipeline_run assert finished_pipeline_run.run_id == run_id assert finished_pipeline_run.status == PipelineRunStatus.SUCCESS poll_for_event(instance, run_id, event_type="ENGINE_EVENT", message="Process for pipeline exited") event_records = instance.all_logs(run_id) engine_events = _get_engine_events(event_records) if _is_multiprocess(run_config): messages = [ "Started process for pipeline", "Starting initialization of resources", "Finished initialization of resources", "Executing steps using multiprocess executor", "Launching subprocess for return_one", "Executing step return_one in subprocess", "Starting initialization of resources", "Finished initialization of resources", # multiply_by_2 and multiply_by_3 launch and execute in non-deterministic order "", "", "", "", "", "", "", "", "Launching subprocess for add", "Executing step add in subprocess", "Starting initialization of resources", "Finished initialization of resources", "Multiprocess executor: parent process exiting", "Process for pipeline exited", ] else: messages = [ "Started process for pipeline", "Starting initialization of resources", "Finished initialization of resources", "Executing steps in process", "Finished steps in process", "Process for pipeline exited", ] events_iter = iter(engine_events) assert len(engine_events) == len(messages) for message in messages: next_log = next(events_iter) assert message in next_log.message
def test_partitions_for_hourly_schedule_decorators_with_timezone(): with instance_for_test() as instance: with pendulum.test( pendulum.create(2019, 2, 27, 0, 1, 1, tz="US/Central")): start_date = datetime(year=2019, month=1, day=1) # You can specify a start date with no timezone and it will be assumed to be # in the execution timezone @hourly_schedule( pipeline_name="foo_pipeline", start_date=start_date, execution_time=time(hour=0, minute=25), execution_timezone="US/Central", ) def hourly_central_schedule(hourly_time): return {"hourly_time": hourly_time.isoformat()} assert hourly_central_schedule.execution_timezone == "US/Central" _check_partitions( hourly_central_schedule, HOURS_UNTIL_FEBRUARY_27, pendulum.instance(start_date, tz="US/Central"), DEFAULT_HOURLY_FORMAT_WITH_TIMEZONE, relativedelta(hours=1), ) valid_time = pendulum.create(year=2019, month=1, day=27, hour=1, minute=25, tz="US/Central") context_with_valid_time = ScheduleExecutionContext( instance, valid_time) assert hourly_central_schedule.get_run_config( context_with_valid_time) == { "hourly_time": pendulum.create(year=2019, month=1, day=27, hour=0, tz="US/Central").isoformat() } assert hourly_central_schedule.should_execute( context_with_valid_time) # You can specify a start date in a different timezone and it will be transformed into the # execution timezone start_date_with_different_timezone = pendulum.create( 2019, 1, 1, 0, tz="US/Pacific") @hourly_schedule( pipeline_name="foo_pipeline", start_date=start_date_with_different_timezone, execution_time=time(hour=0, minute=25), execution_timezone="US/Central", ) def hourly_central_schedule_with_timezone_start_time(hourly_time): return {"hourly_time": hourly_time.isoformat()} _check_partitions( hourly_central_schedule_with_timezone_start_time, HOURS_UNTIL_FEBRUARY_27 - 2, # start date is two hours later since it's in PT start_date_with_different_timezone.in_tz("US/Central"), DEFAULT_HOURLY_FORMAT_WITH_TIMEZONE, relativedelta(hours=1), )
def test_execute_canceled_state(): def event_callback(_record): pass with instance_for_test() as instance: pipeline_def = PipelineDefinition( name="basic_resource_pipeline", solid_defs=[resource_solid], mode_defs=[ ModeDefinition( resource_defs={ "a": resource_a, "b": resource_b }, logger_defs={ "callback": construct_event_logger(event_callback) }, ) ], ) pipeline_run = instance.create_run_for_pipeline( pipeline_def=pipeline_def, run_config={ "loggers": { "callback": {} } }, mode="default", ).with_status(PipelineRunStatus.CANCELED) with pytest.raises(DagsterInvariantViolationError): execute_run( InMemoryPipeline(pipeline_def), pipeline_run, instance=instance, ) logs = instance.all_logs(pipeline_run.run_id) assert len(logs) == 1 assert ( "Not starting execution since the run was canceled before execution could start" in logs[0].message) iter_run = instance.create_run_for_pipeline( pipeline_def=pipeline_def, run_config={ "loggers": { "callback": {} } }, mode="default", ).with_status(PipelineRunStatus.CANCELED) iter_events = list( execute_run_iterator(InMemoryPipeline(pipeline_def), iter_run, instance=instance)) assert len(iter_events) == 1 assert ( "Not starting execution since the run was canceled before execution could start" in iter_events[0].message)
def test_filesystem_persist_one_run(tmpdir): with instance_for_test(temp_dir=str(tmpdir)) as instance: do_test_single_write_read(instance)
def instance_fixture(): with instance_for_test() as instance: yield instance
def test_run_list(): with instance_for_test(): runner = CliRunner() result = runner.invoke(run_list_command) assert result.exit_code == 0
def test_memoized_plan_inits_resources_once(): @solid(output_defs=[OutputDefinition(io_manager_key="foo")], version="foo") def foo_solid(): pass @solid(output_defs=[OutputDefinition(io_manager_key="bar")], version="bar") def bar_solid(): pass foo_capture = [] bar_capture = [] resource_dep_capture = [] default_capture = [] @io_manager(required_resource_keys={"my_resource"}) def foo_manager(): foo_capture.append("entered") return VersionedInMemoryIOManager() @io_manager(required_resource_keys={"my_resource"}) def bar_manager(): bar_capture.append("entered") return VersionedInMemoryIOManager() @io_manager def default_manager(): default_capture.append("entered") return VersionedInMemoryIOManager() @resource def my_resource(): resource_dep_capture.append("entered") return None @pipeline( mode_defs=[ ModeDefinition( name="fakemode", resource_defs={ "foo": foo_manager, "bar": bar_manager, "my_resource": my_resource, "io_manager": default_manager, }, ), ], tags={MEMOIZED_RUN_TAG: "true"}, ) def wrap_pipeline(): foo_solid() foo_solid.alias("another_foo")() bar_solid() bar_solid.alias("another_bar")() with instance_for_test() as instance: create_execution_plan(wrap_pipeline, instance_ref=instance.get_ref()) assert len(foo_capture) == 1 assert len(bar_capture) == 1 assert len(resource_dep_capture) == 1 assert len(default_capture) == 0
def test_execute_run_iterator(): records = [] def event_callback(record): assert isinstance(record, EventLogEntry) records.append(record) with instance_for_test() as instance: pipeline_def = PipelineDefinition( name="basic_resource_pipeline", solid_defs=[resource_solid], mode_defs=[ ModeDefinition( resource_defs={ "a": resource_a, "b": resource_b }, logger_defs={ "callback": construct_event_logger(event_callback) }, ) ], ) pipeline_run = instance.create_run_for_pipeline( pipeline_def=pipeline_def, run_config={"loggers": { "callback": {} }}, mode="default", ) iterator = execute_run_iterator(InMemoryPipeline(pipeline_def), pipeline_run, instance=instance) event_type = None while event_type != "STEP_START": event = next(iterator) event_type = event.event_type_value iterator.close() events = [ record.dagster_event for record in records if record.is_dagster_event ] messages = [ record.user_message for record in records if not record.is_dagster_event ] pipeline_failure_events = [ event for event in events if event.is_pipeline_failure ] assert len(pipeline_failure_events) == 1 assert "GeneratorExit" in pipeline_failure_events[ 0].pipeline_failure_data.error.message assert len( [message for message in messages if message == "CLEANING A"]) > 0 assert len( [message for message in messages if message == "CLEANING B"]) > 0 pipeline_run = instance.create_run_for_pipeline( pipeline_def=pipeline_def, run_config={ "loggers": { "callback": {} } }, mode="default", ).with_status(PipelineRunStatus.SUCCESS) with pytest.raises( check.CheckError, match=r"Pipeline run basic_resource_pipeline \({}\) in state" r" PipelineRunStatus.SUCCESS, expected NOT_STARTED or STARTING" .format(pipeline_run.run_id), ): execute_run_iterator(InMemoryPipeline(pipeline_def), pipeline_run, instance=instance) pipeline_run = instance.create_run_for_pipeline( pipeline_def=pipeline_def, run_config={ "loggers": { "callback": {} } }, mode="default", ).with_status(PipelineRunStatus.CANCELED) events = list( execute_run_iterator(InMemoryPipeline(pipeline_def), pipeline_run, instance=instance)) assert len(events) == 1 assert ( events[0].message == "Not starting execution since the run was canceled before execution could start" )
def test_memoized_plan_affected_by_resource_config(): @solid(required_resource_keys={"my_resource"}, version="39") def solid_reqs_resource(): pass @resource(version="42", config_schema={"foo": str}) def basic(): pass manager = VersionedInMemoryIOManager() @pipeline( mode_defs=[ ModeDefinition(resource_defs={ "my_resource": basic, "io_manager": IOManagerDefinition.hardcoded_io_manager(manager), }, ) ], tags={MEMOIZED_RUN_TAG: "true"}, ) def my_pipeline(): solid_reqs_resource() with instance_for_test() as instance: my_resource_config = {"foo": "bar"} run_config = { "resources": { "my_resource": { "config": my_resource_config } } } unmemoized_plan = create_execution_plan( my_pipeline, run_config=run_config, instance_ref=instance.get_ref()) assert unmemoized_plan.step_keys_to_execute == ["solid_reqs_resource"] step_output_handle = StepOutputHandle("solid_reqs_resource", "result") version = unmemoized_plan.get_version_for_step_output_handle( step_output_handle) manager.values[step_output_handle.step_key, step_output_handle.output_name, version] = 5 memoized_plan = create_execution_plan(my_pipeline, run_config=run_config, instance_ref=instance.get_ref()) assert len(memoized_plan.step_keys_to_execute) == 0 my_resource_config["foo"] = "baz" changed_config_plan = create_execution_plan( my_pipeline, run_config=run_config, instance_ref=instance.get_ref()) assert changed_config_plan.step_keys_to_execute == [ "solid_reqs_resource" ]
def test_template_task_dag(): dag = DAG( dag_id="dag", default_args=default_args, schedule_interval=None, ) t1 = BashOperator( task_id="print_hello", bash_command="echo hello dagsir", dag=dag, ) t2 = BashOperator( task_id="sleep", bash_command="sleep 2", dag=dag, ) templated_command = """ {% for i in range(5) %} echo '{{ ds }}' echo '{{ macros.ds_add(ds, 7)}}' echo '{{ params.my_param }}' {% endfor %} """ t3 = BashOperator( task_id="templated", depends_on_past=False, bash_command=templated_command, params={"my_param": "Parameter I passed in"}, dag=dag, ) # pylint: disable=pointless-statement t1 >> [t2, t3] with instance_for_test() as instance: manager = instance.compute_log_manager execution_date = get_current_datetime_in_utc() execution_date_add_one_week = execution_date + datetime.timedelta( days=7) execution_date_iso = execution_date.strftime("%Y-%m-%d") execution_date_add_one_week_iso = execution_date_add_one_week.strftime( "%Y-%m-%d") result = execute_pipeline( make_dagster_pipeline_from_airflow_dag( dag=dag, tags={AIRFLOW_EXECUTION_DATE_STR: execution_date_iso}), instance=instance, ) compute_steps = [ event.step_key for event in result.step_event_list if event.event_type == DagsterEventType.STEP_START ] assert compute_steps == [ "airflow_print_hello", "airflow_sleep", "airflow_templated", ] for step_key in compute_steps: compute_io_path = manager.get_local_path(result.run_id, step_key, ComputeIOType.STDOUT) assert os.path.exists(compute_io_path) stdout_file = open(compute_io_path, "r") file_contents = normalize_file_content(stdout_file.read()) stdout_file.close() if step_key == "airflow_print_hello": assert file_contents.count( "INFO - Running command: echo hello dagsir\n") == 1 assert file_contents.count( "INFO - Command exited with return code 0") == 1 elif step_key == "airflow_sleep": assert file_contents.count( "INFO - Running command: sleep 2\n") == 1 assert file_contents.count("INFO - Output:\n") == 1 assert file_contents.count( "INFO - Command exited with return code 0") == 1 elif step_key == "airflow_templated": assert (file_contents.count( "INFO - Running command: \n \n " "echo '{execution_date_iso}'\n " "echo '{execution_date_add_one_week_iso}'\n " "echo 'Parameter I passed in'\n \n " "echo '{execution_date_iso}'\n " "echo '{execution_date_add_one_week_iso}'\n " "echo 'Parameter I passed in'\n \n " "echo '{execution_date_iso}'\n " "echo '{execution_date_add_one_week_iso}'\n " "echo 'Parameter I passed in'\n \n " "echo '{execution_date_iso}'\n " "echo '{execution_date_add_one_week_iso}'\n " "echo 'Parameter I passed in'\n \n " "echo '{execution_date_iso}'\n " "echo '{execution_date_add_one_week_iso}'\n " "echo 'Parameter I passed in'\n \n \n".format( execution_date_iso=execution_date_iso, execution_date_add_one_week_iso= execution_date_add_one_week_iso, )) == 1) assert (file_contents.count( "INFO - {execution_date_iso}\n".format( execution_date_iso=execution_date_iso)) == 5) assert (file_contents.count( "INFO - {execution_date_add_one_week_iso}\n".format( execution_date_add_one_week_iso= execution_date_add_one_week_iso)) == 5) assert file_contents.count( "INFO - Parameter I passed in\n") == 5 assert file_contents.count( "INFO - Command exited with return code 0") == 1
def test_terminated_run(get_external_pipeline, in_process): # pylint: disable=redefined-outer-name with instance_for_test() as instance: pipeline_run = instance.create_run_for_pipeline( pipeline_def=sleepy_pipeline, run_config=None) with get_external_pipeline( pipeline_run.pipeline_name) as external_pipeline: run_id = pipeline_run.run_id assert instance.get_run_by_id( run_id).status == PipelineRunStatus.NOT_STARTED launcher = instance.run_launcher launcher.launch_run(instance, pipeline_run, external_pipeline) poll_for_step_start(instance, run_id) assert launcher.can_terminate(run_id) assert launcher.terminate(run_id) terminated_pipeline_run = poll_for_finished_run(instance, run_id, timeout=30) terminated_pipeline_run = instance.get_run_by_id(run_id) assert terminated_pipeline_run.status == PipelineRunStatus.FAILURE poll_for_event(instance, run_id, event_type="ENGINE_EVENT", message="Process for pipeline exited") poll_for_event( instance, run_id, event_type="ENGINE_EVENT", message="Received pipeline termination request", ) run_logs = instance.all_logs(run_id) event_types = [ event.dagster_event.event_type_value for event in run_logs ] if in_process: poll_for_event( instance, run_id, event_type="ENGINE_EVENT", message="Pipeline was terminated successfully", ) assert event_types == [ "ENGINE_EVENT", "ENGINE_EVENT", "PIPELINE_START", "ENGINE_EVENT", "STEP_START", "ENGINE_EVENT", "STEP_FAILURE", "PIPELINE_FAILURE", "ENGINE_EVENT", "ENGINE_EVENT", ] else: poll_for_event( instance, run_id, event_type="ENGINE_EVENT", message="Pipeline execution terminated by interrupt", ) assert event_types == [ "ENGINE_EVENT", "PIPELINE_START", "ENGINE_EVENT", "STEP_START", "ENGINE_EVENT", "STEP_FAILURE", "PIPELINE_FAILURE", "ENGINE_EVENT", "ENGINE_EVENT", ]
def test_filtered_runs(): with instance_for_test() as instance: repo = get_repo_at_time_1() run_id_1 = execute_pipeline(repo.get_pipeline("foo_pipeline"), instance=instance, tags={ "run": "one" }).run_id run_id_2 = execute_pipeline(repo.get_pipeline("foo_pipeline"), instance=instance, tags={ "run": "two" }).run_id with define_out_of_process_context(__file__, "get_repo_at_time_1", instance) as context: result = execute_dagster_graphql( context, FILTERED_RUN_QUERY, variables={"filter": { "runIds": [run_id_1] }}) assert result.data run_ids = [ run["runId"] for run in result.data["pipelineRunsOrError"]["results"] ] assert len(run_ids) == 1 assert run_ids[0] == run_id_1 result = execute_dagster_graphql( context, FILTERED_RUN_QUERY, variables={ "filter": { "tags": [{ "key": "run", "value": "one" }] } }, ) assert result.data run_ids = [ run["runId"] for run in result.data["pipelineRunsOrError"]["results"] ] assert len(run_ids) == 1 assert run_ids[0] == run_id_1 # test multiple run ids result = execute_dagster_graphql( context, FILTERED_RUN_QUERY, variables={"filter": { "runIds": [run_id_1, run_id_2] }}) assert result.data run_ids = [ run["runId"] for run in result.data["pipelineRunsOrError"]["results"] ] assert len(run_ids) == 2 assert set(run_ids) == set([run_id_1, run_id_2])
def test_run_wipe_incorrect_delete_message(): with instance_for_test(): runner = CliRunner() result = runner.invoke(run_wipe_command, input="WRONG\n") assert "Exiting without deleting all run history and event logs" in result.output assert result.exit_code == 0
def test_execute_mode_command(): runner = CliRunner() with instance_for_test(): add_result = runner_pipeline_execute( runner, [ "-f", file_relative_path(__file__, "../../general_tests/test_repository.py"), "-a", "dagster_test_repository", "--config", file_relative_path( __file__, "../../environments/multi_mode_with_resources/add_mode.yaml" ), "--mode", "add_mode", "-p", "multi_mode_with_resources", # pipeline name ], ) assert add_result mult_result = runner_pipeline_execute( runner, [ "-f", file_relative_path(__file__, "../../general_tests/test_repository.py"), "-a", "dagster_test_repository", "--config", file_relative_path( __file__, "../../environments/multi_mode_with_resources/mult_mode.yaml" ), "--mode", "mult_mode", "-p", "multi_mode_with_resources", # pipeline name ], ) assert mult_result double_adder_result = runner_pipeline_execute( runner, [ "-f", file_relative_path(__file__, "../../general_tests/test_repository.py"), "-a", "dagster_test_repository", "--config", file_relative_path( __file__, "../../environments/multi_mode_with_resources/double_adder_mode.yaml" ), "--mode", "double_adder_mode", "-p", "multi_mode_with_resources", # pipeline name ], ) assert double_adder_result
def test_grpc_default_settings(): with instance_for_test() as instance: assert ( instance.code_server_process_startup_timeout == DEFAULT_LOCAL_CODE_SERVER_STARTUP_TIMEOUT )
def test_execute_run_iterator(): records = [] def event_callback(record): assert isinstance(record, EventLogEntry) records.append(record) with instance_for_test() as instance: pipeline_def = PipelineDefinition( name="basic_resource_pipeline", solid_defs=[resource_solid], mode_defs=[ ModeDefinition( resource_defs={ "a": resource_a, "b": resource_b }, logger_defs={ "callback": construct_event_logger(event_callback) }, ) ], ) pipeline_run = instance.create_run_for_pipeline( pipeline_def=pipeline_def, run_config={"loggers": { "callback": {} }}, mode="default", ) iterator = execute_run_iterator(InMemoryPipeline(pipeline_def), pipeline_run, instance=instance) event_type = None while event_type != "STEP_START": event = next(iterator) event_type = event.event_type_value iterator.close() events = [ record.dagster_event for record in records if record.is_dagster_event ] messages = [ record.user_message for record in records if not record.is_dagster_event ] pipeline_failure_events = [ event for event in events if event.is_pipeline_failure ] assert len(pipeline_failure_events) == 1 assert "GeneratorExit" in pipeline_failure_events[ 0].pipeline_failure_data.error.message assert len( [message for message in messages if message == "CLEANING A"]) > 0 assert len( [message for message in messages if message == "CLEANING B"]) > 0 pipeline_run = instance.create_run_for_pipeline( pipeline_def=pipeline_def, run_config={ "loggers": { "callback": {} } }, mode="default", ).with_status(PipelineRunStatus.SUCCESS) with pytest.raises( Exception, match=r"basic_resource_pipeline \({}\) started a new " r"run while the run was already in state DagsterRunStatus.SUCCESS." .format(pipeline_run.run_id), ): execute_run_iterator(InMemoryPipeline(pipeline_def), pipeline_run, instance=instance) with instance_for_test( overrides={ "run_launcher": { "module": "dagster_tests.daemon_tests.test_monitoring_daemon", "class": "TestRunLauncher", }, "run_monitoring": { "enabled": True }, }) as run_monitoring_instance: event = next( execute_run_iterator(InMemoryPipeline(pipeline_def), pipeline_run, instance=run_monitoring_instance)) assert ( "Ignoring a duplicate run that was started from somewhere other than the run monitor daemon" in event.message) with pytest.raises( check.CheckError, match= r"in state DagsterRunStatus.SUCCESS, expected STARTED or STARTING " r"because it's resuming from a run worker failure", ): execute_run_iterator( InMemoryPipeline(pipeline_def), pipeline_run, instance=run_monitoring_instance, resume_from_failure=True, ) pipeline_run = instance.create_run_for_pipeline( pipeline_def=pipeline_def, run_config={ "loggers": { "callback": {} } }, mode="default", ).with_status(PipelineRunStatus.CANCELED) events = list( execute_run_iterator(InMemoryPipeline(pipeline_def), pipeline_run, instance=instance)) assert len(events) == 1 assert ( events[0].message == "Not starting execution since the run was canceled before execution could start" )
def test_grpc_override_settings(): with instance_for_test(overrides={"code_servers": {"local_startup_timeout": 60}}) as instance: assert instance.code_server_process_startup_timeout == 60
def test_terminated_run(get_external_pipeline, run_config): # pylint: disable=redefined-outer-name with instance_for_test() as instance: pipeline_run = instance.create_run_for_pipeline( pipeline_def=sleepy_pipeline, run_config=run_config, ) with get_external_pipeline( pipeline_run.pipeline_name) as external_pipeline: run_id = pipeline_run.run_id assert instance.get_run_by_id( run_id).status == PipelineRunStatus.NOT_STARTED instance.launch_run(pipeline_run.run_id, external_pipeline) poll_for_step_start(instance, run_id) launcher = instance.run_launcher assert launcher.can_terminate(run_id) assert launcher.terminate(run_id) terminated_pipeline_run = poll_for_finished_run(instance, run_id, timeout=30) terminated_pipeline_run = instance.get_run_by_id(run_id) assert terminated_pipeline_run.status == PipelineRunStatus.CANCELED poll_for_event( instance, run_id, event_type="ENGINE_EVENT", message="Process for pipeline exited", ) run_logs = instance.all_logs(run_id) if _is_multiprocess(run_config): _check_event_log_contains( run_logs, [ ("PIPELINE_CANCELING", "Sending pipeline termination request."), ( "ENGINE_EVENT", "Multiprocess executor: received termination signal - forwarding to active child process", ), ( "ENGINE_EVENT", "Multiprocess executor: interrupted all active child processes", ), ("STEP_FAILURE", 'Execution of step "sleepy_solid" failed.'), ( "PIPELINE_CANCELED", 'Execution of pipeline "sleepy_pipeline" canceled.', ), ("ENGINE_EVENT", "Process for pipeline exited"), ], ) else: _check_event_log_contains( run_logs, [ ("PIPELINE_CANCELING", "Sending pipeline termination request."), ("STEP_FAILURE", 'Execution of step "sleepy_solid" failed.'), ( "PIPELINE_CANCELED", 'Execution of pipeline "sleepy_pipeline" canceled.', ), ("ENGINE_EVENT", "Pipeline execution terminated by interrupt"), ("ENGINE_EVENT", "Process for pipeline exited"), ], )
def test_partitions_for_hourly_schedule_decorators_without_timezone(): with instance_for_test() as instance: with pendulum.test( create_pendulum_time(2019, 2, 27, 0, 1, 1, tz="US/Central")): context_without_time = ScheduleExecutionContext(instance, None) start_date = datetime(year=2019, month=1, day=1) @hourly_schedule( pipeline_name="foo_pipeline", start_date=start_date, execution_time=time(hour=0, minute=25), ) def hourly_foo_schedule(hourly_time): return {"hourly_time": hourly_time.isoformat()} _check_partitions( hourly_foo_schedule, HOURS_UNTIL_FEBRUARY_27, pendulum.instance(start_date, tz="US/Central"), DEFAULT_HOURLY_FORMAT_WITHOUT_TIMEZONE, relativedelta(hours=1), ) execution_data = hourly_foo_schedule.get_execution_data( context_without_time) assert len(execution_data) == 1 assert isinstance(execution_data[0], RunRequest) assert execution_data[0].run_config == { "hourly_time": create_pendulum_time(year=2019, month=2, day=26, hour=23, tz="US/Central").isoformat() } # time that's invalid since it corresponds to a partition before the start date # should not execute and should yield a SkipReason if it tries to generate run config execution_time_with_invalid_partition = create_pendulum_time( year=2018, month=12, day=30, hour=3, minute=25, tz="US/Central") context_with_invalid_time = ScheduleExecutionContext( instance, execution_time_with_invalid_partition) execution_data = hourly_foo_schedule.get_execution_data( context_with_invalid_time) assert len(execution_data) == 1 skip_data = execution_data[0] assert isinstance(skip_data, SkipReason) assert ( "Partition selector did not return a partition. " "Make sure that the timezone on your partition set matches your execution timezone." in skip_data.skip_message) valid_time = create_pendulum_time(year=2019, month=1, day=27, hour=1, minute=25, tz="US/Central") context_with_valid_time = ScheduleExecutionContext( instance, valid_time) execution_data = hourly_foo_schedule.get_execution_data( context_with_valid_time) assert len(execution_data) == 1 assert isinstance(execution_data[0], RunRequest) assert execution_data[0].run_config == { "hourly_time": create_pendulum_time(year=2019, month=1, day=27, hour=0, tz="US/Central").isoformat() }
def test_partitions_for_hourly_schedule_decorators_without_timezone(): with instance_for_test() as instance: with pendulum.test( pendulum.create(2019, 2, 27, 0, 1, 1, tz="US/Central")): context_without_time = ScheduleExecutionContext(instance, None) start_date = datetime(year=2019, month=1, day=1) @hourly_schedule( pipeline_name="foo_pipeline", start_date=start_date, execution_time=time(hour=0, minute=25), ) def hourly_foo_schedule(hourly_time): return {"hourly_time": hourly_time.isoformat()} _check_partitions( hourly_foo_schedule, HOURS_UNTIL_FEBRUARY_27, start_date, DEFAULT_HOURLY_FORMAT_WITHOUT_TIMEZONE, relativedelta(hours=1), ) assert hourly_foo_schedule.get_run_config( context_without_time) == { "hourly_time": pendulum.create(year=2019, month=2, day=26, hour=23, tz="US/Central").isoformat() } assert hourly_foo_schedule.should_execute(context_without_time) # time that's invalid since it corresponds to a partition that hasn't happened yet # should not execute and should throw if it tries to generate run config execution_time_with_invalid_partition = datetime(year=2019, month=2, day=27, hour=3, minute=25) context_with_invalid_time = ScheduleExecutionContext( instance, execution_time_with_invalid_partition) assert not hourly_foo_schedule.should_execute( context_with_invalid_time) with pytest.raises( DagsterInvariantViolationError, match= "The partition selection function `default_partition_selector` did not return a partition from PartitionSet hourly_foo_schedule_partitions", ): hourly_foo_schedule.get_run_config(context_with_invalid_time) valid_time = datetime(year=2019, month=1, day=27, hour=1, minute=25) context_with_valid_time = ScheduleExecutionContext( instance, valid_time) assert hourly_foo_schedule.get_run_config( context_with_valid_time) == { "hourly_time": pendulum.create(year=2019, month=1, day=27, hour=0, tz="US/Central").isoformat() } assert hourly_foo_schedule.should_execute(context_with_valid_time)
def test_partitions_for_weekly_schedule_decorators_without_timezone(): with instance_for_test() as instance: with pendulum.test( create_pendulum_time(2019, 2, 27, 0, 1, 1, tz="US/Central")): context_without_time = ScheduleExecutionContext(instance, None) start_date = datetime(year=2019, month=1, day=1) @weekly_schedule( pipeline_name="foo_pipeline", execution_day_of_week=3, start_date=start_date, execution_time=time(9, 30), ) def weekly_foo_schedule(weekly_time): return {"weekly_time": weekly_time.isoformat()} valid_weekly_time = create_pendulum_time(year=2019, month=1, day=30, hour=9, minute=30, tz="US/Central") context_with_valid_time = ScheduleExecutionContext( instance, valid_weekly_time) execution_data = weekly_foo_schedule.get_execution_data( context_with_valid_time) assert len(execution_data) == 1 assert isinstance(execution_data[0], RunRequest) assert execution_data[0].run_config == { "weekly_time": create_pendulum_time(year=2019, month=1, day=22, tz="US/Central").isoformat() } execution_data = weekly_foo_schedule.get_execution_data( context_without_time) assert len(execution_data) == 1 assert isinstance(execution_data[0], RunRequest) assert execution_data[0].run_config == { "weekly_time": create_pendulum_time(year=2019, month=2, day=19, tz="US/Central").isoformat() } _check_partitions( weekly_foo_schedule, 8, pendulum.instance(start_date, tz="US/Central"), DEFAULT_DATE_FORMAT, relativedelta(weeks=1), ) # Test partition_weeks_offset=0 @weekly_schedule( pipeline_name="foo_pipeline", execution_day_of_week=3, start_date=start_date, execution_time=time(9, 30), partition_weeks_offset=0, ) def weekly_foo_same_week_schedule(weekly_time): return {"weekly_time": weekly_time.isoformat()} valid_weekly_time = create_pendulum_time(year=2019, month=1, day=30, hour=9, minute=30, tz="US/Central") context_with_valid_time = ScheduleExecutionContext( instance, valid_weekly_time) execution_data = weekly_foo_same_week_schedule.get_execution_data( context_with_valid_time) assert len(execution_data) == 1 assert isinstance(execution_data[0], RunRequest) assert execution_data[0].run_config == { "weekly_time": create_pendulum_time(year=2019, month=1, day=29, tz="US/Central").isoformat() }
def test_terminated_run(get_external_pipeline, run_config): # pylint: disable=redefined-outer-name with instance_for_test() as instance: pipeline_run = instance.create_run_for_pipeline( pipeline_def=sleepy_pipeline, run_config=run_config, ) with get_external_pipeline( pipeline_run.pipeline_name) as external_pipeline: run_id = pipeline_run.run_id assert instance.get_run_by_id( run_id).status == PipelineRunStatus.NOT_STARTED launcher = instance.run_launcher launcher.launch_run(instance, pipeline_run, external_pipeline) poll_for_step_start(instance, run_id) assert launcher.can_terminate(run_id) assert launcher.terminate(run_id) terminated_pipeline_run = poll_for_finished_run(instance, run_id, timeout=30) terminated_pipeline_run = instance.get_run_by_id(run_id) assert terminated_pipeline_run.status == PipelineRunStatus.FAILURE poll_for_event( instance, run_id, event_type="ENGINE_EVENT", message="Process for pipeline exited", ) run_logs = instance.all_logs(run_id) if _is_multiprocess(run_config): _check_event_log( run_logs, [ ("ENGINE_EVENT", "Started process for pipeline"), ("PIPELINE_START", 'Started execution of pipeline "sleepy_pipeline".'), ("ENGINE_EVENT", "Executing steps using multiprocess executor"), ("ENGINE_EVENT", "Launching subprocess for sleepy_solid.compute"), ("ENGINE_EVENT", "Executing step sleepy_solid.compute in subprocess"), ("STEP_START", 'Started execution of step "sleepy_solid.compute".'), ("ENGINE_EVENT", "Received pipeline termination request"), ( "ENGINE_EVENT", "Multiprocess executor: received termination signal - forwarding to active child process", ), ("STEP_FAILURE", 'Execution of step "sleepy_solid.compute" failed.'), ( "PIPELINE_FAILURE", 'Execution of pipeline "sleepy_pipeline" failed. An exception was thrown during execution.', ), ("ENGINE_EVENT", "Process for pipeline exited"), ], ) else: _check_event_log( run_logs, [ ("ENGINE_EVENT", "Started process for pipeline"), ("PIPELINE_START", 'Started execution of pipeline "sleepy_pipeline".'), ("ENGINE_EVENT", "Executing steps in process"), ("STEP_START", 'Started execution of step "sleepy_solid.compute".'), ("ENGINE_EVENT", "Received pipeline termination request"), ("STEP_FAILURE", 'Execution of step "sleepy_solid.compute" failed.'), ("PIPELINE_FAILURE", 'Execution of pipeline "sleepy_pipeline" failed.'), ("ENGINE_EVENT", "Pipeline execution terminated by interrupt"), ("ENGINE_EVENT", "Process for pipeline exited"), ], )
def test_list_command_cli(): with instance_for_test(): runner = CliRunner() result = runner.invoke( pipeline_list_command, ["-f", file_relative_path(__file__, "test_cli_commands.py"), "-a", "bar"], ) assert_correct_bar_repository_output(result) result = runner.invoke( pipeline_list_command, [ "-f", file_relative_path(__file__, "test_cli_commands.py"), "-a", "bar", "-d", os.path.dirname(__file__), ], ) assert_correct_bar_repository_output(result) result = runner.invoke( pipeline_list_command, ["-m", "dagster_tests.cli_tests.command_tests.test_cli_commands", "-a", "bar"], ) assert_correct_bar_repository_output(result) result = runner.invoke( pipeline_list_command, ["-w", file_relative_path(__file__, "workspace.yaml")] ) assert_correct_bar_repository_output(result) result = runner.invoke( pipeline_list_command, [ "-w", file_relative_path(__file__, "workspace.yaml"), "-w", file_relative_path(__file__, "override.yaml"), ], ) assert_correct_extra_repository_output(result) result = runner.invoke( pipeline_list_command, [ "-f", "foo.py", "-m", "dagster_tests.cli_tests.command_tests.test_cli_commands", "-a", "bar", ], ) assert result.exit_code == 2 result = runner.invoke( pipeline_list_command, ["-m", "dagster_tests.cli_tests.command_tests.test_cli_commands"], ) assert_correct_bar_repository_output(result) result = runner.invoke( pipeline_list_command, ["-f", file_relative_path(__file__, "test_cli_commands.py")] ) assert_correct_bar_repository_output(result)
def test_start_mock_worker_config_from_yaml(worker_patch): with instance_for_test(): args = ["-y", file_relative_path(__file__, "engine_config.yaml")] start_worker("dagster_test_worker", args=args) assert_called(worker_patch)
def test_run_groups_over_time(): with instance_for_test() as instance: repo_1 = get_repo_at_time_1() full_evolve_run_id = execute_pipeline( repo_1.get_pipeline("evolving_pipeline"), instance=instance).run_id foo_run_id = execute_pipeline(repo_1.get_pipeline("foo_pipeline"), instance=instance).run_id evolve_a_run_id = execute_pipeline( repo_1.get_pipeline("evolving_pipeline").get_pipeline_subset_def( {"solid_A"}), instance=instance, ).run_id evolve_b_run_id = execute_pipeline( repo_1.get_pipeline("evolving_pipeline").get_pipeline_subset_def( {"solid_B"}), instance=instance, ).run_id with define_out_of_process_context(__file__, "get_repo_at_time_1", instance) as context_at_time_1: result = execute_dagster_graphql(context_at_time_1, ALL_RUN_GROUPS_QUERY) assert result.data assert "runGroupsOrError" in result.data assert "results" in result.data["runGroupsOrError"] assert len(result.data["runGroupsOrError"]["results"]) == 4 t1_runs = { run["runId"]: run for group in result.data["runGroupsOrError"]["results"] for run in group["runs"] } # test full_evolve_run_id assert t1_runs[full_evolve_run_id]["pipeline"] == { "__typename": "PipelineSnapshot", "name": "evolving_pipeline", "solidSelection": None, } # test foo_run_id assert t1_runs[foo_run_id]["pipeline"] == { "__typename": "PipelineSnapshot", "name": "foo_pipeline", "solidSelection": None, } # test evolve_a_run_id assert t1_runs[evolve_a_run_id]["pipeline"] == { "__typename": "PipelineSnapshot", "name": "evolving_pipeline", "solidSelection": ["solid_A"], } assert t1_runs[evolve_a_run_id]["pipelineSnapshotId"] # test evolve_b_run_id assert t1_runs[evolve_b_run_id]["pipeline"] == { "__typename": "PipelineSnapshot", "name": "evolving_pipeline", "solidSelection": ["solid_B"], } with define_out_of_process_context(__file__, "get_repo_at_time_2", instance) as context_at_time_2: result = execute_dagster_graphql(context_at_time_2, ALL_RUN_GROUPS_QUERY) assert "runGroupsOrError" in result.data assert "results" in result.data["runGroupsOrError"] assert len(result.data["runGroupsOrError"]["results"]) == 4 t2_runs = { run["runId"]: run for group in result.data["runGroupsOrError"]["results"] for run in group["runs"] } # test full_evolve_run_id assert t2_runs[full_evolve_run_id]["pipeline"] == { "__typename": "PipelineSnapshot", "name": "evolving_pipeline", "solidSelection": None, } # test evolve_a_run_id assert t2_runs[evolve_a_run_id]["pipeline"] == { "__typename": "PipelineSnapshot", "name": "evolving_pipeline", "solidSelection": ["solid_A"], } assert t2_runs[evolve_a_run_id]["pipelineSnapshotId"] # names same assert (t1_runs[full_evolve_run_id]["pipeline"]["name"] == t2_runs[evolve_a_run_id]["pipeline"]["name"]) # snapshots differ assert (t1_runs[full_evolve_run_id]["pipelineSnapshotId"] != t2_runs[evolve_a_run_id]["pipelineSnapshotId"]) # pipeline name changed assert t2_runs[foo_run_id]["pipeline"] == { "__typename": "PipelineSnapshot", "name": "foo_pipeline", "solidSelection": None, } # subset no longer valid - b renamed assert t2_runs[evolve_b_run_id]["pipeline"] == { "__typename": "PipelineSnapshot", "name": "evolving_pipeline", "solidSelection": ["solid_B"], }
def test_partitions_for_monthly_schedule_decorators_without_timezone(): with instance_for_test() as instance: with pendulum.test( to_timezone(create_pendulum_time(2019, 2, 27, 0, 1, 1, tz="UTC"), "US/Eastern") ): context_without_time = build_schedule_context(instance) start_date = datetime(year=2019, month=1, day=1) @monthly_schedule( pipeline_name="foo_pipeline", execution_day_of_month=3, start_date=start_date, execution_time=time(9, 30), ) def monthly_foo_schedule(monthly_time): return {"monthly_time": monthly_time.isoformat()} valid_monthly_time = create_pendulum_time( year=2019, month=2, day=3, hour=9, minute=30, tz="UTC" ) context_with_valid_time = build_schedule_context(instance, valid_monthly_time) execution_data = monthly_foo_schedule.get_execution_data(context_with_valid_time) assert len(execution_data) == 1 assert isinstance(execution_data[0], RunRequest) assert execution_data[0].run_config == { "monthly_time": create_pendulum_time( year=2019, month=1, day=1, tz="UTC" ).isoformat() } execution_data = monthly_foo_schedule.get_execution_data(context_without_time) assert len(execution_data) == 1 assert isinstance(execution_data[0], RunRequest) assert execution_data[0].run_config == { "monthly_time": create_pendulum_time( year=2019, month=1, day=1, tz="UTC" ).isoformat() } _check_partitions( monthly_foo_schedule, 1, pendulum.instance(start_date, tz="UTC"), DEFAULT_MONTHLY_FORMAT, relativedelta(months=1), ) # test partition_months_offset=0 @monthly_schedule( pipeline_name="foo_pipeline", execution_day_of_month=3, start_date=start_date, execution_time=time(9, 30), partition_months_offset=0, ) def monthly_foo_schedule_same_month(monthly_time): return {"monthly_time": monthly_time.isoformat()} valid_monthly_time = create_pendulum_time( year=2019, month=2, day=3, hour=9, minute=30, tz="UTC" ) context_with_valid_time = build_schedule_context(instance, valid_monthly_time) execution_data = monthly_foo_schedule_same_month.get_execution_data( context_with_valid_time ) assert len(execution_data) == 1 assert isinstance(execution_data[0], RunRequest) assert execution_data[0].run_config == { "monthly_time": create_pendulum_time( year=2019, month=2, day=1, tz="UTC" ).isoformat() }
def instance(): overrides = { "run_launcher": {"module": "dagster.core.test_utils", "class": "MockedRunLauncher"}, } with instance_for_test(overrides=overrides) as inst: yield inst