def test_engine_error(): with pytest.raises(DagsterSubprocessError): with seven.TemporaryDirectory() as tempdir: with instance_for_test_tempdir(tempdir) as instance: storage = os.path.join(tempdir, "flakey_storage") execute_pipeline( ReconstructablePipeline.for_file(REPO_FILE, "engine_error"), run_config={ "storage": { "filesystem": { "config": { "base_dir": storage } } }, "execution": { "celery": { "config": { "config_source": { "task_always_eager": True } } } }, "solids": { "destroy": { "config": storage } }, }, instance=instance, )
def test_execute_on_dask_local(): with seven.TemporaryDirectory() as tempdir: with instance_for_test_tempdir(tempdir) as instance: result = execute_pipeline( reconstructable(dask_engine_pipeline), run_config={ "intermediate_storage": { "filesystem": { "config": { "base_dir": tempdir } } }, "execution": { "dask": { "config": { "cluster": { "local": { "timeout": 30 } } } } }, }, instance=instance, ) assert result.result_for_solid("simple").output_value() == 1
def graphql_postgres_instance(overrides): with tempfile.TemporaryDirectory() as temp_dir: with TestPostgresInstance.docker_service_up_or_skip( file_relative_path(__file__, "docker-compose.yml"), "test-postgres-db-graphql", ) as pg_conn_string: TestPostgresInstance.clean_run_storage(pg_conn_string) TestPostgresInstance.clean_event_log_storage(pg_conn_string) TestPostgresInstance.clean_schedule_storage(pg_conn_string) with instance_for_test_tempdir( temp_dir, overrides=merge_dicts( { "run_storage": { "module": "dagster_postgres.run_storage.run_storage", "class": "PostgresRunStorage", "config": {"postgres_url": pg_conn_string}, }, "event_log_storage": { "module": "dagster_postgres.event_log.event_log", "class": "PostgresEventLogStorage", "config": {"postgres_url": pg_conn_string}, }, "schedule_storage": { "module": "dagster_postgres.schedule_storage.schedule_storage", "class": "PostgresScheduleStorage", "config": {"postgres_url": pg_conn_string}, }, }, overrides if overrides else {}, ), ) as instance: yield instance
def test_dagster_telemetry_unset(caplog): with seven.TemporaryDirectory() as temp_dir: with instance_for_test_tempdir(temp_dir): runner = CliRunner(env={"DAGSTER_HOME": temp_dir}) with pushd(path_to_file("")): pipeline_attribute = "foo_pipeline" pipeline_name = "foo" result = runner.invoke( pipeline_execute_command, [ "-f", path_to_file("test_cli_commands.py"), "-a", pipeline_attribute ], ) for record in caplog.records: message = json.loads(record.getMessage()) if message.get("action") == UPDATE_REPO_STATS: assert message.get("pipeline_name_hash") == hash_name( pipeline_name) assert message.get("num_pipelines_in_repo") == str(1) assert message.get("repo_hash") == hash_name( get_ephemeral_repository_name(pipeline_name)) assert set(message.keys()) == EXPECTED_KEYS assert len(caplog.records) == 5 assert result.exit_code == 0
def test_terminate_pipeline_on_celery(rabbitmq): with start_celery_worker(): with seven.TemporaryDirectory() as tempdir: pipeline_def = ReconstructablePipeline.for_file( REPO_FILE, "interrupt_pipeline") with instance_for_test_tempdir(tempdir) as instance: run_config = { "intermediate_storage": { "filesystem": { "config": { "base_dir": tempdir } } }, "execution": { "celery": {} }, } results = [] result_types = [] interrupt_thread = None try: for result in execute_pipeline_iterator( pipeline=pipeline_def, run_config=run_config, instance=instance, ): # Interrupt once the first step starts if (result.event_type == DagsterEventType.STEP_START and not interrupt_thread): interrupt_thread = Thread(target=send_interrupt, args=()) interrupt_thread.start() results.append(result) result_types.append(result.event_type) assert False except DagsterExecutionInterruptedError: pass interrupt_thread.join() # At least one step succeeded (the one that was running when the interrupt fired) assert DagsterEventType.STEP_SUCCESS in result_types # At least one step was revoked (and there were no step failure events) revoke_steps = [ result for result in results if result.event_type == DagsterEventType.ENGINE_EVENT and "was revoked." in result.message ] assert len(revoke_steps) > 0 # The overall pipeline failed assert DagsterEventType.PIPELINE_FAILURE in result_types
def test_logs_in_start_execution_predefined(): variables = seven.json.dumps({ "executionParams": { "selector": { "repositoryLocationName": "test_cli_location", "repositoryName": "test", "pipelineName": "math", }, "runConfigData": { "solids": { "add_one": { "inputs": { "num": { "value": 123 } } } } }, "mode": "default", } }) workspace_path = file_relative_path(__file__, "./cli_test_workspace.yaml") with tempfile.TemporaryDirectory() as temp_dir: with instance_for_test_tempdir( temp_dir, overrides={ "run_launcher": { "module": "dagster.core.launcher.sync_in_memory_run_launcher", "class": "SyncInMemoryRunLauncher", } }, ) as instance: runner = CliRunner(env={"DAGSTER_HOME": temp_dir}) result = runner.invoke(ui, [ "-w", workspace_path, "-v", variables, "-p", "launchPipelineExecution" ]) assert result.exit_code == 0 result_data = json.loads(result.output.strip("\n").split("\n")[-1]) assert (result_data["data"]["launchPipelineExecution"] ["__typename"] == "LaunchPipelineRunSuccess") run_id = result_data["data"]["launchPipelineExecution"]["run"][ "runId"] # allow FS events to flush retries = 5 while retries != 0 and not _is_done(instance, run_id): time.sleep(0.333) retries -= 1 # assert that the watching run storage captured the run correctly from the other process run = instance.get_run_by_id(run_id) assert run.status == PipelineRunStatus.SUCCESS
def test_run_priority_pipeline(rabbitmq): with seven.TemporaryDirectory() as tempdir: with instance_for_test_tempdir(tempdir) as instance: low_done = threading.Event() hi_done = threading.Event() # enqueue low-priority tasks low_thread = threading.Thread( target=execute_on_thread, args=("low_pipeline", low_done, instance.get_ref()), kwargs={ "tempdir": tempdir, "tags": { DAGSTER_CELERY_RUN_PRIORITY_TAG: "-3" } }, ) low_thread.daemon = True low_thread.start() time.sleep( 1) # sleep so that we don't hit any sqlite concurrency issues # enqueue hi-priority tasks hi_thread = threading.Thread( target=execute_on_thread, args=("hi_pipeline", hi_done, instance.get_ref()), kwargs={ "tempdir": tempdir, "tags": { DAGSTER_CELERY_RUN_PRIORITY_TAG: "3" } }, ) hi_thread.daemon = True hi_thread.start() time.sleep(5) # sleep to give queue time to prioritize tasks with start_celery_worker(): while not low_done.is_set() or not hi_done.is_set(): time.sleep(1) low_runs = instance.get_runs(filters=PipelineRunsFilter( pipeline_name="low_pipeline")) assert len(low_runs) == 1 low_run = low_runs[0] lowstats = instance.get_run_stats(low_run.run_id) hi_runs = instance.get_runs(filters=PipelineRunsFilter( pipeline_name="hi_pipeline")) assert len(hi_runs) == 1 hi_run = hi_runs[0] histats = instance.get_run_stats(hi_run.run_id) assert lowstats.start_time < histats.start_time assert lowstats.end_time > histats.end_time
def _default_cli_test_instance_tempdir(temp_dir, overrides=None): default_overrides = { "run_launcher": {"module": "dagster.core.test_utils", "class": "MockedRunLauncher",} } with instance_for_test_tempdir( temp_dir, overrides=merge_dicts(default_overrides, (overrides if overrides else {})) ) as instance: with mock.patch("dagster.core.instance.DagsterInstance.get") as _instance: _instance.return_value = instance yield instance
def _sqlite_instance_with_default_hijack(): with tempfile.TemporaryDirectory() as temp_dir: with instance_for_test_tempdir( temp_dir, overrides={ "backfill": { "daemon_enabled": True }, }, ) as instance: yield instance
def _sqlite_instance(): with tempfile.TemporaryDirectory() as temp_dir: with instance_for_test_tempdir( temp_dir, overrides={ "run_coordinator": { "module": "dagster.core.run_coordinator.queued_run_coordinator", "class": "QueuedRunCoordinator", }, }, ) as instance: yield instance
def dagster_cli_runner(): with tempfile.TemporaryDirectory() as dagster_home_temp: with instance_for_test_tempdir( dagster_home_temp, overrides={ "run_launcher": { "module": "dagster.core.launcher.sync_in_memory_run_launcher", "class": "SyncInMemoryRunLauncher", } }, ): yield CliRunner(env={"DAGSTER_HOME": dagster_home_temp})
def _sqlite_instance_with_default_hijack(): with seven.TemporaryDirectory() as temp_dir: with instance_for_test_tempdir( temp_dir, overrides={ "scheduler": { "module": "dagster.utils.test", "class": "FilesystemTestScheduler", "config": {"base_dir": temp_dir}, }, "run_launcher": {"module": "dagster", "class": "DefaultRunLauncher",}, }, ) as instance: yield instance
def test_dagit_logs( server_mock, caplog, ): with tempfile.TemporaryDirectory() as temp_dir: with instance_for_test_tempdir(temp_dir): runner = CliRunner(env={"DAGSTER_HOME": temp_dir}) workspace_path = file_relative_path(__file__, "telemetry_repository.yaml") result = runner.invoke( ui, ["-w", workspace_path], ) assert result.exit_code == 0, str(result.exception) expected_repo_stats = { hash_name("test_repository"): 1, hash_name("dagster_test_repository"): 4, } actions = set() for record in caplog.records: message = json.loads(record.getMessage()) actions.add(message.get("action")) if message.get("action") == UPDATE_REPO_STATS: assert message.get("pipeline_name_hash") == "" repo_hash = message.get("repo_hash") assert repo_hash in expected_repo_stats expected_num_pipelines_in_repo = expected_repo_stats.get( repo_hash) assert message.get("num_pipelines_in_repo") == str( expected_num_pipelines_in_repo) assert set(message.keys()) == set([ "action", "client_time", "elapsed_time", "event_id", "instance_id", "pipeline_name_hash", "num_pipelines_in_repo", "repo_hash", "python_version", "metadata", "version", ]) assert actions == set([START_DAGIT_WEBSERVER, UPDATE_REPO_STATS]) assert len(caplog.records) == 3 assert server_mock.call_args_list == [mock.call()]
def graphql_in_process_context(): with tempfile.TemporaryDirectory() as temp_dir: with instance_for_test_tempdir( temp_dir, overrides={ "scheduler": { "module": "dagster.utils.test", "class": "FilesystemTestScheduler", "config": { "base_dir": temp_dir }, } }, ) as instance: yield define_test_in_process_context(instance)
def _sqlite_instance(): with tempfile.TemporaryDirectory() as temp_dir: with instance_for_test_tempdir( temp_dir, overrides={ "scheduler": { "module": "dagster.utils.test", "class": "FilesystemTestScheduler", "config": {"base_dir": temp_dir}, }, "run_launcher": { "module": "dagster.core.launcher.sync_in_memory_run_launcher", "class": "SyncInMemoryRunLauncher", }, }, ) as instance: yield instance
def create_test_instance_and_storage(): with tempfile.TemporaryDirectory() as tmpdir_path: with instance_for_test_tempdir( tmpdir_path, overrides= { "event_log_storage": { "module": "dagster_tests.core_tests.storage_tests.test_polling_event_watcher", "class": "SqlitePollingEventLogStorage", "config": { "base_dir": tmpdir_path }, } }, ) as instance: yield (instance, instance._event_storage) # pylint: disable=protected-access
def _readonly_sqlite_instance(): with tempfile.TemporaryDirectory() as temp_dir: with instance_for_test_tempdir( temp_dir, overrides={ "scheduler": { "module": "dagster.utils.test", "class": "FilesystemTestScheduler", "config": {"base_dir": temp_dir}, }, "run_launcher": { "module": "dagster.core.test_utils", "class": "ExplodingRunLauncher", }, }, ) as instance: yield instance
def test_create_app_with_workspace_and_scheduler(): with load_workspace_from_yaml_paths( [file_relative_path(__file__, "./workspace.yaml")]) as workspace: with tempfile.TemporaryDirectory() as temp_dir: with instance_for_test_tempdir( temp_dir, overrides={ "scheduler": { "module": "dagster.utils.test", "class": "FilesystemTestScheduler", "config": { "base_dir": temp_dir }, } }, ) as instance: assert create_app_from_workspace(workspace, instance)
def _sqlite_instance(): with tempfile.TemporaryDirectory() as temp_dir: with instance_for_test_tempdir( temp_dir, overrides={ "scheduler": { "module": "dagster.utils.test", "class": "FilesystemTestScheduler", "config": {"base_dir": temp_dir}, }, "run_coordinator": { "module": "dagster.core.run_coordinator.queued_run_coordinator", "class": "QueuedRunCoordinator", }, }, ) as instance: yield instance
def test_invalid_instance_run(get_external_pipeline): with seven.TemporaryDirectory() as temp_dir: correct_run_storage_dir = os.path.join(temp_dir, "history", "") wrong_run_storage_dir = os.path.join(temp_dir, "wrong", "") with environ({"RUN_STORAGE_ENV": correct_run_storage_dir}): with instance_for_test_tempdir( temp_dir, overrides={ "run_storage": { "module": "dagster.core.storage.runs", "class": "SqliteRunStorage", "config": { "base_dir": { "env": "RUN_STORAGE_ENV" } }, } }, ) as instance: pipeline_run = instance.create_run_for_pipeline( pipeline_def=noop_pipeline, ) # Server won't be able to load the run from run storage with environ({"RUN_STORAGE_ENV": wrong_run_storage_dir}): with get_external_pipeline( pipeline_run.pipeline_name) as external_pipeline: with pytest.raises( DagsterLaunchFailedError, match=re.escape( "gRPC server could not load run {run_id} in order to execute it" .format(run_id=pipeline_run.run_id)), ): instance.launch_run( run_id=pipeline_run.run_id, external_pipeline=external_pipeline, ) failed_run = instance.get_run_by_id( pipeline_run.run_id) assert failed_run.status == PipelineRunStatus.FAILURE
def mysql_instance_for_test(dunder_file, container_name, overrides=None): with TemporaryDirectory() as temp_dir: with TestMySQLInstance.docker_service_up_or_skip( file_relative_path(dunder_file, "docker-compose.yml"), container_name, ) as mysql_conn_string: TestMySQLInstance.clean_run_storage(mysql_conn_string) TestMySQLInstance.clean_event_log_storage(mysql_conn_string) TestMySQLInstance.clean_schedule_storage(mysql_conn_string) with instance_for_test_tempdir( temp_dir, overrides=merge_dicts( { "run_storage": { "module": "dagster_mysql.run_storage.run_storage", "class": "MySQLRunStorage", "config": { "mysql_url": mysql_conn_string }, }, "event_log_storage": { "module": "dagster_mysql.event_log.event_log", "class": "MySQLEventLogStorage", "config": { "mysql_url": mysql_conn_string }, }, "schedule_storage": { "module": "dagster_mysql.schedule_storage.schedule_storage", "class": "MySQLScheduleStorage", "config": { "mysql_url": mysql_conn_string }, }, }, overrides if overrides else {}, ), ) as instance: yield instance
def test_compute_log_base_with_spaces(): with tempfile.TemporaryDirectory() as temp_dir: with instance_for_test_tempdir( temp_dir, { "compute_logs": { "module": "dagster.core.storage.local_compute_log_manager", "class": "LocalComputeLogManager", "config": { "base_dir": os.path.join(temp_dir, "base with spaces") }, } }, ) as instance: pipeline_name = "foo_pipeline" pipeline_run = create_run_for_test(instance, pipeline_name=pipeline_name) step_keys = ["A", "B", "C"] with instance.compute_log_manager.watch(pipeline_run): print("outer 1") # pylint: disable=print-call print("outer 2") # pylint: disable=print-call print("outer 3") # pylint: disable=print-call for step_key in step_keys: inner_step(instance, pipeline_run, step_key) for step_key in step_keys: stdout = instance.compute_log_manager.read_logs_file( pipeline_run.run_id, step_key, ComputeIOType.STDOUT) assert normalize_file_content( stdout.data) == expected_inner_output(step_key) full_out = instance.compute_log_manager.read_logs_file( pipeline_run.run_id, pipeline_name, ComputeIOType.STDOUT) assert normalize_file_content(full_out.data).startswith( expected_outer_prefix())
def test_engine_error(): with seven.mock.patch( "dagster.core.execution.context.system.SystemExecutionContextData.raise_on_error", return_value=True, ): with pytest.raises(DagsterSubprocessError): with tempfile.TemporaryDirectory() as tempdir: with instance_for_test_tempdir(tempdir) as instance: storage = os.path.join(tempdir, "flakey_storage") execute_pipeline( ReconstructablePipeline.for_file(REPO_FILE, "engine_error"), run_config={ "intermediate_storage": { "filesystem": {"config": {"base_dir": storage}} }, "execution": { "celery": {"config": {"config_source": {"task_always_eager": True}}} }, "solids": {"destroy": {"config": storage}}, }, instance=instance, )
def test_repo_stats(caplog): with seven.TemporaryDirectory() as temp_dir: with instance_for_test_tempdir(temp_dir): runner = CliRunner(env={"DAGSTER_HOME": temp_dir}) with pushd(path_to_file("")): pipeline_name = "multi_mode_with_resources" result = runner.invoke( pipeline_execute_command, [ "-f", file_relative_path( __file__, "../../general_tests/test_repository.py"), "-a", "dagster_test_repository", "-p", pipeline_name, "--preset", "add", "--tags", '{ "foo": "bar" }', ], ) assert result.exit_code == 0, result.stdout for record in caplog.records: message = json.loads(record.getMessage()) if message.get("action") == UPDATE_REPO_STATS: assert message.get("pipeline_name_hash") == hash_name( pipeline_name) assert message.get("num_pipelines_in_repo") == str(4) assert message.get("repo_hash") == hash_name( "dagster_test_repository") assert set(message.keys()) == EXPECTED_KEYS assert len(caplog.records) == 5 assert result.exit_code == 0
def test_filesystem_persist_one_run(tmpdir): with instance_for_test_tempdir(str(tmpdir)) as instance: do_test_single_write_read(instance)
def dagster_cli_runner(): with seven.TemporaryDirectory() as dagster_home_temp: with instance_for_test_tempdir(dagster_home_temp): yield CliRunner(env={"DAGSTER_HOME": dagster_home_temp})
def test_interrupt_multiproc(): with seven.TemporaryDirectory() as tempdir: with instance_for_test_tempdir(tempdir) as instance: file_1 = os.path.join(tempdir, "file_1") file_2 = os.path.join(tempdir, "file_2") file_3 = os.path.join(tempdir, "file_3") file_4 = os.path.join(tempdir, "file_4") # launch a thread that waits until the file is written to launch an interrupt Thread(target=_send_kbd_int, args=([file_1, file_2, file_3, file_4], )).start() results = [] received_interrupt = False try: # launch a pipeline that writes a file and loops infinitely # next time the launched thread wakes up it will send a keyboard # interrupt for result in execute_pipeline_iterator( reconstructable(write_files_pipeline), run_config={ "solids": { "write_1": { "config": { "tempfile": file_1 } }, "write_2": { "config": { "tempfile": file_2 } }, "write_3": { "config": { "tempfile": file_3 } }, "write_4": { "config": { "tempfile": file_4 } }, }, "execution": { "multiprocess": { "config": { "max_concurrent": 4 } } }, "intermediate_storage": { "filesystem": {} }, }, instance=instance, ): results.append(result) assert False # should never reach except DagsterExecutionInterruptedError: received_interrupt = True assert received_interrupt assert [result.event_type for result in results ].count(DagsterEventType.STEP_FAILURE) == 4 assert DagsterEventType.PIPELINE_FAILURE in [ result.event_type for result in results ]