def test_engine_events(get_external_pipeline, in_process): # pylint: disable=redefined-outer-name with instance_for_test() as instance: pipeline_run = instance.create_run_for_pipeline( pipeline_def=math_diamond, run_config=None) run_id = pipeline_run.run_id assert instance.get_run_by_id( run_id).status == PipelineRunStatus.NOT_STARTED with get_external_pipeline( pipeline_run.pipeline_name) as external_pipeline: launcher = instance.run_launcher launcher.launch_run(instance, pipeline_run, external_pipeline) finished_pipeline_run = poll_for_finished_run(instance, run_id) assert finished_pipeline_run assert finished_pipeline_run.run_id == run_id assert finished_pipeline_run.status == PipelineRunStatus.SUCCESS poll_for_event(instance, run_id, event_type="ENGINE_EVENT", message="Process for pipeline exited") event_records = instance.all_logs(run_id) if in_process: ( about_to_start, started_process, executing_steps, finished_steps, process_exited, ) = tuple(_get_engine_events(event_records)) assert "About to start process" in about_to_start.message assert "Started process for pipeline" in started_process.message assert "Executing steps in process" in executing_steps.message assert "Finished steps in process" in finished_steps.message assert "Process for pipeline exited" in process_exited.message else: (started_process, executing_steps, finished_steps, process_exited) = tuple(_get_engine_events(event_records)) assert "Started process for pipeline" in started_process.message assert "Executing steps in process" in executing_steps.message assert "Finished steps in process" in finished_steps.message assert "Process for pipeline exited" in process_exited.message
def test_execute_schedule_on_celery_k8s( # pylint: disable=redefined-outer-name, disable=unused-argument dagster_instance_for_daemon, helm_namespace_for_daemon): schedule_name = "frequent_celery" with get_test_project_external_schedule( schedule_name) as external_schedule: reoriginated_schedule = ReOriginatedExternalScheduleForTest( external_schedule) dagster_instance_for_daemon.start_schedule_and_update_storage_state( reoriginated_schedule) scheduler_runs = dagster_instance_for_daemon.get_runs( PipelineRunsFilter( tags=PipelineRun.tags_for_schedule(reoriginated_schedule))) assert len(scheduler_runs) == 0 try: start_time = time.time() while True: schedule_runs = dagster_instance_for_daemon.get_runs( PipelineRunsFilter(tags=PipelineRun.tags_for_schedule( reoriginated_schedule))) if len(schedule_runs) > 0: break if time.time() - start_time > 120: raise Exception( "Timed out waiting for schedule to start a run. " "Check the dagster-daemon pod logs to see why it didn't start." ) time.sleep(1) continue finally: dagster_instance_for_daemon.stop_schedule_and_update_storage_state( reoriginated_schedule.get_external_origin_id()) last_run = schedule_runs[0] finished_pipeline_run = poll_for_finished_run( dagster_instance_for_daemon, last_run.run_id, timeout=120) assert finished_pipeline_run.is_success
def test_run_always_finishes(): # pylint: disable=redefined-outer-name with instance_for_test() as instance: pipeline_run = instance.create_run_for_pipeline( pipeline_def=slow_pipeline, run_config=None) run_id = pipeline_run.run_id loadable_target_origin = LoadableTargetOrigin( executable_path=sys.executable, attribute="nope", python_file=file_relative_path(__file__, "test_default_run_launcher.py"), ) server_process = GrpcServerProcess( loadable_target_origin=loadable_target_origin, max_workers=4) with server_process.create_ephemeral_client() as api_client: with GrpcServerRepositoryLocationOrigin( location_name="test", port=api_client.port, socket=api_client.socket, host=api_client.host, ).create_location() as repository_location: external_pipeline = repository_location.get_repository( "nope").get_full_external_pipeline("slow_pipeline") assert instance.get_run_by_id( run_id).status == PipelineRunStatus.NOT_STARTED instance.launch_run(run_id=pipeline_run.run_id, external_pipeline=external_pipeline) # Server process now receives shutdown event, run has not finished yet pipeline_run = instance.get_run_by_id(run_id) assert not pipeline_run.is_finished assert server_process.server_process.poll() is None # Server should wait until run finishes, then shutdown pipeline_run = poll_for_finished_run(instance, run_id) assert pipeline_run.status == PipelineRunStatus.SUCCESS start_time = time.time() while server_process.server_process.poll() is None: time.sleep(0.05) # Verify server process cleans up eventually assert time.time() - start_time < 5 server_process.wait()
def test_terminated_run(get_external_pipeline): # pylint: disable=redefined-outer-name with grpc_instance() as instance: pipeline_run = instance.create_run_for_pipeline( pipeline_def=sleepy_pipeline, run_config=None) with get_external_pipeline( pipeline_run.pipeline_name) as external_pipeline: run_id = pipeline_run.run_id assert instance.get_run_by_id( run_id).status == PipelineRunStatus.NOT_STARTED launcher = instance.run_launcher launcher.launch_run(instance, pipeline_run, external_pipeline) poll_for_step_start(instance, run_id) assert launcher.can_terminate(run_id) assert launcher.terminate(run_id) terminated_pipeline_run = poll_for_finished_run(instance, run_id, timeout=30) terminated_pipeline_run = instance.get_run_by_id(run_id) assert terminated_pipeline_run.status == PipelineRunStatus.FAILURE poll_for_event(instance, run_id, event_type="ENGINE_EVENT", message="Process for pipeline exited") run_logs = instance.all_logs(run_id) event_types = [ event.dagster_event.event_type_value for event in run_logs ] assert event_types == [ "ENGINE_EVENT", "PIPELINE_START", "ENGINE_EVENT", "STEP_START", "STEP_FAILURE", "PIPELINE_FAILURE", "ENGINE_EVENT", "ENGINE_EVENT", ]
def test_crashy_run(get_workspace, run_config): # pylint: disable=redefined-outer-name with instance_for_test() as instance: with get_workspace(instance) as workspace: external_pipeline = ( workspace.get_repository_location("test") .get_repository("nope") .get_full_external_pipeline("crashy_pipeline") ) pipeline_run = instance.create_run_for_pipeline( pipeline_def=crashy_pipeline, run_config=run_config, external_pipeline_origin=external_pipeline.get_external_origin(), pipeline_code_origin=external_pipeline.get_python_origin(), ) run_id = pipeline_run.run_id assert instance.get_run_by_id(run_id).status == PipelineRunStatus.NOT_STARTED instance.launch_run(pipeline_run.run_id, workspace) failed_pipeline_run = instance.get_run_by_id(run_id) assert failed_pipeline_run assert failed_pipeline_run.run_id == run_id failed_pipeline_run = poll_for_finished_run(instance, run_id, timeout=5) assert failed_pipeline_run.status == PipelineRunStatus.FAILURE event_records = instance.all_logs(run_id) if _is_multiprocess(run_config): message = ( "Multiprocess executor: child process for " "step crashy_solid unexpectedly exited" ) else: message = "Pipeline execution process for {run_id} unexpectedly exited".format( run_id=run_id ) assert _message_exists(event_records, message)
def test_terminated_run(): with instance_for_test() as instance: repo_yaml = file_relative_path(__file__, "repo.yaml") pipeline_run = instance.create_run_for_pipeline( pipeline_def=sleepy_pipeline, run_config=None) run_id = pipeline_run.run_id assert instance.get_run_by_id( run_id).status == PipelineRunStatus.NOT_STARTED external_pipeline = get_full_external_pipeline( repo_yaml, pipeline_run.pipeline_name) launcher = instance.run_launcher launcher.launch_run(instance, pipeline_run, external_pipeline) poll_for_step_start(instance, run_id) assert launcher.can_terminate(run_id) assert launcher.terminate(run_id) # Return false is already terminated assert not launcher.terminate(run_id) terminated_pipeline_run = poll_for_finished_run(instance, run_id, timeout=2) terminated_pipeline_run = instance.get_run_by_id(run_id) assert terminated_pipeline_run.status == PipelineRunStatus.FAILURE events = [ event.dagster_event.event_type_value for event in instance.all_logs(run_id) ] assert events == [ "ENGINE_EVENT", "ENGINE_EVENT", "PIPELINE_START", "ENGINE_EVENT", "STEP_START", "STEP_FAILURE", "PIPELINE_FAILURE", "ENGINE_EVENT", ]
def test_successful_run(get_external_pipeline, run_config): # pylint: disable=redefined-outer-name with instance_for_test() as instance: pipeline_run = instance.create_run_for_pipeline( pipeline_def=noop_pipeline, run_config=run_config ) with get_external_pipeline(pipeline_run.pipeline_name) as external_pipeline: run_id = pipeline_run.run_id assert instance.get_run_by_id(run_id).status == PipelineRunStatus.NOT_STARTED instance.launch_run(run_id=pipeline_run.run_id, external_pipeline=external_pipeline) pipeline_run = instance.get_run_by_id(run_id) assert pipeline_run assert pipeline_run.run_id == run_id pipeline_run = poll_for_finished_run(instance, run_id) assert pipeline_run.status == PipelineRunStatus.SUCCESS
def test_terminate_kills_subproc(): with instance_for_test() as instance: with get_managed_grpc_server_workspace(instance) as workspace: external_pipeline = (workspace.get_repository_location( "test").get_repository("sleepy_repo"). get_full_external_pipeline("sleepy_pipeline")) pipeline_run = instance.create_run_for_pipeline( pipeline_def=sleepy_pipeline, external_pipeline_origin=external_pipeline.get_external_origin( ), pipeline_code_origin=external_pipeline.get_python_origin(), ) run_id = pipeline_run.run_id assert instance.get_run_by_id( run_id).status == PipelineRunStatus.NOT_STARTED instance.launch_run(pipeline_run.run_id, workspace) poll_for_step_start(instance, run_id) # find pid of subprocess subproc_pid = poll_for_pid(instance, run_id) assert psutil.pid_exists(subproc_pid) # simulate waiting a bit to terminate the pipeline time.sleep(0.5) launcher = instance.run_launcher assert launcher.can_terminate(run_id) assert launcher.terminate(run_id) terminated_pipeline_run = poll_for_finished_run(instance, run_id, timeout=30) terminated_pipeline_run = instance.get_run_by_id(run_id) assert terminated_pipeline_run.status == PipelineRunStatus.CANCELED # make sure the subprocess is killed after a short delay time.sleep(0.5) assert not psutil.pid_exists(subproc_pid)
def test_exity_run(run_config): # pylint: disable=redefined-outer-name with instance_for_test() as instance: with get_managed_grpc_server_workspace(instance) as workspace: external_pipeline = ( workspace.get_repository_location("test").get_repository( "nope").get_full_external_pipeline("exity_pipeline")) pipeline_run = instance.create_run_for_pipeline( pipeline_def=exity_pipeline, run_config=run_config, external_pipeline_origin=external_pipeline.get_external_origin( ), pipeline_code_origin=external_pipeline.get_python_origin(), ) run_id = pipeline_run.run_id assert instance.get_run_by_id( run_id).status == PipelineRunStatus.NOT_STARTED instance.launch_run(pipeline_run.run_id, workspace) failed_pipeline_run = instance.get_run_by_id(run_id) assert failed_pipeline_run assert failed_pipeline_run.run_id == run_id failed_pipeline_run = poll_for_finished_run(instance, run_id, timeout=5) assert failed_pipeline_run.status == PipelineRunStatus.FAILURE event_records = instance.all_logs(run_id) assert _message_exists(event_records, 'Execution of step "exity_solid" failed.') assert _message_exists( event_records, 'Execution of run for "exity_pipeline" failed. An exception was thrown during execution.', )
def test_single_solid_selection_execution( get_external_pipeline, run_config, ): # pylint: disable=redefined-outer-name with instance_for_test() as instance: pipeline_run = instance.create_run_for_pipeline( pipeline_def=math_diamond, run_config=run_config, solids_to_execute={"return_one"} ) run_id = pipeline_run.run_id assert instance.get_run_by_id(run_id).status == PipelineRunStatus.NOT_STARTED with get_external_pipeline(pipeline_run.pipeline_name) as external_pipeline: instance.launch_run(pipeline_run.run_id, external_pipeline) finished_pipeline_run = poll_for_finished_run(instance, run_id) event_records = instance.all_logs(run_id) assert finished_pipeline_run assert finished_pipeline_run.run_id == run_id assert finished_pipeline_run.status == PipelineRunStatus.SUCCESS assert _get_successful_step_keys(event_records) == {"return_one"}
def test_crashy_run(get_external_pipeline, run_config): # pylint: disable=redefined-outer-name with instance_for_test() as instance: pipeline_run = instance.create_run_for_pipeline( pipeline_def=crashy_pipeline, run_config=run_config, ) with get_external_pipeline( pipeline_run.pipeline_name) as external_pipeline: run_id = pipeline_run.run_id assert instance.get_run_by_id( run_id).status == PipelineRunStatus.NOT_STARTED launcher = instance.run_launcher launcher.launch_run(instance, pipeline_run, external_pipeline) failed_pipeline_run = instance.get_run_by_id(run_id) assert failed_pipeline_run assert failed_pipeline_run.run_id == run_id failed_pipeline_run = poll_for_finished_run(instance, run_id, timeout=5) assert failed_pipeline_run.status == PipelineRunStatus.FAILURE event_records = instance.all_logs(run_id) if _is_multiprocess(run_config): message = ("Multiprocess executor: child process for " "step crashy_solid.compute unexpectedly exited") else: message = "Pipeline execution process for {run_id} unexpectedly exited.".format( run_id=run_id) assert _message_exists(event_records, message)
def test_multi_solid_selection_execution( get_workspace, run_config, ): # pylint: disable=redefined-outer-name with instance_for_test() as instance: with get_workspace(instance) as workspace: external_pipeline = ( workspace.get_repository_location("test") .get_repository("nope") .get_full_external_pipeline("math_diamond") ) pipeline_run = instance.create_run_for_pipeline( pipeline_def=math_diamond, run_config=run_config, solids_to_execute={"return_one", "multiply_by_2"}, external_pipeline_origin=external_pipeline.get_external_origin(), pipeline_code_origin=external_pipeline.get_python_origin(), ) run_id = pipeline_run.run_id assert instance.get_run_by_id(run_id).status == PipelineRunStatus.NOT_STARTED instance.launch_run(pipeline_run.run_id, workspace) finished_pipeline_run = poll_for_finished_run(instance, run_id) event_records = instance.all_logs(run_id) assert finished_pipeline_run assert finished_pipeline_run.run_id == run_id assert finished_pipeline_run.status == PipelineRunStatus.SUCCESS assert _get_successful_step_keys(event_records) == { "return_one", "multiply_by_2", }
def test_engine_events(get_external_pipeline, run_config): # pylint: disable=redefined-outer-name with instance_for_test() as instance: pipeline_run = instance.create_run_for_pipeline( pipeline_def=math_diamond, run_config=run_config ) run_id = pipeline_run.run_id assert instance.get_run_by_id(run_id).status == PipelineRunStatus.NOT_STARTED with get_external_pipeline(pipeline_run.pipeline_name) as external_pipeline: instance.launch_run(pipeline_run.run_id, external_pipeline) finished_pipeline_run = poll_for_finished_run(instance, run_id) assert finished_pipeline_run assert finished_pipeline_run.run_id == run_id assert finished_pipeline_run.status == PipelineRunStatus.SUCCESS poll_for_event( instance, run_id, event_type="ENGINE_EVENT", message="Process for pipeline exited" ) event_records = instance.all_logs(run_id) engine_events = _get_engine_events(event_records) if _is_multiprocess(run_config): messages = [ "Started process for pipeline", "Starting initialization of resources", "Finished initialization of resources", "Executing steps using multiprocess executor", "Launching subprocess for return_one", "Executing step return_one in subprocess", "Starting initialization of resources", "Finished initialization of resources", # multiply_by_2 and multiply_by_3 launch and execute in non-deterministic order "", "", "", "", "", "", "", "", "Launching subprocess for add", "Executing step add in subprocess", "Starting initialization of resources", "Finished initialization of resources", "Multiprocess executor: parent process exiting", "Process for pipeline exited", ] else: messages = [ "Started process for pipeline", "Starting initialization of resources", "Finished initialization of resources", "Executing steps in process", "Finished steps in process", "Process for pipeline exited", ] events_iter = iter(engine_events) assert len(engine_events) == len(messages) for message in messages: next_log = next(events_iter) assert message in next_log.message
def test_terminated_run(get_external_pipeline, run_config): # pylint: disable=redefined-outer-name with instance_for_test() as instance: pipeline_run = instance.create_run_for_pipeline( pipeline_def=sleepy_pipeline, run_config=run_config, ) with get_external_pipeline(pipeline_run.pipeline_name) as external_pipeline: run_id = pipeline_run.run_id assert instance.get_run_by_id(run_id).status == PipelineRunStatus.NOT_STARTED instance.launch_run(pipeline_run.run_id, external_pipeline) poll_for_step_start(instance, run_id) launcher = instance.run_launcher assert launcher.can_terminate(run_id) assert launcher.terminate(run_id) terminated_pipeline_run = poll_for_finished_run(instance, run_id, timeout=30) terminated_pipeline_run = instance.get_run_by_id(run_id) assert terminated_pipeline_run.status == PipelineRunStatus.CANCELED poll_for_event( instance, run_id, event_type="ENGINE_EVENT", message="Process for pipeline exited", ) run_logs = instance.all_logs(run_id) if _is_multiprocess(run_config): _check_event_log_contains( run_logs, [ ("PIPELINE_CANCELING", "Sending pipeline termination request."), ( "ENGINE_EVENT", "Multiprocess executor: received termination signal - forwarding to active child process", ), ( "ENGINE_EVENT", "Multiprocess executor: interrupted all active child processes", ), ("STEP_FAILURE", 'Execution of step "sleepy_solid" failed.'), ( "PIPELINE_CANCELED", 'Execution of pipeline "sleepy_pipeline" canceled.', ), ("ENGINE_EVENT", "Process for pipeline exited"), ], ) else: _check_event_log_contains( run_logs, [ ("PIPELINE_CANCELING", "Sending pipeline termination request."), ("STEP_FAILURE", 'Execution of step "sleepy_solid" failed.'), ( "PIPELINE_CANCELED", 'Execution of pipeline "sleepy_pipeline" canceled.', ), ("ENGINE_EVENT", "Pipeline execution terminated by interrupt"), ("ENGINE_EVENT", "Process for pipeline exited"), ], )
def test_terminate_launched_docker_run(): docker_image = get_test_project_docker_image() launcher_config = { "env_vars": [ "AWS_ACCESS_KEY_ID", "AWS_SECRET_ACCESS_KEY", ], "network": "container:test-postgres-db-docker", } if IS_BUILDKITE: launcher_config["registry"] = get_buildkite_registry_config() else: find_local_test_image(docker_image) run_config = merge_yamls([ os.path.join(get_test_project_environments_path(), "env_s3.yaml"), ]) with docker_postgres_instance( overrides={ "run_launcher": { "class": "DockerRunLauncher", "module": "dagster_docker", "config": launcher_config, } }) as instance: recon_pipeline = get_test_project_recon_pipeline( "hanging_pipeline", docker_image) run = instance.create_run_for_pipeline( pipeline_def=recon_pipeline.get_definition(), run_config=run_config, ) run_id = run.run_id external_pipeline = ReOriginatedExternalPipelineForTest( get_test_project_external_pipeline("hanging_pipeline", container_image=docker_image), container_image=docker_image, ) instance.launch_run(run_id, external_pipeline) poll_for_step_start(instance, run_id) assert instance.run_launcher.can_terminate(run_id) assert instance.run_launcher.terminate(run_id) terminated_pipeline_run = poll_for_finished_run(instance, run_id, timeout=30) terminated_pipeline_run = instance.get_run_by_id(run_id) assert terminated_pipeline_run.status == PipelineRunStatus.CANCELED run_logs = instance.all_logs(run_id) _check_event_log_contains( run_logs, [ ("PIPELINE_CANCELING", "Sending pipeline termination request"), ("STEP_FAILURE", 'Execution of step "hanging_solid" failed.'), ("PIPELINE_CANCELED", 'Execution of pipeline "hanging_pipeline" canceled.'), ("ENGINE_EVENT", "Pipeline execution terminated by interrupt"), ("ENGINE_EVENT", "Process for pipeline exited"), ], )
def test_terminated_run(get_external_pipeline, run_config): # pylint: disable=redefined-outer-name with instance_for_test() as instance: pipeline_run = instance.create_run_for_pipeline( pipeline_def=sleepy_pipeline, run_config=run_config, ) with get_external_pipeline( pipeline_run.pipeline_name) as external_pipeline: run_id = pipeline_run.run_id assert instance.get_run_by_id( run_id).status == PipelineRunStatus.NOT_STARTED instance.launch_run(pipeline_run.run_id, external_pipeline) poll_for_step_start(instance, run_id) launcher = instance.run_launcher assert launcher.can_terminate(run_id) assert launcher.terminate(run_id) terminated_pipeline_run = poll_for_finished_run(instance, run_id, timeout=30) terminated_pipeline_run = instance.get_run_by_id(run_id) assert terminated_pipeline_run.status == PipelineRunStatus.FAILURE poll_for_event( instance, run_id, event_type="ENGINE_EVENT", message="Process for pipeline exited", ) run_logs = instance.all_logs(run_id) if _is_multiprocess(run_config): _check_event_log( run_logs, [ ("ENGINE_EVENT", "Started process for pipeline"), ("PIPELINE_START", 'Started execution of pipeline "sleepy_pipeline".'), ("ENGINE_EVENT", "Executing steps using multiprocess executor"), ("ENGINE_EVENT", "Launching subprocess for sleepy_solid.compute"), ("ENGINE_EVENT", "Executing step sleepy_solid.compute in subprocess"), ("STEP_START", 'Started execution of step "sleepy_solid.compute".'), ("ENGINE_EVENT", "Received pipeline termination request"), ( "ENGINE_EVENT", "Multiprocess executor: received termination signal - forwarding to active child process", ), ("STEP_FAILURE", 'Execution of step "sleepy_solid.compute" failed.'), ( "PIPELINE_FAILURE", 'Execution of pipeline "sleepy_pipeline" failed. An exception was thrown during execution.', ), ("ENGINE_EVENT", "Process for pipeline exited"), ], ) else: _check_event_log( run_logs, [ ("ENGINE_EVENT", "Started process for pipeline"), ("PIPELINE_START", 'Started execution of pipeline "sleepy_pipeline".'), ("ENGINE_EVENT", "Executing steps in process"), ("STEP_START", 'Started execution of step "sleepy_solid.compute".'), ("ENGINE_EVENT", "Received pipeline termination request"), ("STEP_FAILURE", 'Execution of step "sleepy_solid.compute" failed.'), ("PIPELINE_FAILURE", 'Execution of pipeline "sleepy_pipeline" failed.'), ("ENGINE_EVENT", "Pipeline execution terminated by interrupt"), ("ENGINE_EVENT", "Process for pipeline exited"), ], )
def _test_launch(docker_image, launcher_config, terminate=False): if IS_BUILDKITE: launcher_config["registry"] = get_buildkite_registry_config() else: find_local_test_image(docker_image) run_config = merge_yamls( [ os.path.join(get_test_project_environments_path(), "env.yaml"), os.path.join(get_test_project_environments_path(), "env_s3.yaml"), ] ) with docker_postgres_instance( overrides={ "run_launcher": { "class": "DockerRunLauncher", "module": "dagster_docker", "config": launcher_config, } } ) as instance: recon_pipeline = get_test_project_recon_pipeline("demo_pipeline_s3") with get_test_project_workspace_and_external_pipeline(instance, "demo_pipeline_s3") as ( workspace, orig_pipeline, ): external_pipeline = ReOriginatedExternalPipelineForTest(orig_pipeline) run = instance.create_run_for_pipeline( pipeline_def=recon_pipeline.get_definition(), run_config=run_config, external_pipeline_origin=external_pipeline.get_external_origin(), pipeline_code_origin=external_pipeline.get_python_origin(), ) instance.launch_run(run.run_id, workspace) if not terminate: poll_for_finished_run(instance, run.run_id, timeout=60) assert instance.get_run_by_id(run.run_id).status == PipelineRunStatus.SUCCESS else: start_time = time.time() filters = PipelineRunsFilter( run_ids=[run.run_id], statuses=[ PipelineRunStatus.STARTED, ], ) while True: runs = instance.get_runs(filters, limit=1) if runs: break else: time.sleep(0.1) if time.time() - start_time > 60: raise Exception("Timed out waiting for run to start") launcher = instance.run_launcher assert launcher.can_terminate(run.run_id) assert launcher.terminate(run.run_id) poll_for_finished_run(instance, run.run_id, timeout=60) assert instance.get_run_by_id(run.run_id).status == PipelineRunStatus.CANCELED
def test_launch_docker_image_on_pipeline_config(): # Docker image name to use for launch specified as part of the pipeline origin # rather than in the run launcher instance config docker_image = get_test_project_docker_image() launcher_config = { "env_vars": [ "AWS_ACCESS_KEY_ID", "AWS_SECRET_ACCESS_KEY", "DOCKER_LAUNCHER_NETWORK", ], "network": {"env": "DOCKER_LAUNCHER_NETWORK"}, "container_kwargs": { "auto_remove": True, }, } if IS_BUILDKITE: launcher_config["registry"] = get_buildkite_registry_config() else: find_local_test_image(docker_image) run_config = merge_yamls( [ os.path.join(get_test_project_environments_path(), "env.yaml"), os.path.join(get_test_project_environments_path(), "env_s3.yaml"), ] ) with environ({"DOCKER_LAUNCHER_NETWORK": "container:test-postgres-db-docker"}): with docker_postgres_instance( overrides={ "run_launcher": { "class": "DockerRunLauncher", "module": "dagster_docker", "config": launcher_config, } } ) as instance: recon_pipeline = get_test_project_recon_pipeline("demo_pipeline_s3", docker_image) with get_test_project_workspace_and_external_pipeline( instance, "demo_pipeline_s3", container_image=docker_image ) as (workspace, orig_pipeline): external_pipeline = ReOriginatedExternalPipelineForTest( orig_pipeline, container_image=docker_image, ) run = instance.create_run_for_pipeline( pipeline_def=recon_pipeline.get_definition(), run_config=run_config, external_pipeline_origin=external_pipeline.get_external_origin(), pipeline_code_origin=external_pipeline.get_python_origin(), ) instance.launch_run(run.run_id, workspace) poll_for_finished_run(instance, run.run_id, timeout=60) run = instance.get_run_by_id(run.run_id) assert run.status == PipelineRunStatus.SUCCESS assert run.tags[DOCKER_IMAGE_TAG] == docker_image
def test_docker_monitoring(): docker_image = get_test_project_docker_image() launcher_config = { "env_vars": [ "AWS_ACCESS_KEY_ID", "AWS_SECRET_ACCESS_KEY", ], "networks": ["container:test-postgres-db-docker"], "container_kwargs": { # "auto_remove": True, "volumes": ["/var/run/docker.sock:/var/run/docker.sock"], }, } if IS_BUILDKITE: launcher_config["registry"] = get_buildkite_registry_config() else: find_local_test_image(docker_image) run_config = merge_dicts( load_yaml_from_path( os.path.join(get_test_project_environments_path(), "env_s3.yaml")), { "solids": { "multiply_the_word_slow": { "inputs": { "word": "bar" }, "config": { "factor": 2, "sleep_time": 20 }, } }, "execution": { "docker": { "config": {} } }, }, ) with docker_postgres_instance({ "run_monitoring": { "enabled": True }, "run_launcher": { "class": "DockerRunLauncher", "module": "dagster_docker", "config": launcher_config, }, }) as instance: recon_pipeline = get_test_project_recon_pipeline( "demo_pipeline_docker_slow", docker_image) with get_test_project_workspace_and_external_pipeline( instance, "demo_pipeline_docker_slow", container_image=docker_image) as ( workspace, orig_pipeline, ): with start_daemon(): external_pipeline = ReOriginatedExternalPipelineForTest( orig_pipeline, container_image=docker_image) run = instance.create_run_for_pipeline( pipeline_def=recon_pipeline.get_definition(), run_config=run_config, external_pipeline_origin=external_pipeline. get_external_origin(), pipeline_code_origin=external_pipeline.get_python_origin(), ) with log_run_events(instance, run.run_id): instance.launch_run(run.run_id, workspace) start_time = time.time() while time.time() - start_time < 60: run = instance.get_run_by_id(run.run_id) if run.status == PipelineRunStatus.STARTED: break assert run.status == PipelineRunStatus.STARTING time.sleep(1) time.sleep(3) instance.run_launcher._get_container( # pylint:disable=protected-access instance.get_run_by_id(run.run_id)).stop() # daemon resumes the run poll_for_finished_run(instance, run.run_id, timeout=90) assert instance.get_run_by_id( run.run_id).status == PipelineRunStatus.SUCCESS
def test_launch_run_with_unloadable_pipeline_grpc(): with instance_for_test() as instance: with get_bar_repo_repository_location(instance) as repository_location: pipeline_handle = PipelineHandle( "foo", repository_location.get_repository("bar_repo").handle) api_client = repository_location.client pipeline_run = instance.create_run( pipeline_name="foo", run_id=None, run_config={}, mode="default", solids_to_execute=None, step_keys_to_execute=None, status=None, tags=None, root_run_id=None, parent_run_id=None, pipeline_snapshot=None, execution_plan_snapshot=None, parent_pipeline_snapshot=None, ) run_id = pipeline_run.run_id original_origin = pipeline_handle.get_external_origin() # point the api to a pipeline that cannot be loaded res = deserialize_json_to_dagster_namedtuple( api_client.start_run( ExecuteExternalPipelineArgs( pipeline_origin=original_origin._replace( pipeline_name="i_am_fake_pipeline"), pipeline_run_id=run_id, instance_ref=instance.get_ref(), ))) assert res.success finished_pipeline_run = poll_for_finished_run(instance, run_id) assert finished_pipeline_run assert finished_pipeline_run.run_id == run_id assert finished_pipeline_run.status == PipelineRunStatus.FAILURE poll_for_event(instance, run_id, event_type="ENGINE_EVENT", message="Process for run exited") event_records = instance.all_logs(run_id) _check_event_log_contains( event_records, [ ("ENGINE_EVENT", "Started process for run"), ("ENGINE_EVENT", "Could not load pipeline definition"), ( "PIPELINE_FAILURE", "This run has been marked as failed from outside the execution context", ), ("ENGINE_EVENT", "Process for run exited"), ], )
def test_retry_early_terminate(self, graphql_context): instance = graphql_context.instance selector = infer_pipeline_selector( graphql_context, "retry_multi_input_early_terminate_pipeline" ) run_id = make_new_run_id() execute_dagster_graphql( graphql_context, LAUNCH_PIPELINE_EXECUTION_MUTATION, variables={ "executionParams": { "mode": "default", "selector": selector, "runConfigData": { "solids": { "get_input_one": {"config": {"wait_to_terminate": True}}, "get_input_two": {"config": {"wait_to_terminate": True}}, }, }, "executionMetadata": {"runId": run_id}, } }, ) # Wait until the first step succeeded while instance.get_run_stats(run_id).steps_succeeded < 1: sleep(0.1) # Terminate the current pipeline run at the second step graphql_context.instance.run_launcher.terminate(run_id) records = instance.all_logs(run_id) # The first step should succeed, the second should fail or not start, # and the following steps should not appear in records assert step_did_succeed_in_records(records, "return_one") assert not step_did_fail_in_records(records, "return_one") assert any( [ step_did_fail_in_records(records, "get_input_one"), step_did_not_run_in_records(records, "get_input_one"), ] ) assert step_did_not_run_in_records(records, "get_input_two") assert step_did_not_run_in_records(records, "sum_inputs") # Wait for the original run to finish poll_for_finished_run(instance, run_id, timeout=30) assert instance.get_run_by_id(run_id).status == PipelineRunStatus.CANCELED # Start retry new_run_id = make_new_run_id() execute_dagster_graphql_and_finish_runs( graphql_context, LAUNCH_PIPELINE_REEXECUTION_MUTATION, variables={ "executionParams": { "mode": "default", "selector": selector, "runConfigData": { "solids": { "get_input_one": {"config": {"wait_to_terminate": False}}, "get_input_two": {"config": {"wait_to_terminate": False}}, }, }, "executionMetadata": { "runId": new_run_id, "rootRunId": run_id, "parentRunId": run_id, "tags": [{"key": RESUME_RETRY_TAG, "value": "true"}], }, } }, ) retry_records = instance.all_logs(new_run_id) # The first step should not run and the other three steps should succeed in retry assert step_did_not_run_in_records(retry_records, "return_one") assert step_did_succeed_in_records(retry_records, "get_input_one") assert step_did_succeed_in_records(retry_records, "get_input_two") assert step_did_succeed_in_records(retry_records, "sum_inputs")
def test_cancel_run(): with instance_for_test() as instance: loadable_target_origin = LoadableTargetOrigin( executable_path=sys.executable, python_file=__file__, working_directory=None, ) server_process = GrpcServerProcess(loadable_target_origin, max_workers=10) with server_process.create_ephemeral_client() as api_client: streaming_results = [] pipeline_run = instance.create_run_for_pipeline( streaming_pipeline, run_config={ "solids": { "streamer": { "config": { "length": 20 } } } }, ) execute_run_args = ExecuteRunArgs( pipeline_origin=PipelineGrpcServerOrigin( pipeline_name="streaming_pipeline", repository_origin=RepositoryGrpcServerOrigin( host="localhost", socket=api_client.socket, port=api_client.port, repository_name="test_repository", ), ), pipeline_run_id=pipeline_run.run_id, instance_ref=instance.get_ref(), ) stream_events_result_thread = threading.Thread( target=_stream_events_target, args=[streaming_results, api_client, execute_run_args]) stream_events_result_thread.daemon = True stream_events_result_thread.start() poll_for_step_start(instance, pipeline_run.run_id) res = api_client.cancel_execution( cancel_execution_request=CancelExecutionRequest( run_id=pipeline_run.run_id)) assert res.success is True poll_for_finished_run(instance, pipeline_run.run_id) logs = instance.all_logs(pipeline_run.run_id) assert (len([ ev for ev in logs if ev.dagster_event.event_type_value == "STEP_MATERIALIZATION" ]) < 20) # soft termination assert [ ev for ev in logs if ev.dagster_event.event_type_value == "STEP_FAILURE" ] server_process.wait()
def test_container_context_on_pipeline(): docker_image = get_test_project_docker_image() launcher_config = {} if IS_BUILDKITE: launcher_config["registry"] = get_buildkite_registry_config() else: find_local_test_image(docker_image) executor_config = { "execution": { "docker": { "config": {} } }, } run_config = merge_dicts( merge_yamls([ os.path.join(get_test_project_environments_path(), "env.yaml"), os.path.join(get_test_project_environments_path(), "env_s3.yaml"), ]), executor_config, ) with docker_postgres_instance( overrides={ "run_launcher": { "class": "DockerRunLauncher", "module": "dagster_docker", "config": launcher_config, } }) as instance: recon_pipeline = get_test_project_recon_pipeline( "demo_pipeline_docker", docker_image, container_context={ "docker": { "env_vars": [ "AWS_ACCESS_KEY_ID", "AWS_SECRET_ACCESS_KEY", ], "networks": ["container:test-postgres-db-docker"], "container_kwargs": { "auto_remove": True, "volumes": ["/var/run/docker.sock:/var/run/docker.sock"], }, } }, ) with get_test_project_workspace_and_external_pipeline( instance, "demo_pipeline_docker", container_image=docker_image) as ( workspace, orig_pipeline, ): external_pipeline = ReOriginatedExternalPipelineForTest( orig_pipeline, container_image=docker_image) run = instance.create_run_for_pipeline( pipeline_def=recon_pipeline.get_definition(), run_config=run_config, external_pipeline_origin=external_pipeline.get_external_origin( ), pipeline_code_origin=recon_pipeline.get_python_origin(), ) instance.launch_run(run.run_id, workspace) poll_for_finished_run(instance, run.run_id, timeout=60) for log in instance.all_logs(run.run_id): print(log) # pylint: disable=print-call assert instance.get_run_by_id( run.run_id).status == PipelineRunStatus.SUCCESS