def test_server_down(): with instance_for_test() as instance: loadable_target_origin = LoadableTargetOrigin( executable_path=sys.executable, attribute="nope", python_file=file_relative_path(__file__, "test_default_run_launcher.py"), ) server_process = GrpcServerProcess( loadable_target_origin=loadable_target_origin, max_workers=4, force_port=True ) with server_process.create_ephemeral_client() as api_client: repository_location = GrpcServerRepositoryLocation( RepositoryLocationHandle.create_from_repository_location_origin( GrpcServerRepositoryLocationOrigin( location_name="test", port=api_client.port, socket=api_client.socket, host=api_client.host, ) ) ) external_pipeline = repository_location.get_repository( "nope" ).get_full_external_pipeline("sleepy_pipeline") pipeline_run = instance.create_run_for_pipeline( pipeline_def=sleepy_pipeline, run_config=None ) instance.launch_run(pipeline_run.run_id, external_pipeline) poll_for_step_start(instance, pipeline_run.run_id) launcher = instance.run_launcher assert launcher.can_terminate(pipeline_run.run_id) original_run_tags = instance.get_run_by_id(pipeline_run.run_id).tags[GRPC_INFO_TAG] # Replace run tags with an invalid port instance.add_run_tags( pipeline_run.run_id, { GRPC_INFO_TAG: seven.json.dumps( merge_dicts({"host": "localhost"}, {"port": find_free_port()}) ) }, ) assert not launcher.can_terminate(pipeline_run.run_id) instance.add_run_tags( pipeline_run.run_id, {GRPC_INFO_TAG: original_run_tags,}, ) assert launcher.terminate(pipeline_run.run_id) server_process.wait()
def get_external_pipeline_from_grpc_server_repository(pipeline_name): loadable_target_origin = LoadableTargetOrigin( executable_path=sys.executable, attribute="nope", python_file=file_relative_path(__file__, "test_default_run_launcher.py"), ) server_process = GrpcServerProcess( loadable_target_origin=loadable_target_origin) try: with server_process.create_ephemeral_client() as api_client: repository_location = GrpcServerRepositoryLocation( RepositoryLocationHandle. create_from_repository_location_origin( GrpcServerRepositoryLocationOrigin( location_name="test", port=api_client.port, socket=api_client.socket, host=api_client.host, ))) yield repository_location.get_repository( "nope").get_full_external_pipeline(pipeline_name) finally: server_process.wait()
def test_server_down(): with grpc_instance() as instance: repo_yaml = file_relative_path(__file__, "repo.yaml") recon_repo = ReconstructableRepository.from_legacy_repository_yaml( repo_yaml) loadable_target_origin = recon_repo.get_origin().loadable_target_origin server_process = GrpcServerProcess( loadable_target_origin=loadable_target_origin, max_workers=4, force_port=True) with server_process.create_ephemeral_client() as api_client: repository_location = GrpcServerRepositoryLocation( RepositoryLocationHandle.create_grpc_server_location( location_name="test", port=api_client.port, socket=api_client.socket, host=api_client.host, )) external_pipeline = repository_location.get_repository( "nope").get_full_external_pipeline("sleepy_pipeline") pipeline_run = instance.create_run_for_pipeline( pipeline_def=sleepy_pipeline, run_config=None) launcher = instance.run_launcher launcher.launch_run(instance, pipeline_run, external_pipeline) poll_for_step_start(instance, pipeline_run.run_id) assert launcher.can_terminate(pipeline_run.run_id) original_run_tags = instance.get_run_by_id( pipeline_run.run_id).tags[GRPC_INFO_TAG] # Replace run tags with an invalid port instance.add_run_tags( pipeline_run.run_id, { GRPC_INFO_TAG: seven.json.dumps( merge_dicts({"host": "localhost"}, {"port": find_free_port()})) }, ) assert not launcher.can_terminate(pipeline_run.run_id) instance.add_run_tags( pipeline_run.run_id, { GRPC_INFO_TAG: original_run_tags, }, ) assert launcher.terminate(pipeline_run.run_id) server_process.wait()
def test_error_repo_in_registry(): error_origin = ManagedGrpcPythonEnvRepositoryLocationOrigin( loadable_target_origin=LoadableTargetOrigin( executable_path=sys.executable, attribute="error_repo", python_file=file_relative_path(__file__, "error_repo.py"), ), ) with ProcessGrpcServerRegistry(reload_interval=5, heartbeat_ttl=10) as registry: # Repository with a loading error does not raise an exception endpoint = registry.get_grpc_endpoint(error_origin) # But using that endpoint to load a location results in an error with pytest.raises(DagsterUserCodeProcessError, match="object is not callable"): with GrpcServerRepositoryLocation( origin=error_origin, server_id=endpoint.server_id, port=endpoint.port, socket=endpoint.socket, host=endpoint.host, watch_server=False, ): pass # that error is idempotent with pytest.raises(DagsterUserCodeProcessError, match="object is not callable"): with GrpcServerRepositoryLocation( origin=error_origin, server_id=endpoint.server_id, port=endpoint.port, socket=endpoint.socket, host=endpoint.host, watch_server=False, ): pass
def test_run_always_finishes(): # pylint: disable=redefined-outer-name with seven.TemporaryDirectory() as temp_dir: instance = DagsterInstance.local_temp( temp_dir, overrides={ "run_launcher": { "module": "dagster.core.launcher.grpc_run_launcher", "class": "GrpcRunLauncher", } }, ) pipeline_run = instance.create_run_for_pipeline( pipeline_def=slow_pipeline, run_config=None) run_id = pipeline_run.run_id loadable_target_origin = LoadableTargetOrigin( attribute="nope", python_file=file_relative_path(__file__, "test_default_run_launcher.py"), ) server_process = GrpcServerProcess( loadable_target_origin=loadable_target_origin, max_workers=4) with server_process.create_ephemeral_client() as api_client: repository_location = GrpcServerRepositoryLocation( RepositoryLocationHandle.create_grpc_server_location( location_name="test", port=api_client.port, socket=api_client.socket, host=api_client.host, )) external_pipeline = repository_location.get_repository( "nope").get_full_external_pipeline("slow_pipeline") assert instance.get_run_by_id( run_id).status == PipelineRunStatus.NOT_STARTED launcher = instance.run_launcher launcher.launch_run(instance=instance, run=pipeline_run, external_pipeline=external_pipeline) # Server process now receives shutdown event, run has not finished yet pipeline_run = instance.get_run_by_id(run_id) assert not pipeline_run.is_finished assert server_process.server_process.poll() is None # Server should wait until run finishes, then shutdown pipeline_run = poll_for_finished_run(instance, run_id) assert pipeline_run.status == PipelineRunStatus.SUCCESS start_time = time.time() while server_process.server_process.poll() is None: time.sleep(0.05) # Verify server process cleans up eventually assert time.time() - start_time < 5 server_process.wait()
def get_external_pipeline_from_managed_grpc_python_env_repository(pipeline_name): with ManagedGrpcPythonEnvRepositoryLocationOrigin( loadable_target_origin=LoadableTargetOrigin( executable_path=sys.executable, attribute="nope", python_file=file_relative_path(__file__, "test_default_run_launcher.py"), ), location_name="nope", ).create_handle() as repository_location_handle: repository_location = GrpcServerRepositoryLocation(repository_location_handle) yield repository_location.get_repository("nope").get_full_external_pipeline(pipeline_name)
def get_external_pipeline_from_managed_grpc_python_env_repository(pipeline_name): repository_location_handle = RepositoryLocationHandle.create_process_bound_grpc_server_location( loadable_target_origin=LoadableTargetOrigin( attribute="nope", python_file=file_relative_path(__file__, "test_default_run_launcher.py"), ), location_name="nope", ) repository_location = GrpcServerRepositoryLocation(repository_location_handle) try: yield repository_location.get_repository("nope").get_full_external_pipeline(pipeline_name) finally: repository_location_handle.cleanup()
def get_external_pipeline_from_managed_grpc_python_env_repository(pipeline_name): repository_location_handle = RepositoryLocationHandle.create_process_bound_grpc_server_location( loadable_target_origin=LoadableTargetOrigin( attribute='nope', python_file=file_relative_path(__file__, 'test_cli_api_run_launcher.py'), ), location_name='nope', ) repository_location = GrpcServerRepositoryLocation(repository_location_handle) yield repository_location.get_repository('nope').get_full_external_pipeline(pipeline_name)
def get_external_pipeline_from_grpc_server_repository(pipeline_name): repo_yaml = file_relative_path(__file__, 'repo.yaml') recon_repo = ReconstructableRepository.from_legacy_repository_yaml(repo_yaml) loadable_target_origin = LoadableTargetOrigin.from_python_origin(recon_repo.get_origin()) with GrpcServerProcess( loadable_target_origin=loadable_target_origin ).create_ephemeral_client() as server: repository_location = GrpcServerRepositoryLocation( RepositoryLocationHandle.create_grpc_server_location( location_name='test', port=server.port, socket=server.socket, host='localhost', ) ) yield repository_location.get_repository('nope').get_full_external_pipeline(pipeline_name)
def test_run_always_finishes(): # pylint: disable=redefined-outer-name with instance_for_test() as instance: pipeline_run = instance.create_run_for_pipeline( pipeline_def=slow_pipeline, run_config=None) run_id = pipeline_run.run_id loadable_target_origin = LoadableTargetOrigin( executable_path=sys.executable, attribute="nope", python_file=file_relative_path(__file__, "test_default_run_launcher.py"), ) server_process = GrpcServerProcess( loadable_target_origin=loadable_target_origin, max_workers=4) with server_process.create_ephemeral_client() as api_client: repository_location = GrpcServerRepositoryLocation( RepositoryLocationHandle. create_from_repository_location_origin( GrpcServerRepositoryLocationOrigin( location_name="test", port=api_client.port, socket=api_client.socket, host=api_client.host, ))) external_pipeline = repository_location.get_repository( "nope").get_full_external_pipeline("slow_pipeline") assert instance.get_run_by_id( run_id).status == PipelineRunStatus.NOT_STARTED instance.launch_run(run_id=pipeline_run.run_id, external_pipeline=external_pipeline) # Server process now receives shutdown event, run has not finished yet pipeline_run = instance.get_run_by_id(run_id) assert not pipeline_run.is_finished assert server_process.server_process.poll() is None # Server should wait until run finishes, then shutdown pipeline_run = poll_for_finished_run(instance, run_id) assert pipeline_run.status == PipelineRunStatus.SUCCESS start_time = time.time() while server_process.server_process.poll() is None: time.sleep(0.05) # Verify server process cleans up eventually assert time.time() - start_time < 5 server_process.wait()
def create_single_location(self): from .repository_location import GrpcServerRepositoryLocation from .grpc_server_registry import ProcessGrpcServerRegistry from dagster.core.workspace.context import ( DAGIT_GRPC_SERVER_HEARTBEAT_TTL, DAGIT_GRPC_SERVER_STARTUP_TIMEOUT, ) with ProcessGrpcServerRegistry( reload_interval=0, heartbeat_ttl=DAGIT_GRPC_SERVER_HEARTBEAT_TTL, startup_timeout=DAGIT_GRPC_SERVER_STARTUP_TIMEOUT, ) as grpc_server_registry: endpoint = grpc_server_registry.get_grpc_endpoint(self) with GrpcServerRepositoryLocation( origin=self, server_id=endpoint.server_id, port=endpoint.port, socket=endpoint.socket, host=endpoint.host, heartbeat=True, watch_server=False, grpc_server_registry=grpc_server_registry, ) as location: yield location
def get_location(self, origin): check.inst_param(origin, "origin", RepositoryLocationOrigin) origin_id = origin.get_id() existing_location = self._locations.get(origin_id) if not self._grpc_server_registry.supports_origin(origin): location = existing_location if existing_location else origin.create_location() else: endpoint = self._grpc_server_registry.get_grpc_endpoint(origin) # Registry may periodically reload the endpoint, at which point the server ID will # change and we should reload the location if existing_location and existing_location.server_id != endpoint.server_id: existing_location.cleanup() existing_location = None location = ( existing_location if existing_location else GrpcServerRepositoryLocation( origin=origin, server_id=endpoint.server_id, port=endpoint.port, socket=endpoint.socket, host=endpoint.host, heartbeat=True, watch_server=False, grpc_server_registry=self._grpc_server_registry, ) ) self._locations[origin_id] = location return self._locations[origin_id]
def create_single_location( self, instance: "DagsterInstance" ) -> Generator["RepositoryLocation", None, None]: from dagster.core.workspace.context import DAGIT_GRPC_SERVER_HEARTBEAT_TTL from .grpc_server_registry import ProcessGrpcServerRegistry from .repository_location import GrpcServerRepositoryLocation with ProcessGrpcServerRegistry( reload_interval=0, heartbeat_ttl=DAGIT_GRPC_SERVER_HEARTBEAT_TTL, startup_timeout=instance.code_server_process_startup_timeout, ) as grpc_server_registry: endpoint = grpc_server_registry.get_grpc_endpoint(self) with GrpcServerRepositoryLocation( origin=self, server_id=endpoint.server_id, port=endpoint.port, socket=endpoint.socket, host=endpoint.host, heartbeat=True, watch_server=False, grpc_server_registry=grpc_server_registry, ) as location: yield location
def test_terminate_after_shutdown(): with instance_for_test() as instance: with RepositoryLocationHandle.create_from_repository_location_origin( ManagedGrpcPythonEnvRepositoryLocationOrigin( loadable_target_origin=LoadableTargetOrigin( executable_path=sys.executable, attribute="nope", python_file=file_relative_path(__file__, "test_default_run_launcher.py"), ), location_name="nope", ) ) as repository_location_handle: repository_location = GrpcServerRepositoryLocation(repository_location_handle) external_pipeline = repository_location.get_repository( "nope" ).get_full_external_pipeline("sleepy_pipeline") pipeline_run = instance.create_run_for_pipeline( pipeline_def=sleepy_pipeline, run_config=None ) instance.launch_run(pipeline_run.run_id, external_pipeline) poll_for_step_start(instance, pipeline_run.run_id) # Tell the server to shut down once executions finish repository_location_handle.client.cleanup_server() # Trying to start another run fails doomed_to_fail_external_pipeline = repository_location.get_repository( "nope" ).get_full_external_pipeline("math_diamond") doomed_to_fail_pipeline_run = instance.create_run_for_pipeline( pipeline_def=math_diamond, run_config=None ) with pytest.raises(DagsterLaunchFailedError): instance.launch_run( doomed_to_fail_pipeline_run.run_id, doomed_to_fail_external_pipeline ) launcher = instance.run_launcher # Can terminate the run even after the shutdown event has been received assert launcher.can_terminate(pipeline_run.run_id) assert launcher.terminate(pipeline_run.run_id)
def test_run_always_finishes(temp_instance): # pylint: disable=redefined-outer-name instance = temp_instance pipeline_run = instance.create_run_for_pipeline(pipeline_def=slow_pipeline, run_config=None) run_id = pipeline_run.run_id recon_repo = ReconstructableRepository.for_file(__file__, 'nope') loadable_target_origin = LoadableTargetOrigin.from_python_origin(recon_repo.get_origin()) server_process = GrpcServerProcess(loadable_target_origin=loadable_target_origin, max_workers=4) with server_process.create_ephemeral_client() as api_client: repository_location = GrpcServerRepositoryLocation( RepositoryLocationHandle.create_grpc_server_location( location_name='test', port=api_client.port, socket=api_client.socket, host=api_client.host, ) ) external_pipeline = repository_location.get_repository('nope').get_full_external_pipeline( 'slow_pipeline' ) assert instance.get_run_by_id(run_id).status == PipelineRunStatus.NOT_STARTED launcher = instance.run_launcher launcher.launch_run( instance=instance, run=pipeline_run, external_pipeline=external_pipeline ) # Server process now receives shutdown event, run has not finished yet pipeline_run = instance.get_run_by_id(run_id) assert not pipeline_run.is_finished assert server_process.server_process.poll() is None # Server should wait until run finishes, then shutdown pipeline_run = poll_for_run(instance, run_id) assert pipeline_run.status == PipelineRunStatus.SUCCESS start_time = time.time() while server_process.server_process.poll() is None: time.sleep(0.05) # Verify server process cleans up eventually assert time.time() - start_time < 5
def _can_connect(origin, endpoint): try: with GrpcServerRepositoryLocation( origin=origin, server_id=endpoint.server_id, port=endpoint.port, socket=endpoint.socket, host=endpoint.host, watch_server=False, ): return True except Exception: # pylint: disable=broad-except return False
def get_external_pipeline_from_grpc_server_repository(pipeline_name): repo_yaml = file_relative_path(__file__, "repo.yaml") recon_repo = ReconstructableRepository.from_legacy_repository_yaml( repo_yaml) loadable_target_origin = recon_repo.get_origin().loadable_target_origin server_process = GrpcServerProcess( loadable_target_origin=loadable_target_origin) try: with server_process.create_ephemeral_client() as api_client: repository_location = GrpcServerRepositoryLocation( RepositoryLocationHandle.create_grpc_server_location( location_name="test", port=api_client.port, socket=api_client.socket, host=api_client.host, )) yield repository_location.get_repository( "nope").get_full_external_pipeline(pipeline_name) finally: server_process.wait()
def _create_location_from_origin(self, origin) -> RepositoryLocation: check.inst_param(origin, "origin", RepositoryLocationOrigin) if not self._grpc_server_registry.supports_origin(origin): return origin.create_location() else: endpoint = self._grpc_server_registry.get_grpc_endpoint(origin) return GrpcServerRepositoryLocation( origin=origin, server_id=endpoint.server_id, port=endpoint.port, socket=endpoint.socket, host=endpoint.host, heartbeat=True, watch_server=False, grpc_server_registry=self._grpc_server_registry, )
def create_location(self): from dagster.core.host_representation.repository_location import ( GrpcServerRepositoryLocation, ) return GrpcServerRepositoryLocation(self)
def get_bar_grpc_repo_handle(): with get_bar_repo_grpc_repository_location_handle() as handle: yield GrpcServerRepositoryLocation(handle).get_repository( "bar_repo").handle
def get_bar_grpc_repo_handle(): return (GrpcServerRepositoryLocation( get_bar_repo_grpc_repository_location_handle()).get_repository( 'bar_repo').handle)