class GrpcServerRepositoryLocationHandle(RepositoryLocationHandle): """ Represents a gRPC server that Dagster is not responsible for managing. """ def __init__( self, origin, host=None, port=None, socket=None, server_id=None, heartbeat=False, watch_server=True, ): from dagster.grpc.client import DagsterGrpcClient, client_heartbeat_thread from dagster.grpc.server_watcher import create_grpc_watch_thread self._origin = check.inst_param(origin, "origin", RepositoryLocationOrigin) if isinstance(self._origin, GrpcServerRepositoryLocationOrigin): self._port = self.origin.port self._socket = self.origin.socket self._host = self.origin.host self._use_ssl = bool(self.origin.use_ssl) else: self._port = check.opt_int_param(port, "port") self._socket = check.opt_str_param(socket, "socket") self._host = check.str_param(host, "host") self._use_ssl = False self._watch_thread_shutdown_event = None self._watch_thread = None self._heartbeat_shutdown_event = None self._heartbeat_thread = None self._heartbeat = check.bool_param(heartbeat, "heartbeat") self._watch_server = check.bool_param(watch_server, "watch_server") self.server_id = None self._external_repositories_data = None try: self.client = DagsterGrpcClient( port=self._port, socket=self._socket, host=self._host, use_ssl=self._use_ssl, ) list_repositories_response = sync_list_repositories_grpc( self.client) self.server_id = server_id if server_id else sync_get_server_id( self.client) self.repository_names = set( symbol.repository_name for symbol in list_repositories_response.repository_symbols) if self._heartbeat: self._heartbeat_shutdown_event = threading.Event() self._heartbeat_thread = threading.Thread( target=client_heartbeat_thread, args=( self.client, self._heartbeat_shutdown_event, ), name="grpc-client-heartbeat", ) self._heartbeat_thread.daemon = True self._heartbeat_thread.start() if self._watch_server: self._state_subscribers = [] self._watch_thread_shutdown_event, self._watch_thread = create_grpc_watch_thread( self.client, on_updated=lambda new_server_id: self. _send_state_event_to_subscribers( LocationStateChangeEvent( LocationStateChangeEventType.LOCATION_UPDATED, location_name=self.location_name, message="Server has been updated.", server_id=new_server_id, )), on_error=lambda: self._send_state_event_to_subscribers( LocationStateChangeEvent( LocationStateChangeEventType.LOCATION_ERROR, location_name=self.location_name, message= "Unable to reconnect to server. You can reload the server once it is " "reachable again", )), ) self._watch_thread.start() self.executable_path = list_repositories_response.executable_path self.repository_code_pointer_dict = ( list_repositories_response.repository_code_pointer_dict) self.container_image = self._reload_current_image() self._external_repositories_data = sync_get_streaming_external_repositories_data_grpc( self.client, self, ) except: self.cleanup() raise @property def origin(self): return self._origin def add_state_subscriber(self, subscriber): if self._watch_server: self._state_subscribers.append(subscriber) def _send_state_event_to_subscribers(self, event): check.inst_param(event, "event", LocationStateChangeEvent) for subscriber in self._state_subscribers: subscriber.handle_event(event) def cleanup(self): if self._heartbeat_shutdown_event: self._heartbeat_shutdown_event.set() self._heartbeat_shutdown_event = None if self._watch_thread_shutdown_event: self._watch_thread_shutdown_event.set() self._watch_thread_shutdown_event = None if self._heartbeat_thread: self._heartbeat_thread.join() self._heartbeat_thread = None if self._watch_thread: self._watch_thread.join() self._watch_thread = None @property def port(self): return self._port @property def socket(self): return self._socket @property def host(self): return self._host @property def use_ssl(self): return self._use_ssl @property def location_name(self): return self.origin.location_name def _reload_current_image(self): return self.client.get_current_image().current_image def get_repository_python_origin(self, repository_name): return _get_repository_python_origin( self.executable_path, self.repository_code_pointer_dict, repository_name, self.container_image, ) def create_location(self): from dagster.core.host_representation.repository_location import ( GrpcServerRepositoryLocation, ) return GrpcServerRepositoryLocation(self) def create_external_repositories(self): from dagster.core.host_representation.external import ExternalRepository return { repo_name: ExternalRepository( repo_data, RepositoryHandle( repository_name=repo_name, repository_location_handle=self, ), ) for repo_name, repo_data in self._external_repositories_data.items() } def get_display_metadata(self): return merge_dicts( self.origin.get_display_metadata(), ({ "image": self.container_image } if self.container_image else {}), )
class GrpcServerRepositoryLocation(RepositoryLocation): def __init__( self, origin: RepositoryLocationOrigin, host: Optional[str] = None, port: Optional[int] = None, socket: Optional[str] = None, server_id: Optional[str] = None, heartbeat: Optional[bool] = False, watch_server: Optional[bool] = True, grpc_server_registry: Optional[GrpcServerRegistry] = None, ): from dagster.grpc.client import DagsterGrpcClient, client_heartbeat_thread self._origin = check.inst_param(origin, "origin", RepositoryLocationOrigin) self.grpc_server_registry = check.opt_inst_param( grpc_server_registry, "grpc_server_registry", GrpcServerRegistry) if isinstance(self.origin, GrpcServerRepositoryLocationOrigin): self._port = self.origin.port self._socket = self.origin.socket self._host = self.origin.host self._use_ssl = bool(self.origin.use_ssl) else: self._port = check.opt_int_param(port, "port") self._socket = check.opt_str_param(socket, "socket") self._host = check.str_param(host, "host") self._use_ssl = False self._watch_thread_shutdown_event = None self._watch_thread = None self._heartbeat_shutdown_event = None self._heartbeat_thread = None self._heartbeat = check.bool_param(heartbeat, "heartbeat") self._watch_server = check.bool_param(watch_server, "watch_server") self.server_id = None self._external_repositories_data = None self._executable_path = None self._container_image = None self._repository_code_pointer_dict = None try: self.client = DagsterGrpcClient( port=self._port, socket=self._socket, host=self._host, use_ssl=self._use_ssl, ) list_repositories_response = sync_list_repositories_grpc( self.client) self.server_id = server_id if server_id else sync_get_server_id( self.client) self.repository_names = set( symbol.repository_name for symbol in list_repositories_response.repository_symbols) if self._heartbeat: self._heartbeat_shutdown_event = threading.Event() self._heartbeat_thread = threading.Thread( target=client_heartbeat_thread, args=( self.client, self._heartbeat_shutdown_event, ), name="grpc-client-heartbeat", ) self._heartbeat_thread.daemon = True self._heartbeat_thread.start() self._executable_path = list_repositories_response.executable_path self._repository_code_pointer_dict = ( list_repositories_response.repository_code_pointer_dict) self._container_image = self._reload_current_image() self._external_repositories_data = sync_get_streaming_external_repositories_data_grpc( self.client, self, ) self.external_repositories = { repo_name: ExternalRepository( repo_data, RepositoryHandle( repository_name=repo_name, repository_location=self, ), ) for repo_name, repo_data in self._external_repositories_data.items() } except: self.cleanup() raise @property def origin(self) -> RepositoryLocationOrigin: return self._origin @property def container_image(self) -> str: return cast(str, self._container_image) @property def repository_code_pointer_dict(self) -> Dict[str, CodePointer]: return cast(Dict[str, CodePointer], self._repository_code_pointer_dict) @property def executable_path(self) -> Optional[str]: return self._executable_path @property def port(self) -> Optional[int]: return self._port @property def socket(self) -> Optional[str]: return self._socket @property def host(self) -> str: return self._host @property def use_ssl(self) -> bool: return self._use_ssl def _reload_current_image(self) -> str: return self.client.get_current_image().current_image def cleanup(self) -> None: if self._heartbeat_shutdown_event: self._heartbeat_shutdown_event.set() self._heartbeat_shutdown_event = None if self._watch_thread_shutdown_event: self._watch_thread_shutdown_event.set() self._watch_thread_shutdown_event = None if self._heartbeat_thread: self._heartbeat_thread.join() self._heartbeat_thread = None if self._watch_thread: self._watch_thread.join() self._watch_thread = None @property def is_reload_supported(self) -> bool: return True def get_repository(self, name: str) -> ExternalRepository: check.str_param(name, "name") return self.get_repositories()[name] def has_repository(self, name: str) -> bool: return name in self.get_repositories() def get_repositories(self) -> Dict[str, ExternalRepository]: return self.external_repositories def get_external_execution_plan( self, external_pipeline: ExternalPipeline, run_config: Dict[str, Any], mode: str, step_keys_to_execute: Optional[List[str]], known_state: Optional[KnownExecutionState], ) -> ExternalExecutionPlan: check.inst_param(external_pipeline, "external_pipeline", ExternalPipeline) check.dict_param(run_config, "run_config") check.str_param(mode, "mode") check.opt_list_param(step_keys_to_execute, "step_keys_to_execute", of_type=str) check.opt_inst_param(known_state, "known_state", KnownExecutionState) execution_plan_snapshot_or_error = sync_get_external_execution_plan_grpc( api_client=self.client, pipeline_origin=external_pipeline.get_external_origin(), run_config=run_config, mode=mode, pipeline_snapshot_id=external_pipeline. identifying_pipeline_snapshot_id, solid_selection=external_pipeline.solid_selection, step_keys_to_execute=step_keys_to_execute, known_state=known_state, ) return ExternalExecutionPlan( execution_plan_snapshot=execution_plan_snapshot_or_error, represented_pipeline=external_pipeline, ) def get_subset_external_pipeline_result( self, selector: PipelineSelector) -> "ExternalPipelineSubsetResult": check.inst_param(selector, "selector", PipelineSelector) check.invariant( selector.location_name == self.name, "PipelineSelector location_name mismatch, got {selector.location_name} expected {self.name}" .format(self=self, selector=selector), ) external_repository = self.get_repository(selector.repository_name) pipeline_handle = PipelineHandle(selector.pipeline_name, external_repository.handle) return sync_get_external_pipeline_subset_grpc( self.client, pipeline_handle.get_external_origin(), selector.solid_selection) def get_external_partition_config( self, repository_handle: RepositoryHandle, partition_set_name: str, partition_name: str) -> "ExternalPartitionConfigData": check.inst_param(repository_handle, "repository_handle", RepositoryHandle) check.str_param(partition_set_name, "partition_set_name") check.str_param(partition_name, "partition_name") return sync_get_external_partition_config_grpc(self.client, repository_handle, partition_set_name, partition_name) def get_external_partition_tags( self, repository_handle: RepositoryHandle, partition_set_name: str, partition_name: str) -> "ExternalPartitionTagsData": check.inst_param(repository_handle, "repository_handle", RepositoryHandle) check.str_param(partition_set_name, "partition_set_name") check.str_param(partition_name, "partition_name") return sync_get_external_partition_tags_grpc(self.client, repository_handle, partition_set_name, partition_name) def get_external_partition_names( self, repository_handle: RepositoryHandle, partition_set_name: str) -> "ExternalPartitionNamesData": check.inst_param(repository_handle, "repository_handle", RepositoryHandle) check.str_param(partition_set_name, "partition_set_name") return sync_get_external_partition_names_grpc(self.client, repository_handle, partition_set_name) def get_external_schedule_execution_data( self, instance: DagsterInstance, repository_handle: RepositoryHandle, schedule_name: str, scheduled_execution_time: Optional[datetime.datetime], ) -> "ExternalScheduleExecutionData": check.inst_param(instance, "instance", DagsterInstance) check.inst_param(repository_handle, "repository_handle", RepositoryHandle) check.str_param(schedule_name, "schedule_name") check.opt_inst_param(scheduled_execution_time, "scheduled_execution_time", PendulumDateTime) return sync_get_external_schedule_execution_data_grpc( self.client, instance, repository_handle, schedule_name, scheduled_execution_time, ) def get_external_sensor_execution_data( self, instance: DagsterInstance, repository_handle: RepositoryHandle, name: str, last_completion_time: Optional[float], last_run_key: Optional[str], cursor: Optional[str], ) -> "SensorExecutionData": return sync_get_external_sensor_execution_data_grpc( self.client, instance, repository_handle, name, last_completion_time, last_run_key, cursor, ) def get_external_partition_set_execution_param_data( self, repository_handle: RepositoryHandle, partition_set_name: str, partition_names: List[str], ) -> "ExternalPartitionSetExecutionParamData": check.inst_param(repository_handle, "repository_handle", RepositoryHandle) check.str_param(partition_set_name, "partition_set_name") check.list_param(partition_names, "partition_names", of_type=str) return sync_get_external_partition_set_execution_param_data_grpc( self.client, repository_handle, partition_set_name, partition_names)
class GrpcServerRepositoryLocationHandle(RepositoryLocationHandle): """ Represents a gRPC server that Dagster is not responsible for managing. """ def __init__(self, origin): from dagster.grpc.client import DagsterGrpcClient from dagster.grpc.server_watcher import create_grpc_watch_thread self.origin = check.inst_param(origin, "origin", GrpcServerRepositoryLocationOrigin) port = self.origin.port socket = self.origin.socket host = self.origin.host self._watch_thread_shutdown_event = None self._watch_thread = None try: self.client = DagsterGrpcClient(port=port, socket=socket, host=host) list_repositories_response = sync_list_repositories_grpc(self.client) self.server_id = sync_get_server_id(self.client) self.repository_names = set( symbol.repository_name for symbol in list_repositories_response.repository_symbols ) self._state_subscribers = [] self._watch_thread_shutdown_event, self._watch_thread = create_grpc_watch_thread( self.client, on_updated=lambda new_server_id: self._send_state_event_to_subscribers( LocationStateChangeEvent( LocationStateChangeEventType.LOCATION_UPDATED, location_name=self.location_name, message="Server has been updated.", server_id=new_server_id, ) ), on_error=lambda: self._send_state_event_to_subscribers( LocationStateChangeEvent( LocationStateChangeEventType.LOCATION_ERROR, location_name=self.location_name, message="Unable to reconnect to server. You can reload the server once it is " "reachable again", ) ), ) self._watch_thread.start() self.executable_path = list_repositories_response.executable_path self.repository_code_pointer_dict = ( list_repositories_response.repository_code_pointer_dict ) self.container_image = self._reload_current_image() except: self.cleanup() raise def add_state_subscriber(self, subscriber): self._state_subscribers.append(subscriber) def _send_state_event_to_subscribers(self, event): check.inst_param(event, "event", LocationStateChangeEvent) for subscriber in self._state_subscribers: subscriber.handle_event(event) def cleanup(self): if self._watch_thread_shutdown_event: self._watch_thread_shutdown_event.set() self._watch_thread_shutdown_event = None if self._watch_thread: self._watch_thread.join() self._watch_thread = None @property def port(self): return self.origin.port @property def socket(self): return self.origin.socket @property def host(self): return self.origin.host @property def location_name(self): return self.origin.location_name def _reload_current_image(self): return self.client.get_current_image().current_image def get_repository_python_origin(self, repository_name): return _get_repository_python_origin( self.executable_path, self.repository_code_pointer_dict, repository_name, self.container_image, ) def reload_repository_python_origin(self, repository_name): check.str_param(repository_name, "repository_name") list_repositories_response = sync_list_repositories_grpc(self.client) return _get_repository_python_origin( list_repositories_response.executable_path, list_repositories_response.repository_code_pointer_dict, repository_name, self._reload_current_image(), )
class GrpcServerRepositoryLocationHandle(RepositoryLocationHandle): """ Represents a gRPC server that Dagster is not responsible for managing. """ def __init__(self, origin): from dagster.grpc.client import DagsterGrpcClient self.origin = check.inst_param(origin, "origin", GrpcServerRepositoryLocationOrigin) port = self.origin.port socket = self.origin.socket host = self.origin.host self.client = DagsterGrpcClient(port=port, socket=socket, host=host) list_repositories_response = sync_list_repositories_grpc(self.client) self.repository_names = set( symbol.repository_name for symbol in list_repositories_response.repository_symbols) self.executable_path = list_repositories_response.executable_path self.repository_code_pointer_dict = list_repositories_response.repository_code_pointer_dict @property def port(self): return self.origin.port @property def socket(self): return self.origin.socket @property def host(self): return self.origin.host @property def location_name(self): return self.origin.location_name def get_current_image(self): job_image = self.client.get_current_image().current_image if not job_image: raise DagsterInvariantViolationError( "Unable to get current image that GRPC server is running. Please make sure that " "env var DAGSTER_CURRENT_IMAGE is set in the GRPC server and contains the most " "up-to-date user code image and tag. Exiting.") return job_image def get_repository_python_origin(self, repository_name): return _get_repository_python_origin(self.executable_path, self.repository_code_pointer_dict, repository_name) def reload_repository_python_origin(self, repository_name): check.str_param(repository_name, "repository_name") list_repositories_response = sync_list_repositories_grpc(self.client) return _get_repository_python_origin( list_repositories_response.executable_path, list_repositories_response.repository_code_pointer_dict, repository_name, )
class GrpcServerRepositoryLocation(RepositoryLocation): def __init__( self, origin, host=None, port=None, socket=None, server_id=None, heartbeat=False, watch_server=True, grpc_server_registry=None, ): from dagster.grpc.client import DagsterGrpcClient, client_heartbeat_thread from dagster.grpc.server_watcher import create_grpc_watch_thread from .grpc_server_registry import GrpcServerRegistry self._origin = check.inst_param(origin, "origin", RepositoryLocationOrigin) self.grpc_server_registry = check.opt_inst_param( grpc_server_registry, "grpc_server_registry", GrpcServerRegistry) if isinstance(self._origin, GrpcServerRepositoryLocationOrigin): self._port = self.origin.port self._socket = self.origin.socket self._host = self.origin.host self._use_ssl = bool(self.origin.use_ssl) else: self._port = check.opt_int_param(port, "port") self._socket = check.opt_str_param(socket, "socket") self._host = check.str_param(host, "host") self._use_ssl = False self._watch_thread_shutdown_event = None self._watch_thread = None self._heartbeat_shutdown_event = None self._heartbeat_thread = None self._heartbeat = check.bool_param(heartbeat, "heartbeat") self._watch_server = check.bool_param(watch_server, "watch_server") self.server_id = None self._external_repositories_data = None self._executable_path = None self._container_image = None self._repository_code_pointer_dict = None try: self.client = DagsterGrpcClient( port=self._port, socket=self._socket, host=self._host, use_ssl=self._use_ssl, ) list_repositories_response = sync_list_repositories_grpc( self.client) self.server_id = server_id if server_id else sync_get_server_id( self.client) self.repository_names = set( symbol.repository_name for symbol in list_repositories_response.repository_symbols) if self._heartbeat: self._heartbeat_shutdown_event = threading.Event() self._heartbeat_thread = threading.Thread( target=client_heartbeat_thread, args=( self.client, self._heartbeat_shutdown_event, ), name="grpc-client-heartbeat", ) self._heartbeat_thread.daemon = True self._heartbeat_thread.start() if self._watch_server: self._state_subscribers = [] self._watch_thread_shutdown_event, self._watch_thread = create_grpc_watch_thread( self.client, on_updated=lambda new_server_id: self. _send_state_event_to_subscribers( LocationStateChangeEvent( LocationStateChangeEventType.LOCATION_UPDATED, location_name=self.name, message="Server has been updated.", server_id=new_server_id, )), on_error=lambda: self._send_state_event_to_subscribers( LocationStateChangeEvent( LocationStateChangeEventType.LOCATION_ERROR, location_name=self.name, message= "Unable to reconnect to server. You can reload the server once it is " "reachable again", )), ) self._watch_thread.start() self._executable_path = list_repositories_response.executable_path self._repository_code_pointer_dict = ( list_repositories_response.repository_code_pointer_dict) self._container_image = self._reload_current_image() self._external_repositories_data = sync_get_streaming_external_repositories_data_grpc( self.client, self, ) self.external_repositories = { repo_name: ExternalRepository( repo_data, RepositoryHandle( repository_name=repo_name, repository_location=self, ), ) for repo_name, repo_data in self._external_repositories_data.items() } except: self.cleanup() raise def add_state_subscriber(self, subscriber): if self._watch_server: self._state_subscribers.append(subscriber) def _send_state_event_to_subscribers(self, event): check.inst_param(event, "event", LocationStateChangeEvent) for subscriber in self._state_subscribers: subscriber.handle_event(event) @property def origin(self): return self._origin @property def container_image(self): return self._container_image @property def repository_code_pointer_dict(self): return self._repository_code_pointer_dict @property def executable_path(self): return self._executable_path @property def port(self): return self._port @property def socket(self): return self._socket @property def host(self): return self._host @property def use_ssl(self): return self._use_ssl def _reload_current_image(self): return self.client.get_current_image().current_image def cleanup(self): if self._heartbeat_shutdown_event: self._heartbeat_shutdown_event.set() self._heartbeat_shutdown_event = None if self._watch_thread_shutdown_event: self._watch_thread_shutdown_event.set() self._watch_thread_shutdown_event = None if self._heartbeat_thread: self._heartbeat_thread.join() self._heartbeat_thread = None if self._watch_thread: self._watch_thread.join() self._watch_thread = None @property def is_reload_supported(self): return True def get_repository(self, name): check.str_param(name, "name") return self.get_repositories()[name] def has_repository(self, name): return name in self.get_repositories() def get_repositories(self): return self.external_repositories def get_external_execution_plan( self, external_pipeline, run_config, mode, step_keys_to_execute, known_state, ): check.inst_param(external_pipeline, "external_pipeline", ExternalPipeline) check.dict_param(run_config, "run_config") check.str_param(mode, "mode") check.opt_list_param(step_keys_to_execute, "step_keys_to_execute", of_type=str) check.opt_inst_param(known_state, "known_state", KnownExecutionState) execution_plan_snapshot_or_error = sync_get_external_execution_plan_grpc( api_client=self.client, pipeline_origin=external_pipeline.get_external_origin(), run_config=run_config, mode=mode, pipeline_snapshot_id=external_pipeline. identifying_pipeline_snapshot_id, solid_selection=external_pipeline.solid_selection, step_keys_to_execute=step_keys_to_execute, known_state=known_state, ) return ExternalExecutionPlan( execution_plan_snapshot=execution_plan_snapshot_or_error, represented_pipeline=external_pipeline, ) def get_subset_external_pipeline_result(self, selector): check.inst_param(selector, "selector", PipelineSelector) check.invariant( selector.location_name == self.name, "PipelineSelector location_name mismatch, got {selector.location_name} expected {self.name}" .format(self=self, selector=selector), ) external_repository = self.get_repository(selector.repository_name) pipeline_handle = PipelineHandle(selector.pipeline_name, external_repository.handle) return sync_get_external_pipeline_subset_grpc( self.client, pipeline_handle.get_external_origin(), selector.solid_selection) def get_external_partition_config(self, repository_handle, partition_set_name, partition_name): check.inst_param(repository_handle, "repository_handle", RepositoryHandle) check.str_param(partition_set_name, "partition_set_name") check.str_param(partition_name, "partition_name") return sync_get_external_partition_config_grpc(self.client, repository_handle, partition_set_name, partition_name) def get_external_partition_tags(self, repository_handle, partition_set_name, partition_name): check.inst_param(repository_handle, "repository_handle", RepositoryHandle) check.str_param(partition_set_name, "partition_set_name") check.str_param(partition_name, "partition_name") return sync_get_external_partition_tags_grpc(self.client, repository_handle, partition_set_name, partition_name) def get_external_partition_names(self, repository_handle, partition_set_name): check.inst_param(repository_handle, "repository_handle", RepositoryHandle) check.str_param(partition_set_name, "partition_set_name") return sync_get_external_partition_names_grpc(self.client, repository_handle, partition_set_name) def get_external_schedule_execution_data( self, instance, repository_handle, schedule_name, scheduled_execution_time, ): check.inst_param(instance, "instance", DagsterInstance) check.inst_param(repository_handle, "repository_handle", RepositoryHandle) check.str_param(schedule_name, "schedule_name") check.opt_inst_param(scheduled_execution_time, "scheduled_execution_time", datetime.datetime) return sync_get_external_schedule_execution_data_grpc( self.client, instance, repository_handle, schedule_name, scheduled_execution_time, ) def get_external_sensor_execution_data(self, instance, repository_handle, name, last_completion_time, last_run_key): return sync_get_external_sensor_execution_data_grpc( self.client, instance, repository_handle, name, last_completion_time, last_run_key, ) def get_external_partition_set_execution_param_data( self, repository_handle, partition_set_name, partition_names): check.inst_param(repository_handle, "repository_handle", RepositoryHandle) check.str_param(partition_set_name, "partition_set_name") check.list_param(partition_names, "partition_names", of_type=str) return sync_get_external_partition_set_execution_param_data_grpc( self.client, repository_handle, partition_set_name, partition_names)