def __init__(self, server_uri=_SERVER_URI, notification_service_uri=None): MetadataClient.__init__(self, server_uri) ModelCenterClient.__init__(self, server_uri) DeployClient.__init__(self, server_uri) MetricClient.__init__(self, server_uri) if notification_service_uri is None: NotificationClient.__init__(self, server_uri) else: NotificationClient.__init__(self, notification_service_uri)
def __init__(self, server_uri=_SERVER_URI, notification_service_uri=None, project_config: ProjectConfig = None): MetadataClient.__init__(self, server_uri) ModelCenterClient.__init__(self, server_uri) DeployClient.__init__(self, server_uri) MetricClient.__init__(self, server_uri) self.enable_ha = False self.list_member_interval_ms = 5000 self.retry_interval_ms = 1000 self.retry_timeout_ms = 10000 if project_config is not None: if server_uri is None: server_uri = project_config.get_master_uri() if notification_service_uri is None: notification_service_uri = project_config.get_notification_service_uri( ) self.enable_ha = project_config.get_enable_ha() self.list_member_interval_ms = project_config.get_list_member_interval_ms( ) self.retry_interval_ms = project_config.get_retry_interval_ms() self.retry_timeout_ms = project_config.get_retry_timeout_ms() if notification_service_uri is None: NotificationClient.__init__( self, server_uri, enable_ha=self.enable_ha, list_member_interval_ms=self.list_member_interval_ms, retry_interval_ms=self.retry_interval_ms, retry_timeout_ms=self.retry_timeout_ms) else: NotificationClient.__init__( self, notification_service_uri, enable_ha=self.enable_ha, list_member_interval_ms=self.list_member_interval_ms, retry_interval_ms=self.retry_interval_ms, retry_timeout_ms=self.retry_timeout_ms) if self.enable_ha: server_uris = server_uri.split(",") self.living_aiflow_members = [] self.current_aiflow_uri = None last_error = None for server_uri in server_uris: channel = grpc.insecure_channel(server_uri) high_availability_stub = HighAvailabilityManagerStub(channel) try: request = ListMembersRequest(timeout_seconds=0) response = high_availability_stub.listMembers(request) if response.return_code == ReturnStatus.CALL_SUCCESS: self.living_aiflow_members = [ proto_to_member(proto).server_uri for proto in response.members ] else: raise Exception(response.return_msg) self.current_aiflow_uri = server_uri self.high_availability_stub = high_availability_stub break except grpc.RpcError as e: last_error = e if self.current_aiflow_uri is None: raise Exception( "No available aiflow server uri!") from last_error self.aiflow_ha_change_lock = threading.Lock() self.aiflow_ha_running = True self._replace_aiflow_stubs(self.current_aiflow_uri) self.list_aiflow_member_thread = threading.Thread( target=self._list_aiflow_members, daemon=True) self.list_aiflow_member_thread.start()