class IdGenerator(Proxy): def __init__(self, client, service_name, name, atomic_long): super(IdGenerator, self).__init__(client, service_name, name) self._atomic_long = atomic_long self._residue = AtomicInteger(BLOCK_SIZE) self._local = AtomicInteger(-1) self._lock = threading.RLock() def _on_destroy(self): self._atomic_long.destroy() def init(self, initial): if id <= 0: return False step = initial / BLOCK_SIZE with self._lock: init = self._atomic_long.compare_and_set(0, step + 1) if init: self._local.set(step) self._residue.set((initial % BLOCK_SIZE) + 1) return init def new_id(self): val = self._residue.get_and_increment() if val >= BLOCK_SIZE: with self._lock: val = self._residue.get() if val >= BLOCK_SIZE: increment = self._atomic_long.get_and_increment() self._local.set(increment) self._residue.set(0) return self.new_id() get = self._local.get() return get * BLOCK_SIZE + val
class ConnectionManager: """ConnectionManager is responsible for managing ``Connection`` objects.""" def __init__( self, client, config, reactor, address_provider, lifecycle_service, partition_service, cluster_service, invocation_service, near_cache_manager, send_state_to_cluster_fn, ): self.live = False self.active_connections = { } # uuid to connection, must be modified under the _lock self.client_uuid = uuid.uuid4() self._client = client self._config = config self._reactor = reactor self._address_provider = address_provider self._lifecycle_service = lifecycle_service self._partition_service = partition_service self._cluster_service = cluster_service self._invocation_service = invocation_service self._near_cache_manager = near_cache_manager self._send_state_to_cluster_fn = send_state_to_cluster_fn self._client_state = _ClientState.INITIAL # must be modified under the _lock self._smart_routing_enabled = config.smart_routing self._wait_strategy = self._init_wait_strategy(config) self._reconnect_mode = config.reconnect_mode self._heartbeat_manager = _HeartbeatManager(self, self._client, config, reactor, invocation_service) self._connection_listeners = [] self._connect_all_members_timer = None self._async_start = config.async_start self._connect_to_cluster_thread_running = False self._shuffle_member_list = config.shuffle_member_list self._lock = threading.RLock() self._connection_id_generator = AtomicInteger() self._labels = frozenset(config.labels) self._cluster_id = None self._load_balancer = None self._use_public_ip = (isinstance( address_provider, DefaultAddressProvider) and config.use_public_ip) def add_listener(self, on_connection_opened=None, on_connection_closed=None): """Registers a ConnectionListener. If the same listener is registered multiple times, it will be notified multiple times. Args: on_connection_opened (function): Function to be called when a connection is opened. (Default value = None) on_connection_closed (function): Function to be called when a connection is removed. (Default value = None) """ self._connection_listeners.append( (on_connection_opened, on_connection_closed)) def get_connection(self, member_uuid): return self.active_connections.get(member_uuid, None) def get_random_connection(self): # Try getting the connection from the load balancer, if smart routing is enabled if self._smart_routing_enabled: member = self._load_balancer.next() if member: connection = self.get_connection(member.uuid) if connection: return connection # Otherwise iterate over connections and return the first one for connection in list(self.active_connections.values()): return connection # Failed to get a connection return None def get_random_connection_for_sql(self): """Returns a random connection for SQL. The connection is tried to be selected in the following order. - Random connection to a data member from the larger same-version group. - Random connection to a data member. - Any random connection - ``None``, if there is no connection. Returns: Connection: A random connection for SQL. """ if self._smart_routing_enabled: # There might be a race - the chosen member might be just connected or disconnected. # Try a couple of times, the member_of_larger_same_version_group returns a random # connection, we might be lucky... for _ in range(_SQL_CONNECTION_RANDOM_ATTEMPTS): members = self._cluster_service.get_members() member = member_of_larger_same_version_group(members) if not member: break connection = self.get_connection(member.uuid) if connection: return connection # Otherwise iterate over connections and return the first one # that's not to a lite member. first_connection = None for member_uuid, connection in list(self.active_connections.items()): if not first_connection: first_connection = connection member = self._cluster_service.get_member(member_uuid) if not member or member.lite_member: continue return connection # Failed to get a connection to a data member. return first_connection def start(self, load_balancer): if self.live: return self.live = True self._load_balancer = load_balancer self._heartbeat_manager.start() self._connect_to_cluster() def shutdown(self): if not self.live: return self.live = False if self._connect_all_members_timer: self._connect_all_members_timer.cancel() self._heartbeat_manager.shutdown() # Need to create copy of connection values to avoid modification errors on runtime for connection in list(self.active_connections.values()): connection.close_connection("Hazelcast client is shutting down", None) self.active_connections.clear() del self._connection_listeners[:] def connect_to_all_cluster_members(self, sync_start): if not self._smart_routing_enabled: return if sync_start: for member in self._cluster_service.get_members(): try: self._get_or_connect_to_member(member).result() except: pass self._start_connect_all_members_timer() def on_connection_close(self, closed_connection): remote_uuid = closed_connection.remote_uuid remote_address = closed_connection.remote_address if not remote_address: _logger.debug( "Destroying %s, but it has no remote address, hence nothing is " "removed from the connection dictionary", closed_connection, ) return disconnected = False removed = False trigger_reconnection = False with self._lock: connection = self.active_connections.get(remote_uuid, None) if connection == closed_connection: self.active_connections.pop(remote_uuid, None) removed = True _logger.info( "Removed connection to %s:%s, connection: %s", remote_address, remote_uuid, connection, ) if not self.active_connections: trigger_reconnection = True if self._client_state == _ClientState.INITIALIZED_ON_CLUSTER: disconnected = True if disconnected: self._lifecycle_service.fire_lifecycle_event( LifecycleState.DISCONNECTED) if trigger_reconnection: self._trigger_cluster_reconnection() if removed: for _, on_connection_closed in self._connection_listeners: if on_connection_closed: try: on_connection_closed(closed_connection) except: _logger.exception("Exception in connection listener") else: _logger.debug( "Destroying %s, but there is no mapping for %s in the connection dictionary", closed_connection, remote_uuid, ) def check_invocation_allowed(self): state = self._client_state if state == _ClientState.INITIALIZED_ON_CLUSTER and self.active_connections: return if state == _ClientState.INITIAL: if self._async_start: raise ClientOfflineError() else: raise IOError( "No connection found to cluster since the client is starting." ) elif self._reconnect_mode == ReconnectMode.ASYNC: raise ClientOfflineError() else: raise IOError("No connection found to cluster") def _get_or_connect_to_address(self, address): for connection in list(self.active_connections.values()): if connection.remote_address == address: return ImmediateFuture(connection) try: translated = self._translate(address) connection = self._create_connection(translated) return self._authenticate(connection).continue_with( self._on_auth, connection) except Exception as e: return ImmediateExceptionFuture(e) def _get_or_connect_to_member(self, member): connection = self.active_connections.get(member.uuid, None) if connection: return ImmediateFuture(connection) try: translated = self._translate_member_address(member) connection = self._create_connection(translated) return self._authenticate(connection).continue_with( self._on_auth, connection) except Exception as e: return ImmediateExceptionFuture(e) def _create_connection(self, address): factory = self._reactor.connection_factory return factory( self, self._connection_id_generator.get_and_increment(), address, self._config, self._invocation_service.handle_client_message, ) def _translate(self, address): translated = self._address_provider.translate(address) if not translated: raise ValueError( "Address provider %s could not translate address %s" % (self._address_provider.__class__.__name__, address)) return translated def _translate_member_address(self, member): if self._use_public_ip: public_address = member.address_map.get( _CLIENT_PUBLIC_ENDPOINT_QUALIFIER, None) if public_address: return public_address return member.address return self._translate(member.address) def _trigger_cluster_reconnection(self): if self._reconnect_mode == ReconnectMode.OFF: _logger.info("Reconnect mode is OFF. Shutting down the client") self._shutdown_client() return if self._lifecycle_service.running: self._start_connect_to_cluster_thread() def _init_wait_strategy(self, config): cluster_connect_timeout = config.cluster_connect_timeout if cluster_connect_timeout == -1: # If the no timeout is specified by the # user, or set to -1 explicitly, set # the timeout to infinite. cluster_connect_timeout = _INF return _WaitStrategy( config.retry_initial_backoff, config.retry_max_backoff, config.retry_multiplier, cluster_connect_timeout, config.retry_jitter, ) def _start_connect_all_members_timer(self): connecting_uuids = set() def run(): if not self._lifecycle_service.running: return for member in self._cluster_service.get_members(): member_uuid = member.uuid if self.active_connections.get(member_uuid, None): continue if member_uuid in connecting_uuids: continue connecting_uuids.add(member_uuid) if not self._lifecycle_service.running: break # Bind the bound_member_uuid to the value # in this loop iteration def cb(_, bound_member_uuid=member_uuid): connecting_uuids.discard(bound_member_uuid) self._get_or_connect_to_member(member).add_done_callback(cb) self._connect_all_members_timer = self._reactor.add_timer(1, run) self._connect_all_members_timer = self._reactor.add_timer(1, run) def _connect_to_cluster(self): if self._async_start: self._start_connect_to_cluster_thread() else: self._sync_connect_to_cluster() def _start_connect_to_cluster_thread(self): with self._lock: if self._connect_to_cluster_thread_running: return self._connect_to_cluster_thread_running = True def run(): try: while True: self._sync_connect_to_cluster() with self._lock: if self.active_connections: self._connect_to_cluster_thread_running = False return except: _logger.exception( "Could not connect to any cluster, shutting down the client" ) self._shutdown_client() t = threading.Thread(target=run, name="hazelcast_async_connection") t.daemon = True t.start() def _shutdown_client(self): try: self._client.shutdown() except: _logger.exception("Exception during client shutdown") def _sync_connect_to_cluster(self): tried_addresses = set() self._wait_strategy.reset() try: while True: tried_addresses_per_attempt = set() members = self._cluster_service.get_members() if self._shuffle_member_list: random.shuffle(members) for member in members: self._check_client_active() tried_addresses_per_attempt.add(member.address) connection = self._connect(member, self._get_or_connect_to_member) if connection: return for address in self._get_possible_addresses(): self._check_client_active() if address in tried_addresses_per_attempt: # We already tried this address on from the member list continue tried_addresses_per_attempt.add(address) connection = self._connect(address, self._get_or_connect_to_address) if connection: return tried_addresses.update(tried_addresses_per_attempt) # If the address providers load no addresses (which seems to be possible), # then the above loop is not entered and the lifecycle check is missing, # hence we need to repeat the same check at this point. if not tried_addresses_per_attempt: self._check_client_active() if not self._wait_strategy.sleep(): break except (ClientNotAllowedInClusterError, InvalidConfigurationError): cluster_name = self._config.cluster_name _logger.exception("Stopped trying on cluster %s", cluster_name) cluster_name = self._config.cluster_name _logger.info( "Unable to connect to any address from the cluster with name: %s. " "The following addresses were tried: %s", cluster_name, tried_addresses, ) if self._lifecycle_service.running: msg = "Unable to connect to any cluster" else: msg = "Client is being shutdown" raise IllegalStateError(msg) def _connect(self, target, get_or_connect_func): _logger.info("Trying to connect to %s", target) try: return get_or_connect_func(target).result() except (ClientNotAllowedInClusterError, InvalidConfigurationError) as e: _logger.warning("Error during initial connection to %s", target, exc_info=True) raise e except: _logger.warning("Error during initial connection to %s", target, exc_info=True) return None def _authenticate(self, connection): client = self._client cluster_name = self._config.cluster_name client_name = client.name if self._config.token_provider: token = self._config.token_provider.token( connection.connected_address) request = client_authentication_custom_codec.encode_request( cluster_name, token, self.client_uuid, CLIENT_TYPE, SERIALIZATION_VERSION, __version__, client_name, self._labels, ) else: request = client_authentication_codec.encode_request( cluster_name, self._config.creds_username, self._config.creds_password, self.client_uuid, CLIENT_TYPE, SERIALIZATION_VERSION, __version__, client_name, self._labels, ) invocation = Invocation(request, connection=connection, urgent=True, response_handler=lambda m: m) self._invocation_service.invoke(invocation) return invocation.future def _on_auth(self, response, connection): try: response = client_authentication_codec.decode_response( response.result()) except Exception as e: connection.close_connection("Failed to authenticate connection", e) raise e status = response["status"] if status == _AuthenticationStatus.AUTHENTICATED: return self._handle_successful_auth(response, connection) if status == _AuthenticationStatus.CREDENTIALS_FAILED: err = AuthenticationError( "Authentication failed. Check cluster name and credentials.") elif status == _AuthenticationStatus.NOT_ALLOWED_IN_CLUSTER: err = ClientNotAllowedInClusterError( "Client is not allowed in the cluster") elif status == _AuthenticationStatus.SERIALIZATION_VERSION_MISMATCH: err = IllegalStateError( "Server serialization version does not match to client") else: err = AuthenticationError( "Authentication status code not supported. status: %s" % status) connection.close_connection("Failed to authenticate connection", err) raise err def _handle_successful_auth(self, response, connection): with self._lock: self._check_partition_count(response["partition_count"]) server_version_str = response["server_hazelcast_version"] remote_address = response["address"] remote_uuid = response["member_uuid"] connection.remote_address = remote_address connection.server_version = calculate_version(server_version_str) connection.remote_uuid = remote_uuid existing = self.active_connections.get(remote_uuid, None) if existing: connection.close_connection( "Duplicate connection to same member with UUID: %s" % remote_uuid, None) return existing new_cluster_id = response["cluster_id"] changed_cluster = self._cluster_id is not None and self._cluster_id != new_cluster_id if changed_cluster: self._check_client_state_on_cluster_change(connection) _logger.warning( "Switching from current cluster: %s to new cluster: %s", self._cluster_id, new_cluster_id, ) self._on_cluster_restart() is_initial_connection = not self.active_connections self.active_connections[remote_uuid] = connection if is_initial_connection: self._cluster_id = new_cluster_id if changed_cluster: self._client_state = _ClientState.CONNECTED_TO_CLUSTER self._initialize_on_cluster(new_cluster_id) else: self._client_state = _ClientState.INITIALIZED_ON_CLUSTER if is_initial_connection and not changed_cluster: self._lifecycle_service.fire_lifecycle_event( LifecycleState.CONNECTED) _logger.info( "Authenticated with server %s:%s, server version: %s, local address: %s", remote_address, remote_uuid, server_version_str, connection.local_address, ) for on_connection_opened, _ in self._connection_listeners: if on_connection_opened: try: on_connection_opened(connection) except: _logger.exception("Exception in connection listener") if not connection.live: self.on_connection_close(connection) return connection def _initialize_on_cluster(self, cluster_id) -> None: # This method is only called in the reactor thread if cluster_id != self._cluster_id: _logger.warning( f"Client won't send the state to the cluster: {cluster_id}" f"because it switched to a new cluster: {self._cluster_id}") return def callback(future): try: future.result() if cluster_id == self._cluster_id: _logger.debug("The client state is sent to the cluster %s", cluster_id) self._client_state = _ClientState.INITIALIZED_ON_CLUSTER self._lifecycle_service.fire_lifecycle_event( LifecycleState.CONNECTED) elif _logger.isEnabledFor(logging.DEBUG): _logger.warning( "Cannot set client state to 'INITIALIZED_ON_CLUSTER'" f"because current cluster id: {self._cluster_id}" f"is different than the expected cluster id: {cluster_id}" ) except: retry_on_error() def retry_on_error(): _logger.exception( f"Failure during sending client state to the cluster {cluster_id}" ) if cluster_id != self._cluster_id: return if _logger.isEnabledFor(logging.DEBUG): _logger.warning( f"Retrying sending client state to the cluster: {cluster_id}" ) self._initialize_on_cluster(cluster_id) try: self._send_state_to_cluster_fn().add_done_callback(callback) except: retry_on_error() def _check_client_state_on_cluster_change(self, connection): if self.active_connections: # If there are other connections, we must be connected to the wrong cluster. # We should not stay connected to this new connection. # Note that, in some racy scenarios, we might close a connection that # we can operate on. In those scenarios, we rely on the fact that we will # reopen the connections. reason = "Connection does not belong to the cluster %s" % self._cluster_id connection.close_connection(reason, None) raise ValueError(reason) def _on_cluster_restart(self): self._near_cache_manager.clear_near_caches() self._cluster_service.clear_member_list() def _check_partition_count(self, partition_count): if not self._partition_service.check_and_set_partition_count( partition_count): raise ClientNotAllowedInClusterError( "Client can not work with this cluster because it has a " "different partition count. Expected partition count: %d, " "Member partition count: %d" % (self._partition_service.partition_count, partition_count)) def _check_client_active(self): if not self._lifecycle_service.running: raise HazelcastClientNotActiveError() def _get_possible_addresses(self): primaries, secondaries = self._address_provider.load_addresses() if self._shuffle_member_list: # The relative order between primary and secondary addresses should # not be changed. So we shuffle the lists separately and then add # them to the final list so that secondary addresses are not tried # before all primary addresses have been tried. Otherwise we can get # startup delays random.shuffle(primaries) random.shuffle(secondaries) addresses = [] addresses.extend(primaries) addresses.extend(secondaries) return addresses
class InvocationService: _CLEAN_RESOURCES_PERIOD = 0.1 def __init__(self, client, config, reactor): smart_routing = config.smart_routing if smart_routing: self._do_invoke = self._invoke_smart else: self._do_invoke = self._invoke_non_smart self._client = client self._reactor = reactor self._partition_service = None self._connection_manager = None self._listener_service = None self._check_invocation_allowed_fn = None self._pending = {} self._next_correlation_id = AtomicInteger(1) self._is_redo_operation = config.redo_operation self._invocation_timeout = config.invocation_timeout self._invocation_retry_pause = config.invocation_retry_pause self._backup_ack_to_client_enabled = smart_routing and config.backup_ack_to_client_enabled self._fail_on_indeterminate_state = config.fail_on_indeterminate_operation_state self._backup_timeout = config.operation_backup_timeout self._clean_resources_timer = None self._shutdown = False self._compact_schema_service = None def init(self, partition_service, connection_manager, listener_service, compact_schema_service): self._partition_service = partition_service self._connection_manager = connection_manager self._listener_service = listener_service self._check_invocation_allowed_fn = connection_manager.check_invocation_allowed self._compact_schema_service = compact_schema_service def start(self): self._start_clean_resources_timer() def add_backup_listener(self): if self._backup_ack_to_client_enabled: self._register_backup_listener() def handle_client_message(self, message): correlation_id = message.get_correlation_id() start_frame = message.start_frame if start_frame.has_event_flag() or start_frame.has_backup_event_flag(): self._listener_service.handle_client_message( message, correlation_id) return invocation = self._pending.get(correlation_id, None) if not invocation: _logger.warning("Got message with unknown correlation id: %s", message) return if message.get_message_type() == EXCEPTION_MESSAGE_TYPE: error = create_error_from_message(message) return self._notify_error(invocation, error) self._notify(invocation, message) def invoke(self, invocation): if not invocation.timeout: invocation.timeout = self._invocation_timeout + time.time() correlation_id = self._next_correlation_id.get_and_increment() request = invocation.request request.set_correlation_id(correlation_id) request.set_partition_id(invocation.partition_id) self._do_invoke(invocation) def shutdown(self): if self._shutdown: return self._shutdown = True if self._clean_resources_timer: self._clean_resources_timer.cancel() for invocation in list(self._pending.values()): self._notify_error(invocation, HazelcastClientNotActiveError()) def _invoke_on_partition_owner(self, invocation, partition_id): owner_uuid = self._partition_service.get_partition_owner(partition_id) if not owner_uuid: _logger.debug("Partition owner is not assigned yet") return False return self._invoke_on_target(invocation, owner_uuid) def _invoke_on_target(self, invocation, owner_uuid): connection = self._connection_manager.get_connection(owner_uuid) if not connection: _logger.debug("Client is not connected to target: %s", owner_uuid) return False return self._send(invocation, connection) def _invoke_on_random_connection(self, invocation): connection = self._connection_manager.get_random_connection() if not connection: _logger.debug("No connection found to invoke") return False return self._send(invocation, connection) def _invoke_smart(self, invocation): try: if not invocation.urgent: self._check_invocation_allowed_fn() connection = invocation.connection if connection: invoked = self._send(invocation, connection) if not invoked: self._notify_error( invocation, IOError("Could not invoke on connection %s" % connection)) return if invocation.partition_id != -1: invoked = self._invoke_on_partition_owner( invocation, invocation.partition_id) elif invocation.uuid: invoked = self._invoke_on_target(invocation, invocation.uuid) else: invoked = self._invoke_on_random_connection(invocation) if not invoked: invoked = self._invoke_on_random_connection(invocation) if not invoked: self._notify_error(invocation, IOError("No connection found to invoke")) except Exception as e: self._notify_error(invocation, e) def _invoke_non_smart(self, invocation): try: if not invocation.urgent: self._check_invocation_allowed_fn() connection = invocation.connection if connection: invoked = self._send(invocation, connection) if not invoked: self._notify_error( invocation, IOError("Could not invoke on connection %s" % connection)) return if not self._invoke_on_random_connection(invocation): self._notify_error(invocation, IOError("No connection found to invoke")) except Exception as e: self._notify_error(invocation, e) def _send(self, invocation, connection): if self._shutdown: raise HazelcastClientNotActiveError() if self._backup_ack_to_client_enabled: invocation.request.set_backup_aware_flag() message = invocation.request correlation_id = message.get_correlation_id() self._pending[correlation_id] = invocation if invocation.event_handler: self._listener_service.add_event_handler(correlation_id, invocation.event_handler) if not connection.send_message(message): if invocation.event_handler: self._listener_service.remove_event_handler(correlation_id) return False invocation.sent_connection = connection return True def _complete(self, invocation: Invocation, client_message: InboundMessage) -> None: try: result = invocation.response_handler(client_message) invocation.future.set_result(result) except SchemaNotFoundError as e: self._fetch_schema_and_complete_again(e, invocation, client_message) return except Exception as e: invocation.future.set_exception(e) correlation_id = invocation.request.get_correlation_id() self._pending.pop(correlation_id, None) def _complete_with_error(self, invocation, error): invocation.future.set_exception(error, None) correlation_id = invocation.request.get_correlation_id() self._pending.pop(correlation_id, None) def _fetch_schema_and_complete_again(self, error: SchemaNotFoundError, invocation: Invocation, message: InboundMessage) -> None: schema_id = error.schema_id def callback(future): try: schema = future.result() self._compact_schema_service.register_fetched_schema( schema_id, schema) except Exception as e: self._complete_with_error(invocation, e) return message.reset_next_frame() self._complete(invocation, message) fetch_schema_future = self._compact_schema_service.fetch_schema( schema_id) fetch_schema_future.add_done_callback(callback) def _notify_error(self, invocation, error): _logger.debug("Got exception for request %s, error: %s", invocation.request, error) if not self._client.lifecycle_service.is_running(): self._complete_with_error(invocation, HazelcastClientNotActiveError()) return if not self._should_retry(invocation, error): self._complete_with_error(invocation, error) return if invocation.timeout < time.time(): _logger.debug( "Error will not be retried because invocation timed out: %s", error) error = OperationTimeoutError( "Request timed out because an error occurred " "after invocation timeout: %s" % error) self._complete_with_error(invocation, error) return invocation.sent_connection = None invoke_func = functools.partial(self._retry_if_not_done, invocation) self._reactor.add_timer(self._invocation_retry_pause, invoke_func) def _retry_if_not_done(self, invocation): if not invocation.future.done(): self._do_invoke(invocation) def _should_retry(self, invocation, error): if invocation.connection and isinstance( error, (IOError, TargetDisconnectedError)): return False if invocation.uuid and isinstance(error, TargetNotMemberError): return False if isinstance( error, (IOError, HazelcastInstanceNotActiveError)) or is_retryable_error(error): return True if isinstance(error, TargetDisconnectedError): return invocation.request.retryable or self._is_redo_operation return False def _register_backup_listener(self): codec = client_local_backup_listener_codec request = codec.encode_request() self._listener_service.register_listener( request, codec.decode_response, lambda reg_id: None, lambda m: codec.handle(m, self._backup_event_handler), ).result() def _backup_event_handler(self, correlation_id): invocation = self._pending.get(correlation_id, None) if not invocation: _logger.debug( "Invocation not found for backup event, invocation id %s", correlation_id) return self._notify_backup_complete(invocation) def _notify(self, invocation, client_message): expected_backups = client_message.get_number_of_backup_acks() if expected_backups > invocation.backup_acks_received: invocation.pending_response_received_time = time.time() invocation.backup_acks_expected = expected_backups invocation.pending_response = client_message return self._complete(invocation, client_message) def _notify_backup_complete(self, invocation): invocation.backup_acks_received += 1 if not invocation.pending_response: return if invocation.backup_acks_expected != invocation.backup_acks_received: return self._complete(invocation, invocation.pending_response) def _start_clean_resources_timer(self): def run(): if self._shutdown: return now = time.time() for invocation in list(self._pending.values()): connection = invocation.sent_connection if not connection: continue if not connection.live: error = TargetDisconnectedError(connection.close_reason) self._notify_error(invocation, error) continue if self._backup_ack_to_client_enabled: self._detect_and_handle_backup_timeout(invocation, now) self._clean_resources_timer = self._reactor.add_timer( self._CLEAN_RESOURCES_PERIOD, run) self._clean_resources_timer = self._reactor.add_timer( self._CLEAN_RESOURCES_PERIOD, run) def _detect_and_handle_backup_timeout(self, invocation, now): if not invocation.pending_response: return if invocation.backup_acks_expected == invocation.backup_acks_received: return expiration_time = invocation.pending_response_received_time + self._backup_timeout timeout_reached = 0 < expiration_time < now if not timeout_reached: return if self._fail_on_indeterminate_state: error = IndeterminateOperationStateError( "Invocation failed because the backup acks are missed") self._complete_with_error(invocation, error) return self._complete(invocation, invocation.pending_response)
class InvocationService(object): logger = logging.getLogger("InvocationService") def __init__(self, client): self._pending = {} self._event_handlers = {} self._next_correlation_id = AtomicInteger(1) self._client = client self._event_queue = Queue() self._is_redo_operation = client.config.network_config.redo_operation if client.config.network_config.smart_routing: self.invoke = self.invoke_smart else: self.invoke = self.invoke_non_smart self._client.connection_manager.add_listener(on_connection_closed=self.cleanup_connection) client.heartbeat.add_listener(on_heartbeat_stopped=self._heartbeat_stopped) def invoke_on_connection(self, message, connection, ignore_heartbeat=False): return self.invoke(Invocation(message, connection=connection), ignore_heartbeat) def invoke_on_partition(self, message, partition_id): return self.invoke(Invocation(message, partition_id=partition_id)) def invoke_on_random_target(self, message): return self.invoke(Invocation(message)) def invoke_on_target(self, message, address): return self.invoke(Invocation(message, address=address)) def invoke_smart(self, invocation, ignore_heartbeat=False): if invocation.has_connection(): self._send(invocation, invocation.connection, ignore_heartbeat) elif invocation.has_partition_id(): addr = self._client.partition_service.get_partition_owner(invocation.partition_id) self._send_to_address(invocation, addr) elif invocation.has_address(): self._send_to_address(invocation, invocation.address) else: # send to random address addr = self._client.load_balancer.next_address() self._send_to_address(invocation, addr) return invocation.future def invoke_non_smart(self, invocation, ignore_heartbeat=False): if invocation.has_connection(): self._send(invocation, invocation.connection, ignore_heartbeat) else: addr = self._client.cluster.owner_connection_address self._send_to_address(invocation, addr) return invocation.future def cleanup_connection(self, connection, cause): for correlation_id, invocation in dict(self._pending).iteritems(): if invocation.sent_connection == connection: self._handle_exception(invocation, cause) if self._client.lifecycle.is_live: for correlation_id, invocation in dict(self._event_handlers).iteritems(): if invocation.sent_connection == connection and invocation.connection is None: self._client.listener.re_register_listener(invocation) def _heartbeat_stopped(self, connection): for correlation_id, invocation in dict(self._pending).iteritems(): if invocation.sent_connection == connection: self._handle_exception(invocation, TargetDisconnectedError("%s has stopped heart beating." % connection)) def _remove_event_handler(self, correlation_id): self._event_handlers.pop(correlation_id) def _send_to_address(self, invocation, address, ignore_heartbeat=False): try: conn = self._client.connection_manager.connections[address] self._send(invocation, conn, ignore_heartbeat) except KeyError: if self._client.lifecycle.state != LIFECYCLE_STATE_CONNECTED: self._handle_exception(invocation, IOError("Client is not in connected state")) else: self._client.connection_manager.get_or_connect(address).continue_with(self.on_connect, invocation, ignore_heartbeat) def on_connect(self, f, invocation, ignore_heartbeat): if f.is_success(): self._send(invocation, f.result(), ignore_heartbeat) else: self._handle_exception(invocation, f.exception(), f.traceback()) def _send(self, invocation, connection, ignore_heartbeat): correlation_id = self._next_correlation_id.get_and_increment() message = invocation.request message.set_correlation_id(correlation_id) message.set_partition_id(invocation.partition_id) self._pending[correlation_id] = invocation if not invocation.timer: invocation.timer = self._client.reactor.add_timer_absolute(invocation.timeout, invocation.on_timeout) if isinstance(invocation, ListenerInvocation): self._event_handlers[correlation_id] = invocation self.logger.debug("Sending %s to %s", message, connection) if not ignore_heartbeat and not connection.heartbeating: self._handle_exception(invocation, TargetDisconnectedError("%s has stopped heart beating." % connection)) return invocation.sent_connection = connection try: connection.send_message(message) except IOError as e: self._handle_exception(invocation, e) def _handle_client_message(self, message): correlation_id = message.get_correlation_id() if message.has_flags(LISTENER_FLAG): if correlation_id not in self._event_handlers: self.logger.warn("Got event message with unknown correlation id: %s", message) return invocation = self._event_handlers[correlation_id] self._handle_event(invocation, message) return if correlation_id not in self._pending: self.logger.warn("Got message with unknown correlation id: %s", message) return invocation = self._pending.pop(correlation_id) if message.get_message_type() == EXCEPTION_MESSAGE_TYPE: error = create_exception(ErrorCodec(message)) return self._handle_exception(invocation, error) invocation.set_response(message) def _handle_event(self, invocation, message): try: invocation.event_handler(message) except: self.logger.warn("Error handling event %s", message, exc_info=True) def _handle_exception(self, invocation, error, traceback=None): if self.logger.isEnabledFor(logging.DEBUG): self.logger.debug("Got exception for request %s: %s: %s", invocation.request, type(error).__name__, error) if isinstance(error, (AuthenticationError, IOError, HazelcastInstanceNotActiveError)): if self._try_retry(invocation): return if is_retryable_error(error): if invocation.request.is_retryable() or self._is_redo_operation: if self._try_retry(invocation): return invocation.set_exception(error, traceback) def _try_retry(self, invocation): if invocation.connection: return False if invocation.timeout < time.time(): return False invoke_func = functools.partial(self.invoke, invocation) self.logger.debug("Rescheduling request %s to be retried in %s seconds", invocation.request, RETRY_WAIT_TIME_IN_SECONDS) self._client.reactor.add_timer(RETRY_WAIT_TIME_IN_SECONDS, invoke_func) return True
class InvocationService(object): logger = logging.getLogger("HazelcastClient.InvocationService") def __init__(self, client): self._pending = {} self._next_correlation_id = AtomicInteger(1) self._client = client self._logger_extras = { "client_name": client.name, "group_name": client.config.group_config.name } self._event_queue = queue.Queue() self._is_redo_operation = client.config.network_config.redo_operation self.invocation_retry_pause = self._init_invocation_retry_pause() self.invocation_timeout = self._init_invocation_timeout() self._listener_service = None if client.config.network_config.smart_routing: self.invoke = self.invoke_smart else: self.invoke = self.invoke_non_smart self._client.connection_manager.add_listener( on_connection_closed=self.cleanup_connection) client.heartbeat.add_listener( on_heartbeat_stopped=self._heartbeat_stopped) def start(self): self._listener_service = self._client.listener def invoke_on_connection(self, message, connection, ignore_heartbeat=False, event_handler=None): return self.invoke( Invocation(self, message, connection=connection, event_handler=event_handler), ignore_heartbeat) def invoke_on_partition(self, message, partition_id, invocation_timeout=None): invocation = Invocation(self, message, partition_id=partition_id) if invocation_timeout: invocation.set_timeout(invocation_timeout) return self.invoke(invocation) def invoke_on_random_target(self, message): return self.invoke(Invocation(self, message)) def invoke_on_target(self, message, address): return self.invoke(Invocation(self, message, address=address)) def invoke_smart(self, invocation, ignore_heartbeat=False): if invocation.has_connection(): self._send(invocation, invocation.connection, ignore_heartbeat) elif invocation.has_partition_id(): addr = self._client.partition_service.get_partition_owner( invocation.partition_id) if addr is None: self._handle_exception( invocation, IOError("Partition does not have an owner. " "partition Id: ".format(invocation.partition_id))) elif not self._is_member(addr): self._handle_exception( invocation, TargetNotMemberError("Partition owner '{}' " "is not a member.".format(addr))) else: self._send_to_address(invocation, addr) elif invocation.has_address(): if not self._is_member(invocation.address): self._handle_exception( invocation, TargetNotMemberError("Target '{}' is not a member.".format( invocation.address))) else: self._send_to_address(invocation, invocation.address) else: # send to random address addr = self._client.load_balancer.next_address() if addr is None: self._handle_exception(invocation, IOError("No address found to invoke")) else: self._send_to_address(invocation, addr) return invocation.future def invoke_non_smart(self, invocation, ignore_heartbeat=False): if invocation.has_connection(): self._send(invocation, invocation.connection, ignore_heartbeat) else: addr = self._client.cluster.owner_connection_address self._send_to_address(invocation, addr) return invocation.future def cleanup_connection(self, connection, cause): for correlation_id, invocation in six.iteritems(dict(self._pending)): if invocation.sent_connection == connection: self._handle_exception(invocation, cause) def _init_invocation_retry_pause(self): invocation_retry_pause = self._client.properties.get_seconds_positive_or_default( self._client.properties.INVOCATION_RETRY_PAUSE_MILLIS) return invocation_retry_pause def _init_invocation_timeout(self): invocation_timeout = self._client.properties.get_seconds_positive_or_default( self._client.properties.INVOCATION_TIMEOUT_SECONDS) return invocation_timeout def _heartbeat_stopped(self, connection): for correlation_id, invocation in six.iteritems(dict(self._pending)): if invocation.sent_connection == connection: self._handle_exception( invocation, TargetDisconnectedError("%s has stopped heart beating." % connection)) def _send_to_address(self, invocation, address, ignore_heartbeat=False): try: conn = self._client.connection_manager.connections[address] self._send(invocation, conn, ignore_heartbeat) except KeyError: if self._client.lifecycle.state != LIFECYCLE_STATE_CONNECTED: self._handle_exception( invocation, IOError("Client is not in connected state")) else: self._client.connection_manager.get_or_connect( address).continue_with(self.on_connect, invocation, ignore_heartbeat) def on_connect(self, f, invocation, ignore_heartbeat): if f.is_success(): self._send(invocation, f.result(), ignore_heartbeat) else: self._handle_exception(invocation, f.exception(), f.traceback()) def _send(self, invocation, connection, ignore_heartbeat): correlation_id = self._next_correlation_id.get_and_increment() message = invocation.request message.set_correlation_id(correlation_id) message.set_partition_id(invocation.partition_id) self._pending[correlation_id] = invocation if not invocation.timer: invocation.timer = self._client.reactor.add_timer_absolute( invocation.timeout, invocation.on_timeout) if invocation.event_handler is not None: self._listener_service.add_event_handler(correlation_id, invocation.event_handler) self.logger.debug("Sending %s to %s", message, connection, extra=self._logger_extras) if not ignore_heartbeat and not connection.heartbeating: self._handle_exception( invocation, TargetDisconnectedError("%s has stopped heart beating." % connection)) return invocation.sent_connection = connection try: connection.send_message(message) except IOError as e: if invocation.event_handler is not None: self._listener_service.remove_event_handler(correlation_id) self._handle_exception(invocation, e) def _handle_client_message(self, message): correlation_id = message.get_correlation_id() if message.has_flags(LISTENER_FLAG): self._listener_service.handle_client_message(message) return if correlation_id not in self._pending: self.logger.warning("Got message with unknown correlation id: %s", message, extra=self._logger_extras) return invocation = self._pending.pop(correlation_id) if message.get_message_type() == EXCEPTION_MESSAGE_TYPE: error = create_exception(ErrorCodec(message)) return self._handle_exception(invocation, error) invocation.set_response(message) def _handle_event(self, invocation, message): try: invocation.event_handler(message) except: self.logger.warning("Error handling event %s", message, exc_info=True, extra=self._logger_extras) def _handle_exception(self, invocation, error, traceback=None): if self.logger.isEnabledFor(logging.DEBUG): self.logger.debug("Got exception for request %s: %s: %s", invocation.request, type(error).__name__, error, extra=self._logger_extras) if not self._client.lifecycle.is_live: invocation.set_exception( HazelcastClientNotActiveException(error.args[0]), traceback) return if self._is_not_allowed_to_retry_on_selection(invocation, error): invocation.set_exception(error, traceback) return if not self._should_retry(invocation, error): invocation.set_exception(error, traceback) return if invocation.timeout < time.time(): if self.logger.isEnabledFor(logging.DEBUG): self.logger.debug( 'Error will not be retried because invocation timed out: %s', error, extra=self._logger_extras) invocation.set_exception( TimeoutError( '%s timed out because an error occurred after invocation timeout: %s' % (invocation.request, error), traceback)) return invoke_func = functools.partial(self.invoke, invocation) self._client.reactor.add_timer(self.invocation_retry_pause, invoke_func) def _should_retry(self, invocation, error): if isinstance( error, (IOError, HazelcastInstanceNotActiveError)) or is_retryable_error(error): return True if isinstance(error, TargetDisconnectedError): return invocation.request.is_retryable() or self._is_redo_operation return False def _is_not_allowed_to_retry_on_selection(self, invocation, error): if invocation.connection is not None and isinstance(error, IOError): return True # When invocation is sent over an address,error is the TargetNotMemberError and the # member is not in the member list, we should not retry return invocation.address is not None and isinstance(error, TargetNotMemberError) \ and not self._is_member(invocation.address) def _is_member(self, address): return self._client.cluster.get_member_by_address(address) is not None
class ConnectionManager(object): """ ConnectionManager is responsible for managing :mod:`Connection` objects. """ logger = logging.getLogger("HazelcastClient.ConnectionManager") def __init__(self, client, reactor, address_provider, lifecycle_service, partition_service, cluster_service, invocation_service, near_cache_manager, logger_extras): self.live = False self.active_connections = dict() self.client_uuid = uuid.uuid4() self._client = client self._reactor = reactor self._address_provider = address_provider self._lifecycle_service = lifecycle_service self._partition_service = partition_service self._cluster_service = cluster_service self._invocation_service = invocation_service self._near_cache_manager = near_cache_manager self._logger_extras = logger_extras config = self._client.config self._smart_routing_enabled = config.network.smart_routing self._wait_strategy = self._init_wait_strategy(config) self._reconnect_mode = config.connection_strategy.reconnect_mode self._heartbeat_manager = _HeartbeatManager(self, self._client, reactor, invocation_service, logger_extras) self._connection_listeners = [] self._connect_all_members_timer = None self._async_start = config.connection_strategy.async_start self._connect_to_cluster_thread_running = False self._pending_connections = dict() props = self._client.properties self._shuffle_member_list = props.get_bool(props.SHUFFLE_MEMBER_LIST) self._lock = threading.RLock() self._connection_id_generator = AtomicInteger() self._labels = config.labels self._cluster_id = None self._load_balancer = None def add_listener(self, on_connection_opened=None, on_connection_closed=None): """ Registers a ConnectionListener. If the same listener is registered multiple times, it will be notified multiple times. :param on_connection_opened: (Function), function to be called when a connection is opened. :param on_connection_closed: (Function), function to be called when a connection is removed. """ self._connection_listeners.append( (on_connection_opened, on_connection_closed)) def get_connection(self, member_uuid): return self.active_connections.get(member_uuid, None) def get_connection_from_address(self, address): for connection in six.itervalues(self.active_connections): if address == connection.remote_address: return connection return None def get_random_connection(self): if self._smart_routing_enabled: member = self._load_balancer.next() if member: connection = self.get_connection(member.uuid) if connection: return connection for connection in six.itervalues(self.active_connections): return connection return None def start(self, load_balancer): if self.live: return self.live = True self._load_balancer = load_balancer self._heartbeat_manager.start() self._connect_to_cluster() if self._smart_routing_enabled: self._start_connect_all_members_timer() def shutdown(self): if not self.live: return self.live = False if self._connect_all_members_timer: self._connect_all_members_timer.cancel() self._heartbeat_manager.shutdown() for connection_future in six.itervalues(self._pending_connections): connection_future.set_exception( HazelcastClientNotActiveError( "Hazelcast client is shutting down")) # Need to create copy of connection values to avoid modification errors on runtime for connection in list(six.itervalues(self.active_connections)): connection.close("Hazelcast client is shutting down", None) self._connection_listeners = [] self.active_connections.clear() self._pending_connections.clear() def connect_to_all_cluster_members(self): if not self._smart_routing_enabled: return for member in self._cluster_service.get_members(): try: self._get_or_connect(member.address).result() except: pass def on_connection_close(self, closed_connection, cause): connected_address = closed_connection.connected_address remote_uuid = closed_connection.remote_uuid if not connected_address: self.logger.debug( "Destroying %s, but it has no remote address, hence nothing is " "removed from the connection dictionary" % closed_connection, extra=self._logger_extras) with self._lock: pending = self._pending_connections.pop(connected_address, None) connection = self.active_connections.pop(remote_uuid, None) if pending: pending.set_exception(cause) if connection: self.logger.info( "Removed connection to %s:%s, connection: %s" % (connected_address, remote_uuid, connection), extra=self._logger_extras) if not self.active_connections: self._lifecycle_service.fire_lifecycle_event( LifecycleState.DISCONNECTED) self._trigger_cluster_reconnection() if connection: for _, on_connection_closed in self._connection_listeners: if on_connection_closed: try: on_connection_closed(connection, cause) except: self.logger.exception( "Exception in connection listener", extra=self._logger_extras) else: if remote_uuid: self.logger.debug( "Destroying %s, but there is no mapping for %s in the connection dictionary" % (closed_connection, remote_uuid), extra=self._logger_extras) def check_invocation_allowed(self): if self.active_connections: return if self._async_start or self._reconnect_mode == RECONNECT_MODE.ASYNC: raise ClientOfflineError() else: raise IOError("No connection found to cluster") def _trigger_cluster_reconnection(self): if self._reconnect_mode == RECONNECT_MODE.OFF: self.logger.info("Reconnect mode is OFF. Shutting down the client", extra=self._logger_extras) self._shutdown_client() return if self._lifecycle_service.running: self._start_connect_to_cluster_thread() def _init_wait_strategy(self, config): retry_config = config.connection_strategy.connection_retry return _WaitStrategy(retry_config.initial_backoff, retry_config.max_backoff, retry_config.multiplier, retry_config.cluster_connect_timeout, retry_config.jitter, self._logger_extras) def _start_connect_all_members_timer(self): connecting_addresses = set() def run(): if not self._lifecycle_service.running: return for member in self._cluster_service.get_members(): address = member.address if not self.get_connection_from_address( address) and address not in connecting_addresses: connecting_addresses.add(address) if not self._lifecycle_service.running: break if not self.get_connection(member.uuid): self._get_or_connect(address).add_done_callback( lambda f: connecting_addresses.discard(address)) self._connect_all_members_timer = self._reactor.add_timer(1, run) self._connect_all_members_timer = self._reactor.add_timer(1, run) def _connect_to_cluster(self): if self._async_start: self._start_connect_to_cluster_thread() else: self._sync_connect_to_cluster() def _start_connect_to_cluster_thread(self): with self._lock: if self._connect_to_cluster_thread_running: return self._connect_to_cluster_thread_running = True def run(): try: while True: self._sync_connect_to_cluster() with self._lock: if self.active_connections: self._connect_to_cluster_thread_running = False return except: self.logger.exception( "Could not connect to any cluster, shutting down the client", extra=self._logger_extras) self._shutdown_client() t = threading.Thread(target=run, name='hazelcast_async_connection') t.daemon = True t.start() def _shutdown_client(self): try: self._client.shutdown() except: self.logger.exception("Exception during client shutdown", extra=self._logger_extras) def _sync_connect_to_cluster(self): tried_addresses = set() self._wait_strategy.reset() try: while True: for address in self._get_possible_addresses(): self._check_client_active() tried_addresses.add(address) connection = self._connect(address) if connection: return # If the address providers load no addresses (which seems to be possible), # then the above loop is not entered and the lifecycle check is missing, # hence we need to repeat the same check at this point. self._check_client_active() if not self._wait_strategy.sleep(): break except (ClientNotAllowedInClusterError, InvalidConfigurationError): cluster_name = self._client.config.cluster_name self.logger.exception("Stopped trying on cluster %s" % cluster_name, extra=self._logger_extras) cluster_name = self._client.config.cluster_name self.logger.info( "Unable to connect to any address from the cluster with name: %s. " "The following addresses were tried: %s" % (cluster_name, tried_addresses), extra=self._logger_extras) if self._lifecycle_service.running: msg = "Unable to connect to any cluster" else: msg = "Client is being shutdown" raise IllegalStateError(msg) def _connect(self, address): self.logger.info("Trying to connect to %s" % address, extra=self._logger_extras) try: return self._get_or_connect(address).result() except (ClientNotAllowedInClusterError, InvalidConfigurationError) as e: self.logger.warning("Error during initial connection to %s: %s" % (address, e), extra=self._logger_extras) raise e except Exception as e: self.logger.warning("Error during initial connection to %s: %s" % (address, e), extra=self._logger_extras) return None def _get_or_connect(self, address): connection = self.get_connection_from_address(address) if connection: return ImmediateFuture(connection) with self._lock: connection = self.get_connection_from_address(address) if connection: return ImmediateFuture(connection) else: pending = self._pending_connections.get(address, None) if pending: return pending else: try: translated = self._address_provider.translate(address) if not translated: return ImmediateExceptionFuture( ValueError( "Address translator could not translate address %s" % address)) factory = self._reactor.connection_factory connection = factory( self, self._connection_id_generator.get_and_increment(), translated, self._client.config.network, self._invocation_service.handle_client_message) except IOError: return ImmediateExceptionFuture( sys.exc_info()[1], sys.exc_info()[2]) future = self._authenticate(connection).continue_with( self._on_auth, connection, address) self._pending_connections[address] = future return future def _authenticate(self, connection): client = self._client cluster_name = client.config.cluster_name client_name = client.name request = client_authentication_codec.encode_request( cluster_name, None, None, self.client_uuid, CLIENT_TYPE, SERIALIZATION_VERSION, CLIENT_VERSION, client_name, self._labels) invocation = Invocation(request, connection=connection, urgent=True, response_handler=lambda m: m) self._invocation_service.invoke(invocation) return invocation.future def _on_auth(self, response, connection, address): if response.is_success(): response = client_authentication_codec.decode_response( response.result()) status = response["status"] if status == _AuthenticationStatus.AUTHENTICATED: return self._handle_successful_auth(response, connection, address) if status == _AuthenticationStatus.CREDENTIALS_FAILED: err = AuthenticationError( "Authentication failed. The configured cluster name on " "the client does not match the one configured in the cluster." ) elif status == _AuthenticationStatus.NOT_ALLOWED_IN_CLUSTER: err = ClientNotAllowedInClusterError( "Client is not allowed in the cluster") elif status == _AuthenticationStatus.SERIALIZATION_VERSION_MISMATCH: err = IllegalStateError( "Server serialization version does not match to client") else: err = AuthenticationError( "Authentication status code not supported. status: %s" % status) connection.close("Failed to authenticate connection", err) raise err else: e = response.exception() connection.close("Failed to authenticate connection", e) self._pending_connections.pop(address, None) six.reraise(e.__class__, e, response.traceback()) def _handle_successful_auth(self, response, connection, address): self._check_partition_count(response["partition_count"]) server_version_str = response["server_hazelcast_version"] remote_address = response["address"] remote_uuid = response["member_uuid"] connection.remote_address = remote_address connection.server_version = calculate_version(server_version_str) connection.remote_uuid = remote_uuid new_cluster_id = response["cluster_id"] is_initial_connection = not self.active_connections changed_cluster = is_initial_connection and self._cluster_id is not None and self._cluster_id != new_cluster_id if changed_cluster: self.logger.warning( "Switching from current cluster: %s to new cluster: %s" % (self._cluster_id, new_cluster_id), extra=self._logger_extras) self._on_cluster_restart() with self._lock: self.active_connections[response["member_uuid"]] = connection self._pending_connections.pop(address, None) if is_initial_connection: self._cluster_id = new_cluster_id self._lifecycle_service.fire_lifecycle_event( LifecycleState.CONNECTED) self.logger.info( "Authenticated with server %s:%s, server version: %s, local address: %s" % (remote_address, remote_uuid, server_version_str, connection.local_address), extra=self._logger_extras) for on_connection_opened, _ in self._connection_listeners: if on_connection_opened: try: on_connection_opened(connection) except: self.logger.exception("Exception in connection listener", extra=self._logger_extras) if not connection.live: self.on_connection_close(connection, None) return connection def _on_cluster_restart(self): self._near_cache_manager.clear_near_caches() self._cluster_service.clear_member_list_version() def _check_partition_count(self, partition_count): if not self._partition_service.check_and_set_partition_count( partition_count): raise ClientNotAllowedInClusterError( "Client can not work with this cluster because it has a " "different partition count. Expected partition count: %d, " "Member partition count: %d" % (self._partition_service.partition_count, partition_count)) def _check_client_active(self): if not self._lifecycle_service.running: raise HazelcastClientNotActiveError() def _get_possible_addresses(self): member_addresses = list( map(lambda m: (m.address, None), self._cluster_service.get_members())) if self._shuffle_member_list: random.shuffle(member_addresses) addresses = OrderedDict(member_addresses) primaries, secondaries = self._address_provider.load_addresses() if self._shuffle_member_list: random.shuffle(primaries) random.shuffle(secondaries) for address in primaries: addresses[address] = None for address in secondaries: addresses[address] = None return six.iterkeys(addresses)
class InvocationService(object): logger = logging.getLogger("HazelcastClient.InvocationService") def __init__(self, client, reactor, logger_extras): config = client.config if config.network.smart_routing: self.invoke = self._invoke_smart else: self.invoke = self._invoke_non_smart self._client = client self._reactor = reactor self._logger_extras = logger_extras self._partition_service = None self._connection_manager = None self._listener_service = None self._check_invocation_allowed_fn = None self._pending = {} self._next_correlation_id = AtomicInteger(1) self._is_redo_operation = config.network.redo_operation self._invocation_timeout = self._init_invocation_timeout() self._invocation_retry_pause = self._init_invocation_retry_pause() self._shutdown = False def start(self, partition_service, connection_manager, listener_service): self._partition_service = partition_service self._connection_manager = connection_manager self._listener_service = listener_service self._check_invocation_allowed_fn = connection_manager.check_invocation_allowed def handle_client_message(self, message): correlation_id = message.get_correlation_id() if message.start_frame.has_event_flag(): self._listener_service.handle_client_message( message, correlation_id) return invocation = self._pending.pop(correlation_id, None) if not invocation: self.logger.warning("Got message with unknown correlation id: %s", message, extra=self._logger_extras) return if message.get_message_type() == EXCEPTION_MESSAGE_TYPE: error = create_error_from_message(message) return self._handle_exception(invocation, error) invocation.set_response(message) def shutdown(self): self._shutdown = True for invocation in list(six.itervalues(self._pending)): self._handle_exception(invocation, HazelcastClientNotActiveError()) def _invoke_on_partition_owner(self, invocation, partition_id): owner_uuid = self._partition_service.get_partition_owner(partition_id) if not owner_uuid: self.logger.debug("Partition owner is not assigned yet", extra=self._logger_extras) return False return self._invoke_on_target(invocation, owner_uuid) def _invoke_on_target(self, invocation, owner_uuid): connection = self._connection_manager.get_connection(owner_uuid) if not connection: self.logger.debug("Client is not connected to target: %s" % owner_uuid, extra=self._logger_extras) return False return self._send(invocation, connection) def _invoke_on_random_connection(self, invocation): connection = self._connection_manager.get_random_connection() if not connection: self.logger.debug("No connection found to invoke", extra=self._logger_extras) return False return self._send(invocation, connection) def _invoke_smart(self, invocation): if not invocation.timeout: invocation.timeout = self._invocation_timeout + time.time() try: if not invocation.urgent: self._check_invocation_allowed_fn() connection = invocation.connection if connection: invoked = self._send(invocation, connection) if not invoked: self._handle_exception( invocation, IOError("Could not invoke on connection %s" % connection)) return if invocation.partition_id != -1: invoked = self._invoke_on_partition_owner( invocation, invocation.partition_id) elif invocation.uuid: invoked = self._invoke_on_target(invocation, invocation.uuid) else: invoked = self._invoke_on_random_connection(invocation) if not invoked: invoked = self._invoke_on_random_connection(invocation) if not invoked: self._handle_exception( invocation, IOError("No connection found to invoke")) except Exception as e: self._handle_exception(invocation, e) def _invoke_non_smart(self, invocation): if not invocation.timeout: invocation.timeout = self._invocation_timeout + time.time() try: if not invocation.urgent: self._check_invocation_allowed_fn() connection = invocation.connection if connection: invoked = self._send(invocation, connection) if not invoked: self._handle_exception( invocation, IOError("Could not invoke on connection %s" % connection)) return if not self._invoke_on_random_connection(invocation): self._handle_exception( invocation, IOError("No connection found to invoke")) except Exception as e: self._handle_exception(invocation, e) def _init_invocation_retry_pause(self): invocation_retry_pause = self._client.properties.get_seconds_positive_or_default( self._client.properties.INVOCATION_RETRY_PAUSE_MILLIS) return invocation_retry_pause def _init_invocation_timeout(self): invocation_timeout = self._client.properties.get_seconds_positive_or_default( self._client.properties.INVOCATION_TIMEOUT_SECONDS) return invocation_timeout def _send(self, invocation, connection): if self._shutdown: raise HazelcastClientNotActiveError() correlation_id = self._next_correlation_id.get_and_increment() message = invocation.request message.set_correlation_id(correlation_id) message.set_partition_id(invocation.partition_id) self._pending[correlation_id] = invocation if invocation.event_handler: self._listener_service.add_event_handler(correlation_id, invocation.event_handler) self.logger.debug("Sending %s to %s", message, connection, extra=self._logger_extras) if not connection.send_message(message): if invocation.event_handler: self._listener_service.remove_event_handler(correlation_id) return False return True def _handle_exception(self, invocation, error, traceback=None): if self.logger.isEnabledFor(logging.DEBUG): self.logger.debug("Got exception for request %s, error: %s" % (invocation.request, error), extra=self._logger_extras) if not self._client.lifecycle_service.is_running(): invocation.set_exception(HazelcastClientNotActiveError(), traceback) self._pending.pop(invocation.request.get_correlation_id(), None) return if not self._should_retry(invocation, error): invocation.set_exception(error, traceback) self._pending.pop(invocation.request.get_correlation_id(), None) return if invocation.timeout < time.time(): self.logger.debug( "Error will not be retried because invocation timed out: %s", error, extra=self._logger_extras) invocation.set_exception( HazelcastTimeoutError( "Request timed out because an error occurred after " "invocation timeout: %s" % error, traceback)) self._pending.pop(invocation.request.get_correlation_id(), None) return invoke_func = functools.partial(self.invoke, invocation) self._reactor.add_timer(self._invocation_retry_pause, invoke_func) def _should_retry(self, invocation, error): if invocation.connection and isinstance( error, (IOError, TargetDisconnectedError)): return True if invocation.uuid and isinstance(error, TargetNotMemberError): return False if isinstance( error, (IOError, HazelcastInstanceNotActiveError)) or is_retryable_error(error): return True if isinstance(error, TargetDisconnectedError): return invocation.request.retryable or self._is_redo_operation return False