class IdGenerator(Proxy):
    def __init__(self, client, service_name, name, atomic_long):
        super(IdGenerator, self).__init__(client, service_name, name)
        self._atomic_long = atomic_long
        self._residue = AtomicInteger(BLOCK_SIZE)
        self._local = AtomicInteger(-1)
        self._lock = threading.RLock()

    def _on_destroy(self):
        self._atomic_long.destroy()

    def init(self, initial):
        if id <= 0:
            return False
        step = initial / BLOCK_SIZE
        with self._lock:
            init = self._atomic_long.compare_and_set(0, step + 1)
            if init:
                self._local.set(step)
                self._residue.set((initial % BLOCK_SIZE) + 1)
            return init

    def new_id(self):
        val = self._residue.get_and_increment()
        if val >= BLOCK_SIZE:
            with self._lock:
                val = self._residue.get()
                if val >= BLOCK_SIZE:
                    increment = self._atomic_long.get_and_increment()
                    self._local.set(increment)
                    self._residue.set(0)
                return self.new_id()
        get = self._local.get()
        return get * BLOCK_SIZE + val
Ejemplo n.º 2
0
class ConnectionManager:
    """ConnectionManager is responsible for managing ``Connection`` objects."""
    def __init__(
        self,
        client,
        config,
        reactor,
        address_provider,
        lifecycle_service,
        partition_service,
        cluster_service,
        invocation_service,
        near_cache_manager,
        send_state_to_cluster_fn,
    ):
        self.live = False
        self.active_connections = {
        }  # uuid to connection, must be modified under the _lock
        self.client_uuid = uuid.uuid4()

        self._client = client
        self._config = config
        self._reactor = reactor
        self._address_provider = address_provider
        self._lifecycle_service = lifecycle_service
        self._partition_service = partition_service
        self._cluster_service = cluster_service
        self._invocation_service = invocation_service
        self._near_cache_manager = near_cache_manager
        self._send_state_to_cluster_fn = send_state_to_cluster_fn
        self._client_state = _ClientState.INITIAL  # must be modified under the _lock
        self._smart_routing_enabled = config.smart_routing
        self._wait_strategy = self._init_wait_strategy(config)
        self._reconnect_mode = config.reconnect_mode
        self._heartbeat_manager = _HeartbeatManager(self, self._client, config,
                                                    reactor,
                                                    invocation_service)
        self._connection_listeners = []
        self._connect_all_members_timer = None
        self._async_start = config.async_start
        self._connect_to_cluster_thread_running = False
        self._shuffle_member_list = config.shuffle_member_list
        self._lock = threading.RLock()
        self._connection_id_generator = AtomicInteger()
        self._labels = frozenset(config.labels)
        self._cluster_id = None
        self._load_balancer = None
        self._use_public_ip = (isinstance(
            address_provider, DefaultAddressProvider) and config.use_public_ip)

    def add_listener(self,
                     on_connection_opened=None,
                     on_connection_closed=None):
        """Registers a ConnectionListener.

        If the same listener is registered multiple times, it will be notified multiple times.

        Args:
            on_connection_opened (function): Function to be called when a connection is opened. (Default value = None)
            on_connection_closed (function): Function to be called when a connection is removed. (Default value = None)
        """
        self._connection_listeners.append(
            (on_connection_opened, on_connection_closed))

    def get_connection(self, member_uuid):
        return self.active_connections.get(member_uuid, None)

    def get_random_connection(self):
        # Try getting the connection from the load balancer, if smart routing is enabled
        if self._smart_routing_enabled:
            member = self._load_balancer.next()
            if member:
                connection = self.get_connection(member.uuid)
                if connection:
                    return connection

        # Otherwise iterate over connections and return the first one
        for connection in list(self.active_connections.values()):
            return connection

        # Failed to get a connection
        return None

    def get_random_connection_for_sql(self):
        """Returns a random connection for SQL.

        The connection is tried to be selected in the following order.

            - Random connection to a data member from the larger same-version
              group.
            - Random connection to a data member.
            - Any random connection
            - ``None``, if there is no connection.

        Returns:
            Connection: A random connection for SQL.
        """
        if self._smart_routing_enabled:
            # There might be a race - the chosen member might be just connected or disconnected.
            # Try a couple of times, the member_of_larger_same_version_group returns a random
            # connection, we might be lucky...
            for _ in range(_SQL_CONNECTION_RANDOM_ATTEMPTS):
                members = self._cluster_service.get_members()
                member = member_of_larger_same_version_group(members)
                if not member:
                    break

                connection = self.get_connection(member.uuid)
                if connection:
                    return connection

        # Otherwise iterate over connections and return the first one
        # that's not to a lite member.
        first_connection = None
        for member_uuid, connection in list(self.active_connections.items()):
            if not first_connection:
                first_connection = connection

            member = self._cluster_service.get_member(member_uuid)
            if not member or member.lite_member:
                continue

            return connection

        # Failed to get a connection to a data member.
        return first_connection

    def start(self, load_balancer):
        if self.live:
            return

        self.live = True
        self._load_balancer = load_balancer
        self._heartbeat_manager.start()
        self._connect_to_cluster()

    def shutdown(self):
        if not self.live:
            return

        self.live = False
        if self._connect_all_members_timer:
            self._connect_all_members_timer.cancel()

        self._heartbeat_manager.shutdown()

        # Need to create copy of connection values to avoid modification errors on runtime
        for connection in list(self.active_connections.values()):
            connection.close_connection("Hazelcast client is shutting down",
                                        None)

        self.active_connections.clear()
        del self._connection_listeners[:]

    def connect_to_all_cluster_members(self, sync_start):
        if not self._smart_routing_enabled:
            return

        if sync_start:
            for member in self._cluster_service.get_members():
                try:
                    self._get_or_connect_to_member(member).result()
                except:
                    pass

        self._start_connect_all_members_timer()

    def on_connection_close(self, closed_connection):
        remote_uuid = closed_connection.remote_uuid
        remote_address = closed_connection.remote_address

        if not remote_address:
            _logger.debug(
                "Destroying %s, but it has no remote address, hence nothing is "
                "removed from the connection dictionary",
                closed_connection,
            )
            return

        disconnected = False
        removed = False
        trigger_reconnection = False
        with self._lock:
            connection = self.active_connections.get(remote_uuid, None)
            if connection == closed_connection:
                self.active_connections.pop(remote_uuid, None)
                removed = True
                _logger.info(
                    "Removed connection to %s:%s, connection: %s",
                    remote_address,
                    remote_uuid,
                    connection,
                )

                if not self.active_connections:
                    trigger_reconnection = True
                    if self._client_state == _ClientState.INITIALIZED_ON_CLUSTER:
                        disconnected = True

        if disconnected:
            self._lifecycle_service.fire_lifecycle_event(
                LifecycleState.DISCONNECTED)

        if trigger_reconnection:
            self._trigger_cluster_reconnection()

        if removed:
            for _, on_connection_closed in self._connection_listeners:
                if on_connection_closed:
                    try:
                        on_connection_closed(closed_connection)
                    except:
                        _logger.exception("Exception in connection listener")
        else:
            _logger.debug(
                "Destroying %s, but there is no mapping for %s in the connection dictionary",
                closed_connection,
                remote_uuid,
            )

    def check_invocation_allowed(self):
        state = self._client_state
        if state == _ClientState.INITIALIZED_ON_CLUSTER and self.active_connections:
            return

        if state == _ClientState.INITIAL:
            if self._async_start:
                raise ClientOfflineError()
            else:
                raise IOError(
                    "No connection found to cluster since the client is starting."
                )
        elif self._reconnect_mode == ReconnectMode.ASYNC:
            raise ClientOfflineError()
        else:
            raise IOError("No connection found to cluster")

    def _get_or_connect_to_address(self, address):
        for connection in list(self.active_connections.values()):
            if connection.remote_address == address:
                return ImmediateFuture(connection)

        try:
            translated = self._translate(address)
            connection = self._create_connection(translated)
            return self._authenticate(connection).continue_with(
                self._on_auth, connection)
        except Exception as e:
            return ImmediateExceptionFuture(e)

    def _get_or_connect_to_member(self, member):
        connection = self.active_connections.get(member.uuid, None)
        if connection:
            return ImmediateFuture(connection)

        try:
            translated = self._translate_member_address(member)
            connection = self._create_connection(translated)
            return self._authenticate(connection).continue_with(
                self._on_auth, connection)
        except Exception as e:
            return ImmediateExceptionFuture(e)

    def _create_connection(self, address):
        factory = self._reactor.connection_factory
        return factory(
            self,
            self._connection_id_generator.get_and_increment(),
            address,
            self._config,
            self._invocation_service.handle_client_message,
        )

    def _translate(self, address):
        translated = self._address_provider.translate(address)
        if not translated:
            raise ValueError(
                "Address provider %s could not translate address %s" %
                (self._address_provider.__class__.__name__, address))

        return translated

    def _translate_member_address(self, member):
        if self._use_public_ip:
            public_address = member.address_map.get(
                _CLIENT_PUBLIC_ENDPOINT_QUALIFIER, None)
            if public_address:
                return public_address

            return member.address

        return self._translate(member.address)

    def _trigger_cluster_reconnection(self):
        if self._reconnect_mode == ReconnectMode.OFF:
            _logger.info("Reconnect mode is OFF. Shutting down the client")
            self._shutdown_client()
            return

        if self._lifecycle_service.running:
            self._start_connect_to_cluster_thread()

    def _init_wait_strategy(self, config):
        cluster_connect_timeout = config.cluster_connect_timeout
        if cluster_connect_timeout == -1:
            # If the no timeout is specified by the
            # user, or set to -1 explicitly, set
            # the timeout to infinite.
            cluster_connect_timeout = _INF

        return _WaitStrategy(
            config.retry_initial_backoff,
            config.retry_max_backoff,
            config.retry_multiplier,
            cluster_connect_timeout,
            config.retry_jitter,
        )

    def _start_connect_all_members_timer(self):
        connecting_uuids = set()

        def run():
            if not self._lifecycle_service.running:
                return

            for member in self._cluster_service.get_members():
                member_uuid = member.uuid

                if self.active_connections.get(member_uuid, None):
                    continue

                if member_uuid in connecting_uuids:
                    continue

                connecting_uuids.add(member_uuid)
                if not self._lifecycle_service.running:
                    break

                # Bind the bound_member_uuid to the value
                # in this loop iteration
                def cb(_, bound_member_uuid=member_uuid):
                    connecting_uuids.discard(bound_member_uuid)

                self._get_or_connect_to_member(member).add_done_callback(cb)

            self._connect_all_members_timer = self._reactor.add_timer(1, run)

        self._connect_all_members_timer = self._reactor.add_timer(1, run)

    def _connect_to_cluster(self):
        if self._async_start:
            self._start_connect_to_cluster_thread()
        else:
            self._sync_connect_to_cluster()

    def _start_connect_to_cluster_thread(self):
        with self._lock:
            if self._connect_to_cluster_thread_running:
                return

            self._connect_to_cluster_thread_running = True

        def run():
            try:
                while True:
                    self._sync_connect_to_cluster()
                    with self._lock:
                        if self.active_connections:
                            self._connect_to_cluster_thread_running = False
                            return
            except:
                _logger.exception(
                    "Could not connect to any cluster, shutting down the client"
                )
                self._shutdown_client()

        t = threading.Thread(target=run, name="hazelcast_async_connection")
        t.daemon = True
        t.start()

    def _shutdown_client(self):
        try:
            self._client.shutdown()
        except:
            _logger.exception("Exception during client shutdown")

    def _sync_connect_to_cluster(self):
        tried_addresses = set()
        self._wait_strategy.reset()
        try:
            while True:
                tried_addresses_per_attempt = set()
                members = self._cluster_service.get_members()
                if self._shuffle_member_list:
                    random.shuffle(members)

                for member in members:
                    self._check_client_active()
                    tried_addresses_per_attempt.add(member.address)
                    connection = self._connect(member,
                                               self._get_or_connect_to_member)
                    if connection:
                        return

                for address in self._get_possible_addresses():
                    self._check_client_active()
                    if address in tried_addresses_per_attempt:
                        # We already tried this address on from the member list
                        continue

                    tried_addresses_per_attempt.add(address)
                    connection = self._connect(address,
                                               self._get_or_connect_to_address)
                    if connection:
                        return

                tried_addresses.update(tried_addresses_per_attempt)

                # If the address providers load no addresses (which seems to be possible),
                # then the above loop is not entered and the lifecycle check is missing,
                # hence we need to repeat the same check at this point.
                if not tried_addresses_per_attempt:
                    self._check_client_active()

                if not self._wait_strategy.sleep():
                    break
        except (ClientNotAllowedInClusterError, InvalidConfigurationError):
            cluster_name = self._config.cluster_name
            _logger.exception("Stopped trying on cluster %s", cluster_name)

        cluster_name = self._config.cluster_name
        _logger.info(
            "Unable to connect to any address from the cluster with name: %s. "
            "The following addresses were tried: %s",
            cluster_name,
            tried_addresses,
        )
        if self._lifecycle_service.running:
            msg = "Unable to connect to any cluster"
        else:
            msg = "Client is being shutdown"
        raise IllegalStateError(msg)

    def _connect(self, target, get_or_connect_func):
        _logger.info("Trying to connect to %s", target)
        try:
            return get_or_connect_func(target).result()
        except (ClientNotAllowedInClusterError,
                InvalidConfigurationError) as e:
            _logger.warning("Error during initial connection to %s",
                            target,
                            exc_info=True)
            raise e
        except:
            _logger.warning("Error during initial connection to %s",
                            target,
                            exc_info=True)
            return None

    def _authenticate(self, connection):
        client = self._client
        cluster_name = self._config.cluster_name
        client_name = client.name
        if self._config.token_provider:
            token = self._config.token_provider.token(
                connection.connected_address)
            request = client_authentication_custom_codec.encode_request(
                cluster_name,
                token,
                self.client_uuid,
                CLIENT_TYPE,
                SERIALIZATION_VERSION,
                __version__,
                client_name,
                self._labels,
            )
        else:
            request = client_authentication_codec.encode_request(
                cluster_name,
                self._config.creds_username,
                self._config.creds_password,
                self.client_uuid,
                CLIENT_TYPE,
                SERIALIZATION_VERSION,
                __version__,
                client_name,
                self._labels,
            )
        invocation = Invocation(request,
                                connection=connection,
                                urgent=True,
                                response_handler=lambda m: m)
        self._invocation_service.invoke(invocation)
        return invocation.future

    def _on_auth(self, response, connection):
        try:
            response = client_authentication_codec.decode_response(
                response.result())
        except Exception as e:
            connection.close_connection("Failed to authenticate connection", e)
            raise e

        status = response["status"]
        if status == _AuthenticationStatus.AUTHENTICATED:
            return self._handle_successful_auth(response, connection)

        if status == _AuthenticationStatus.CREDENTIALS_FAILED:
            err = AuthenticationError(
                "Authentication failed. Check cluster name and credentials.")
        elif status == _AuthenticationStatus.NOT_ALLOWED_IN_CLUSTER:
            err = ClientNotAllowedInClusterError(
                "Client is not allowed in the cluster")
        elif status == _AuthenticationStatus.SERIALIZATION_VERSION_MISMATCH:
            err = IllegalStateError(
                "Server serialization version does not match to client")
        else:
            err = AuthenticationError(
                "Authentication status code not supported. status: %s" %
                status)

        connection.close_connection("Failed to authenticate connection", err)
        raise err

    def _handle_successful_auth(self, response, connection):
        with self._lock:
            self._check_partition_count(response["partition_count"])

            server_version_str = response["server_hazelcast_version"]
            remote_address = response["address"]
            remote_uuid = response["member_uuid"]

            connection.remote_address = remote_address
            connection.server_version = calculate_version(server_version_str)
            connection.remote_uuid = remote_uuid

            existing = self.active_connections.get(remote_uuid, None)
            if existing:
                connection.close_connection(
                    "Duplicate connection to same member with UUID: %s" %
                    remote_uuid, None)
                return existing

            new_cluster_id = response["cluster_id"]
            changed_cluster = self._cluster_id is not None and self._cluster_id != new_cluster_id
            if changed_cluster:
                self._check_client_state_on_cluster_change(connection)
                _logger.warning(
                    "Switching from current cluster: %s to new cluster: %s",
                    self._cluster_id,
                    new_cluster_id,
                )
                self._on_cluster_restart()

            is_initial_connection = not self.active_connections
            self.active_connections[remote_uuid] = connection
            if is_initial_connection:
                self._cluster_id = new_cluster_id
                if changed_cluster:
                    self._client_state = _ClientState.CONNECTED_TO_CLUSTER
                    self._initialize_on_cluster(new_cluster_id)
                else:
                    self._client_state = _ClientState.INITIALIZED_ON_CLUSTER

        if is_initial_connection and not changed_cluster:
            self._lifecycle_service.fire_lifecycle_event(
                LifecycleState.CONNECTED)

        _logger.info(
            "Authenticated with server %s:%s, server version: %s, local address: %s",
            remote_address,
            remote_uuid,
            server_version_str,
            connection.local_address,
        )

        for on_connection_opened, _ in self._connection_listeners:
            if on_connection_opened:
                try:
                    on_connection_opened(connection)
                except:
                    _logger.exception("Exception in connection listener")

        if not connection.live:
            self.on_connection_close(connection)

        return connection

    def _initialize_on_cluster(self, cluster_id) -> None:
        # This method is only called in the reactor thread
        if cluster_id != self._cluster_id:
            _logger.warning(
                f"Client won't send the state to the cluster: {cluster_id}"
                f"because it switched to a new cluster: {self._cluster_id}")
            return

        def callback(future):
            try:
                future.result()
                if cluster_id == self._cluster_id:
                    _logger.debug("The client state is sent to the cluster %s",
                                  cluster_id)

                    self._client_state = _ClientState.INITIALIZED_ON_CLUSTER

                    self._lifecycle_service.fire_lifecycle_event(
                        LifecycleState.CONNECTED)

                elif _logger.isEnabledFor(logging.DEBUG):
                    _logger.warning(
                        "Cannot set client state to 'INITIALIZED_ON_CLUSTER'"
                        f"because current cluster id: {self._cluster_id}"
                        f"is different than the expected cluster id: {cluster_id}"
                    )
            except:
                retry_on_error()

        def retry_on_error():
            _logger.exception(
                f"Failure during sending client state to the cluster {cluster_id}"
            )

            if cluster_id != self._cluster_id:
                return

            if _logger.isEnabledFor(logging.DEBUG):
                _logger.warning(
                    f"Retrying sending client state to the cluster: {cluster_id}"
                )

            self._initialize_on_cluster(cluster_id)

        try:
            self._send_state_to_cluster_fn().add_done_callback(callback)
        except:
            retry_on_error()

    def _check_client_state_on_cluster_change(self, connection):
        if self.active_connections:
            # If there are other connections, we must be connected to the wrong cluster.
            # We should not stay connected to this new connection.
            # Note that, in some racy scenarios, we might close a connection that
            # we can operate on. In those scenarios, we rely on the fact that we will
            # reopen the connections.
            reason = "Connection does not belong to the cluster %s" % self._cluster_id
            connection.close_connection(reason, None)
            raise ValueError(reason)

    def _on_cluster_restart(self):
        self._near_cache_manager.clear_near_caches()
        self._cluster_service.clear_member_list()

    def _check_partition_count(self, partition_count):
        if not self._partition_service.check_and_set_partition_count(
                partition_count):
            raise ClientNotAllowedInClusterError(
                "Client can not work with this cluster because it has a "
                "different partition count. Expected partition count: %d, "
                "Member partition count: %d" %
                (self._partition_service.partition_count, partition_count))

    def _check_client_active(self):
        if not self._lifecycle_service.running:
            raise HazelcastClientNotActiveError()

    def _get_possible_addresses(self):
        primaries, secondaries = self._address_provider.load_addresses()
        if self._shuffle_member_list:
            # The relative order between primary and secondary addresses should
            # not be changed. So we shuffle the lists separately and then add
            # them to the final list so that secondary addresses are not tried
            # before all primary addresses have been tried. Otherwise we can get
            # startup delays
            random.shuffle(primaries)
            random.shuffle(secondaries)

        addresses = []
        addresses.extend(primaries)
        addresses.extend(secondaries)
        return addresses
Ejemplo n.º 3
0
class InvocationService:
    _CLEAN_RESOURCES_PERIOD = 0.1

    def __init__(self, client, config, reactor):
        smart_routing = config.smart_routing
        if smart_routing:
            self._do_invoke = self._invoke_smart
        else:
            self._do_invoke = self._invoke_non_smart

        self._client = client
        self._reactor = reactor
        self._partition_service = None
        self._connection_manager = None
        self._listener_service = None
        self._check_invocation_allowed_fn = None
        self._pending = {}
        self._next_correlation_id = AtomicInteger(1)
        self._is_redo_operation = config.redo_operation
        self._invocation_timeout = config.invocation_timeout
        self._invocation_retry_pause = config.invocation_retry_pause
        self._backup_ack_to_client_enabled = smart_routing and config.backup_ack_to_client_enabled
        self._fail_on_indeterminate_state = config.fail_on_indeterminate_operation_state
        self._backup_timeout = config.operation_backup_timeout
        self._clean_resources_timer = None
        self._shutdown = False
        self._compact_schema_service = None

    def init(self, partition_service, connection_manager, listener_service,
             compact_schema_service):
        self._partition_service = partition_service
        self._connection_manager = connection_manager
        self._listener_service = listener_service
        self._check_invocation_allowed_fn = connection_manager.check_invocation_allowed
        self._compact_schema_service = compact_schema_service

    def start(self):
        self._start_clean_resources_timer()

    def add_backup_listener(self):
        if self._backup_ack_to_client_enabled:
            self._register_backup_listener()

    def handle_client_message(self, message):
        correlation_id = message.get_correlation_id()

        start_frame = message.start_frame
        if start_frame.has_event_flag() or start_frame.has_backup_event_flag():
            self._listener_service.handle_client_message(
                message, correlation_id)
            return

        invocation = self._pending.get(correlation_id, None)
        if not invocation:
            _logger.warning("Got message with unknown correlation id: %s",
                            message)
            return

        if message.get_message_type() == EXCEPTION_MESSAGE_TYPE:
            error = create_error_from_message(message)
            return self._notify_error(invocation, error)

        self._notify(invocation, message)

    def invoke(self, invocation):
        if not invocation.timeout:
            invocation.timeout = self._invocation_timeout + time.time()

        correlation_id = self._next_correlation_id.get_and_increment()
        request = invocation.request
        request.set_correlation_id(correlation_id)
        request.set_partition_id(invocation.partition_id)
        self._do_invoke(invocation)

    def shutdown(self):
        if self._shutdown:
            return

        self._shutdown = True
        if self._clean_resources_timer:
            self._clean_resources_timer.cancel()
        for invocation in list(self._pending.values()):
            self._notify_error(invocation, HazelcastClientNotActiveError())

    def _invoke_on_partition_owner(self, invocation, partition_id):
        owner_uuid = self._partition_service.get_partition_owner(partition_id)
        if not owner_uuid:
            _logger.debug("Partition owner is not assigned yet")
            return False
        return self._invoke_on_target(invocation, owner_uuid)

    def _invoke_on_target(self, invocation, owner_uuid):
        connection = self._connection_manager.get_connection(owner_uuid)
        if not connection:
            _logger.debug("Client is not connected to target: %s", owner_uuid)
            return False
        return self._send(invocation, connection)

    def _invoke_on_random_connection(self, invocation):
        connection = self._connection_manager.get_random_connection()
        if not connection:
            _logger.debug("No connection found to invoke")
            return False
        return self._send(invocation, connection)

    def _invoke_smart(self, invocation):
        try:
            if not invocation.urgent:
                self._check_invocation_allowed_fn()

            connection = invocation.connection
            if connection:
                invoked = self._send(invocation, connection)
                if not invoked:
                    self._notify_error(
                        invocation,
                        IOError("Could not invoke on connection %s" %
                                connection))
                return

            if invocation.partition_id != -1:
                invoked = self._invoke_on_partition_owner(
                    invocation, invocation.partition_id)
            elif invocation.uuid:
                invoked = self._invoke_on_target(invocation, invocation.uuid)
            else:
                invoked = self._invoke_on_random_connection(invocation)

            if not invoked:
                invoked = self._invoke_on_random_connection(invocation)

            if not invoked:
                self._notify_error(invocation,
                                   IOError("No connection found to invoke"))
        except Exception as e:
            self._notify_error(invocation, e)

    def _invoke_non_smart(self, invocation):
        try:
            if not invocation.urgent:
                self._check_invocation_allowed_fn()

            connection = invocation.connection
            if connection:
                invoked = self._send(invocation, connection)
                if not invoked:
                    self._notify_error(
                        invocation,
                        IOError("Could not invoke on connection %s" %
                                connection))
                return

            if not self._invoke_on_random_connection(invocation):
                self._notify_error(invocation,
                                   IOError("No connection found to invoke"))
        except Exception as e:
            self._notify_error(invocation, e)

    def _send(self, invocation, connection):
        if self._shutdown:
            raise HazelcastClientNotActiveError()

        if self._backup_ack_to_client_enabled:
            invocation.request.set_backup_aware_flag()

        message = invocation.request
        correlation_id = message.get_correlation_id()
        self._pending[correlation_id] = invocation

        if invocation.event_handler:
            self._listener_service.add_event_handler(correlation_id,
                                                     invocation.event_handler)

        if not connection.send_message(message):
            if invocation.event_handler:
                self._listener_service.remove_event_handler(correlation_id)
            return False

        invocation.sent_connection = connection
        return True

    def _complete(self, invocation: Invocation,
                  client_message: InboundMessage) -> None:
        try:
            result = invocation.response_handler(client_message)
            invocation.future.set_result(result)
        except SchemaNotFoundError as e:
            self._fetch_schema_and_complete_again(e, invocation,
                                                  client_message)
            return
        except Exception as e:
            invocation.future.set_exception(e)

        correlation_id = invocation.request.get_correlation_id()
        self._pending.pop(correlation_id, None)

    def _complete_with_error(self, invocation, error):
        invocation.future.set_exception(error, None)
        correlation_id = invocation.request.get_correlation_id()
        self._pending.pop(correlation_id, None)

    def _fetch_schema_and_complete_again(self, error: SchemaNotFoundError,
                                         invocation: Invocation,
                                         message: InboundMessage) -> None:
        schema_id = error.schema_id

        def callback(future):
            try:
                schema = future.result()
                self._compact_schema_service.register_fetched_schema(
                    schema_id, schema)
            except Exception as e:
                self._complete_with_error(invocation, e)
                return

            message.reset_next_frame()
            self._complete(invocation, message)

        fetch_schema_future = self._compact_schema_service.fetch_schema(
            schema_id)
        fetch_schema_future.add_done_callback(callback)

    def _notify_error(self, invocation, error):
        _logger.debug("Got exception for request %s, error: %s",
                      invocation.request, error)

        if not self._client.lifecycle_service.is_running():
            self._complete_with_error(invocation,
                                      HazelcastClientNotActiveError())
            return

        if not self._should_retry(invocation, error):
            self._complete_with_error(invocation, error)
            return

        if invocation.timeout < time.time():
            _logger.debug(
                "Error will not be retried because invocation timed out: %s",
                error)
            error = OperationTimeoutError(
                "Request timed out because an error occurred "
                "after invocation timeout: %s" % error)
            self._complete_with_error(invocation, error)
            return

        invocation.sent_connection = None
        invoke_func = functools.partial(self._retry_if_not_done, invocation)
        self._reactor.add_timer(self._invocation_retry_pause, invoke_func)

    def _retry_if_not_done(self, invocation):
        if not invocation.future.done():
            self._do_invoke(invocation)

    def _should_retry(self, invocation, error):
        if invocation.connection and isinstance(
                error, (IOError, TargetDisconnectedError)):
            return False

        if invocation.uuid and isinstance(error, TargetNotMemberError):
            return False

        if isinstance(
                error,
            (IOError,
             HazelcastInstanceNotActiveError)) or is_retryable_error(error):
            return True

        if isinstance(error, TargetDisconnectedError):
            return invocation.request.retryable or self._is_redo_operation

        return False

    def _register_backup_listener(self):
        codec = client_local_backup_listener_codec
        request = codec.encode_request()
        self._listener_service.register_listener(
            request,
            codec.decode_response,
            lambda reg_id: None,
            lambda m: codec.handle(m, self._backup_event_handler),
        ).result()

    def _backup_event_handler(self, correlation_id):
        invocation = self._pending.get(correlation_id, None)
        if not invocation:
            _logger.debug(
                "Invocation not found for backup event, invocation id %s",
                correlation_id)
            return
        self._notify_backup_complete(invocation)

    def _notify(self, invocation, client_message):
        expected_backups = client_message.get_number_of_backup_acks()
        if expected_backups > invocation.backup_acks_received:
            invocation.pending_response_received_time = time.time()
            invocation.backup_acks_expected = expected_backups
            invocation.pending_response = client_message
            return

        self._complete(invocation, client_message)

    def _notify_backup_complete(self, invocation):
        invocation.backup_acks_received += 1
        if not invocation.pending_response:
            return

        if invocation.backup_acks_expected != invocation.backup_acks_received:
            return

        self._complete(invocation, invocation.pending_response)

    def _start_clean_resources_timer(self):
        def run():
            if self._shutdown:
                return

            now = time.time()
            for invocation in list(self._pending.values()):
                connection = invocation.sent_connection
                if not connection:
                    continue

                if not connection.live:
                    error = TargetDisconnectedError(connection.close_reason)
                    self._notify_error(invocation, error)
                    continue

                if self._backup_ack_to_client_enabled:
                    self._detect_and_handle_backup_timeout(invocation, now)

            self._clean_resources_timer = self._reactor.add_timer(
                self._CLEAN_RESOURCES_PERIOD, run)

        self._clean_resources_timer = self._reactor.add_timer(
            self._CLEAN_RESOURCES_PERIOD, run)

    def _detect_and_handle_backup_timeout(self, invocation, now):
        if not invocation.pending_response:
            return

        if invocation.backup_acks_expected == invocation.backup_acks_received:
            return

        expiration_time = invocation.pending_response_received_time + self._backup_timeout
        timeout_reached = 0 < expiration_time < now
        if not timeout_reached:
            return

        if self._fail_on_indeterminate_state:
            error = IndeterminateOperationStateError(
                "Invocation failed because the backup acks are missed")
            self._complete_with_error(invocation, error)
            return

        self._complete(invocation, invocation.pending_response)
class InvocationService(object):
    logger = logging.getLogger("InvocationService")

    def __init__(self, client):
        self._pending = {}
        self._event_handlers = {}
        self._next_correlation_id = AtomicInteger(1)
        self._client = client
        self._event_queue = Queue()
        self._is_redo_operation = client.config.network_config.redo_operation

        if client.config.network_config.smart_routing:
            self.invoke = self.invoke_smart
        else:
            self.invoke = self.invoke_non_smart

        self._client.connection_manager.add_listener(on_connection_closed=self.cleanup_connection)
        client.heartbeat.add_listener(on_heartbeat_stopped=self._heartbeat_stopped)

    def invoke_on_connection(self, message, connection, ignore_heartbeat=False):
        return self.invoke(Invocation(message, connection=connection), ignore_heartbeat)

    def invoke_on_partition(self, message, partition_id):
        return self.invoke(Invocation(message, partition_id=partition_id))

    def invoke_on_random_target(self, message):
        return self.invoke(Invocation(message))

    def invoke_on_target(self, message, address):
        return self.invoke(Invocation(message, address=address))

    def invoke_smart(self, invocation, ignore_heartbeat=False):
        if invocation.has_connection():
            self._send(invocation, invocation.connection, ignore_heartbeat)
        elif invocation.has_partition_id():
            addr = self._client.partition_service.get_partition_owner(invocation.partition_id)
            self._send_to_address(invocation, addr)
        elif invocation.has_address():
            self._send_to_address(invocation, invocation.address)
        else:  # send to random address
            addr = self._client.load_balancer.next_address()
            self._send_to_address(invocation, addr)

        return invocation.future

    def invoke_non_smart(self, invocation, ignore_heartbeat=False):
        if invocation.has_connection():
            self._send(invocation, invocation.connection, ignore_heartbeat)
        else:
            addr = self._client.cluster.owner_connection_address
            self._send_to_address(invocation, addr)
        return invocation.future

    def cleanup_connection(self, connection, cause):
        for correlation_id, invocation in dict(self._pending).iteritems():
            if invocation.sent_connection == connection:
                self._handle_exception(invocation, cause)

        if self._client.lifecycle.is_live:
            for correlation_id, invocation in dict(self._event_handlers).iteritems():
                if invocation.sent_connection == connection and invocation.connection is None:
                    self._client.listener.re_register_listener(invocation)

    def _heartbeat_stopped(self, connection):
        for correlation_id, invocation in dict(self._pending).iteritems():
            if invocation.sent_connection == connection:
                self._handle_exception(invocation,
                                       TargetDisconnectedError("%s has stopped heart beating." % connection))

    def _remove_event_handler(self, correlation_id):
        self._event_handlers.pop(correlation_id)

    def _send_to_address(self, invocation, address, ignore_heartbeat=False):
        try:
            conn = self._client.connection_manager.connections[address]
            self._send(invocation, conn, ignore_heartbeat)
        except KeyError:
            if self._client.lifecycle.state != LIFECYCLE_STATE_CONNECTED:
                self._handle_exception(invocation, IOError("Client is not in connected state"))
            else:
                self._client.connection_manager.get_or_connect(address).continue_with(self.on_connect, invocation, ignore_heartbeat)

    def on_connect(self, f, invocation, ignore_heartbeat):
        if f.is_success():
            self._send(invocation, f.result(), ignore_heartbeat)
        else:
            self._handle_exception(invocation, f.exception(), f.traceback())

    def _send(self, invocation, connection, ignore_heartbeat):
        correlation_id = self._next_correlation_id.get_and_increment()
        message = invocation.request
        message.set_correlation_id(correlation_id)
        message.set_partition_id(invocation.partition_id)
        self._pending[correlation_id] = invocation
        if not invocation.timer:
            invocation.timer = self._client.reactor.add_timer_absolute(invocation.timeout, invocation.on_timeout)

        if isinstance(invocation, ListenerInvocation):
            self._event_handlers[correlation_id] = invocation

        self.logger.debug("Sending %s to %s", message, connection)

        if not ignore_heartbeat and not connection.heartbeating:
            self._handle_exception(invocation, TargetDisconnectedError("%s has stopped heart beating." % connection))
            return

        invocation.sent_connection = connection
        try:
            connection.send_message(message)
        except IOError as e:
            self._handle_exception(invocation, e)

    def _handle_client_message(self, message):
        correlation_id = message.get_correlation_id()
        if message.has_flags(LISTENER_FLAG):
            if correlation_id not in self._event_handlers:
                self.logger.warn("Got event message with unknown correlation id: %s", message)
                return
            invocation = self._event_handlers[correlation_id]
            self._handle_event(invocation, message)
            return
        if correlation_id not in self._pending:
            self.logger.warn("Got message with unknown correlation id: %s", message)
            return
        invocation = self._pending.pop(correlation_id)

        if message.get_message_type() == EXCEPTION_MESSAGE_TYPE:
            error = create_exception(ErrorCodec(message))
            return self._handle_exception(invocation, error)

        invocation.set_response(message)

    def _handle_event(self, invocation, message):
        try:
            invocation.event_handler(message)
        except:
            self.logger.warn("Error handling event %s", message, exc_info=True)

    def _handle_exception(self, invocation, error, traceback=None):
        if self.logger.isEnabledFor(logging.DEBUG):
            self.logger.debug("Got exception for request %s: %s: %s", invocation.request,
                              type(error).__name__, error)
        if isinstance(error, (AuthenticationError, IOError, HazelcastInstanceNotActiveError)):
            if self._try_retry(invocation):
                return

        if is_retryable_error(error):
            if invocation.request.is_retryable() or self._is_redo_operation:
                if self._try_retry(invocation):
                    return

        invocation.set_exception(error, traceback)

    def _try_retry(self, invocation):
        if invocation.connection:
            return False
        if invocation.timeout < time.time():
            return False

        invoke_func = functools.partial(self.invoke, invocation)
        self.logger.debug("Rescheduling request %s to be retried in %s seconds", invocation.request,
                          RETRY_WAIT_TIME_IN_SECONDS)
        self._client.reactor.add_timer(RETRY_WAIT_TIME_IN_SECONDS, invoke_func)
        return True
Ejemplo n.º 5
0
class InvocationService(object):
    logger = logging.getLogger("HazelcastClient.InvocationService")

    def __init__(self, client):
        self._pending = {}
        self._next_correlation_id = AtomicInteger(1)
        self._client = client
        self._logger_extras = {
            "client_name": client.name,
            "group_name": client.config.group_config.name
        }
        self._event_queue = queue.Queue()
        self._is_redo_operation = client.config.network_config.redo_operation
        self.invocation_retry_pause = self._init_invocation_retry_pause()
        self.invocation_timeout = self._init_invocation_timeout()
        self._listener_service = None

        if client.config.network_config.smart_routing:
            self.invoke = self.invoke_smart
        else:
            self.invoke = self.invoke_non_smart

        self._client.connection_manager.add_listener(
            on_connection_closed=self.cleanup_connection)
        client.heartbeat.add_listener(
            on_heartbeat_stopped=self._heartbeat_stopped)

    def start(self):
        self._listener_service = self._client.listener

    def invoke_on_connection(self,
                             message,
                             connection,
                             ignore_heartbeat=False,
                             event_handler=None):
        return self.invoke(
            Invocation(self,
                       message,
                       connection=connection,
                       event_handler=event_handler), ignore_heartbeat)

    def invoke_on_partition(self,
                            message,
                            partition_id,
                            invocation_timeout=None):
        invocation = Invocation(self, message, partition_id=partition_id)
        if invocation_timeout:
            invocation.set_timeout(invocation_timeout)
        return self.invoke(invocation)

    def invoke_on_random_target(self, message):
        return self.invoke(Invocation(self, message))

    def invoke_on_target(self, message, address):
        return self.invoke(Invocation(self, message, address=address))

    def invoke_smart(self, invocation, ignore_heartbeat=False):
        if invocation.has_connection():
            self._send(invocation, invocation.connection, ignore_heartbeat)
        elif invocation.has_partition_id():
            addr = self._client.partition_service.get_partition_owner(
                invocation.partition_id)
            if addr is None:
                self._handle_exception(
                    invocation,
                    IOError("Partition does not have an owner. "
                            "partition Id: ".format(invocation.partition_id)))
            elif not self._is_member(addr):
                self._handle_exception(
                    invocation,
                    TargetNotMemberError("Partition owner '{}' "
                                         "is not a member.".format(addr)))
            else:
                self._send_to_address(invocation, addr)
        elif invocation.has_address():
            if not self._is_member(invocation.address):
                self._handle_exception(
                    invocation,
                    TargetNotMemberError("Target '{}' is not a member.".format(
                        invocation.address)))
            else:
                self._send_to_address(invocation, invocation.address)
        else:  # send to random address
            addr = self._client.load_balancer.next_address()
            if addr is None:
                self._handle_exception(invocation,
                                       IOError("No address found to invoke"))
            else:
                self._send_to_address(invocation, addr)
        return invocation.future

    def invoke_non_smart(self, invocation, ignore_heartbeat=False):
        if invocation.has_connection():
            self._send(invocation, invocation.connection, ignore_heartbeat)
        else:
            addr = self._client.cluster.owner_connection_address
            self._send_to_address(invocation, addr)
        return invocation.future

    def cleanup_connection(self, connection, cause):
        for correlation_id, invocation in six.iteritems(dict(self._pending)):
            if invocation.sent_connection == connection:
                self._handle_exception(invocation, cause)

    def _init_invocation_retry_pause(self):
        invocation_retry_pause = self._client.properties.get_seconds_positive_or_default(
            self._client.properties.INVOCATION_RETRY_PAUSE_MILLIS)
        return invocation_retry_pause

    def _init_invocation_timeout(self):
        invocation_timeout = self._client.properties.get_seconds_positive_or_default(
            self._client.properties.INVOCATION_TIMEOUT_SECONDS)
        return invocation_timeout

    def _heartbeat_stopped(self, connection):
        for correlation_id, invocation in six.iteritems(dict(self._pending)):
            if invocation.sent_connection == connection:
                self._handle_exception(
                    invocation,
                    TargetDisconnectedError("%s has stopped heart beating." %
                                            connection))

    def _send_to_address(self, invocation, address, ignore_heartbeat=False):
        try:
            conn = self._client.connection_manager.connections[address]
            self._send(invocation, conn, ignore_heartbeat)
        except KeyError:
            if self._client.lifecycle.state != LIFECYCLE_STATE_CONNECTED:
                self._handle_exception(
                    invocation, IOError("Client is not in connected state"))
            else:
                self._client.connection_manager.get_or_connect(
                    address).continue_with(self.on_connect, invocation,
                                           ignore_heartbeat)

    def on_connect(self, f, invocation, ignore_heartbeat):
        if f.is_success():
            self._send(invocation, f.result(), ignore_heartbeat)
        else:
            self._handle_exception(invocation, f.exception(), f.traceback())

    def _send(self, invocation, connection, ignore_heartbeat):
        correlation_id = self._next_correlation_id.get_and_increment()
        message = invocation.request
        message.set_correlation_id(correlation_id)
        message.set_partition_id(invocation.partition_id)
        self._pending[correlation_id] = invocation
        if not invocation.timer:
            invocation.timer = self._client.reactor.add_timer_absolute(
                invocation.timeout, invocation.on_timeout)

        if invocation.event_handler is not None:
            self._listener_service.add_event_handler(correlation_id,
                                                     invocation.event_handler)

        self.logger.debug("Sending %s to %s",
                          message,
                          connection,
                          extra=self._logger_extras)

        if not ignore_heartbeat and not connection.heartbeating:
            self._handle_exception(
                invocation,
                TargetDisconnectedError("%s has stopped heart beating." %
                                        connection))
            return

        invocation.sent_connection = connection
        try:
            connection.send_message(message)
        except IOError as e:
            if invocation.event_handler is not None:
                self._listener_service.remove_event_handler(correlation_id)
            self._handle_exception(invocation, e)

    def _handle_client_message(self, message):
        correlation_id = message.get_correlation_id()
        if message.has_flags(LISTENER_FLAG):
            self._listener_service.handle_client_message(message)
            return
        if correlation_id not in self._pending:
            self.logger.warning("Got message with unknown correlation id: %s",
                                message,
                                extra=self._logger_extras)
            return
        invocation = self._pending.pop(correlation_id)

        if message.get_message_type() == EXCEPTION_MESSAGE_TYPE:
            error = create_exception(ErrorCodec(message))
            return self._handle_exception(invocation, error)

        invocation.set_response(message)

    def _handle_event(self, invocation, message):
        try:
            invocation.event_handler(message)
        except:
            self.logger.warning("Error handling event %s",
                                message,
                                exc_info=True,
                                extra=self._logger_extras)

    def _handle_exception(self, invocation, error, traceback=None):
        if self.logger.isEnabledFor(logging.DEBUG):
            self.logger.debug("Got exception for request %s: %s: %s",
                              invocation.request,
                              type(error).__name__,
                              error,
                              extra=self._logger_extras)

        if not self._client.lifecycle.is_live:
            invocation.set_exception(
                HazelcastClientNotActiveException(error.args[0]), traceback)
            return

        if self._is_not_allowed_to_retry_on_selection(invocation, error):
            invocation.set_exception(error, traceback)
            return

        if not self._should_retry(invocation, error):
            invocation.set_exception(error, traceback)
            return

        if invocation.timeout < time.time():
            if self.logger.isEnabledFor(logging.DEBUG):
                self.logger.debug(
                    'Error will not be retried because invocation timed out: %s',
                    error,
                    extra=self._logger_extras)
            invocation.set_exception(
                TimeoutError(
                    '%s timed out because an error occurred after invocation timeout: %s'
                    % (invocation.request, error), traceback))
            return

        invoke_func = functools.partial(self.invoke, invocation)
        self._client.reactor.add_timer(self.invocation_retry_pause,
                                       invoke_func)

    def _should_retry(self, invocation, error):
        if isinstance(
                error,
            (IOError,
             HazelcastInstanceNotActiveError)) or is_retryable_error(error):
            return True

        if isinstance(error, TargetDisconnectedError):
            return invocation.request.is_retryable() or self._is_redo_operation

        return False

    def _is_not_allowed_to_retry_on_selection(self, invocation, error):
        if invocation.connection is not None and isinstance(error, IOError):
            return True

        # When invocation is sent over an address,error is the TargetNotMemberError and the
        # member is not in the member list, we should not retry
        return invocation.address is not None and isinstance(error, TargetNotMemberError) \
               and not self._is_member(invocation.address)

    def _is_member(self, address):
        return self._client.cluster.get_member_by_address(address) is not None
Ejemplo n.º 6
0
class ConnectionManager(object):
    """
    ConnectionManager is responsible for managing :mod:`Connection` objects.
    """
    logger = logging.getLogger("HazelcastClient.ConnectionManager")

    def __init__(self, client, reactor, address_provider, lifecycle_service,
                 partition_service, cluster_service, invocation_service,
                 near_cache_manager, logger_extras):
        self.live = False
        self.active_connections = dict()
        self.client_uuid = uuid.uuid4()

        self._client = client
        self._reactor = reactor
        self._address_provider = address_provider
        self._lifecycle_service = lifecycle_service
        self._partition_service = partition_service
        self._cluster_service = cluster_service
        self._invocation_service = invocation_service
        self._near_cache_manager = near_cache_manager
        self._logger_extras = logger_extras
        config = self._client.config
        self._smart_routing_enabled = config.network.smart_routing
        self._wait_strategy = self._init_wait_strategy(config)
        self._reconnect_mode = config.connection_strategy.reconnect_mode
        self._heartbeat_manager = _HeartbeatManager(self, self._client,
                                                    reactor,
                                                    invocation_service,
                                                    logger_extras)
        self._connection_listeners = []
        self._connect_all_members_timer = None
        self._async_start = config.connection_strategy.async_start
        self._connect_to_cluster_thread_running = False
        self._pending_connections = dict()
        props = self._client.properties
        self._shuffle_member_list = props.get_bool(props.SHUFFLE_MEMBER_LIST)
        self._lock = threading.RLock()
        self._connection_id_generator = AtomicInteger()
        self._labels = config.labels
        self._cluster_id = None
        self._load_balancer = None

    def add_listener(self,
                     on_connection_opened=None,
                     on_connection_closed=None):
        """
        Registers a ConnectionListener. If the same listener is registered multiple times, it will be notified multiple
        times.

        :param on_connection_opened: (Function), function to be called when a connection is opened.
        :param on_connection_closed: (Function), function to be called when a connection is removed.
        """
        self._connection_listeners.append(
            (on_connection_opened, on_connection_closed))

    def get_connection(self, member_uuid):
        return self.active_connections.get(member_uuid, None)

    def get_connection_from_address(self, address):
        for connection in six.itervalues(self.active_connections):
            if address == connection.remote_address:
                return connection
        return None

    def get_random_connection(self):
        if self._smart_routing_enabled:
            member = self._load_balancer.next()
            if member:
                connection = self.get_connection(member.uuid)
                if connection:
                    return connection

        for connection in six.itervalues(self.active_connections):
            return connection

        return None

    def start(self, load_balancer):
        if self.live:
            return

        self.live = True
        self._load_balancer = load_balancer
        self._heartbeat_manager.start()
        self._connect_to_cluster()
        if self._smart_routing_enabled:
            self._start_connect_all_members_timer()

    def shutdown(self):
        if not self.live:
            return

        self.live = False
        if self._connect_all_members_timer:
            self._connect_all_members_timer.cancel()

        self._heartbeat_manager.shutdown()
        for connection_future in six.itervalues(self._pending_connections):
            connection_future.set_exception(
                HazelcastClientNotActiveError(
                    "Hazelcast client is shutting down"))

        # Need to create copy of connection values to avoid modification errors on runtime
        for connection in list(six.itervalues(self.active_connections)):
            connection.close("Hazelcast client is shutting down", None)

        self._connection_listeners = []
        self.active_connections.clear()
        self._pending_connections.clear()

    def connect_to_all_cluster_members(self):
        if not self._smart_routing_enabled:
            return

        for member in self._cluster_service.get_members():
            try:
                self._get_or_connect(member.address).result()
            except:
                pass

    def on_connection_close(self, closed_connection, cause):
        connected_address = closed_connection.connected_address
        remote_uuid = closed_connection.remote_uuid

        if not connected_address:
            self.logger.debug(
                "Destroying %s, but it has no remote address, hence nothing is "
                "removed from the connection dictionary" % closed_connection,
                extra=self._logger_extras)

        with self._lock:
            pending = self._pending_connections.pop(connected_address, None)
            connection = self.active_connections.pop(remote_uuid, None)

            if pending:
                pending.set_exception(cause)

            if connection:
                self.logger.info(
                    "Removed connection to %s:%s, connection: %s" %
                    (connected_address, remote_uuid, connection),
                    extra=self._logger_extras)
                if not self.active_connections:
                    self._lifecycle_service.fire_lifecycle_event(
                        LifecycleState.DISCONNECTED)
                    self._trigger_cluster_reconnection()

        if connection:
            for _, on_connection_closed in self._connection_listeners:
                if on_connection_closed:
                    try:
                        on_connection_closed(connection, cause)
                    except:
                        self.logger.exception(
                            "Exception in connection listener",
                            extra=self._logger_extras)
        else:
            if remote_uuid:
                self.logger.debug(
                    "Destroying %s, but there is no mapping for %s in the connection dictionary"
                    % (closed_connection, remote_uuid),
                    extra=self._logger_extras)

    def check_invocation_allowed(self):
        if self.active_connections:
            return

        if self._async_start or self._reconnect_mode == RECONNECT_MODE.ASYNC:
            raise ClientOfflineError()
        else:
            raise IOError("No connection found to cluster")

    def _trigger_cluster_reconnection(self):
        if self._reconnect_mode == RECONNECT_MODE.OFF:
            self.logger.info("Reconnect mode is OFF. Shutting down the client",
                             extra=self._logger_extras)
            self._shutdown_client()
            return

        if self._lifecycle_service.running:
            self._start_connect_to_cluster_thread()

    def _init_wait_strategy(self, config):
        retry_config = config.connection_strategy.connection_retry
        return _WaitStrategy(retry_config.initial_backoff,
                             retry_config.max_backoff, retry_config.multiplier,
                             retry_config.cluster_connect_timeout,
                             retry_config.jitter, self._logger_extras)

    def _start_connect_all_members_timer(self):
        connecting_addresses = set()

        def run():
            if not self._lifecycle_service.running:
                return

            for member in self._cluster_service.get_members():
                address = member.address

                if not self.get_connection_from_address(
                        address) and address not in connecting_addresses:
                    connecting_addresses.add(address)
                    if not self._lifecycle_service.running:
                        break

                    if not self.get_connection(member.uuid):
                        self._get_or_connect(address).add_done_callback(
                            lambda f: connecting_addresses.discard(address))

            self._connect_all_members_timer = self._reactor.add_timer(1, run)

        self._connect_all_members_timer = self._reactor.add_timer(1, run)

    def _connect_to_cluster(self):
        if self._async_start:
            self._start_connect_to_cluster_thread()
        else:
            self._sync_connect_to_cluster()

    def _start_connect_to_cluster_thread(self):
        with self._lock:
            if self._connect_to_cluster_thread_running:
                return

            self._connect_to_cluster_thread_running = True

        def run():
            try:
                while True:
                    self._sync_connect_to_cluster()
                    with self._lock:
                        if self.active_connections:
                            self._connect_to_cluster_thread_running = False
                            return
            except:
                self.logger.exception(
                    "Could not connect to any cluster, shutting down the client",
                    extra=self._logger_extras)
                self._shutdown_client()

        t = threading.Thread(target=run, name='hazelcast_async_connection')
        t.daemon = True
        t.start()

    def _shutdown_client(self):
        try:
            self._client.shutdown()
        except:
            self.logger.exception("Exception during client shutdown",
                                  extra=self._logger_extras)

    def _sync_connect_to_cluster(self):
        tried_addresses = set()
        self._wait_strategy.reset()
        try:
            while True:
                for address in self._get_possible_addresses():
                    self._check_client_active()
                    tried_addresses.add(address)
                    connection = self._connect(address)
                    if connection:
                        return
                # If the address providers load no addresses (which seems to be possible),
                # then the above loop is not entered and the lifecycle check is missing,
                # hence we need to repeat the same check at this point.
                self._check_client_active()
                if not self._wait_strategy.sleep():
                    break
        except (ClientNotAllowedInClusterError, InvalidConfigurationError):
            cluster_name = self._client.config.cluster_name
            self.logger.exception("Stopped trying on cluster %s" %
                                  cluster_name,
                                  extra=self._logger_extras)

        cluster_name = self._client.config.cluster_name
        self.logger.info(
            "Unable to connect to any address from the cluster with name: %s. "
            "The following addresses were tried: %s" %
            (cluster_name, tried_addresses),
            extra=self._logger_extras)
        if self._lifecycle_service.running:
            msg = "Unable to connect to any cluster"
        else:
            msg = "Client is being shutdown"
        raise IllegalStateError(msg)

    def _connect(self, address):
        self.logger.info("Trying to connect to %s" % address,
                         extra=self._logger_extras)
        try:
            return self._get_or_connect(address).result()
        except (ClientNotAllowedInClusterError,
                InvalidConfigurationError) as e:
            self.logger.warning("Error during initial connection to %s: %s" %
                                (address, e),
                                extra=self._logger_extras)
            raise e
        except Exception as e:
            self.logger.warning("Error during initial connection to %s: %s" %
                                (address, e),
                                extra=self._logger_extras)
            return None

    def _get_or_connect(self, address):
        connection = self.get_connection_from_address(address)
        if connection:
            return ImmediateFuture(connection)

        with self._lock:
            connection = self.get_connection_from_address(address)
            if connection:
                return ImmediateFuture(connection)
            else:
                pending = self._pending_connections.get(address, None)
                if pending:
                    return pending
                else:
                    try:
                        translated = self._address_provider.translate(address)
                        if not translated:
                            return ImmediateExceptionFuture(
                                ValueError(
                                    "Address translator could not translate address %s"
                                    % address))

                        factory = self._reactor.connection_factory
                        connection = factory(
                            self,
                            self._connection_id_generator.get_and_increment(),
                            translated, self._client.config.network,
                            self._invocation_service.handle_client_message)
                    except IOError:
                        return ImmediateExceptionFuture(
                            sys.exc_info()[1],
                            sys.exc_info()[2])

                    future = self._authenticate(connection).continue_with(
                        self._on_auth, connection, address)
                    self._pending_connections[address] = future
                    return future

    def _authenticate(self, connection):
        client = self._client
        cluster_name = client.config.cluster_name
        client_name = client.name
        request = client_authentication_codec.encode_request(
            cluster_name, None, None, self.client_uuid, CLIENT_TYPE,
            SERIALIZATION_VERSION, CLIENT_VERSION, client_name, self._labels)

        invocation = Invocation(request,
                                connection=connection,
                                urgent=True,
                                response_handler=lambda m: m)
        self._invocation_service.invoke(invocation)
        return invocation.future

    def _on_auth(self, response, connection, address):
        if response.is_success():
            response = client_authentication_codec.decode_response(
                response.result())
            status = response["status"]
            if status == _AuthenticationStatus.AUTHENTICATED:
                return self._handle_successful_auth(response, connection,
                                                    address)

            if status == _AuthenticationStatus.CREDENTIALS_FAILED:
                err = AuthenticationError(
                    "Authentication failed. The configured cluster name on "
                    "the client does not match the one configured in the cluster."
                )
            elif status == _AuthenticationStatus.NOT_ALLOWED_IN_CLUSTER:
                err = ClientNotAllowedInClusterError(
                    "Client is not allowed in the cluster")
            elif status == _AuthenticationStatus.SERIALIZATION_VERSION_MISMATCH:
                err = IllegalStateError(
                    "Server serialization version does not match to client")
            else:
                err = AuthenticationError(
                    "Authentication status code not supported. status: %s" %
                    status)

            connection.close("Failed to authenticate connection", err)
            raise err
        else:
            e = response.exception()
            connection.close("Failed to authenticate connection", e)
            self._pending_connections.pop(address, None)
            six.reraise(e.__class__, e, response.traceback())

    def _handle_successful_auth(self, response, connection, address):
        self._check_partition_count(response["partition_count"])

        server_version_str = response["server_hazelcast_version"]
        remote_address = response["address"]
        remote_uuid = response["member_uuid"]

        connection.remote_address = remote_address
        connection.server_version = calculate_version(server_version_str)
        connection.remote_uuid = remote_uuid

        new_cluster_id = response["cluster_id"]

        is_initial_connection = not self.active_connections
        changed_cluster = is_initial_connection and self._cluster_id is not None and self._cluster_id != new_cluster_id
        if changed_cluster:
            self.logger.warning(
                "Switching from current cluster: %s to new cluster: %s" %
                (self._cluster_id, new_cluster_id),
                extra=self._logger_extras)
            self._on_cluster_restart()

        with self._lock:
            self.active_connections[response["member_uuid"]] = connection
            self._pending_connections.pop(address, None)

        if is_initial_connection:
            self._cluster_id = new_cluster_id
            self._lifecycle_service.fire_lifecycle_event(
                LifecycleState.CONNECTED)

        self.logger.info(
            "Authenticated with server %s:%s, server version: %s, local address: %s"
            % (remote_address, remote_uuid, server_version_str,
               connection.local_address),
            extra=self._logger_extras)

        for on_connection_opened, _ in self._connection_listeners:
            if on_connection_opened:
                try:
                    on_connection_opened(connection)
                except:
                    self.logger.exception("Exception in connection listener",
                                          extra=self._logger_extras)

        if not connection.live:
            self.on_connection_close(connection, None)

        return connection

    def _on_cluster_restart(self):
        self._near_cache_manager.clear_near_caches()
        self._cluster_service.clear_member_list_version()

    def _check_partition_count(self, partition_count):
        if not self._partition_service.check_and_set_partition_count(
                partition_count):
            raise ClientNotAllowedInClusterError(
                "Client can not work with this cluster because it has a "
                "different partition count. Expected partition count: %d, "
                "Member partition count: %d" %
                (self._partition_service.partition_count, partition_count))

    def _check_client_active(self):
        if not self._lifecycle_service.running:
            raise HazelcastClientNotActiveError()

    def _get_possible_addresses(self):
        member_addresses = list(
            map(lambda m: (m.address, None),
                self._cluster_service.get_members()))

        if self._shuffle_member_list:
            random.shuffle(member_addresses)

        addresses = OrderedDict(member_addresses)
        primaries, secondaries = self._address_provider.load_addresses()
        if self._shuffle_member_list:
            random.shuffle(primaries)
            random.shuffle(secondaries)

        for address in primaries:
            addresses[address] = None

        for address in secondaries:
            addresses[address] = None

        return six.iterkeys(addresses)
Ejemplo n.º 7
0
class InvocationService(object):
    logger = logging.getLogger("HazelcastClient.InvocationService")

    def __init__(self, client, reactor, logger_extras):
        config = client.config
        if config.network.smart_routing:
            self.invoke = self._invoke_smart
        else:
            self.invoke = self._invoke_non_smart

        self._client = client
        self._reactor = reactor
        self._logger_extras = logger_extras
        self._partition_service = None
        self._connection_manager = None
        self._listener_service = None
        self._check_invocation_allowed_fn = None
        self._pending = {}
        self._next_correlation_id = AtomicInteger(1)
        self._is_redo_operation = config.network.redo_operation
        self._invocation_timeout = self._init_invocation_timeout()
        self._invocation_retry_pause = self._init_invocation_retry_pause()
        self._shutdown = False

    def start(self, partition_service, connection_manager, listener_service):
        self._partition_service = partition_service
        self._connection_manager = connection_manager
        self._listener_service = listener_service
        self._check_invocation_allowed_fn = connection_manager.check_invocation_allowed

    def handle_client_message(self, message):
        correlation_id = message.get_correlation_id()

        if message.start_frame.has_event_flag():
            self._listener_service.handle_client_message(
                message, correlation_id)
            return

        invocation = self._pending.pop(correlation_id, None)
        if not invocation:
            self.logger.warning("Got message with unknown correlation id: %s",
                                message,
                                extra=self._logger_extras)
            return

        if message.get_message_type() == EXCEPTION_MESSAGE_TYPE:
            error = create_error_from_message(message)
            return self._handle_exception(invocation, error)

        invocation.set_response(message)

    def shutdown(self):
        self._shutdown = True
        for invocation in list(six.itervalues(self._pending)):
            self._handle_exception(invocation, HazelcastClientNotActiveError())

    def _invoke_on_partition_owner(self, invocation, partition_id):
        owner_uuid = self._partition_service.get_partition_owner(partition_id)
        if not owner_uuid:
            self.logger.debug("Partition owner is not assigned yet",
                              extra=self._logger_extras)
            return False
        return self._invoke_on_target(invocation, owner_uuid)

    def _invoke_on_target(self, invocation, owner_uuid):
        connection = self._connection_manager.get_connection(owner_uuid)
        if not connection:
            self.logger.debug("Client is not connected to target: %s" %
                              owner_uuid,
                              extra=self._logger_extras)
            return False
        return self._send(invocation, connection)

    def _invoke_on_random_connection(self, invocation):
        connection = self._connection_manager.get_random_connection()
        if not connection:
            self.logger.debug("No connection found to invoke",
                              extra=self._logger_extras)
            return False
        return self._send(invocation, connection)

    def _invoke_smart(self, invocation):
        if not invocation.timeout:
            invocation.timeout = self._invocation_timeout + time.time()

        try:
            if not invocation.urgent:
                self._check_invocation_allowed_fn()

            connection = invocation.connection
            if connection:
                invoked = self._send(invocation, connection)
                if not invoked:
                    self._handle_exception(
                        invocation,
                        IOError("Could not invoke on connection %s" %
                                connection))
                return

            if invocation.partition_id != -1:
                invoked = self._invoke_on_partition_owner(
                    invocation, invocation.partition_id)
            elif invocation.uuid:
                invoked = self._invoke_on_target(invocation, invocation.uuid)
            else:
                invoked = self._invoke_on_random_connection(invocation)

            if not invoked:
                invoked = self._invoke_on_random_connection(invocation)

            if not invoked:
                self._handle_exception(
                    invocation, IOError("No connection found to invoke"))
        except Exception as e:
            self._handle_exception(invocation, e)

    def _invoke_non_smart(self, invocation):
        if not invocation.timeout:
            invocation.timeout = self._invocation_timeout + time.time()

        try:
            if not invocation.urgent:
                self._check_invocation_allowed_fn()

            connection = invocation.connection
            if connection:
                invoked = self._send(invocation, connection)
                if not invoked:
                    self._handle_exception(
                        invocation,
                        IOError("Could not invoke on connection %s" %
                                connection))
                return

            if not self._invoke_on_random_connection(invocation):
                self._handle_exception(
                    invocation, IOError("No connection found to invoke"))
        except Exception as e:
            self._handle_exception(invocation, e)

    def _init_invocation_retry_pause(self):
        invocation_retry_pause = self._client.properties.get_seconds_positive_or_default(
            self._client.properties.INVOCATION_RETRY_PAUSE_MILLIS)
        return invocation_retry_pause

    def _init_invocation_timeout(self):
        invocation_timeout = self._client.properties.get_seconds_positive_or_default(
            self._client.properties.INVOCATION_TIMEOUT_SECONDS)
        return invocation_timeout

    def _send(self, invocation, connection):
        if self._shutdown:
            raise HazelcastClientNotActiveError()

        correlation_id = self._next_correlation_id.get_and_increment()
        message = invocation.request
        message.set_correlation_id(correlation_id)
        message.set_partition_id(invocation.partition_id)
        self._pending[correlation_id] = invocation

        if invocation.event_handler:
            self._listener_service.add_event_handler(correlation_id,
                                                     invocation.event_handler)

        self.logger.debug("Sending %s to %s",
                          message,
                          connection,
                          extra=self._logger_extras)

        if not connection.send_message(message):
            if invocation.event_handler:
                self._listener_service.remove_event_handler(correlation_id)
            return False
        return True

    def _handle_exception(self, invocation, error, traceback=None):
        if self.logger.isEnabledFor(logging.DEBUG):
            self.logger.debug("Got exception for request %s, error: %s" %
                              (invocation.request, error),
                              extra=self._logger_extras)

        if not self._client.lifecycle_service.is_running():
            invocation.set_exception(HazelcastClientNotActiveError(),
                                     traceback)
            self._pending.pop(invocation.request.get_correlation_id(), None)
            return

        if not self._should_retry(invocation, error):
            invocation.set_exception(error, traceback)
            self._pending.pop(invocation.request.get_correlation_id(), None)
            return

        if invocation.timeout < time.time():
            self.logger.debug(
                "Error will not be retried because invocation timed out: %s",
                error,
                extra=self._logger_extras)
            invocation.set_exception(
                HazelcastTimeoutError(
                    "Request timed out because an error occurred after "
                    "invocation timeout: %s" % error, traceback))
            self._pending.pop(invocation.request.get_correlation_id(), None)
            return

        invoke_func = functools.partial(self.invoke, invocation)
        self._reactor.add_timer(self._invocation_retry_pause, invoke_func)

    def _should_retry(self, invocation, error):
        if invocation.connection and isinstance(
                error, (IOError, TargetDisconnectedError)):
            return True

        if invocation.uuid and isinstance(error, TargetNotMemberError):
            return False

        if isinstance(
                error,
            (IOError,
             HazelcastInstanceNotActiveError)) or is_retryable_error(error):
            return True

        if isinstance(error, TargetDisconnectedError):
            return invocation.request.retryable or self._is_redo_operation

        return False