Ejemplo n.º 1
0
    def _gather_node_ip_addresses(self, nodes, lapse, ssh_timeout, remake=False):
        """
        Connect via SSH to each node.

        Return set of nodes that could not be reached with `lapse` seconds.
        """
        # for convenience, we might set this to ``None`` if the file cannot
        # be opened -- but we do not want to forget the cluster-wide
        # setting in case the error is transient
        known_hosts_path = self.known_hosts_file

        # If run with remake=True, deletes known_hosts_file so that it will
        # be recreated. Prevents "Invalid host key" errors
        if remake and os.path.isfile(known_hosts_path):
            os.remove(known_hosts_path)

        # Create the file if it's not present, otherwise the
        # following lines will raise an error
        try:
            fd = open(known_hosts_path, 'a')
            fd.close()
        except IOError as err:
            log.warning("Error opening SSH 'known hosts' file `%s`: %s",
                        known_hosts_path, err)
            known_hosts_path = None

        keys = paramiko.hostkeys.HostKeys(known_hosts_path)

        with timeout(lapse, raise_timeout_error):
            try:
                while nodes:
                    for node in copy(nodes):
                        ssh = node.connect(
                            keyfile=known_hosts_path,
                            timeout=ssh_timeout)
                        if ssh:
                            log.info("Connection to node `%s` successful,"
                                     " using IP address %s to connect.",
                                     node.name, node.connection_ip())
                            # Add host keys to the keys object.
                            for host, key in ssh.get_host_keys().items():
                                for keytype, keydata in key.items():
                                    keys.add(host, keytype, keydata)
                            self._save_keys_to_known_hosts_file(keys)
                            nodes.remove(node)
                    if nodes:
                        time.sleep(self.polling_interval)

            except TimeoutError:
                log.error(
                    "Some nodes of the cluster were unreachable"
                    " within the given %d-seconds timeout: %s",
                    lapse, ', '.join(node.name for node in nodes))

        # return list of nodes
        return nodes
Ejemplo n.º 2
0
    def _gather_node_ip_addresses(self, nodes, lapse):
        """
        Connect via SSH to each node.

        Return set of nodes that could not be reached with `lapse` seconds.
        """
        # for convenience, we might set this to ``None`` if the file cannot
        # be opened -- but we do not want to forget the cluster-wide
        # setting in case the error is transient
        known_hosts_path = self.known_hosts_file

        # Create the file if it's not present, otherwise the
        # following lines will raise an error
        try:
            fd = open(known_hosts_path, 'a')
            fd.close()
        except IOError as err:
            log.warning("Error opening SSH 'known hosts' file `%s`: %s",
                        known_hosts_path, err)
            known_hosts_path = None

        keys = paramiko.hostkeys.HostKeys(known_hosts_path)

        with timeout(lapse, raise_timeout_error):
            try:
                while nodes:
                    for node in copy(nodes):
                        ssh = node.connect(keyfile=known_hosts_path)
                        if ssh:
                            log.info("Connection to node `%s` successful,"
                                     " using IP address %s to connect.",
                                     node.name, node.connection_ip())
                            # Add host keys to the keys object.
                            for host, key in ssh.get_host_keys().items():
                                for keytype, keydata in key.items():
                                    keys.add(host, keytype, keydata)
                            self._save_keys_to_known_hosts_file(keys)
                            nodes.remove(node)
                    if nodes:
                        time.sleep(self.polling_interval)

            except TimeoutError:
                log.error(
                    "Some nodes of the cluster were unreachable"
                    " within the given %d-seconds timeout: %s",
                    lapse, ', '.join(node.name for node in nodes))

        # return list of nodes
        return nodes
Ejemplo n.º 3
0
 def _check_starting_nodes(self, nodes, lapse):
     """
     Wait until all given nodes are alive, for max `lapse` seconds.
     """
     with timeout(lapse, raise_timeout_error):
         try:
             while nodes:
                 nodes = set(node for node in nodes
                                   if not node.is_alive())
                 if nodes:
                     log.debug("Waiting for %d more nodes to come up ...", len(nodes))
                     time.sleep(self.polling_interval)
         except TimeoutError:
             log.error("Some nodes did not start correctly"
                       " within the given %d-seconds timeout: %s",
                       lapse, ', '.join(node.name for node in nodes))
     # return list of not-yet-started nodes,
     # so we can exclude them from coming rounds
     return nodes
Ejemplo n.º 4
0
 def _check_starting_nodes(self, nodes, lapse):
     """
     Wait until all given nodes are alive, for max `lapse` seconds.
     """
     with timeout(lapse, raise_timeout_error):
         try:
             while nodes:
                 nodes = set(node for node in nodes
                             if not node.is_alive())
                 if nodes:
                     log.debug("Waiting for %d more nodes to come up ...", len(nodes))
                     time.sleep(self.polling_interval)
         except TimeoutError:
             log.error("Some nodes did not start correctly"
                       " within the given %d-seconds timeout: %s",
                       lapse, ', '.join(node.name for node in nodes))
     # return list of not-yet-started nodes,
     # so we can exclude them from coming rounds
     return nodes