Example #1
0
    def start(self):
        """ Start this manager and all remote managers. """
        super(Cluster, self).start()
        hostname = socket.getfqdn()
        listener = connection.Listener(address=(hostname, 0),
                                       authkey=self._authkey,
                                       backlog=5)  # Default is 1.
        # TODO: support multiple addresses if multiple networks are attached.

        # Start managers in separate thread to avoid losing connections.
        starter = threading.Thread(target=self._start_hosts,
                                   args=(listener.address, get_credentials()))
        starter.daemon = True
        starter.start()

        # Accept callback connections from started managers.
        waiting = ['']
        retry = 0
        while waiting:
            host_processed = False
            for host in self._hostlist:
                host.poll()
                if host.state == 'started':
                    # Accept conection from *any* host.
                    _LOGGER.debug('waiting for a connection, host %s',
                                  host.hostname)
                    # This will hang if server doesn't receive our address.
                    conn = listener.accept()
                    i, address, pubkey_text = conn.recv()
                    conn.close()
                    other_host = self._hostlist[i]
                    if address is None:
                        _LOGGER.error('Host %s died: %s', other_host.hostname,
                                      pubkey_text)  # Exception text.
                        continue

                    other_host.manager = HostManager.from_address(
                        address, self._authkey)
                    other_host.state = 'up'
                    if pubkey_text:
                        other_host.manager._pubkey = \
                            decode_public_key(pubkey_text)
                    host_processed = True
                    _LOGGER.debug('Host %s is now up', other_host.hostname)
                    self._up.append(other_host)

            # See if there are still hosts to wait for.
            waiting = []
            for host in self._hostlist:
                host.poll()
                if host.state == 'init' or host.state == 'started':
                    waiting.append(host)
            if waiting:
                if not host_processed:
                    retry += 1
                    if retry < 600:  # ~60 seconds.
                        time.sleep(0.1)
                    else:
                        _LOGGER.warning('Cluster startup timeout,'
                                        ' hosts not started:')
                        for host in waiting:
                            _LOGGER.warning('    %s (%s) in dir %s',
                                            host.hostname, host.state,
                                            host.tempdir)
                        break
            else:
                break

        self._up = sorted(self._up, key=lambda host: host.hostname)

        self._base_shutdown = self.shutdown
        del self.shutdown
    def start(self):
        """ Start this manager and all remote managers. """
        super(Cluster, self).start()
        hostname = socket.getfqdn()
        listener = connection.Listener(address=(hostname, 0),
                                       authkey=self._authkey,
                                       backlog=5)  # Default is 1.
# TODO: support multiple addresses if multiple networks are attached.

        # Start managers in separate thread to avoid losing connections.
        starter = threading.Thread(target=self._start_hosts,
                                   args=(listener.address, get_credentials()))
        starter.daemon = True
        starter.start()

        # Accept callback connections from started managers.
        waiting = ['']
        retry = 0
        while waiting:
            host_processed = False
            for host in self._hostlist:
                host.poll()
                if host.state == 'started':
                    # Accept conection from *any* host.
                    _LOGGER.debug('waiting for a connection, host %s',
                                  host.hostname)
                    # This will hang if server doesn't receive our address.
                    conn = listener.accept()
                    i, address, pubkey_text = conn.recv()
                    conn.close()
                    other_host = self._hostlist[i]
                    if address is None:
                        _LOGGER.error('Host %s died: %s', other_host.hostname,
                                      pubkey_text)  # Exception text.
                        continue

                    other_host.manager = HostManager.from_address(address,
                                                                  self._authkey)
                    other_host.state = 'up'
                    if pubkey_text:
                        other_host.manager._pubkey = \
                            decode_public_key(pubkey_text)
                    host_processed = True
                    _LOGGER.debug('Host %s is now up', other_host.hostname)
                    self._up.append(other_host)

            # See if there are still hosts to wait for.
            waiting = []
            for host in self._hostlist:
                host.poll()
                if host.state == 'init' or host.state == 'started':
                    waiting.append(host)
            if waiting:
                if not host_processed:
                    retry += 1
                    if retry < 300:  # ~60 seconds.
                        time.sleep(0.2)
                    else:
                        _LOGGER.warning('Cluster startup timeout,'
                                        ' hosts not started:')
                        for host in waiting:
                            _LOGGER.warning('    %s (%s) in dir %s',
                                            host.hostname, host.state,
                                            host.tempdir)
                        break
            else:
                break

        self._up = sorted(self._up, key=lambda host: host.hostname)

        self._base_shutdown = self.shutdown
        del self.shutdown
    def start(self):
        """
        Start this manager and all remote managers. If some managers fail to
        start, errors are logged and the corresponding host's state is set to
        ``failed``. You can use ``len(cluster)`` to determine how many remote
        managers are available.

        A :class:`RuntimeError` will be raised if no managers were successfully
        started.
        """
        super(Cluster, self).start()
        listener = connection.Listener(address=(self._hostname, 0),
                                       authkey=self._authkey,
                                       backlog=5)  # Default is 1.

        # Start managers in separate thread to avoid losing connections.
        starter = threading.Thread(target=self._start_hosts,
                                   args=(listener.address, get_credentials()))
        starter.daemon = True
        starter.start()

        # Accept callback connections from started managers.
        waiting = ['']
        retry = 0
        while waiting:
            host_processed = False
            for host in self._hostlist:
                host.poll()
                if host.state == 'started':
                    # Accept conection from *any* host.
                    _LOGGER.debug('waiting for a connection, host %s',
                                  host.hostname)
                    # Normal accept() can hang.
                    retval = []
                    accepter = threading.Thread(target=self._accept,
                                                args=(listener, retval),
                                                name='ClusterAccepter')
                    accepter.daemon = True
                    accepter.start()
                    accepter.join(30)
                    if accepter.is_alive():
                        msg = 'timeout waiting for reply from %s' \
                              % [host.hostname for host in self._hostlist
                                               if host.state == 'started']
                        _LOGGER.error(msg)
                        for host in self._hostlist:
                            if host.state == 'started':
                                if host.proc is not None:
                                    host.proc.terminate()
                                if host.reverse_cleanup is not None:
                                    host.reverse_cleanup[0](*host.reverse_cleanup[1:])
                                host.state = 'failed'
                        continue

                    conn = retval[0]
                    i, address, pubkey_text = conn.recv()
                    conn.close()

                    other_host = self._hostlist[i]
                    if address is None:
                        _LOGGER.error('Host %s died: %s', other_host.hostname,
                                      pubkey_text)  # Exception text.
                        other_host.state = 'failed'
                        continue
                    try:
                        other_host.manager = \
                            HostManager.from_address(address, self._authkey,
                                                     other_host)
                    except Exception as exc:
                        _LOGGER.error("Can't start manager for %s: %s",
                                      other_host.hostname, str(exc) or repr(exc))
                        if other_host.proc is not None:
                            other_host.proc.terminate()
                        other_host.state = 'failed'
                        continue
                    else:
                        other_host.state = 'up'
                        if pubkey_text:
                            other_host.manager._pubkey = \
                                decode_public_key(pubkey_text)
                        host_processed = True
                        _LOGGER.debug('Host %s is now up', other_host.hostname)
                        self._up.append(other_host)

            # See if there are still hosts to wait for.
            waiting = []
            for host in self._hostlist:
                host.poll()
                if host.state == 'init' or host.state == 'started':
                    waiting.append(host)
            if waiting:
                if not host_processed:
                    retry += 1
                    if retry < 300:  # ~60 seconds.
                        time.sleep(0.2)
                    else:
                        _LOGGER.warning('Cluster startup timeout,'
                                        ' hosts not started:')
                        for host in waiting:
                            _LOGGER.warning('    %s (%s) in dir %s',
                                            host.hostname, host.state,
                                            host.tempdir)
                        break
            else:
                break

        self._up = sorted(self._up, key=lambda host: host.hostname)

        # So our class defined shutdown() is called before the superclass
        # installed shutdown().
        self._base_shutdown = self.shutdown
        del self.shutdown

        if len(self._up) < 1:
            raise RuntimeError('No hosts successfully started')
Example #4
0
    def start(self):
        """
        Start this manager and all remote managers. If some managers fail to
        start, errors are logged and the corresponding host's state is set to
        ``failed``. You can use ``len(cluster)`` to determine how many remote
        managers are available.

        A :class:`RuntimeError` will be raised if no managers were successfully
        started.
        """
        super(Cluster, self).start()
        listener = connection.Listener(address=(self._hostname, 0),
                                       authkey=self._authkey,
                                       backlog=5)  # Default is 1.

        # Start managers in separate thread to avoid losing connections.
        starter = threading.Thread(target=self._start_hosts,
                                   args=(listener.address, get_credentials()))
        starter.daemon = True
        starter.start()

        # Accept callback connections from started managers.
        waiting = ['']
        retry = 0
        while waiting:
            host_processed = False
            for host in self._hostlist:
                host.poll()
                if host.state == 'started':
                    # Accept conection from *any* host.
                    _LOGGER.debug('waiting for a connection, host %s',
                                  host.hostname)
                    # Normal accept() can hang.
                    retval = []
                    accepter = threading.Thread(target=self._accept,
                                                args=(listener, retval),
                                                name='ClusterAccepter')
                    accepter.daemon = True
                    accepter.start()
                    accepter.join(30)
                    if accepter.is_alive():
                        msg = 'timeout waiting for reply from %s' \
                              % [host.hostname for host in self._hostlist
                                               if host.state == 'started']
                        _LOGGER.error(msg)
                        for host in self._hostlist:
                            if host.state == 'started':
                                if host.proc is not None:
                                    host.proc.terminate()
                                if host.reverse_cleanup is not None:
                                    host.reverse_cleanup[0](
                                        *host.reverse_cleanup[1:])
                                host.state = 'failed'
                        continue

                    conn = retval[0]
                    i, address, pubkey_text = conn.recv()
                    conn.close()

                    other_host = self._hostlist[i]
                    if address is None:
                        _LOGGER.error('Host %s died: %s', other_host.hostname,
                                      pubkey_text)  # Exception text.
                        other_host.state = 'failed'
                        continue
                    try:
                        other_host.manager = \
                            HostManager.from_address(address, self._authkey,
                                                     other_host)
                    except Exception as exc:
                        _LOGGER.error("Can't start manager for %s: %s",
                                      other_host.hostname,
                                      str(exc) or repr(exc))
                        if other_host.proc is not None:
                            other_host.proc.terminate()
                        other_host.state = 'failed'
                        continue
                    else:
                        other_host.state = 'up'
                        if pubkey_text:
                            other_host.manager._pubkey = \
                                decode_public_key(pubkey_text)
                        host_processed = True
                        _LOGGER.debug('Host %s is now up', other_host.hostname)
                        self._up.append(other_host)

            # See if there are still hosts to wait for.
            waiting = []
            for host in self._hostlist:
                host.poll()
                if host.state == 'init' or host.state == 'started':
                    waiting.append(host)
            if waiting:
                if not host_processed:
                    retry += 1
                    if retry < 300:  # ~60 seconds.
                        time.sleep(0.2)
                    else:
                        _LOGGER.warning('Cluster startup timeout,'
                                        ' hosts not started:')
                        for host in waiting:
                            _LOGGER.warning('    %s (%s) in dir %s',
                                            host.hostname, host.state,
                                            host.tempdir)
                        break
            else:
                break

        self._up = sorted(self._up, key=lambda host: host.hostname)

        # So our class defined shutdown() is called before the superclass
        # installed shutdown().
        self._base_shutdown = self.shutdown
        del self.shutdown

        if len(self._up) < 1:
            raise RuntimeError('No hosts successfully started')