Exemple #1
0
    def init_host(self, admin_context=None):
        """Initialize the conductor host.

        :param admin_context: the admin context to pass to periodic tasks.
        :raises: RuntimeError when conductor is already running.
        :raises: NoDriversLoaded when no drivers are enabled on the conductor.
        :raises: DriverNotFound if a driver is enabled that does not exist.
        :raises: DriverLoadError if an enabled driver cannot be loaded.
        """
        if self._started:
            raise RuntimeError(_('Attempt to start an already running '
                                 'conductor manager'))

        self.dbapi = dbapi.get_instance()

        self._keepalive_evt = threading.Event()
        """Event for the keepalive thread."""

        # TODO(dtantsur): make the threshold configurable?
        rejection_func = rejection.reject_when_reached(
            CONF.conductor.workers_pool_size)
        self._executor = futurist.GreenThreadPoolExecutor(
            max_workers=CONF.conductor.workers_pool_size,
            check_and_reject=rejection_func)
        """Executor for performing tasks async."""

        self.ring_manager = hash.HashRingManager()
        """Consistent hash ring which maps drivers to conductors."""

        # NOTE(deva): this call may raise DriverLoadError or DriverNotFound
        drivers = driver_factory.drivers()
        if not drivers:
            msg = _LE("Conductor %s cannot be started because no drivers "
                      "were loaded.  This could be because no drivers were "
                      "specified in 'enabled_drivers' config option.")
            LOG.error(msg, self.host)
            raise exception.NoDriversLoaded(conductor=self.host)

        # NOTE(jroll) this is passed to the dbapi, which requires a list, not
        # a generator (which keys() returns in py3)
        driver_names = list(drivers)

        # Collect driver-specific periodic tasks.
        # Conductor periodic tasks accept context argument, driver periodic
        # tasks accept this manager and context. We have to ensure that the
        # same driver interface class is not traversed twice, otherwise
        # we'll have several instances of the same task.
        LOG.debug('Collecting periodic tasks')
        self._periodic_task_callables = []
        periodic_task_classes = set()
        self._collect_periodic_tasks(self, (admin_context,))
        for driver_obj in drivers.values():
            self._collect_periodic_tasks(driver_obj, (self, admin_context))
            for iface_name in driver_obj.all_interfaces:
                iface = getattr(driver_obj, iface_name, None)
                if iface and iface.__class__ not in periodic_task_classes:
                    self._collect_periodic_tasks(iface, (self, admin_context))
                    periodic_task_classes.add(iface.__class__)

        if (len(self._periodic_task_callables) >
                CONF.conductor.workers_pool_size):
            LOG.warning(_LW('This conductor has %(tasks)d periodic tasks '
                            'enabled, but only %(workers)d task workers '
                            'allowed by [conductor]workers_pool_size option'),
                        {'tasks': len(self._periodic_task_callables),
                         'workers': CONF.conductor.workers_pool_size})

        self._periodic_tasks = periodics.PeriodicWorker(
            self._periodic_task_callables,
            executor_factory=periodics.ExistingExecutor(self._executor))

        # clear all locks held by this conductor before registering
        self.dbapi.clear_node_reservations_for_conductor(self.host)
        try:
            # Register this conductor with the cluster
            cdr = self.dbapi.register_conductor({'hostname': self.host,
                                                 'drivers': driver_names})
        except exception.ConductorAlreadyRegistered:
            # This conductor was already registered and did not shut down
            # properly, so log a warning and update the record.
            LOG.warning(
                _LW("A conductor with hostname %(hostname)s "
                    "was previously registered. Updating registration"),
                {'hostname': self.host})
            cdr = self.dbapi.register_conductor({'hostname': self.host,
                                                 'drivers': driver_names},
                                                update_existing=True)
        self.conductor = cdr

        # Start periodic tasks
        self._periodic_tasks_worker = self._executor.submit(
            self._periodic_tasks.start, allow_empty=True)
        self._periodic_tasks_worker.add_done_callback(
            self._on_periodic_tasks_stop)

        # NOTE(lucasagomes): If the conductor server dies abruptly
        # mid deployment (OMM Killer, power outage, etc...) we
        # can not resume the deployment even if the conductor
        # comes back online. Cleaning the reservation of the nodes
        # (dbapi.clear_node_reservations_for_conductor) is not enough to
        # unstick it, so let's gracefully fail the deployment so the node
        # can go through the steps (deleting & cleaning) to make itself
        # available again.
        filters = {'reserved': False,
                   'provision_state': states.DEPLOYING}
        last_error = (_("The deployment can't be resumed by conductor "
                        "%s. Moving to fail state.") % self.host)
        self._fail_if_in_state(ironic_context.get_admin_context(), filters,
                               states.DEPLOYING, 'provision_updated_at',
                               last_error=last_error)

        # Spawn a dedicated greenthread for the keepalive
        try:
            self._spawn_worker(self._conductor_service_record_keepalive)
            LOG.info(_LI('Successfully started conductor with hostname '
                         '%(hostname)s.'),
                     {'hostname': self.host})
        except exception.NoFreeConductorWorker:
            with excutils.save_and_reraise_exception():
                LOG.critical(_LC('Failed to start keepalive'))
                self.del_host()

        self._started = True
Exemple #2
0
    def init_host(self, admin_context=None):
        """Initialize the conductor host.

        :param admin_context: the admin context to pass to periodic tasks.
        :raises: RuntimeError when conductor is already running.
        :raises: NoDriversLoaded when no drivers are enabled on the conductor.
        :raises: DriverNotFound if a driver is enabled that does not exist.
        :raises: DriverLoadError if an enabled driver cannot be loaded.
        :raises: DriverNameConflict if a classic driver and a dynamic driver
                 are both enabled and have the same name.
        :raises: ConfigInvalid if required config options for connection with
                 radosgw are missing while storing config drive.
        """
        if self._started:
            raise RuntimeError(_('Attempt to start an already running '
                                 'conductor manager'))
        self._shutdown = False

        self.dbapi = dbapi.get_instance()

        self._keepalive_evt = threading.Event()
        """Event for the keepalive thread."""

        # TODO(dtantsur): make the threshold configurable?
        rejection_func = rejection.reject_when_reached(
            CONF.conductor.workers_pool_size)
        self._executor = futurist.GreenThreadPoolExecutor(
            max_workers=CONF.conductor.workers_pool_size,
            check_and_reject=rejection_func)
        """Executor for performing tasks async."""

        self.ring_manager = hash_ring.HashRingManager()
        """Consistent hash ring which maps drivers to conductors."""

        _check_enabled_interfaces()

        # NOTE(deva): these calls may raise DriverLoadError or DriverNotFound
        # NOTE(vdrok): Instantiate network and storage interface factory on
        # startup so that all the interfaces are loaded at the very
        # beginning, and failures prevent the conductor from starting.
        drivers = driver_factory.drivers()
        hardware_types = driver_factory.hardware_types()
        driver_factory.NetworkInterfaceFactory()
        driver_factory.StorageInterfaceFactory()

        # NOTE(jroll) this is passed to the dbapi, which requires a list, not
        # a generator (which keys() returns in py3)
        driver_names = list(drivers)
        hardware_type_names = list(hardware_types)

        # check that at least one driver is loaded, whether classic or dynamic
        if not driver_names and not hardware_type_names:
            msg = ("Conductor %s cannot be started because no drivers "
                   "were loaded. This could be because no classic drivers "
                   "were specified in the 'enabled_drivers' config option "
                   "and no dynamic drivers were specified in the "
                   "'enabled_hardware_types' config option.")
            LOG.error(msg, self.host)
            raise exception.NoDriversLoaded(conductor=self.host)

        # check for name clashes between classic and dynamic drivers
        name_clashes = set(driver_names).intersection(hardware_type_names)
        if name_clashes:
            name_clashes = ', '.join(name_clashes)
            msg = ("Conductor %(host)s cannot be started because there is "
                   "one or more name conflicts between classic drivers and "
                   "dynamic drivers (%(names)s). Check any external driver "
                   "plugins and the 'enabled_drivers' and "
                   "'enabled_hardware_types' config options.")
            LOG.error(msg, {'host': self.host, 'names': name_clashes})
            raise exception.DriverNameConflict(names=name_clashes)

        # Collect driver-specific periodic tasks.
        # Conductor periodic tasks accept context argument, driver periodic
        # tasks accept this manager and context. We have to ensure that the
        # same driver interface class is not traversed twice, otherwise
        # we'll have several instances of the same task.
        LOG.debug('Collecting periodic tasks')
        self._periodic_task_callables = []
        periodic_task_classes = set()
        self._collect_periodic_tasks(self, (admin_context,))
        for driver_obj in drivers.values():
            for iface_name in driver_obj.all_interfaces:
                iface = getattr(driver_obj, iface_name, None)
                if iface and iface.__class__ not in periodic_task_classes:
                    self._collect_periodic_tasks(iface, (self, admin_context))
                    periodic_task_classes.add(iface.__class__)

        if (len(self._periodic_task_callables) >
                CONF.conductor.workers_pool_size):
            LOG.warning('This conductor has %(tasks)d periodic tasks '
                        'enabled, but only %(workers)d task workers '
                        'allowed by [conductor]workers_pool_size option',
                        {'tasks': len(self._periodic_task_callables),
                         'workers': CONF.conductor.workers_pool_size})

        self._periodic_tasks = periodics.PeriodicWorker(
            self._periodic_task_callables,
            executor_factory=periodics.ExistingExecutor(self._executor))

        # Check for required config options if object_store_endpoint_type is
        # radosgw
        if (CONF.deploy.configdrive_use_object_store and
            CONF.deploy.object_store_endpoint_type == "radosgw"):
            if (None in (CONF.swift.auth_url, CONF.swift.username,
                         CONF.swift.password)):
                msg = _("Parameters missing to make a connection with "
                        "radosgw. Ensure that [swift]/auth_url, "
                        "[swift]/username, and [swift]/password are all "
                        "configured.")
                raise exception.ConfigInvalid(msg)

        # clear all target_power_state with locks by this conductor
        self.dbapi.clear_node_target_power_state(self.host)
        # clear all locks held by this conductor before registering
        self.dbapi.clear_node_reservations_for_conductor(self.host)
        try:
            # Register this conductor with the cluster
            self.conductor = objects.Conductor.register(
                admin_context, self.host, driver_names)
        except exception.ConductorAlreadyRegistered:
            # This conductor was already registered and did not shut down
            # properly, so log a warning and update the record.
            LOG.warning("A conductor with hostname %(hostname)s was "
                        "previously registered. Updating registration",
                        {'hostname': self.host})
            self.conductor = objects.Conductor.register(
                admin_context, self.host, driver_names, update_existing=True)

        # register hardware types and interfaces supported by this conductor
        # and validate them against other conductors
        try:
            self._register_and_validate_hardware_interfaces(hardware_types)
        except (exception.DriverLoadError, exception.DriverNotFound,
                exception.ConductorHardwareInterfacesAlreadyRegistered,
                exception.InterfaceNotFoundInEntrypoint,
                exception.NoValidDefaultForInterface) as e:
            with excutils.save_and_reraise_exception():
                LOG.error('Failed to register hardware types. %s', e)
                self.del_host()

        # Start periodic tasks
        self._periodic_tasks_worker = self._executor.submit(
            self._periodic_tasks.start, allow_empty=True)
        self._periodic_tasks_worker.add_done_callback(
            self._on_periodic_tasks_stop)

        # NOTE(lucasagomes): If the conductor server dies abruptly
        # mid deployment (OMM Killer, power outage, etc...) we
        # can not resume the deployment even if the conductor
        # comes back online. Cleaning the reservation of the nodes
        # (dbapi.clear_node_reservations_for_conductor) is not enough to
        # unstick it, so let's gracefully fail the deployment so the node
        # can go through the steps (deleting & cleaning) to make itself
        # available again.
        filters = {'reserved': False,
                   'provision_state': states.DEPLOYING}
        last_error = (_("The deployment can't be resumed by conductor "
                        "%s. Moving to fail state.") % self.host)
        self._fail_if_in_state(ironic_context.get_admin_context(), filters,
                               states.DEPLOYING, 'provision_updated_at',
                               last_error=last_error)

        # Start consoles if it set enabled in a greenthread.
        try:
            self._spawn_worker(self._start_consoles,
                               ironic_context.get_admin_context())
        except exception.NoFreeConductorWorker:
            LOG.warning('Failed to start worker for restarting consoles.')

        # Spawn a dedicated greenthread for the keepalive
        try:
            self._spawn_worker(self._conductor_service_record_keepalive)
            LOG.info('Successfully started conductor with hostname '
                     '%(hostname)s.',
                     {'hostname': self.host})
        except exception.NoFreeConductorWorker:
            with excutils.save_and_reraise_exception():
                LOG.critical('Failed to start keepalive')
                self.del_host()

        self._started = True
Exemple #3
0
    def init_host(self):
        self.dbapi = dbapi.get_instance()

        self._keepalive_evt = threading.Event()
        """Event for the keepalive thread."""

        self._worker_pool = greenpool.GreenPool(
            size=CONF.conductor.workers_pool_size)
        """GreenPool of background workers for performing tasks async."""

        self.ring_manager = hash.HashRingManager()
        """Consistent hash ring which maps drivers to conductors."""

        # NOTE(deva): instantiating DriverFactory may raise DriverLoadError
        #             or DriverNotFound
        self._driver_factory = driver_factory.DriverFactory()
        """Driver factory loads all enabled drivers."""

        self.drivers = self._driver_factory.names
        """List of driver names which this conductor supports."""

        if not self.drivers:
            msg = _LE("Conductor %s cannot be started because no drivers "
                      "were loaded.  This could be because no drivers were "
                      "specified in 'enabled_drivers' config option.")
            LOG.error(msg, self.host)
            raise exception.NoDriversLoaded(conductor=self.host)

        # Collect driver-specific periodic tasks
        for driver_obj in driver_factory.drivers().values():
            self._collect_periodic_tasks(driver_obj)
            for iface_name in (driver_obj.core_interfaces +
                               driver_obj.standard_interfaces + ['vendor']):
                iface = getattr(driver_obj, iface_name, None)
                if iface:
                    self._collect_periodic_tasks(iface)

        # clear all locks held by this conductor before registering
        self.dbapi.clear_node_reservations_for_conductor(self.host)
        try:
            # Register this conductor with the cluster
            cdr = self.dbapi.register_conductor({
                'hostname': self.host,
                'drivers': self.drivers
            })
        except exception.ConductorAlreadyRegistered:
            # This conductor was already registered and did not shut down
            # properly, so log a warning and update the record.
            LOG.warning(
                _LW("A conductor with hostname %(hostname)s "
                    "was previously registered. Updating registration"),
                {'hostname': self.host})
            cdr = self.dbapi.register_conductor(
                {
                    'hostname': self.host,
                    'drivers': self.drivers
                },
                update_existing=True)
        self.conductor = cdr

        # NOTE(lucasagomes): If the conductor server dies abruptly
        # mid deployment (OMM Killer, power outage, etc...) we
        # can not resume the deployment even if the conductor
        # comes back online. Cleaning the reservation of the nodes
        # (dbapi.clear_node_reservations_for_conductor) is not enough to
        # unstick it, so let's gracefully fail the deployment so the node
        # can go through the steps (deleting & cleaning) to make itself
        # available again.
        filters = {'reserved': False, 'provision_state': states.DEPLOYING}
        last_error = (_("The deployment can't be resumed by conductor "
                        "%s. Moving to fail state.") % self.host)
        self._fail_if_in_state(ironic_context.get_admin_context(),
                               filters,
                               states.DEPLOYING,
                               'provision_updated_at',
                               last_error=last_error)

        # Spawn a dedicated greenthread for the keepalive
        try:
            self._spawn_worker(self._conductor_service_record_keepalive)
            LOG.info(
                _LI('Successfully started conductor with hostname '
                    '%(hostname)s.'), {'hostname': self.host})
        except exception.NoFreeConductorWorker:
            with excutils.save_and_reraise_exception():
                LOG.critical(_LC('Failed to start keepalive'))
                self.del_host()
    def init_host(self, admin_context=None):
        """Initialize the conductor host.

        :param admin_context: the admin context to pass to periodic tasks.
        :raises: RuntimeError when conductor is already running.
        :raises: NoDriversLoaded when no drivers are enabled on the conductor.
        :raises: DriverNotFound if a driver is enabled that does not exist.
        :raises: DriverLoadError if an enabled driver cannot be loaded.
        :raises: DriverNameConflict if a classic driver and a dynamic driver
                 are both enabled and have the same name.
        :raises: ConfigInvalid if required config options for connection with
                 radosgw are missing while storing config drive.
        """
        if self._started:
            raise RuntimeError(
                _('Attempt to start an already running '
                  'conductor manager'))
        self._shutdown = False

        self.dbapi = dbapi.get_instance()

        self._keepalive_evt = threading.Event()
        """Event for the keepalive thread."""

        # TODO(dtantsur): make the threshold configurable?
        rejection_func = rejection.reject_when_reached(
            CONF.conductor.workers_pool_size)
        self._executor = futurist.GreenThreadPoolExecutor(
            max_workers=CONF.conductor.workers_pool_size,
            check_and_reject=rejection_func)
        """Executor for performing tasks async."""

        self.ring_manager = hash_ring.HashRingManager()
        """Consistent hash ring which maps drivers to conductors."""

        _check_enabled_interfaces()

        # NOTE(deva): these calls may raise DriverLoadError or DriverNotFound
        # NOTE(vdrok): Instantiate network and storage interface factory on
        # startup so that all the interfaces are loaded at the very
        # beginning, and failures prevent the conductor from starting.
        drivers = driver_factory.drivers()
        hardware_types = driver_factory.hardware_types()
        driver_factory.NetworkInterfaceFactory()
        driver_factory.StorageInterfaceFactory()

        # NOTE(jroll) this is passed to the dbapi, which requires a list, not
        # a generator (which keys() returns in py3)
        driver_names = list(drivers)
        hardware_type_names = list(hardware_types)

        # check that at least one driver is loaded, whether classic or dynamic
        if not driver_names and not hardware_type_names:
            msg = ("Conductor %s cannot be started because no drivers "
                   "were loaded. This could be because no classic drivers "
                   "were specified in the 'enabled_drivers' config option "
                   "and no dynamic drivers were specified in the "
                   "'enabled_hardware_types' config option.")
            LOG.error(msg, self.host)
            raise exception.NoDriversLoaded(conductor=self.host)

        # check for name clashes between classic and dynamic drivers
        name_clashes = set(driver_names).intersection(hardware_type_names)
        if name_clashes:
            name_clashes = ', '.join(name_clashes)
            msg = ("Conductor %(host)s cannot be started because there is "
                   "one or more name conflicts between classic drivers and "
                   "dynamic drivers (%(names)s). Check any external driver "
                   "plugins and the 'enabled_drivers' and "
                   "'enabled_hardware_types' config options.")
            LOG.error(msg, {'host': self.host, 'names': name_clashes})
            raise exception.DriverNameConflict(names=name_clashes)

        # Collect driver-specific periodic tasks.
        # Conductor periodic tasks accept context argument, driver periodic
        # tasks accept this manager and context. We have to ensure that the
        # same driver interface class is not traversed twice, otherwise
        # we'll have several instances of the same task.
        LOG.debug('Collecting periodic tasks')
        self._periodic_task_callables = []
        periodic_task_classes = set()
        self._collect_periodic_tasks(self, (admin_context, ))
        for driver_obj in drivers.values():
            for iface_name in driver_obj.all_interfaces:
                iface = getattr(driver_obj, iface_name, None)
                if iface and iface.__class__ not in periodic_task_classes:
                    self._collect_periodic_tasks(iface, (self, admin_context))
                    periodic_task_classes.add(iface.__class__)

        if (len(self._periodic_task_callables) >
                CONF.conductor.workers_pool_size):
            LOG.warning(
                'This conductor has %(tasks)d periodic tasks '
                'enabled, but only %(workers)d task workers '
                'allowed by [conductor]workers_pool_size option', {
                    'tasks': len(self._periodic_task_callables),
                    'workers': CONF.conductor.workers_pool_size
                })

        self._periodic_tasks = periodics.PeriodicWorker(
            self._periodic_task_callables,
            executor_factory=periodics.ExistingExecutor(self._executor))

        # Check for required config options if object_store_endpoint_type is
        # radosgw
        if (CONF.deploy.configdrive_use_object_store
                and CONF.deploy.object_store_endpoint_type == "radosgw"):
            if (None in (CONF.swift.auth_url, CONF.swift.username,
                         CONF.swift.password)):
                msg = _("Parameters missing to make a connection with "
                        "radosgw. Ensure that [swift]/auth_url, "
                        "[swift]/username, and [swift]/password are all "
                        "configured.")
                raise exception.ConfigInvalid(msg)

        # clear all target_power_state with locks by this conductor
        self.dbapi.clear_node_target_power_state(self.host)
        # clear all locks held by this conductor before registering
        self.dbapi.clear_node_reservations_for_conductor(self.host)
        try:
            # Register this conductor with the cluster
            self.conductor = objects.Conductor.register(
                admin_context, self.host, driver_names)
        except exception.ConductorAlreadyRegistered:
            # This conductor was already registered and did not shut down
            # properly, so log a warning and update the record.
            LOG.warning(
                "A conductor with hostname %(hostname)s was "
                "previously registered. Updating registration",
                {'hostname': self.host})
            self.conductor = objects.Conductor.register(admin_context,
                                                        self.host,
                                                        driver_names,
                                                        update_existing=True)

        # register hardware types and interfaces supported by this conductor
        # and validate them against other conductors
        try:
            self._register_and_validate_hardware_interfaces(hardware_types)
        except (exception.DriverLoadError, exception.DriverNotFound,
                exception.ConductorHardwareInterfacesAlreadyRegistered,
                exception.InterfaceNotFoundInEntrypoint,
                exception.NoValidDefaultForInterface) as e:
            with excutils.save_and_reraise_exception():
                LOG.error('Failed to register hardware types. %s', e)
                self.del_host()

        # Start periodic tasks
        self._periodic_tasks_worker = self._executor.submit(
            self._periodic_tasks.start, allow_empty=True)
        self._periodic_tasks_worker.add_done_callback(
            self._on_periodic_tasks_stop)

        # NOTE(lucasagomes): If the conductor server dies abruptly
        # mid deployment (OMM Killer, power outage, etc...) we
        # can not resume the deployment even if the conductor
        # comes back online. Cleaning the reservation of the nodes
        # (dbapi.clear_node_reservations_for_conductor) is not enough to
        # unstick it, so let's gracefully fail the deployment so the node
        # can go through the steps (deleting & cleaning) to make itself
        # available again.
        filters = {'reserved': False, 'provision_state': states.DEPLOYING}
        last_error = (_("The deployment can't be resumed by conductor "
                        "%s. Moving to fail state.") % self.host)
        self._fail_if_in_state(ironic_context.get_admin_context(),
                               filters,
                               states.DEPLOYING,
                               'provision_updated_at',
                               last_error=last_error)

        # Start consoles if it set enabled in a greenthread.
        try:
            self._spawn_worker(self._start_consoles,
                               ironic_context.get_admin_context())
        except exception.NoFreeConductorWorker:
            LOG.warning('Failed to start worker for restarting consoles.')

        # Spawn a dedicated greenthread for the keepalive
        try:
            self._spawn_worker(self._conductor_service_record_keepalive)
            LOG.info(
                'Successfully started conductor with hostname '
                '%(hostname)s.', {'hostname': self.host})
        except exception.NoFreeConductorWorker:
            with excutils.save_and_reraise_exception():
                LOG.critical('Failed to start keepalive')
                self.del_host()

        self._started = True
Exemple #5
0
    def init_host(self):
        self.dbapi = dbapi.get_instance()

        self._keepalive_evt = threading.Event()
        """Event for the keepalive thread."""

        self._worker_pool = greenpool.GreenPool(
            size=CONF.conductor.workers_pool_size)
        """GreenPool of background workers for performing tasks async."""

        self.ring_manager = hash.HashRingManager()
        """Consistent hash ring which maps drivers to conductors."""

        # NOTE(deva): instantiating DriverFactory may raise DriverLoadError
        #             or DriverNotFound
        self._driver_factory = driver_factory.DriverFactory()
        """Driver factory loads all enabled drivers."""

        self.drivers = self._driver_factory.names
        """List of driver names which this conductor supports."""

        if not self.drivers:
            msg = _LE("Conductor %s cannot be started because no drivers "
                      "were loaded.  This could be because no drivers were "
                      "specified in 'enabled_drivers' config option.")
            LOG.error(msg, self.host)
            raise exception.NoDriversLoaded(conductor=self.host)

        # Collect driver-specific periodic tasks
        for driver_obj in driver_factory.drivers().values():
            self._collect_periodic_tasks(driver_obj)
            for iface_name in (driver_obj.core_interfaces +
                               driver_obj.standard_interfaces +
                               ['vendor']):
                iface = getattr(driver_obj, iface_name, None)
                if iface:
                    self._collect_periodic_tasks(iface)

        # clear all locks held by this conductor before registering
        self.dbapi.clear_node_reservations_for_conductor(self.host)
        try:
            # Register this conductor with the cluster
            cdr = self.dbapi.register_conductor({'hostname': self.host,
                                                 'drivers': self.drivers})
        except exception.ConductorAlreadyRegistered:
            # This conductor was already registered and did not shut down
            # properly, so log a warning and update the record.
            LOG.warning(
                _LW("A conductor with hostname %(hostname)s "
                    "was previously registered. Updating registration"),
                {'hostname': self.host})
            cdr = self.dbapi.register_conductor({'hostname': self.host,
                                                 'drivers': self.drivers},
                                                update_existing=True)
        self.conductor = cdr

        # NOTE(lucasagomes): If the conductor server dies abruptly
        # mid deployment (OMM Killer, power outage, etc...) we
        # can not resume the deployment even if the conductor
        # comes back online. Cleaning the reservation of the nodes
        # (dbapi.clear_node_reservations_for_conductor) is not enough to
        # unstick it, so let's gracefully fail the deployment so the node
        # can go through the steps (deleting & cleaning) to make itself
        # available again.
        filters = {'reserved': False,
                   'provision_state': states.DEPLOYING}
        last_error = (_("The deployment can't be resumed by conductor "
                        "%s. Moving to fail state.") % self.host)
        self._fail_if_in_state(ironic_context.get_admin_context(), filters,
                               states.DEPLOYING, 'provision_updated_at',
                               last_error=last_error)

        # Spawn a dedicated greenthread for the keepalive
        try:
            self._spawn_worker(self._conductor_service_record_keepalive)
            LOG.info(_LI('Successfully started conductor with hostname '
                         '%(hostname)s.'),
                     {'hostname': self.host})
        except exception.NoFreeConductorWorker:
            with excutils.save_and_reraise_exception():
                LOG.critical(_LC('Failed to start keepalive'))
                self.del_host()
Exemple #6
0
    def init_host(self, admin_context=None):
        """Initialize the conductor host.

        :param admin_context: the admin context to pass to periodic tasks.
        :raises: RuntimeError when conductor is already running.
        :raises: NoDriversLoaded when no drivers are enabled on the conductor.
        :raises: DriverNotFound if a driver is enabled that does not exist.
        :raises: DriverLoadError if an enabled driver cannot be loaded.
        """
        if self._started:
            raise RuntimeError(
                _('Attempt to start an already running '
                  'conductor manager'))

        self.dbapi = dbapi.get_instance()

        self._keepalive_evt = threading.Event()
        """Event for the keepalive thread."""

        # TODO(dtantsur): make the threshold configurable?
        rejection_func = rejection.reject_when_reached(
            CONF.conductor.workers_pool_size)
        self._executor = futurist.GreenThreadPoolExecutor(
            max_workers=CONF.conductor.workers_pool_size,
            check_and_reject=rejection_func)
        """Executor for performing tasks async."""

        self.ring_manager = hash.HashRingManager()
        """Consistent hash ring which maps drivers to conductors."""

        # NOTE(deva): these calls may raise DriverLoadError or DriverNotFound
        # NOTE(vdrok): Instantiate network and storage interface factory on
        # startup so that all the interfaces are loaded at the very
        # beginning, and failures prevent the conductor from starting.
        drivers = driver_factory.drivers()
        driver_factory.NetworkInterfaceFactory()
        driver_factory.StorageInterfaceFactory()
        if not drivers:
            msg = _LE("Conductor %s cannot be started because no drivers "
                      "were loaded.  This could be because no drivers were "
                      "specified in 'enabled_drivers' config option.")
            LOG.error(msg, self.host)
            raise exception.NoDriversLoaded(conductor=self.host)

        # NOTE(jroll) this is passed to the dbapi, which requires a list, not
        # a generator (which keys() returns in py3)
        driver_names = list(drivers)

        # Collect driver-specific periodic tasks.
        # Conductor periodic tasks accept context argument, driver periodic
        # tasks accept this manager and context. We have to ensure that the
        # same driver interface class is not traversed twice, otherwise
        # we'll have several instances of the same task.
        LOG.debug('Collecting periodic tasks')
        self._periodic_task_callables = []
        periodic_task_classes = set()
        self._collect_periodic_tasks(self, (admin_context, ))
        for driver_obj in drivers.values():
            # TODO(dtantsur): collecting tasks from driver objects is
            # deprecated and should be removed in Ocata.
            self._collect_periodic_tasks(driver_obj, (self, admin_context))
            for iface_name in driver_obj.all_interfaces:
                iface = getattr(driver_obj, iface_name, None)
                if iface and iface.__class__ not in periodic_task_classes:
                    self._collect_periodic_tasks(iface, (self, admin_context))
                    periodic_task_classes.add(iface.__class__)

        if (len(self._periodic_task_callables) >
                CONF.conductor.workers_pool_size):
            LOG.warning(
                _LW('This conductor has %(tasks)d periodic tasks '
                    'enabled, but only %(workers)d task workers '
                    'allowed by [conductor]workers_pool_size option'), {
                        'tasks': len(self._periodic_task_callables),
                        'workers': CONF.conductor.workers_pool_size
                    })

        self._periodic_tasks = periodics.PeriodicWorker(
            self._periodic_task_callables,
            executor_factory=periodics.ExistingExecutor(self._executor))

        # clear all target_power_state with locks by this conductor
        self.dbapi.clear_node_target_power_state(self.host)
        # clear all locks held by this conductor before registering
        self.dbapi.clear_node_reservations_for_conductor(self.host)
        try:
            # Register this conductor with the cluster
            self.conductor = objects.Conductor.register(
                admin_context, self.host, driver_names)
        except exception.ConductorAlreadyRegistered:
            # This conductor was already registered and did not shut down
            # properly, so log a warning and update the record.
            LOG.warning(
                _LW("A conductor with hostname %(hostname)s "
                    "was previously registered. Updating registration"),
                {'hostname': self.host})
            self.conductor = objects.Conductor.register(admin_context,
                                                        self.host,
                                                        driver_names,
                                                        update_existing=True)

        # Start periodic tasks
        self._periodic_tasks_worker = self._executor.submit(
            self._periodic_tasks.start, allow_empty=True)
        self._periodic_tasks_worker.add_done_callback(
            self._on_periodic_tasks_stop)

        # NOTE(lucasagomes): If the conductor server dies abruptly
        # mid deployment (OMM Killer, power outage, etc...) we
        # can not resume the deployment even if the conductor
        # comes back online. Cleaning the reservation of the nodes
        # (dbapi.clear_node_reservations_for_conductor) is not enough to
        # unstick it, so let's gracefully fail the deployment so the node
        # can go through the steps (deleting & cleaning) to make itself
        # available again.
        filters = {'reserved': False, 'provision_state': states.DEPLOYING}
        last_error = (_("The deployment can't be resumed by conductor "
                        "%s. Moving to fail state.") % self.host)
        self._fail_if_in_state(ironic_context.get_admin_context(),
                               filters,
                               states.DEPLOYING,
                               'provision_updated_at',
                               last_error=last_error)

        # Start consoles if it set enabled in a greenthread.
        try:
            self._spawn_worker(self._start_consoles,
                               ironic_context.get_admin_context())
        except exception.NoFreeConductorWorker:
            LOG.warning(_LW('Failed to start worker for restarting consoles.'))

        # Spawn a dedicated greenthread for the keepalive
        try:
            self._spawn_worker(self._conductor_service_record_keepalive)
            LOG.info(
                _LI('Successfully started conductor with hostname '
                    '%(hostname)s.'), {'hostname': self.host})
        except exception.NoFreeConductorWorker:
            with excutils.save_and_reraise_exception():
                LOG.critical(_LC('Failed to start keepalive'))
                self.del_host()

        self._started = True
Exemple #7
0
    def _collect_periodic_tasks(self, admin_context):
        """Collect driver-specific periodic tasks.

        Conductor periodic tasks accept context argument, driver periodic
        tasks accept this manager and context. We have to ensure that the
        same driver interface class is not traversed twice, otherwise
        we'll have several instances of the same task.

        :param admin_context: Administrator context to pass to tasks.
        """
        LOG.debug('Collecting periodic tasks')
        # collected callables
        periodic_task_callables = []
        # list of visited classes to avoid adding the same tasks twice
        periodic_task_classes = set()

        def _collect_from(obj, args):
            """Collect tasks from the given object.

            :param obj: the object to collect tasks from.
            :param args: a tuple of arguments to pass to tasks.
            """
            if obj and obj.__class__ not in periodic_task_classes:
                for name, member in inspect.getmembers(obj):
                    if periodics.is_periodic(member):
                        LOG.debug('Found periodic task %(owner)s.%(member)s', {
                            'owner': obj.__class__.__name__,
                            'member': name
                        })
                        periodic_task_callables.append((member, args, {}))
                periodic_task_classes.add(obj.__class__)

        # First, collect tasks from the conductor itself
        _collect_from(self, (admin_context, ))

        # Second, collect tasks from hardware interfaces
        for ifaces in driver_factory.all_interfaces().values():
            for iface in ifaces.values():
                _collect_from(iface, args=(self, admin_context))
        # TODO(dtantsur): allow periodics on hardware types themselves?

        # Finally, collect tasks from interfaces of classic drivers, since they
        # are not necessary registered as new-style hardware interfaces.
        for driver_obj in driver_factory.drivers().values():
            for iface_name in driver_obj.all_interfaces:
                iface = getattr(driver_obj, iface_name, None)
                _collect_from(iface, args=(self, admin_context))

        if len(periodic_task_callables) > CONF.conductor.workers_pool_size:
            LOG.warning(
                'This conductor has %(tasks)d periodic tasks '
                'enabled, but only %(workers)d task workers '
                'allowed by [conductor]workers_pool_size option', {
                    'tasks': len(periodic_task_callables),
                    'workers': CONF.conductor.workers_pool_size
                })

        self._periodic_tasks = periodics.PeriodicWorker(
            periodic_task_callables,
            executor_factory=periodics.ExistingExecutor(self._executor))
        # This is only used in tests currently. Delete it?
        self._periodic_task_callables = periodic_task_callables