Exemple #1
0
    def init_host(self):
        self.dbapi = dbapi.get_instance()

        self.driver_factory = driver_factory.DriverFactory()
        self.drivers = self.driver_factory.names
        """List of driver names which this conductor supports."""

        try:
            self.dbapi.register_conductor({'hostname': self.host,
                                           'drivers': self.drivers})
        except exception.ConductorAlreadyRegistered:
            LOG.warn(_("A conductor with hostname %(hostname)s "
                       "was previously registered. Updating registration")
                       % {'hostname': self.host})
            self.dbapi.unregister_conductor(self.host)
            self.dbapi.register_conductor({'hostname': self.host,
                                           'drivers': self.drivers})

        self.ring_manager = hash.HashRingManager()
        """Consistent hash ring which maps drivers to conductors."""

        self._worker_pool = greenpool.GreenPool(
                                size=CONF.conductor.workers_pool_size)
        """GreenPool of background workers for performing tasks async."""

        # Spawn a dedicated greenthread for the keepalive
        try:
            self._keepalive_evt = threading.Event()
            self._spawn_worker(self._conductor_service_record_keepalive)
        except exception.NoFreeConductorWorker:
            with excutils.save_and_reraise_exception():
                LOG.critical(_('Failed to start keepalive'))
                self.del_host()
Exemple #2
0
    def __init__(self, topic=None):
        super(ConductorAPI, self).__init__()
        self.topic = topic
        if self.topic is None:
            self.topic = manager.MANAGER_TOPIC

        target = messaging.Target(topic=self.topic, version='1.0')
        serializer = objects_base.IronicObjectSerializer()
        self.client = rpc.get_client(target,
                                     version_cap=self.RPC_API_VERSION,
                                     serializer=serializer)
        self.ring_manager = hash.HashRingManager()
Exemple #3
0
    def setUp(self):
        """Run before each test method to initialize test environment."""
        super(TestCase, self).setUp()
        test_timeout = os.environ.get('OS_TEST_TIMEOUT', 0)
        try:
            test_timeout = int(test_timeout)
        except ValueError:
            # If timeout value is invalid do not set a timeout.
            test_timeout = 0
        if test_timeout > 0:
            self.useFixture(fixtures.Timeout(test_timeout, gentle=True))
        self.useFixture(fixtures.NestedTempfile())
        self.useFixture(fixtures.TempHomeDir())

        if (os.environ.get('OS_STDOUT_CAPTURE') == 'True'
                or os.environ.get('OS_STDOUT_CAPTURE') == '1'):
            stdout = self.useFixture(fixtures.StringStream('stdout')).stream
            self.useFixture(fixtures.MonkeyPatch('sys.stdout', stdout))
        if (os.environ.get('OS_STDERR_CAPTURE') == 'True'
                or os.environ.get('OS_STDERR_CAPTURE') == '1'):
            stderr = self.useFixture(fixtures.StringStream('stderr')).stream
            self.useFixture(fixtures.MonkeyPatch('sys.stderr', stderr))

        self.log_fixture = self.useFixture(fixtures.FakeLogger())
        self.useFixture(conf_fixture.ConfFixture(CONF))

        global _DB_CACHE
        if not _DB_CACHE:
            _DB_CACHE = Database(sqla_api,
                                 migration,
                                 sql_connection=CONF.database.connection,
                                 sqlite_db=CONF.database.sqlite_db,
                                 sqlite_clean_db=CONF.sqlite_clean_db)
        self.useFixture(_DB_CACHE)

        # NOTE(danms): Make sure to reset us back to non-remote objects
        # for each test to avoid interactions. Also, backup the object
        # registry
        objects_base.IronicObject.indirection_api = None
        self._base_test_obj_backup = copy.copy(
            objects_base.IronicObject._obj_classes)
        self.addCleanup(self._restore_obj_registry)

        self.addCleanup(self._clear_attrs)
        self.addCleanup(hash_ring.HashRingManager().reset)
        self.useFixture(fixtures.EnvironmentVariable('http_proxy'))
        self.policy = self.useFixture(policy_fixture.PolicyFixture())
        CONF.set_override('fatal_exception_format_errors', True)
Exemple #4
0
    def __init__(self, topic=None):
        super(ConductorAPI, self).__init__()
        self.topic = topic
        if self.topic is None:
            self.topic = manager.MANAGER_TOPIC

        target = messaging.Target(topic=self.topic,
                                  version='1.0')
        serializer = objects_base.IronicObjectSerializer()
        release_ver = versions.RELEASE_MAPPING.get(CONF.pin_release_version)
        version_cap = (release_ver['rpc'] if release_ver
                       else self.RPC_API_VERSION)
        self.client = rpc.get_client(target, version_cap=version_cap,
                                     serializer=serializer)
        # NOTE(deva): this is going to be buggy
        self.ring_manager = hash_ring.HashRingManager()
Exemple #5
0
    def setUp(self):
        """Run before each test method to initialize test environment."""
        super(TestCase, self).setUp()
        self.context = ironic_context.get_admin_context()
        test_timeout = os.environ.get('OS_TEST_TIMEOUT', 0)
        try:
            test_timeout = int(test_timeout)
        except ValueError:
            # If timeout value is invalid do not set a timeout.
            test_timeout = 0
        if test_timeout > 0:
            self.useFixture(fixtures.Timeout(test_timeout, gentle=True))
        self.useFixture(fixtures.NestedTempfile())
        self.useFixture(fixtures.TempHomeDir())

        if (os.environ.get('OS_STDOUT_CAPTURE') == 'True' or
                os.environ.get('OS_STDOUT_CAPTURE') == '1'):
            stdout = self.useFixture(fixtures.StringStream('stdout')).stream
            self.useFixture(fixtures.MonkeyPatch('sys.stdout', stdout))
        if (os.environ.get('OS_STDERR_CAPTURE') == 'True' or
                os.environ.get('OS_STDERR_CAPTURE') == '1'):
            stderr = self.useFixture(fixtures.StringStream('stderr')).stream
            self.useFixture(fixtures.MonkeyPatch('sys.stderr', stderr))

        self.log_fixture = self.useFixture(fixtures.FakeLogger())
        self._set_config()
        # NOTE(danms): Make sure to reset us back to non-remote objects
        # for each test to avoid interactions. Also, backup the object
        # registry
        objects_base.IronicObject.indirection_api = None
        self._base_test_obj_backup = copy.copy(
            objects_base.IronicObjectRegistry.obj_classes())
        self.addCleanup(self._restore_obj_registry)

        self.addCleanup(self._clear_attrs)
        self.addCleanup(hash_ring.HashRingManager().reset)
        self.useFixture(fixtures.EnvironmentVariable('http_proxy'))
        self.policy = self.useFixture(policy_fixture.PolicyFixture())

        driver_factory.DriverFactory._extension_manager = None
        driver_factory.HardwareTypesFactory._extension_manager = None
        for factory in driver_factory._INTERFACE_LOADERS.values():
            factory._extension_manager = None
Exemple #6
0
    def setUp(self):
        """Run before each test method to initialize test environment."""
        super(TestCase, self).setUp()
        self.context = ironic_context.get_admin_context()
        self._set_config()
        # NOTE(danms): Make sure to reset us back to non-remote objects
        # for each test to avoid interactions. Also, backup the object
        # registry
        self._base_test_obj_backup = copy.copy(
            objects_base.IronicObjectRegistry.obj_classes())
        self.addCleanup(self._restore_obj_registry)

        self.addCleanup(self._clear_attrs)
        self.addCleanup(hash_ring.HashRingManager().reset)
        self.useFixture(fixtures.EnvironmentVariable('http_proxy'))
        self.policy = self.useFixture(policy_fixture.PolicyFixture())

        driver_factory.HardwareTypesFactory._extension_manager = None
        for factory in driver_factory._INTERFACE_LOADERS.values():
            factory._extension_manager = None

        # Ban running external processes via 'execute' like functions. If the
        # patched function is called, an exception is raised to warn the
        # tester.
        if self.block_execute:
            # NOTE(jlvillal): Intentionally not using mock as if you mock a
            # mock it causes things to not work correctly. As doing an
            # autospec=True causes strangeness. By using a simple function we
            # can then mock it without issue.
            self.patch(processutils, 'execute', do_not_call)
            self.patch(subprocess, 'call', do_not_call)
            self.patch(subprocess, 'check_call', do_not_call)
            self.patch(subprocess, 'check_output', do_not_call)
            self.patch(utils, 'execute', do_not_call)
            # subprocess.Popen is a class
            self.patch(subprocess, 'Popen', DoNotCallPopen)

        if sys.version_info < (3, 7):
            _patch_mock_callable._old_func = mock._callable
            mock._callable = _patch_mock_callable
Exemple #7
0
    def setUp(self):
        """Run before each test method to initialize test environment."""
        super(TestCase, self).setUp()
        self.context = ironic_context.get_admin_context()
        self._set_config()
        # NOTE(danms): Make sure to reset us back to non-remote objects
        # for each test to avoid interactions. Also, backup the object
        # registry
        objects_base.IronicObject.indirection_api = None
        self._base_test_obj_backup = copy.copy(
            objects_base.IronicObjectRegistry.obj_classes())
        self.addCleanup(self._restore_obj_registry)

        self.addCleanup(self._clear_attrs)
        self.addCleanup(hash_ring.HashRingManager().reset)
        self.useFixture(fixtures.EnvironmentVariable('http_proxy'))
        self.policy = self.useFixture(policy_fixture.PolicyFixture())

        driver_factory.DriverFactory._extension_manager = None
        driver_factory.HardwareTypesFactory._extension_manager = None
        for factory in driver_factory._INTERFACE_LOADERS.values():
            factory._extension_manager = None

        # Block access to utils.execute() and related functions.
        # NOTE(bigjools): Not using a decorator on tests because I don't
        # want to force every test method to accept a new arg. Instead, they
        # can override or examine this self._exec_patch Mock as needed.
        if self.block_execute:
            self._exec_patch = mock.Mock()
            self._exec_patch.side_effect = Exception(
                "Don't call ironic_lib.utils.execute() / "
                "processutils.execute() or similar functions in tests!")

            self.patch(processutils, 'execute', self._exec_patch)
            self.patch(subprocess, 'Popen', self._exec_patch)
            self.patch(subprocess, 'call', self._exec_patch)
            self.patch(subprocess, 'check_call', self._exec_patch)
            self.patch(subprocess, 'check_output', self._exec_patch)
            self.patch(utils, 'execute', self._exec_patch)
Exemple #8
0
    def init_host(self):
        self.dbapi = dbapi.get_instance()

        self.driver_factory = driver_factory.DriverFactory()
        self.drivers = self.driver_factory.names
        """List of driver names which this conductor supports."""

        try:
            self.dbapi.register_conductor({'hostname': self.host,
                                           'drivers': self.drivers})
        except exception.ConductorAlreadyRegistered:
            LOG.warn(_("A conductor with hostname %(hostname)s "
                       "was previously registered. Updating registration")
                       % {'hostname': self.host})
            self.dbapi.unregister_conductor(self.host)
            self.dbapi.register_conductor({'hostname': self.host,
                                           'drivers': self.drivers})

        self.ring_manager = hash.HashRingManager()
        """Consistent hash ring which maps drivers to conductors."""

        self._worker_pool = greenpool.GreenPool(
                                size=CONF.conductor.workers_pool_size)
        """GreenPool of background workers for performing tasks async."""
 def setUp(self):
     super(HashRingManagerTestCase, self).setUp()
     self.ring_manager = hash_ring.HashRingManager(
         use_groups=self.use_groups)
 def test_hash_ring_manager_uncached(self):
     ring_mgr = hash_ring.HashRingManager(cache=False,
                                          use_groups=self.use_groups)
     ring = ring_mgr.ring
     self.assertIsNotNone(ring)
     self.assertEqual((None, 0), hash_ring.HashRingManager._hash_rings)
Exemple #11
0
    def init_host(self):
        self.dbapi = dbapi.get_instance()

        self._keepalive_evt = threading.Event()
        """Event for the keepalive thread."""

        self._worker_pool = greenpool.GreenPool(
            size=CONF.conductor.workers_pool_size)
        """GreenPool of background workers for performing tasks async."""

        self.ring_manager = hash.HashRingManager()
        """Consistent hash ring which maps drivers to conductors."""

        # NOTE(deva): instantiating DriverFactory may raise DriverLoadError
        #             or DriverNotFound
        self._driver_factory = driver_factory.DriverFactory()
        """Driver factory loads all enabled drivers."""

        self.drivers = self._driver_factory.names
        """List of driver names which this conductor supports."""

        if not self.drivers:
            msg = _LE("Conductor %s cannot be started because no drivers "
                      "were loaded.  This could be because no drivers were "
                      "specified in 'enabled_drivers' config option.")
            LOG.error(msg, self.host)
            raise exception.NoDriversLoaded(conductor=self.host)

        # Collect driver-specific periodic tasks
        for driver_obj in driver_factory.drivers().values():
            self._collect_periodic_tasks(driver_obj)
            for iface_name in (driver_obj.core_interfaces +
                               driver_obj.standard_interfaces + ['vendor']):
                iface = getattr(driver_obj, iface_name, None)
                if iface:
                    self._collect_periodic_tasks(iface)

        # clear all locks held by this conductor before registering
        self.dbapi.clear_node_reservations_for_conductor(self.host)
        try:
            # Register this conductor with the cluster
            cdr = self.dbapi.register_conductor({
                'hostname': self.host,
                'drivers': self.drivers
            })
        except exception.ConductorAlreadyRegistered:
            # This conductor was already registered and did not shut down
            # properly, so log a warning and update the record.
            LOG.warning(
                _LW("A conductor with hostname %(hostname)s "
                    "was previously registered. Updating registration"),
                {'hostname': self.host})
            cdr = self.dbapi.register_conductor(
                {
                    'hostname': self.host,
                    'drivers': self.drivers
                },
                update_existing=True)
        self.conductor = cdr

        # NOTE(lucasagomes): If the conductor server dies abruptly
        # mid deployment (OMM Killer, power outage, etc...) we
        # can not resume the deployment even if the conductor
        # comes back online. Cleaning the reservation of the nodes
        # (dbapi.clear_node_reservations_for_conductor) is not enough to
        # unstick it, so let's gracefully fail the deployment so the node
        # can go through the steps (deleting & cleaning) to make itself
        # available again.
        filters = {'reserved': False, 'provision_state': states.DEPLOYING}
        last_error = (_("The deployment can't be resumed by conductor "
                        "%s. Moving to fail state.") % self.host)
        self._fail_if_in_state(ironic_context.get_admin_context(),
                               filters,
                               states.DEPLOYING,
                               'provision_updated_at',
                               last_error=last_error)

        # Spawn a dedicated greenthread for the keepalive
        try:
            self._spawn_worker(self._conductor_service_record_keepalive)
            LOG.info(
                _LI('Successfully started conductor with hostname '
                    '%(hostname)s.'), {'hostname': self.host})
        except exception.NoFreeConductorWorker:
            with excutils.save_and_reraise_exception():
                LOG.critical(_LC('Failed to start keepalive'))
                self.del_host()
    def init_host(self, admin_context=None):
        """Initialize the conductor host.

        :param admin_context: the admin context to pass to periodic tasks.
        :raises: RuntimeError when conductor is already running.
        :raises: NoDriversLoaded when no drivers are enabled on the conductor.
        :raises: DriverNotFound if a driver is enabled that does not exist.
        :raises: DriverLoadError if an enabled driver cannot be loaded.
        :raises: DriverNameConflict if a classic driver and a dynamic driver
                 are both enabled and have the same name.
        :raises: ConfigInvalid if required config options for connection with
                 radosgw are missing while storing config drive.
        """
        if self._started:
            raise RuntimeError(
                _('Attempt to start an already running '
                  'conductor manager'))
        self._shutdown = False

        self.dbapi = dbapi.get_instance()

        self._keepalive_evt = threading.Event()
        """Event for the keepalive thread."""

        # TODO(dtantsur): make the threshold configurable?
        rejection_func = rejection.reject_when_reached(
            CONF.conductor.workers_pool_size)
        self._executor = futurist.GreenThreadPoolExecutor(
            max_workers=CONF.conductor.workers_pool_size,
            check_and_reject=rejection_func)
        """Executor for performing tasks async."""

        self.ring_manager = hash_ring.HashRingManager()
        """Consistent hash ring which maps drivers to conductors."""

        _check_enabled_interfaces()

        # NOTE(deva): these calls may raise DriverLoadError or DriverNotFound
        # NOTE(vdrok): Instantiate network and storage interface factory on
        # startup so that all the interfaces are loaded at the very
        # beginning, and failures prevent the conductor from starting.
        drivers = driver_factory.drivers()
        hardware_types = driver_factory.hardware_types()
        driver_factory.NetworkInterfaceFactory()
        driver_factory.StorageInterfaceFactory()

        # NOTE(jroll) this is passed to the dbapi, which requires a list, not
        # a generator (which keys() returns in py3)
        driver_names = list(drivers)
        hardware_type_names = list(hardware_types)

        # check that at least one driver is loaded, whether classic or dynamic
        if not driver_names and not hardware_type_names:
            msg = ("Conductor %s cannot be started because no drivers "
                   "were loaded. This could be because no classic drivers "
                   "were specified in the 'enabled_drivers' config option "
                   "and no dynamic drivers were specified in the "
                   "'enabled_hardware_types' config option.")
            LOG.error(msg, self.host)
            raise exception.NoDriversLoaded(conductor=self.host)

        # check for name clashes between classic and dynamic drivers
        name_clashes = set(driver_names).intersection(hardware_type_names)
        if name_clashes:
            name_clashes = ', '.join(name_clashes)
            msg = ("Conductor %(host)s cannot be started because there is "
                   "one or more name conflicts between classic drivers and "
                   "dynamic drivers (%(names)s). Check any external driver "
                   "plugins and the 'enabled_drivers' and "
                   "'enabled_hardware_types' config options.")
            LOG.error(msg, {'host': self.host, 'names': name_clashes})
            raise exception.DriverNameConflict(names=name_clashes)

        # Collect driver-specific periodic tasks.
        # Conductor periodic tasks accept context argument, driver periodic
        # tasks accept this manager and context. We have to ensure that the
        # same driver interface class is not traversed twice, otherwise
        # we'll have several instances of the same task.
        LOG.debug('Collecting periodic tasks')
        self._periodic_task_callables = []
        periodic_task_classes = set()
        self._collect_periodic_tasks(self, (admin_context, ))
        for driver_obj in drivers.values():
            for iface_name in driver_obj.all_interfaces:
                iface = getattr(driver_obj, iface_name, None)
                if iface and iface.__class__ not in periodic_task_classes:
                    self._collect_periodic_tasks(iface, (self, admin_context))
                    periodic_task_classes.add(iface.__class__)

        if (len(self._periodic_task_callables) >
                CONF.conductor.workers_pool_size):
            LOG.warning(
                'This conductor has %(tasks)d periodic tasks '
                'enabled, but only %(workers)d task workers '
                'allowed by [conductor]workers_pool_size option', {
                    'tasks': len(self._periodic_task_callables),
                    'workers': CONF.conductor.workers_pool_size
                })

        self._periodic_tasks = periodics.PeriodicWorker(
            self._periodic_task_callables,
            executor_factory=periodics.ExistingExecutor(self._executor))

        # Check for required config options if object_store_endpoint_type is
        # radosgw
        if (CONF.deploy.configdrive_use_object_store
                and CONF.deploy.object_store_endpoint_type == "radosgw"):
            if (None in (CONF.swift.auth_url, CONF.swift.username,
                         CONF.swift.password)):
                msg = _("Parameters missing to make a connection with "
                        "radosgw. Ensure that [swift]/auth_url, "
                        "[swift]/username, and [swift]/password are all "
                        "configured.")
                raise exception.ConfigInvalid(msg)

        # clear all target_power_state with locks by this conductor
        self.dbapi.clear_node_target_power_state(self.host)
        # clear all locks held by this conductor before registering
        self.dbapi.clear_node_reservations_for_conductor(self.host)
        try:
            # Register this conductor with the cluster
            self.conductor = objects.Conductor.register(
                admin_context, self.host, driver_names)
        except exception.ConductorAlreadyRegistered:
            # This conductor was already registered and did not shut down
            # properly, so log a warning and update the record.
            LOG.warning(
                "A conductor with hostname %(hostname)s was "
                "previously registered. Updating registration",
                {'hostname': self.host})
            self.conductor = objects.Conductor.register(admin_context,
                                                        self.host,
                                                        driver_names,
                                                        update_existing=True)

        # register hardware types and interfaces supported by this conductor
        # and validate them against other conductors
        try:
            self._register_and_validate_hardware_interfaces(hardware_types)
        except (exception.DriverLoadError, exception.DriverNotFound,
                exception.ConductorHardwareInterfacesAlreadyRegistered,
                exception.InterfaceNotFoundInEntrypoint,
                exception.NoValidDefaultForInterface) as e:
            with excutils.save_and_reraise_exception():
                LOG.error('Failed to register hardware types. %s', e)
                self.del_host()

        # Start periodic tasks
        self._periodic_tasks_worker = self._executor.submit(
            self._periodic_tasks.start, allow_empty=True)
        self._periodic_tasks_worker.add_done_callback(
            self._on_periodic_tasks_stop)

        # NOTE(lucasagomes): If the conductor server dies abruptly
        # mid deployment (OMM Killer, power outage, etc...) we
        # can not resume the deployment even if the conductor
        # comes back online. Cleaning the reservation of the nodes
        # (dbapi.clear_node_reservations_for_conductor) is not enough to
        # unstick it, so let's gracefully fail the deployment so the node
        # can go through the steps (deleting & cleaning) to make itself
        # available again.
        filters = {'reserved': False, 'provision_state': states.DEPLOYING}
        last_error = (_("The deployment can't be resumed by conductor "
                        "%s. Moving to fail state.") % self.host)
        self._fail_if_in_state(ironic_context.get_admin_context(),
                               filters,
                               states.DEPLOYING,
                               'provision_updated_at',
                               last_error=last_error)

        # Start consoles if it set enabled in a greenthread.
        try:
            self._spawn_worker(self._start_consoles,
                               ironic_context.get_admin_context())
        except exception.NoFreeConductorWorker:
            LOG.warning('Failed to start worker for restarting consoles.')

        # Spawn a dedicated greenthread for the keepalive
        try:
            self._spawn_worker(self._conductor_service_record_keepalive)
            LOG.info(
                'Successfully started conductor with hostname '
                '%(hostname)s.', {'hostname': self.host})
        except exception.NoFreeConductorWorker:
            with excutils.save_and_reraise_exception():
                LOG.critical('Failed to start keepalive')
                self.del_host()

        self._started = True
Exemple #13
0
    def init_host(self, admin_context=None):
        """Initialize the conductor host.

        :param admin_context: the admin context to pass to periodic tasks.
        :raises: RuntimeError when conductor is already running.
        :raises: NoDriversLoaded when no drivers are enabled on the conductor.
        :raises: DriverNotFound if a driver is enabled that does not exist.
        :raises: DriverLoadError if an enabled driver cannot be loaded.
        :raises: DriverNameConflict if a classic driver and a dynamic driver
                 are both enabled and have the same name.
        """
        if self._started:
            raise RuntimeError(
                _('Attempt to start an already running '
                  'conductor manager'))
        self._shutdown = False

        self.dbapi = dbapi.get_instance()

        self._keepalive_evt = threading.Event()
        """Event for the keepalive thread."""

        # TODO(dtantsur): make the threshold configurable?
        rejection_func = rejection.reject_when_reached(
            CONF.conductor.workers_pool_size)
        self._executor = futurist.GreenThreadPoolExecutor(
            max_workers=CONF.conductor.workers_pool_size,
            check_and_reject=rejection_func)
        """Executor for performing tasks async."""

        # TODO(jroll) delete the use_groups argument and use the default
        # in Stein.
        self.ring_manager = hash_ring.HashRingManager(
            use_groups=self._use_groups())
        """Consistent hash ring which maps drivers to conductors."""

        _check_enabled_interfaces()

        # NOTE(deva): these calls may raise DriverLoadError or DriverNotFound
        # NOTE(vdrok): Instantiate network and storage interface factory on
        # startup so that all the interfaces are loaded at the very
        # beginning, and failures prevent the conductor from starting.
        hardware_types = driver_factory.hardware_types()
        driver_factory.NetworkInterfaceFactory()
        driver_factory.StorageInterfaceFactory()

        # NOTE(jroll) this is passed to the dbapi, which requires a list, not
        # a generator (which keys() returns in py3)
        hardware_type_names = list(hardware_types)

        # check that at least one driver is loaded, whether classic or dynamic
        if not hardware_type_names:
            msg = ("Conductor %s cannot be started because no hardware types "
                   "were specified in the 'enabled_hardware_types' config "
                   "option.")
            LOG.error(msg, self.host)
            raise exception.NoDriversLoaded(conductor=self.host)

        self._collect_periodic_tasks(admin_context)

        # clear all target_power_state with locks by this conductor
        self.dbapi.clear_node_target_power_state(self.host)
        # clear all locks held by this conductor before registering
        self.dbapi.clear_node_reservations_for_conductor(self.host)
        try:
            # Register this conductor with the cluster
            self.conductor = objects.Conductor.register(
                admin_context, self.host, hardware_type_names,
                CONF.conductor.conductor_group)
        except exception.ConductorAlreadyRegistered:
            # This conductor was already registered and did not shut down
            # properly, so log a warning and update the record.
            LOG.warning(
                "A conductor with hostname %(hostname)s was "
                "previously registered. Updating registration",
                {'hostname': self.host})
            self.conductor = objects.Conductor.register(
                admin_context,
                self.host,
                hardware_type_names,
                CONF.conductor.conductor_group,
                update_existing=True)

        # register hardware types and interfaces supported by this conductor
        # and validate them against other conductors
        try:
            self._register_and_validate_hardware_interfaces(hardware_types)
        except (exception.DriverLoadError, exception.DriverNotFound,
                exception.ConductorHardwareInterfacesAlreadyRegistered,
                exception.InterfaceNotFoundInEntrypoint,
                exception.NoValidDefaultForInterface) as e:
            with excutils.save_and_reraise_exception():
                LOG.error('Failed to register hardware types. %s', e)
                self.del_host()

        # Start periodic tasks
        self._periodic_tasks_worker = self._executor.submit(
            self._periodic_tasks.start, allow_empty=True)
        self._periodic_tasks_worker.add_done_callback(
            self._on_periodic_tasks_stop)

        for state in states.STUCK_STATES_TREATED_AS_FAIL:
            self._fail_transient_state(
                state,
                _("The %(state)s state can't be resumed by conductor "
                  "%(host)s. Moving to fail state.") % {
                      'state': state,
                      'host': self.host
                  })

        # Start consoles if it set enabled in a greenthread.
        try:
            self._spawn_worker(self._start_consoles,
                               ironic_context.get_admin_context())
        except exception.NoFreeConductorWorker:
            LOG.warning('Failed to start worker for restarting consoles.')

        # Spawn a dedicated greenthread for the keepalive
        try:
            self._spawn_worker(self._conductor_service_record_keepalive)
            LOG.info(
                'Successfully started conductor with hostname '
                '%(hostname)s.', {'hostname': self.host})
        except exception.NoFreeConductorWorker:
            with excutils.save_and_reraise_exception():
                LOG.critical('Failed to start keepalive')
                self.del_host()

        # Resume allocations that started before the restart.
        try:
            self._spawn_worker(self._resume_allocations,
                               ironic_context.get_admin_context())
        except exception.NoFreeConductorWorker:
            LOG.warning('Failed to start worker for resuming allocations.')

        if CONF.conductor.enable_mdns:
            self._publish_endpoint()

        self._started = True
Exemple #14
0
    def init_host(self, admin_context=None):
        """Initialize the conductor host.

        :param admin_context: the admin context to pass to periodic tasks.
        :raises: RuntimeError when conductor is already running.
        :raises: NoDriversLoaded when no drivers are enabled on the conductor.
        :raises: DriverNotFound if a driver is enabled that does not exist.
        :raises: DriverLoadError if an enabled driver cannot be loaded.
        """
        if self._started:
            raise RuntimeError(
                _('Attempt to start an already running '
                  'conductor manager'))

        self.dbapi = dbapi.get_instance()

        self._keepalive_evt = threading.Event()
        """Event for the keepalive thread."""

        # TODO(dtantsur): make the threshold configurable?
        rejection_func = rejection.reject_when_reached(
            CONF.conductor.workers_pool_size)
        self._executor = futurist.GreenThreadPoolExecutor(
            max_workers=CONF.conductor.workers_pool_size,
            check_and_reject=rejection_func)
        """Executor for performing tasks async."""

        self.ring_manager = hash.HashRingManager()
        """Consistent hash ring which maps drivers to conductors."""

        # NOTE(deva): these calls may raise DriverLoadError or DriverNotFound
        # NOTE(vdrok): Instantiate network and storage interface factory on
        # startup so that all the interfaces are loaded at the very
        # beginning, and failures prevent the conductor from starting.
        drivers = driver_factory.drivers()
        driver_factory.NetworkInterfaceFactory()
        driver_factory.StorageInterfaceFactory()
        if not drivers:
            msg = _LE("Conductor %s cannot be started because no drivers "
                      "were loaded.  This could be because no drivers were "
                      "specified in 'enabled_drivers' config option.")
            LOG.error(msg, self.host)
            raise exception.NoDriversLoaded(conductor=self.host)

        # NOTE(jroll) this is passed to the dbapi, which requires a list, not
        # a generator (which keys() returns in py3)
        driver_names = list(drivers)

        # Collect driver-specific periodic tasks.
        # Conductor periodic tasks accept context argument, driver periodic
        # tasks accept this manager and context. We have to ensure that the
        # same driver interface class is not traversed twice, otherwise
        # we'll have several instances of the same task.
        LOG.debug('Collecting periodic tasks')
        self._periodic_task_callables = []
        periodic_task_classes = set()
        self._collect_periodic_tasks(self, (admin_context, ))
        for driver_obj in drivers.values():
            # TODO(dtantsur): collecting tasks from driver objects is
            # deprecated and should be removed in Ocata.
            self._collect_periodic_tasks(driver_obj, (self, admin_context))
            for iface_name in driver_obj.all_interfaces:
                iface = getattr(driver_obj, iface_name, None)
                if iface and iface.__class__ not in periodic_task_classes:
                    self._collect_periodic_tasks(iface, (self, admin_context))
                    periodic_task_classes.add(iface.__class__)

        if (len(self._periodic_task_callables) >
                CONF.conductor.workers_pool_size):
            LOG.warning(
                _LW('This conductor has %(tasks)d periodic tasks '
                    'enabled, but only %(workers)d task workers '
                    'allowed by [conductor]workers_pool_size option'), {
                        'tasks': len(self._periodic_task_callables),
                        'workers': CONF.conductor.workers_pool_size
                    })

        self._periodic_tasks = periodics.PeriodicWorker(
            self._periodic_task_callables,
            executor_factory=periodics.ExistingExecutor(self._executor))

        # clear all target_power_state with locks by this conductor
        self.dbapi.clear_node_target_power_state(self.host)
        # clear all locks held by this conductor before registering
        self.dbapi.clear_node_reservations_for_conductor(self.host)
        try:
            # Register this conductor with the cluster
            self.conductor = objects.Conductor.register(
                admin_context, self.host, driver_names)
        except exception.ConductorAlreadyRegistered:
            # This conductor was already registered and did not shut down
            # properly, so log a warning and update the record.
            LOG.warning(
                _LW("A conductor with hostname %(hostname)s "
                    "was previously registered. Updating registration"),
                {'hostname': self.host})
            self.conductor = objects.Conductor.register(admin_context,
                                                        self.host,
                                                        driver_names,
                                                        update_existing=True)

        # Start periodic tasks
        self._periodic_tasks_worker = self._executor.submit(
            self._periodic_tasks.start, allow_empty=True)
        self._periodic_tasks_worker.add_done_callback(
            self._on_periodic_tasks_stop)

        # NOTE(lucasagomes): If the conductor server dies abruptly
        # mid deployment (OMM Killer, power outage, etc...) we
        # can not resume the deployment even if the conductor
        # comes back online. Cleaning the reservation of the nodes
        # (dbapi.clear_node_reservations_for_conductor) is not enough to
        # unstick it, so let's gracefully fail the deployment so the node
        # can go through the steps (deleting & cleaning) to make itself
        # available again.
        filters = {'reserved': False, 'provision_state': states.DEPLOYING}
        last_error = (_("The deployment can't be resumed by conductor "
                        "%s. Moving to fail state.") % self.host)
        self._fail_if_in_state(ironic_context.get_admin_context(),
                               filters,
                               states.DEPLOYING,
                               'provision_updated_at',
                               last_error=last_error)

        # Start consoles if it set enabled in a greenthread.
        try:
            self._spawn_worker(self._start_consoles,
                               ironic_context.get_admin_context())
        except exception.NoFreeConductorWorker:
            LOG.warning(_LW('Failed to start worker for restarting consoles.'))

        # Spawn a dedicated greenthread for the keepalive
        try:
            self._spawn_worker(self._conductor_service_record_keepalive)
            LOG.info(
                _LI('Successfully started conductor with hostname '
                    '%(hostname)s.'), {'hostname': self.host})
        except exception.NoFreeConductorWorker:
            with excutils.save_and_reraise_exception():
                LOG.critical(_LC('Failed to start keepalive'))
                self.del_host()

        self._started = True
Exemple #15
0
 def setUp(self):
     super(HashRingManagerTestCase, self).setUp()
     self.ring_manager = hash.HashRingManager()
     self.context = context.get_admin_context()
     self.dbapi = dbapi.get_instance()
Exemple #16
0
 def setUp(self):
     super(HashRingManagerTestCase, self).setUp()
     self.ring_manager = hash_ring.HashRingManager()
     self.dbapi = dbapi.get_instance()