Example #1
0
def start_up(master=False):
    """Perform start-up tasks for this MAAS server.

    This is used to:
    - make sure the singletons required by the application are created
    - sync the configuration of the external systems driven by MAAS

    The method will be executed multiple times if multiple processes are used
    but this method uses database locking to ensure that the methods it calls
    internally are not run concurrently.
    """
    while True:
        try:
            # Get the shared secret from Tidmouth sheds which was generated
            # when Sir Topham Hatt graduated Sodor Academy. (Ensure we have a
            # shared-secret so that a cluster on the same host as this region
            # can authenticate.)
            yield security.get_shared_secret()
            # Execute other start-up tasks that must not run concurrently with
            # other invocations of themselves, across the whole of this MAAS
            # installation.
            yield deferToDatabase(inner_start_up, master=master)
        except SystemExit:
            raise
        except KeyboardInterrupt:
            raise
        except DatabaseError as e:
            psycopg2_exception = get_psycopg2_exception(e)
            if psycopg2_exception is None:
                maaslog.warning(
                    "Database error during start-up; " "pausing for 3 seconds."
                )
            elif psycopg2_exception.pgcode is None:
                maaslog.warning(
                    "Database error during start-up (PostgreSQL error "
                    "not reported); pausing for 3 seconds."
                )
            else:
                maaslog.warning(
                    "Database error during start-up (PostgreSQL error %s); "
                    "pausing for 3 seconds.",
                    psycopg2_exception.pgcode,
                )
            logger.error("Database error during start-up", exc_info=True)
            yield pause(3.0)  # Wait 3 seconds before having another go.
        except Exception:
            maaslog.warning("Error during start-up; pausing for 3 seconds.")
            logger.error("Error during start-up.", exc_info=True)
            yield pause(3.0)  # Wait 3 seconds before having another go.
        else:
            break
Example #2
0
    def perform_power(self, power_func, state_desired, system_id, context):
        """Provides the logic to perform the power actions.

        :param power_func: Function used to change the power state of the
            node. Typically this will be `self.power_on` or `self.power_off`.
        :param state_desired: The desired state for this node to be in,
            typically "on" or "off".
        :param system_id: The node's system ID.
        """

        state = "unknown"
        exc_info = None, None, None

        for waiting_time in self.wait_time:
            # Try to change state.
            try:
                yield deferToThread(
                    power_func, system_id, context)
            except PowerFatalError:
                raise  # Don't retry.
            except PowerError:
                exc_info = sys.exc_info()
                # Wait before retrying.
                yield pause(waiting_time, self.clock)
            else:
                # Wait before checking state.
                yield pause(waiting_time, self.clock)
                # Try to get power state.
                try:
                    state = yield deferToThread(
                        self.power_query, system_id, context)
                except PowerFatalError:
                    raise  # Don't retry.
                except PowerError:
                    exc_info = sys.exc_info()
                else:
                    # If state is now the correct state, done.
                    if state == state_desired:
                        return

        if exc_info == (None, None, None):
            # No error found, so communication to the BMC is good, state must
            # have not changed in the elapsed time. That is the only reason we
            # should make it this far.
            raise PowerError(
                "Failed to power %s. BMC never transitioned from %s to %s."
                % (system_id, state, state_desired))
        else:
            # Report the last error.
            raise exc_info[0](exc_info[1]).with_traceback(exc_info[2])
Example #3
0
def commission_node(system_id, user):
    """Commission a Node on the region.

    :param system_id: system_id of node to commission.
    :param user: user for the node.
    """
    # Avoid circular dependencies.
    from provisioningserver.rpc.region import CommissionNode

    for elapsed, remaining, wait in retries(15, 5, reactor):
        try:
            client = getRegionClient()
            break
        except NoConnectionsAvailable:
            yield pause(wait, reactor)
    else:
        maaslog.error("Can't commission node, no RPC connection to region.")
        return

    try:
        yield client(CommissionNode, system_id=system_id, user=user)
    except CommissionNodeFailed as e:
        # The node cannot be commissioned, give up.
        maaslog.error("Could not commission with system_id %s because %s.",
                      system_id, e.args[0])
    except UnhandledCommand:
        # The region hasn't been upgraded to support this method
        # yet, so give up.
        maaslog.error("Unable to commission node on region: Region does not "
                      "support the CommissionNode RPC method.")
    finally:
        returnValue(None)
Example #4
0
 def query(self, system_id, context):
     """Performs the power query action for `system_id`."""
     exc_info = None, None, None
     for waiting_time in self.wait_time:
         try:
             # Power queries are predominantly transactional and thus
             # blocking/synchronous. Genuinely non-blocking/asynchronous
             # methods must out themselves explicitly.
             if IAsynchronous.providedBy(self.power_query):
                 # The @asynchronous decorator will DTRT.
                 state = yield self.power_query(system_id, context)
             else:
                 state = yield deferToThread(
                     self.power_query, system_id, context
                 )
         except PowerFatalError:
             raise  # Don't retry.
         except PowerError:
             exc_info = sys.exc_info()
             # Wait before retrying.
             yield pause(waiting_time, self.clock)
         else:
             returnValue(state)
     else:
         raise exc_info[0](exc_info[1]).with_traceback(exc_info[2])
    def test_notification_gets_added_to_notifications(self):
        socket_path = self.patch_socket_path()
        service = LeaseSocketService(sentinel.service, reactor)
        service.startService()
        self.addCleanup(service.stopService)

        # Stop the looping call to check that the notification gets added
        # to notifications.
        process_done = service.done
        service.processor.stop()
        yield process_done
        service.processor = MagicMock()

        # Create test payload to send.
        packet = {"test": factory.make_name("test")}

        # Send notification to the socket should appear in notifications.
        yield deferToThread(self.send_notification, socket_path, packet)

        # Loop until the notifications has a notification.
        for elapsed, remaining, wait in retries(5, 0.1, reactor):
            if len(service.notifications) > 0:
                break
            else:
                yield pause(wait, reactor)

        # Should have one notitication.
        self.assertEquals([packet], list(service.notifications))
Example #6
0
    def _start_download(self):
        client = None
        # Retry a few times, since this service usually comes up before
        # the RPC service.
        for elapsed, remaining, wait in retries(15, 5, self.clock):
            try:
                client = yield self.client_service.getClientNow()
                break
            except NoConnectionsAvailable:
                yield pause(wait, self.clock)
        else:
            maaslog.error(
                "Can't initiate image download, no RPC connection to region.")
            return

        # Get sources from region
        sources = yield self._get_boot_sources(client)
        # Get http proxy from region
        proxies = yield client(GetProxies)

        def get_proxy_url(scheme):
            url = proxies.get(scheme)  # url is a ParsedResult.
            return None if url is None else url.geturl()

        yield import_boot_images(
            sources.get("sources"), get_proxy_url("http"),
            get_proxy_url("https"))
Example #7
0
 def _checkSerial(self, result):
     """Check that the serial of the domain is updated."""
     if result is None:
         return None
     serial, reloaded, domain_names = result
     if not reloaded:
         raise DNSReloadError(
             "Failed to reload DNS; timeout or rdnc command failed.")
     not_matching_domains = set(domain_names)
     loop = 0
     while len(not_matching_domains) > 0 and loop != 30:
         for domain in list(not_matching_domains):
             try:
                 answers, _, _ = yield self.dnsResolver.lookupAuthority(
                     domain)
             except (ValueError, TimeoutError):
                 answers = []
             if len(answers) > 0:
                 if int(answers[0].payload.serial) == int(serial):
                     not_matching_domains.remove(domain)
         loop += 1
         yield pause(2)
     # 30 retries with 2 second pauses (aka. 60 seconds) has passed and
     # there still is a domain that has the wrong serial. For now just
     # raise the error, in the future we should take action and force
     # restart bind.
     if len(not_matching_domains) > 0:
         raise DNSReloadError("Failed to reload DNS; serial mismatch "
                              "on domains %s" %
                              ', '.join(not_matching_domains))
     return result
 def _getRPCClient(self):
     while self.running:
         try:
             client = yield self.clientService.getClientNow()
         except NoConnectionsAvailable:
             yield pause(1.0)
             continue
         else:
             returnValue(client)
Example #9
0
 def _tryGetClient(self):
     client = None
     for elapsed, remaining, wait in retries(15, 5, self.clock):
         try:
             client = yield self.client_service.getClientNow()
             break
         except NoConnectionsAvailable:
             yield pause(wait, self.clock)
     return client
Example #10
0
    def test_takes_lock_when_running(self):
        clock = Clock()
        deferToThread = self.patch(boot_images, "deferToThread")
        deferToThread.return_value = pause(1, clock)

        # Lock is acquired when import is started.
        import_boot_images(sentinel.sources, factory.make_simple_http_url())
        self.assertTrue(concurrency.boot_images.locked)

        # Lock is released once the download is done.
        clock.advance(1)
        self.assertFalse(concurrency.boot_images.locked)
Example #11
0
 def _getConnection(self):
     """Get a connection to the region."""
     client = None
     for elapsed, remaining, wait in retries(30, 10, self.clock):
         try:
             client = yield self.client_service.getClientNow()
             break
         except NoConnectionsAvailable:
             yield pause(wait, self.clock)
     else:
         maaslog.error("Can't update service statuses, no RPC "
                       "connection to region.")
     return client
 def recordInterfaces(self, interfaces, hints=None):
     """Record the interfaces information to the region."""
     while self.running:
         try:
             client = yield self.clientService.getClientNow()
         except NoConnectionsAvailable:
             yield pause(1.0)
             continue
         if self._recorded is None:
             yield client(RequestRackRefresh, system_id=client.localIdent)
         yield client(
             UpdateInterfaces, system_id=client.localIdent,
             interfaces=interfaces, topology_hints=hints)
         break
Example #13
0
    def test_garbage_is_collected(self):
        dnsgc = publication.DNSPublicationGarbageService()

        utcnow = patch_utcnow(self)
        cutoff = utcnow.replace(tzinfo=UTC) - timedelta(days=7)

        self.patch(dnsgc, "_getInterval").side_effect = [0, 999]
        self.patch(DNSPublication.objects, "collect_garbage")

        yield dnsgc.startService()
        yield pause(0.0)  # Let the reactor tick.
        yield dnsgc.stopService()

        self.assertThat(DNSPublication.objects.collect_garbage,
                        MockCalledOnceWith(cutoff))
Example #14
0
    def start_services():
        rpc_service = ClusterClientService(reactor)
        rpc_service.setName("rpc")
        rpc_service.setServiceParent(services)

        yield services.startService()

        for elapsed, remaining, wait in retries(15, 1, reactor):
            try:
                yield getRegionClient()
            except NoConnectionsAvailable:
                yield pause(wait, reactor)
            else:
                break
        else:
            print("Can't connect to the region.", file=stderr)
            raise SystemExit(1)
Example #15
0
 def _updateRegion(self, services):
     """Update region about services status."""
     client = None
     for elapsed, remaining, wait in retries(30, 10, self.clock):
         try:
             client = yield self.client_service.getClientNow()
             break
         except NoConnectionsAvailable:
             yield pause(wait, self.clock)
     else:
         maaslog.error("Can't update service statuses, no RPC "
                       "connection to region.")
         return
     services = yield self._buildServices(services)
     yield client(UpdateServices,
                  system_id=client.localIdent,
                  services=services)
Example #16
0
 def query(self, system_id, context):
     """Performs the power query action for `system_id`."""
     exc_info = None, None, None
     for waiting_time in self.wait_time:
         try:
             state = yield deferToThread(
                 self.power_query, system_id, context)
         except PowerFatalError:
             raise  # Don't retry.
         except PowerError:
             exc_info = sys.exc_info()
             # Wait before retrying.
             yield pause(waiting_time, self.clock)
         else:
             returnValue(state)
     else:
         raise exc_info[0](exc_info[1]).with_traceback(exc_info[2])
Example #17
0
    def processNotification(self, notification, clock=reactor):
        """Send a notification to the region."""
        client = None
        for elapsed, remaining, wait in retries(30, 10, clock):
            try:
                client = yield self.client_service.getClientNow()
                break
            except NoConnectionsAvailable:
                yield pause(wait, self.clock)
        else:
            maaslog.error("Can't send DHCP lease information, no RPC "
                          "connection to region.")
            return

        # Notification contains all the required data except for the cluster
        # UUID. Add that into the notification and send the information to
        # the region for processing.
        notification["cluster_uuid"] = client.localIdent
        yield client(UpdateLease, **notification)
Example #18
0
 def _updateRegion(self, services):
     """Update region about services status."""
     services = yield self._buildServices(services)
     if self._services is not None and self._services == services:
         # The updated status to the region hasn't changed no reason
         # to update the region controller.
         return None
     self._services = services
     client = None
     for elapsed, remaining, wait in retries(30, 10, self.clock):
         try:
             client = yield self.client_service.getClientNow()
             break
         except NoConnectionsAvailable:
             yield pause(wait, self.clock)
     else:
         maaslog.error("Can't update service statuses, no RPC "
                       "connection to region.")
         return
     yield client(UpdateServices,
                  system_id=client.localIdent,
                  services=services)
Example #19
0
 def _rbacFailure(failure, delay):
     log.err(failure, "Failed syncing resources to RBAC.")
     if delay:
         return pause(delay)
Example #20
0
def create_node(macs,
                arch,
                power_type,
                power_parameters,
                domain=None,
                hostname=None):
    """Create a Node on the region and return its system_id.

    :param macs: A list of MAC addresses belonging to the node.
    :param arch: The node's architecture, in the form 'arch/subarch'.
    :param power_type: The node's power type as a string.
    :param power_parameters: The power parameters for the node, as a
        dict.
    :param domain: The domain the node should join.
    """
    if hostname is not None:
        hostname = coerce_to_valid_hostname(hostname)

    for elapsed, remaining, wait in retries(15, 5, reactor):
        try:
            client = getRegionClient()
            break
        except NoConnectionsAvailable:
            yield pause(wait, reactor)
    else:
        maaslog.error("Can't create node, no RPC connection to region.")
        return

    # De-dupe the MAC addresses we pass. We sort here to avoid test
    # failures.
    macs = sorted(set(macs))
    try:
        response = yield client(CreateNode,
                                architecture=arch,
                                power_type=power_type,
                                power_parameters=json.dumps(power_parameters),
                                mac_addresses=macs,
                                hostname=hostname,
                                domain=domain)
    except NodeAlreadyExists:
        # The node already exists on the region, so we log the error and
        # give up.
        maaslog.error(
            "A node with one of the mac addresses in %s already exists.", macs)
        returnValue(None)
    except UnhandledCommand:
        # The region hasn't been upgraded to support this method
        # yet, so give up.
        maaslog.error("Unable to create node on region: Region does not "
                      "support the CreateNode RPC method.")
        returnValue(None)
    except UnknownRemoteError as e:
        # This happens, for example, if a ValidationError occurs on the region.
        # (In particular, we see this if the hostname is a duplicate.)
        # We should probably create specific exceptions for these, so we can
        # act on them appropriately.
        maaslog.error(
            "Unknown error while creating node %s: %s (see regiond.log)", macs,
            e.description)
        returnValue(None)
    else:
        returnValue(response['system_id'])
Example #21
0
    def perform_power(self, power_func, state_desired, system_id, context):
        """Provides the logic to perform the power actions.

        :param power_func: Function used to change the power state of the
            node. Typically this will be `self.power_on` or `self.power_off`.
        :param state_desired: The desired state for this node to be in,
            typically "on" or "off".
        :param system_id: The node's system ID.
        """

        state = "unknown"
        exc_info = None, None, None

        for waiting_time in self.wait_time:
            # Try to change state.
            try:
                # Power methods are predominantly transactional and thus
                # blocking/synchronous. Genuinely non-blocking/asynchronous
                # methods must out themselves explicitly.
                if IAsynchronous.providedBy(power_func):
                    # The @asynchronous decorator will DTRT.
                    yield power_func(system_id, context)
                else:
                    yield deferToThread(power_func, system_id, context)
            except PowerFatalError:
                raise  # Don't retry.
            except PowerError:
                exc_info = sys.exc_info()
                # Wait before retrying.
                yield pause(waiting_time, self.clock)
            else:
                # LP:1768659 - If the power driver isn't queryable(manual)
                # checking the power state will always fail.
                if not self.queryable:
                    return
                # Wait before checking state.
                yield pause(waiting_time, self.clock)
                # Try to get power state.
                try:
                    # Power queries are predominantly transactional and thus
                    # blocking/synchronous. Genuinely non-blocking/asynchronous
                    # methods must out themselves explicitly.
                    if IAsynchronous.providedBy(self.power_query):
                        # The @asynchronous decorator will DTRT.
                        state = yield self.power_query(system_id, context)
                    else:
                        state = yield deferToThread(self.power_query,
                                                    system_id, context)
                except PowerFatalError:
                    raise  # Don't retry.
                except PowerError:
                    exc_info = sys.exc_info()
                else:
                    # If state is now the correct state, done.
                    if state == state_desired:
                        return

        if exc_info == (None, None, None):
            # No error found, so communication to the BMC is good, state must
            # have not changed in the elapsed time. That is the only reason we
            # should make it this far.
            raise PowerError(
                "Failed to power %s. BMC never transitioned from %s to %s." %
                (system_id, state, state_desired))
        else:
            # Report the last error.
            raise exc_info[0](exc_info[1]).with_traceback(exc_info[2])