def testAddRoleToPrimarySuccess(self):
        """Test manager can add a role to a primary server successfully.

        Confirm that actions needs to be taken, e.g., restart scheduler for
        new drone to be added.
        """
        server_models.validate(role=server_models.ServerRole.ROLE.DRONE)
        server_manager_utils.check_server(mox.IgnoreArg(),
                                          mox.IgnoreArg()).AndReturn(True)
        server_manager_utils.use_server_db().MultipleTimes().AndReturn(True)
        self.mox.StubOutWithMock(self.PRIMARY_SCHEDULER, 'get_role_names')
        self.PRIMARY_SCHEDULER.get_role_names().AndReturn(
            [server_models.ServerRole.ROLE.SCHEDULER])
        server_models.ServerRole.objects.create(
            server=self.PRIMARY_SCHEDULER,
            role=server_models.ServerRole.ROLE.DRONE).AndReturn(
                self.DRONE_ROLE)
        server_models.Server.objects.filter(
            roles__role=server_models.ServerRole.ROLE.SCHEDULER,
            status=server_models.Server.STATUS.PRIMARY).AndReturn(
                [self.PRIMARY_SCHEDULER])
        infra.execute_command(mox.IgnoreArg(), mox.IgnoreArg())
        self.mox.ReplayAll()
        server_manager._add_role(self.PRIMARY_SCHEDULER,
                                 server_models.ServerRole.ROLE.DRONE,
                                 action=True)
    def testDeleteRoleFromPrimarySuccess(self):
        """Test manager can delete a role from a primary server successfully.

        Confirm that database call is made, and actions are taken, e.g.,
        restart scheduler to delete an existing drone.
        """
        server_manager_utils.use_server_db().MultipleTimes().AndReturn(True)
        server_models.validate(role=server_models.ServerRole.ROLE.DRONE)
        self.mox.StubOutWithMock(self.PRIMARY_DRONE, 'get_role_names')
        self.PRIMARY_DRONE.get_role_names().MultipleTimes().AndReturn(
            [server_models.ServerRole.ROLE.DRONE])

        self.mox.StubOutWithMock(self.PRIMARY_DRONE.roles, 'get')
        self.PRIMARY_DRONE.roles.get(
            role=server_models.ServerRole.ROLE.DRONE).AndReturn(
                self.DRONE_ROLE)

        server_models.Server.objects.filter(
            roles__role=server_models.ServerRole.ROLE.SCHEDULER,
            status=server_models.Server.STATUS.PRIMARY).AndReturn(
                [self.PRIMARY_SCHEDULER])
        server_manager.server_manager_utils.warn_missing_role(
            server_models.ServerRole.ROLE.DRONE, self.PRIMARY_DRONE)
        infra.execute_command(mox.IgnoreArg(), mox.IgnoreArg())
        self.mox.ReplayAll()
        server_manager._delete_role(self.PRIMARY_DRONE,
                                    server_models.ServerRole.ROLE.DRONE,
                                    action=True)
 def testChangeStatusSuccess_PrimaryToRepairFailed(self):
     """Test manager can change the status of a primary server to
     repair_required.
     """
     server_models.validate(
         status=server_models.Server.STATUS.REPAIR_REQUIRED)
     self.mox.StubOutWithMock(self.PRIMARY_DRONE.roles, 'filter')
     self.mox.StubOutWithMock(self.PRIMARY_DRONE, 'get_role_names')
     self.PRIMARY_DRONE.get_role_names().MultipleTimes().AndReturn(
         [server_models.ServerRole.ROLE.DRONE])
     self.PRIMARY_DRONE.roles.filter(
         role__in=server_models.ServerRole.ROLES_REQUIRE_UNIQUE_INSTANCE
     ).AndReturn(None)
     server_manager_utils.use_server_db().MultipleTimes().AndReturn(True)
     server_manager_utils.warn_missing_role(
         server_models.ServerRole.ROLE.DRONE, self.PRIMARY_DRONE)
     server_models.Server.objects.filter(
         roles__role=server_models.ServerRole.ROLE.SCHEDULER,
         status=server_models.Server.STATUS.PRIMARY).AndReturn(
             [self.PRIMARY_SCHEDULER])
     infra.execute_command(mox.IgnoreArg(), mox.IgnoreArg())
     self.mox.ReplayAll()
     server_manager._change_status(
         server=self.PRIMARY_DRONE,
         status=server_models.Server.STATUS.REPAIR_REQUIRED,
         action=True)
def get_drones():
    """Get a list of drones from server database or global config.
    """
    if server_manager_utils.use_server_db():
        return server_manager_utils.get_drones()
    else:
        drones = global_config.global_config.get_config_value(
            scheduler_config.CONFIG_SECTION, 'drones', default='localhost')
        return [hostname.strip() for hostname in drones.split(',')]
def get_shards():
    """Get a list of shards from server database or global config.
    """
    if server_manager_utils.use_server_db():
        return server_manager_utils.get_shards()
    else:
        config = global_config.global_config
        shards = config.get_config_value('SERVER', 'shards', default='')
        return [hostname.strip() for hostname in shards.split(',')]
    def refresh_drone_configs(self):
        """
        Reread global config options for all drones.
        """
        # Import server_manager_utils is delayed rather than at the beginning of
        # this module. The reason is that test_that imports drone_manager when
        # importing autoserv_utils. The import is done before test_that setup
        # django (test_that only setup django in setup_local_afe, since it's
        # not needed when test_that runs the test in a lab duts through :lab:
        # option. Therefore, if server_manager_utils is imported at the
        # beginning of this module, test_that will fail since django is not
        # setup yet.
        from autotest_lib.site_utils import server_manager_utils
        config = global_config.global_config
        section = scheduler_config.CONFIG_SECTION
        config.parse_config_file()
        for hostname, drone in self._drones.iteritems():
            if server_manager_utils.use_server_db():
                server = server_manager_utils.get_servers(hostname=hostname)[0]
                attributes = dict([(a.attribute, a.value)
                                   for a in server.attributes.all()])
                drone.enabled = (int(attributes.get('disabled', 0)) == 0)
                drone.max_processes = int(
                    attributes.get(
                        'max_processes',
                        scheduler_config.config.max_processes_per_drone))
                allowed_users = attributes.get('users', None)
            else:
                disabled = config.get_config_value(section,
                                                   '%s_disabled' % hostname,
                                                   default='')
                drone.enabled = not bool(disabled)
                drone.max_processes = config.get_config_value(
                    section,
                    '%s_max_processes' % hostname,
                    type=int,
                    default=scheduler_config.config.max_processes_per_drone)

                allowed_users = config.get_config_value(section,
                                                        '%s_users' % hostname,
                                                        default=None)
            if allowed_users:
                drone.allowed_users = set(allowed_users.split())
            else:
                drone.allowed_users = None
            logging.info('Drone %s.max_processes: %s', hostname,
                         drone.max_processes)
            logging.info('Drone %s.enabled: %s', hostname, drone.enabled)
            logging.info('Drone %s.allowed_users: %s', hostname,
                         drone.allowed_users)
            logging.info('Drone %s.support_ssp: %s', hostname,
                         drone.support_ssp)

        self._reorder_drone_queue()  # max_processes may have changed
        # Clear notification record about reaching max_processes limit.
        self._notify_record = {}
    def testDeleteRoleFromBackupSuccess(self):
        """Test manager can delete a role from a backup server successfully.

        Confirm that database call is made, and no action is taken, e.g.,
        restart scheduler to delete an existing devserver.
        """
        server_models.validate(role=server_models.ServerRole.ROLE.DRONE)
        server_manager_utils.use_server_db().MultipleTimes().AndReturn(True)
        self.mox.StubOutWithMock(self.BACKUP_DRONE, 'get_role_names')
        self.BACKUP_DRONE.get_role_names().MultipleTimes().AndReturn(
            [server_models.ServerRole.ROLE.DRONE])
        self.mox.StubOutWithMock(self.BACKUP_DRONE.roles, 'get')
        self.BACKUP_DRONE.roles.get(
            role=server_models.ServerRole.ROLE.DRONE).AndReturn(
                self.DRONE_ROLE)
        self.mox.ReplayAll()
        server_manager._delete_role(server=self.BACKUP_DRONE,
                                    role=server_models.ServerRole.ROLE.DRONE,
                                    action=True)
    def testAddRoleToBackupSuccess(self):
        """Test manager can add a role to a backup server successfully.

        Confirm that database call is made, and no action is taken, e.g.,
        restart scheduler to activate a new devserver.
        """
        server_models.validate(role=server_models.ServerRole.ROLE.DEVSERVER)
        server_manager_utils.check_server(mox.IgnoreArg(),
                                          mox.IgnoreArg()).AndReturn(True)
        server_manager_utils.use_server_db().MultipleTimes().AndReturn(True)
        self.mox.StubOutWithMock(self.BACKUP_DRONE, 'get_role_names')
        self.BACKUP_DRONE.get_role_names().AndReturn(
            [server_models.ServerRole.ROLE.DRONE])
        server_models.ServerRole.objects.create(
            server=mox.IgnoreArg(),
            role=server_models.ServerRole.ROLE.DEVSERVER).AndReturn(
                self.DRONE_ROLE)
        self.mox.ReplayAll()
        server_manager._add_role(server=self.BACKUP_DRONE,
                                 role=server_models.ServerRole.ROLE.DEVSERVER,
                                 action=True)
def try_execute(server,
                roles,
                enable,
                post_change,
                prev_status=server_models.Server.STATUS.REPAIR_REQUIRED,
                do_action=False):
    """Try to execute actions for given role changes of the server.

    @param server: Server that has the role changes.
    @param roles: A list of roles changed.
    @param enable: Set to True if the roles are enabled, i.e., added to server.
                   If it's False, the roles are removed from the server.
    @param post_change: Set to True if to apply actions should be applied after
                        the role changes, otherwise, set to False.
    @param prev_status: The previous status after the status change if any. This
                        is to help to decide if actions should be executed,
                        since actions should be applied if the server's status
                        is changed from primary to other status. Default to
                        repair_required.
    @param do_action: Set to True to execute actions, otherwise, post a warning.
    """
    if not server_manager_utils.use_server_db():
        return
    # This check is to prevent actions to be applied to server not in primary
    # role or server database is not enabled. Note that no action is needed
    # before a server is changed to primary status. If that assumption is
    # no longer valid, this method needs to be updated accordingly.
    if (server.status != server_models.Server.STATUS.PRIMARY
            and prev_status != server_models.Server.STATUS.PRIMARY):
        return

    possible_actions = {}
    if enable:
        if post_change:
            possible_actions = ACTIONS_AFTER_ROLE_APPLIED
    else:
        if post_change:
            possible_actions = ACTIONS_AFTER_ROLE_REMOVED
        else:
            possible_actions = ACTIONS_BEFORE_ROLE_REMOVED

    all_actions = []
    for role in roles:
        all_actions.extend(possible_actions.get(role, []))
    for action in set(all_actions):
        if do_action:
            apply(action)
        else:
            message = ('WARNING! Action %s is skipped. Please manually '
                       'execute the action to make your change effective.' %
                       str(action))
            print >> sys.stderr, message
 def testChangeStatusSuccess_BackupToPrimary(self):
     """Test manager can change the status of a backup server to primary.
     """
     server_models.validate(status=server_models.Server.STATUS.PRIMARY)
     server_manager_utils.use_server_db().MultipleTimes().AndReturn(True)
     self.mox.StubOutWithMock(self.BACKUP_DRONE, 'get_role_names')
     self.BACKUP_DRONE.get_role_names().MultipleTimes().AndReturn(
         [server_models.ServerRole.ROLE.DRONE])
     self.mox.StubOutWithMock(self.BACKUP_DRONE.roles, 'filter')
     self.BACKUP_DRONE.roles.filter(
         role__in=server_models.ServerRole.ROLES_REQUIRE_UNIQUE_INSTANCE
     ).AndReturn(None)
     server_models.Server.objects.filter(
         roles__role=server_models.ServerRole.ROLE.SCHEDULER,
         status=server_models.Server.STATUS.PRIMARY).AndReturn(
             [self.PRIMARY_SCHEDULER])
     infra.execute_command(mox.IgnoreArg(), mox.IgnoreArg())
     self.mox.ReplayAll()
     server_manager._change_status(
         server=self.BACKUP_DRONE,
         status=server_models.Server.STATUS.PRIMARY,
         action=True)
Ejemplo n.º 11
0
def main():
    if _monitor_db_host_acquisition:
        logging.info('Please set inline_host_acquisition=False in the shadow '
                     'config before starting the host scheduler.')
        sys.exit(0)
    try:
        options = parse_arguments(sys.argv[1:])
        scheduler_lib.check_production_settings(options)

        # If server database is enabled, check if the server has role
        # `host_scheduler`. If the server does not have host_scheduler role,
        # exception will be raised and host scheduler will not continue to run.
        if server_manager_utils.use_server_db():
            server_manager_utils.confirm_server_has_role(hostname='localhost',
                                                         role='host_scheduler')

        initialize(options.testing)

        with ts_mon_config.SetupTsMonGlobalState(
                'autotest_host_scheduler',
                indirect=True,
                debug_file=options.metrics_file,
        ):
            metrics.Counter('%s/start' % _METRICS_PREFIX).increment()
            process_start_time = time.time()
            host_scheduler = HostScheduler()
            minimum_tick_sec = global_config.global_config.get_config_value(
                'SCHEDULER', 'host_scheduler_minimum_tick_sec', type=float)
            while not _shutdown:
                if _lifetime_expired(options.lifetime_hours,
                                     process_start_time):
                    break
                start = time.time()
                host_scheduler.tick()
                curr_tick_sec = time.time() - start
                if (minimum_tick_sec > curr_tick_sec):
                    time.sleep(minimum_tick_sec - curr_tick_sec)
                else:
                    time.sleep(0.0001)
            logging.info('Shutdown request recieved. Bye! Bye!')
    except server_manager_utils.ServerActionError:
        # This error is expected when the server is not in primary status
        # for host-scheduler role. Thus do not send email for it.
        raise
    except Exception:
        metrics.Counter('%s/uncaught_exception' % _METRICS_PREFIX).increment()
        raise
    finally:
        email_manager.manager.send_queued_emails()
        if _db_manager:
            _db_manager.disconnect()
    def testDeleteRoleFromPrimarySuccess_NoAction(self):
        """Test manager can delete a role from a primary server successfully.

        Confirm that database call is made, and no action is taken as action
        is set to False.
        """
        server_manager_utils.use_server_db().MultipleTimes().AndReturn(True)
        server_models.validate(role=server_models.ServerRole.ROLE.DRONE)
        self.mox.StubOutWithMock(self.PRIMARY_DRONE, 'get_role_names')
        self.PRIMARY_DRONE.get_role_names().MultipleTimes().AndReturn(
            [server_models.ServerRole.ROLE.DRONE])

        self.mox.StubOutWithMock(self.PRIMARY_DRONE.roles, 'get')
        self.PRIMARY_DRONE.roles.get(
            role=server_models.ServerRole.ROLE.DRONE).AndReturn(
                self.DRONE_ROLE)

        server_manager.server_manager_utils.warn_missing_role(
            server_models.ServerRole.ROLE.DRONE, self.PRIMARY_DRONE)
        self.mox.ReplayAll()
        server_manager._delete_role(self.PRIMARY_DRONE,
                                    server_models.ServerRole.ROLE.DRONE,
                                    action=False)
def main():
    if _monitor_db_host_acquisition:
        logging.info('Please set inline_host_acquisition=False in the shadow '
                     'config before starting the host scheduler.')
        # The upstart job for the host scheduler understands exit(0) to mean
        # 'don't respawn'. This is desirable when the job scheduler is acquiring
        # hosts inline.
        sys.exit(0)
    try:
        options = parse_arguments(sys.argv[1:])
        scheduler_lib.check_production_settings(options)

        # If server database is enabled, check if the server has role
        # `host_scheduler`. If the server does not have host_scheduler role,
        # exception will be raised and host scheduler will not continue to run.
        if server_manager_utils.use_server_db():
            server_manager_utils.confirm_server_has_role(hostname='localhost',
                                                         role='host_scheduler')

        initialize(options.testing)

        # Start the thread to report metadata.
        metadata_reporter.start()

        ts_mon_config.SetupTsMonGlobalState('autotest_host_scheduler')

        host_scheduler = HostScheduler()
        minimum_tick_sec = global_config.global_config.get_config_value(
            'SCHEDULER', 'minimum_tick_sec', type=float)
        while not _shutdown:
            start = time.time()
            host_scheduler.tick()
            curr_tick_sec = time.time() - start
            if (minimum_tick_sec > curr_tick_sec):
                time.sleep(minimum_tick_sec - curr_tick_sec)
            else:
                time.sleep(0.0001)
    except server_manager_utils.ServerActionError as e:
        # This error is expected when the server is not in primary status
        # for host-scheduler role. Thus do not send email for it.
        raise
    except Exception:
        email_manager.manager.log_stacktrace(
            'Uncaught exception; terminating host_scheduler.')
        raise
    finally:
        email_manager.manager.send_queued_emails()
        if _db_manager:
            _db_manager.disconnect()
        metadata_reporter.abort()
Ejemplo n.º 14
0
def _email_alert():
    """
    """
    if not server_manager_utils.use_server_db():
        logging.debug('Server database not emailed, email alert is skipped.')
        return
    try:
        server_manager_utils.confirm_server_has_role(hostname='localhost',
                                                     role='scheduler')
    except server_manager_utils.ServerActionError:
        # Only email alert if the server is a scheduler, not shard.
        return
    subject = ('Metadata upload has been failing for %d seconds' %
               _MAX_UPLOAD_FAIL_DURATION)
    email_manager.manager.enqueue_notify_email(subject, '')
    email_manager.manager.send_queued_emails()
Ejemplo n.º 15
0
def get_servers(hostname=None, role=None, status=None):
    """Get a list of servers with matching role and status.

    @param hostname: FQDN of the server.
    @param role: Name of the server role, e.g., drone, scheduler. Default to
                 None to match any role.
    @param status: Status of the server, e.g., primary, backup, repair_required.
                   Default to None to match any server status.

    @raises error.RPCException: If server database is not used.
    @return: A list of server names for servers with matching role and status.
    """
    if not server_manager_utils.use_server_db():
        raise error.RPCException('Server database is not enabled. Please try '
                                 'retrieve servers from global config.')
    servers = server_manager_utils.get_servers(hostname=hostname,
                                               role=role,
                                               status=status)
    return [s.get_details() for s in servers]
Ejemplo n.º 16
0
def delete(hostname, server=None):
    """Delete given server from server database.

    @param hostname: hostname of the server to be deleted.
    @param server: Server object from database query, this argument should be
                   injected by the verify_server_exists decorator.

    @raise ServerActionError: If delete server action failed, e.g., server is
            not found in database.
    """
    print 'Deleting server %s from server database.' % hostname

    if (server_manager_utils.use_server_db() and
        server.status == server_models.Server.STATUS.PRIMARY):
        print ('Server %s is in status primary, need to disable its '
               'current roles first.' % hostname)
        for role in server.roles.all():
            _delete_role(server, role.role)

    server.delete()
    print 'Server %s is deleted from server database.' % hostname
Ejemplo n.º 17
0
def main():
    """Entry point for suite_scheduler.py"""
    signal.signal(signal.SIGINT, signal_handler)
    signal.signal(signal.SIGHUP, signal_handler)
    signal.signal(signal.SIGTERM, signal_handler)

    parser, options, args = parse_options()
    if args or options.events and not options.build:
        parser.print_help()
        return 1

    if options.config_file and not os.path.exists(options.config_file):
        logging.error('Specified config file %s does not exist.',
                      options.config_file)
        return 1

    config = forgiving_config_parser.ForgivingConfigParser()
    config.read(options.config_file)

    if options.list:
        print 'Supported events:'
        for event_class in driver.Driver.EVENT_CLASSES:
            print '  ', event_class.KEYWORD
        return 0

    # If we're just sanity checking, we can stop after we've parsed the
    # config file.
    if options.sanity:
        # config_file_getter generates a high amount of noise at DEBUG level
        logging.getLogger().setLevel(logging.WARNING)
        d = driver.Driver(None, None, True)
        d.SetUpEventsAndTasks(config, None)
        tasks_per_event = d.TasksFromConfig(config)
        # flatten [[a]] -> [a]
        tasks = [x for y in tasks_per_event.values() for x in y]
        control_files_exist = sanity.CheckControlFileExistence(tasks)
        return control_files_exist

    logging_manager.configure_logging(SchedulerLoggingConfig(),
                                      log_dir=options.log_dir)
    if not options.log_dir:
        logging.info('Not logging to a file, as --log_dir was not passed.')

    # If server database is enabled, check if the server has role
    # `suite_scheduler`. If the server does not have suite_scheduler role,
    # exception will be raised and suite scheduler will not continue to run.
    if not server_manager_utils:
        raise ImportError(
            'Could not import autotest_lib.site_utils.server_manager_utils')
    if server_manager_utils.use_server_db():
        server_manager_utils.confirm_server_has_role(hostname='localhost',
                                                     role='suite_scheduler')

    afe_server = global_config.global_config.get_config_value(
        CONFIG_SECTION_SERVER, "suite_scheduler_afe", default=None)

    afe = frontend_wrappers.RetryingAFE(server=afe_server,
                                        timeout_min=10,
                                        delay_sec=5,
                                        debug=False)
    logging.info('Connecting to: %s', afe.server)
    enumerator = board_enumerator.BoardEnumerator(afe)
    scheduler = deduping_scheduler.DedupingScheduler(afe, options.file_bug)
    mv = manifest_versions.ManifestVersions(options.tmp_repo_dir)
    d = driver.Driver(scheduler, enumerator)
    d.SetUpEventsAndTasks(config, mv)

    # Set up metrics upload for Monarch.
    ts_mon_config.SetupTsMonGlobalState('autotest_suite_scheduler')

    try:
        if options.events:
            # Act as though listed events have just happened.
            keywords = re.split('\s*,\s*', options.events)
            if not options.tmp_repo_dir:
                logging.warn('To run a list of events, you may need to use '
                             '--repo_dir to specify a folder that already has '
                             'manifest repo set up. This is needed for suites '
                             'requiring firmware update.')
            logging.info('Forcing events: %r', keywords)
            d.ForceEventsOnceForBuild(keywords, options.build, options.os_type)
        else:
            if not options.tmp_repo_dir:
                mv.Initialize()
            d.RunForever(config, mv)
    except Exception as e:
        logging.error('Fatal exception in suite_scheduler: %r\n%s', e,
                      traceback.format_exc())
        return 1