Beispiel #1
0
def increase_quota(cluster, role, cpu_str, ram_str, disk_str):
    """usage: increase_quota cluster role cpu ram[unit] disk[unit]

  Increases the amount of production quota allocated to a user.
  """
    cpu = float(cpu_str)
    ram = parse_data(ram_str)
    disk = parse_data(disk_str)

    options = app.get_options()
    client = AuroraClientAPI(CLUSTERS[cluster], options.verbosity == 'verbose')
    resp = client.get_quota(role)
    quota = resp.result.getQuotaResult.quota
    log.info('Current quota for %s:\n\tCPU\t%s\n\tRAM\t%s MB\n\tDisk\t%s MB' %
             (role, quota.numCpus, quota.ramMb, quota.diskMb))

    new_cpu = cpu + quota.numCpus
    new_ram = ram + Amount(quota.ramMb, Data.MB)
    new_disk = disk + Amount(quota.diskMb, Data.MB)

    log.info(
        'Attempting to update quota for %s to\n\tCPU\t%s\n\tRAM\t%s MB\n\tDisk\t%s MB'
        % (role, new_cpu, new_ram.as_(Data.MB), new_disk.as_(Data.MB)))

    resp = client.set_quota(role, new_cpu, new_ram.as_(Data.MB),
                            new_disk.as_(Data.MB))
    check_and_log_response(resp)
Beispiel #2
0
def increase_quota(cluster, role, cpu_str, ram_str, disk_str):
    """usage: increase_quota cluster role cpu ram[unit] disk[unit]

  Increases the amount of production quota allocated to a user.
  """
    cpu = float(cpu_str)
    ram = parse_data(ram_str)
    disk = parse_data(disk_str)

    options = app.get_options()
    client = AuroraClientAPI(CLUSTERS[cluster], options.verbosity == "verbose")
    resp = client.get_quota(role)
    quota = resp.result.getQuotaResult.quota
    log.info(
        "Current quota for %s:\n\tCPU\t%s\n\tRAM\t%s MB\n\tDisk\t%s MB"
        % (role, quota.numCpus, quota.ramMb, quota.diskMb)
    )

    new_cpu = cpu + quota.numCpus
    new_ram = ram + Amount(quota.ramMb, Data.MB)
    new_disk = disk + Amount(quota.diskMb, Data.MB)

    log.info(
        "Attempting to update quota for %s to\n\tCPU\t%s\n\tRAM\t%s MB\n\tDisk\t%s MB"
        % (role, new_cpu, new_ram.as_(Data.MB), new_disk.as_(Data.MB))
    )

    resp = client.set_quota(role, new_cpu, new_ram.as_(Data.MB), new_disk.as_(Data.MB))
    check_and_log_response(resp)
Beispiel #3
0
 def __init__(self, cluster, role, env, jobs, ssh_user=None):
     self._cluster = cluster
     self._api = AuroraClientAPI(cluster=cluster)
     self._role = role
     self._env = env
     self._jobs = jobs
     self._ssh_user = ssh_user if ssh_user else self._role
Beispiel #4
0
def scheduler_backup_now(cluster):
    """usage: scheduler_backup_now cluster

  Immediately initiates a full storage backup.
  """
    options = app.get_options()
    check_and_log_response(
        AuroraClientAPI(CLUSTERS[cluster], options.verbosity).perform_backup())
Beispiel #5
0
def scheduler_snapshot(cluster):
    """usage: scheduler_snapshot cluster

  Request that the scheduler perform a storage snapshot and block until complete.
  """
    options = app.get_options()
    check_and_log_response(
        AuroraClientAPI(CLUSTERS['cluster'], options.verbosity).snapshot())
Beispiel #6
0
def scheduler_unload_recovery(cluster):
    """usage: scheduler_unload_recovery cluster

  Unloads a staged recovery.
  """
    options = app.get_options()
    check_and_log_response(
        AuroraClientAPI(CLUSTERS[cluster],
                        options.verbosity).unload_recovery())
Beispiel #7
0
def scheduler_stage_recovery(cluster, backup_id):
    """usage: scheduler_stage_recovery cluster backup_id

  Stages a backup for recovery.
  """
    options = app.get_options()
    check_and_log_response(
        AuroraClientAPI(CLUSTERS[cluster],
                        options.verbosity).stage_recovery(backup_id))
Beispiel #8
0
def scheduler_delete_recovery_tasks(cluster, task_ids):
    """usage: scheduler_delete_recovery_tasks cluster task_ids

  Deletes a comma-separated list of task IDs from a staged recovery.
  """
    ids = set(task_ids.split(','))
    options = app.get_options()
    check_and_log_response(
        AuroraClientAPI(CLUSTERS[cluster],
                        options.verbosity).delete_recovery_tasks(
                            TaskQuery(taskIds=ids)))
Beispiel #9
0
def scheduler_list_backups(cluster):
    """usage: scheduler_list_backups cluster

  Lists backups available for recovery.
  """
    options = app.get_options()
    resp = AuroraClientAPI(CLUSTERS[cluster], options.verbosity).list_backups()
    check_and_log_response(resp)
    backups = resp.result.listBackupsResult.backups
    print('%s available backups:' % len(backups))
    for backup in backups:
        print(backup)
Beispiel #10
0
def scheduler_list_job_updates(cluster):
    """usage: scheduler_list_job_updates cluster

  Lists in-flight job updates.
  """
    options = app.get_options()
    resp = AuroraClientAPI(CLUSTERS[cluster],
                           options.verbosity).get_job_updates()
    check_and_log_response(resp)
    print('Role\tEnv\tJob')
    for update in resp.jobUpdates:
        print('%s\t%s\t%s' %
              (update.jobKey.role if update.jobKey else update.roleDeprecated,
               update.jobKey.environment if update.jobKey else None,
               update.jobKey.name if update.jobKey else update.jobDeprecated))
Beispiel #11
0
def set_quota(cluster, role, cpu_str, ram_mb_str, disk_mb_str):
    """usage: set_quota cluster role cpu ramMb diskMb

  Alters the amount of production quota allocated to a user.
  """
    try:
        cpu = float(cpu_str)
        ram_mb = int(ram_mb_str)
        disk_mb = int(disk_mb_str)
    except ValueError:
        log.error('Invalid value')

    options = app.get_options()
    resp = AuroraClientAPI(CLUSTERS[cluster], options.verbosity).set_quota(
        role, cpu, ram_mb, disk_mb)
    check_and_log_response(resp)
Beispiel #12
0
def scheduler_print_recovery_tasks(cluster):
    """usage: scheduler_print_recovery_tasks cluster

  Prints all active tasks in a staged recovery.
  """
    options = app.get_options()
    resp = AuroraClientAPI(CLUSTERS[cluster],
                           options.verbosity).query_recovery(
                               TaskQuery(statuses=ACTIVE_STATES))
    check_and_log_response(resp)
    log.info('Role\tJob\tShard\tStatus\tTask ID')
    for task in resp.tasks:
        assigned = task.assignedTask
        conf = assigned.task
        log.info('\t'.join(
            (conf.owner.role, conf.jobName, str(assigned.instanceId),
             ScheduleStatus._VALUES_TO_NAMES[task.status], assigned.taskId)))
 def __init__(self, cluster, verbosity):
   self._client = AuroraClientAPI(cluster, verbosity == 'verbose')
class MesosMaintenance(object):
  """This class provides more methods to interact with the mesos cluster and perform
  maintenance.
  """

  DEFAULT_GROUPING = 'by_host'
  GROUPING_FUNCTIONS = {
    'by_host': group_by_host,
  }
  START_MAINTENANCE_DELAY = Amount(30, Time.SECONDS)

  @classmethod
  def group_hosts(cls, hostnames, grouping_function=DEFAULT_GROUPING):
    try:
      grouping_function = cls.GROUPING_FUNCTIONS[grouping_function]
    except KeyError:
      raise ValueError('Unknown grouping function %s!' % grouping_function)
    groups = defaultdict(set)
    for hostname in hostnames:
      groups[grouping_function(hostname)].add(hostname)
    return groups

  @classmethod
  def iter_batches(cls, hostnames, batch_size, grouping_function=DEFAULT_GROUPING):
    if batch_size <= 0:
      raise ValueError('Batch size must be > 0!')
    groups = cls.group_hosts(hostnames, grouping_function)
    groups = sorted(groups.items(), key=lambda v: v[0])
    for k in range(0, len(groups), batch_size):
      yield Hosts(set.union(*(hostset for (key, hostset) in groups[k:k+batch_size])))

  def __init__(self, cluster, verbosity):
    self._client = AuroraClientAPI(cluster, verbosity == 'verbose')

  def _drain_hosts(self, drainable_hosts, clock=time):
    """This will actively turn down tasks running on hosts."""
    check_and_log_response(self._client.drain_hosts(drainable_hosts))
    not_ready_hosts = [hostname for hostname in drainable_hosts.hostNames]
    while not_ready_hosts:
      log.info("Sleeping for %s." % self.START_MAINTENANCE_DELAY)
      clock.sleep(self.START_MAINTENANCE_DELAY.as_(Time.SECONDS))
      resp = self._client.maintenance_status(Hosts(not_ready_hosts))
      #TODO(jsmith): Workaround until scheduler responds with unknown slaves in MESOS-3454
      if not resp.result.maintenanceStatusResult.statuses:
        not_ready_hosts = None
      for host_status in resp.result.maintenanceStatusResult.statuses:
        if host_status.mode != MaintenanceMode.DRAINED:
          log.warning('%s is currently in status %s' %
              (host_status.host, MaintenanceMode._VALUES_TO_NAMES[host_status.mode]))
        else:
          not_ready_hosts.remove(host_status.host)

  def _complete_maintenance(self, drained_hosts):
    """End the maintenance status for a give set of hosts."""
    check_and_log_response(self._client.end_maintenance(drained_hosts))
    resp = self._client.maintenance_status(drained_hosts)
    for host_status in resp.result.maintenanceStatusResult.statuses:
      if host_status.mode != MaintenanceMode.NONE:
        log.warning('%s is DRAINING or in DRAINED' % host_status.host)

  def _operate_on_hosts(self, drained_hosts, callback):
    """Perform a given operation on a list of hosts that are ready for maintenance."""
    for host in drained_hosts.hostNames:
      callback(host)

  def end_maintenance(self, hosts):
    """Pull a list of hosts out of maintenance mode."""
    self._complete_maintenance(Hosts(set(hosts)))

  def start_maintenance(self, hosts):
    """Put a list of hosts into maintenance mode, to de-prioritize scheduling."""
    check_and_log_response(self._client.start_maintenance(Hosts(set(hosts))))

  def perform_maintenance(self, hosts, batch_size=1, grouping_function=DEFAULT_GROUPING,
                          callback=None):
    """The wrap a callback in between sending hosts into maintenance mode and back.

    Walk through the process of putting hosts into maintenance, draining them of tasks,
    performing an action on them once drained, then removing them from maintenance mode
    so tasks can schedule.
    """
    self._complete_maintenance(Hosts(set(hosts)))
    self.start_maintenance(hosts)

    for hosts in self.iter_batches(hosts, batch_size, grouping_function):
      self._drain_hosts(hosts)
      if callback:
        self._operate_on_hosts(hosts, callback)
      self._complete_maintenance(hosts)

  def check_status(self, hosts):
    resp = self._client.maintenance_status(Hosts(set(hosts)))
    check_and_log_response(resp)
    statuses = []
    for host_status in resp.result.maintenanceStatusResult.statuses:
      statuses.append((host_status.host, MaintenanceMode._VALUES_TO_NAMES[host_status.mode]))
    return statuses
Beispiel #15
0
def query(args, options):
    """usage: query [--shards=N[,N,...]]
                  [--states=State[,State,...]]
                  cluster [role [job]]

  Query Mesos about jobs and tasks.
  """

    def _convert_fmt_string(fmtstr):
        import re

        def convert(match):
            return "%%(%s)s" % match.group(1)

        return re.sub(r"%(\w+)%", convert, fmtstr)

    def flatten_task(t, d={}):
        for key in t.__dict__.keys():
            val = getattr(t, key)
            try:
                val.__dict__.keys()
            except AttributeError:
                d[key] = val
            else:
                flatten_task(val, d)

        return d

    def map_values(d):
        default_value = lambda v: v
        mapping = {"status": lambda v: ScheduleStatus._VALUES_TO_NAMES[v]}
        return dict((k, mapping.get(k, default_value)(v)) for (k, v) in d.items())

    for state in options.states.split(","):
        if state not in ScheduleStatus._NAMES_TO_VALUES:
            msg = "Unknown state '%s' specified.  Valid states are:\n" % state
            msg += ",".join(ScheduleStatus._NAMES_TO_VALUES.keys())
            die(msg)

    # Role, Job, Instances, States, and the listformat
    if len(args) == 0:
        die("Must specify at least cluster.")

    cluster = args[0]
    role = args[1] if len(args) > 1 else None
    job = args[2] if len(args) > 2 else None
    instances = set(map(int, options.shards.split(","))) if options.shards else set()

    if options.states:
        states = set(map(ScheduleStatus._NAMES_TO_VALUES.get, options.states.split(",")))
    else:
        states = ACTIVE_STATES | TERMINAL_STATES
    listformat = _convert_fmt_string(options.listformat)

    #  Figure out "expensive" queries here and bone if they do not have --force
    #  - Does not specify role
    if role is None and not options.force:
        die("--force is required for expensive queries (no role specified)")

    #  - Does not specify job
    if job is None and not options.force:
        die("--force is required for expensive queries (no job specified)")

    #  - Specifies status outside of ACTIVE_STATES
    if not (states <= ACTIVE_STATES) and not options.force:
        die("--force is required for expensive queries (states outside ACTIVE states")

    api = AuroraClientAPI(CLUSTERS[cluster], options.verbosity)
    query_info = api.query(api.build_query(role, job, instances=instances, statuses=states))
    tasks = query_info.result.scheduleStatusResult.tasks
    if query_info.responseCode != ResponseCode.OK:
        die("Failed to query scheduler: %s" % query_info.message)
    if tasks is None:
        return

    try:
        for task in tasks:
            d = flatten_task(task)
            print(listformat % map_values(d))
    except KeyError:
        msg = "Unknown key in format string.  Valid keys are:\n"
        msg += ",".join(d.keys())
        die(msg)
Beispiel #16
0
class DistributedCommandRunner(object):
    @staticmethod
    def execute(args):
        hostname, role, command = args
        ssh_command = ['ssh', '-n', '-q', '%s@%s' % (role, hostname), command]
        po = subprocess.Popen(ssh_command,
                              stdout=subprocess.PIPE,
                              stderr=subprocess.STDOUT)
        output = po.communicate()
        return '\n'.join('%s:  %s' % (hostname, line)
                         for line in output[0].splitlines())

    @classmethod
    def make_executor_path(cls, cluster, executor_name):
        parameters = cls.sandbox_args(cluster)
        parameters.update(executor_name=executor_name)
        return posixpath.join(
            '%(slave_root)s',
            'slaves/*/frameworks/*/executors/%(executor_name)s/runs',
            '%(slave_run_directory)s') % parameters

    @classmethod
    def thermos_sandbox(cls, cluster, executor_sandbox=False):
        sandbox = cls.make_executor_path(cluster,
                                         'thermos-{{thermos.task_id}}')
        return sandbox if executor_sandbox else posixpath.join(
            sandbox, 'sandbox')

    @classmethod
    def sandbox_args(cls, cluster):
        cluster = cluster.with_trait(CommandRunnerTrait)
        return {
            'slave_root': cluster.slave_root,
            'slave_run_directory': cluster.slave_run_directory
        }

    @classmethod
    def substitute_thermos(cls, command, task, cluster, **kw):
        prefix_command = 'cd %s;' % cls.thermos_sandbox(cluster, **kw)
        thermos_namespace = ThermosContext(
            task_id=task.assignedTask.taskId,
            ports=task.assignedTask.assignedPorts)
        mesos_namespace = MesosContext(instance=task.assignedTask.instanceId)
        command = String(prefix_command + command) % Environment(
            thermos=thermos_namespace, mesos=mesos_namespace)
        return command.get()

    @classmethod
    def aurora_sandbox(cls, cluster, executor_sandbox=False):
        if executor_sandbox:
            return cls.make_executor_path(cluster, 'twitter')
        else:
            return '/var/run/nexus/%task_id%/sandbox'

    @classmethod
    def substitute_aurora(cls, command, task, cluster, **kw):
        command = ('cd %s;' % cls.aurora_sandbox(cluster, **kw)) + command
        command = command.replace('%shard_id%',
                                  str(task.assignedTask.instanceId))
        command = command.replace('%task_id%', task.assignedTask.taskId)
        for name, port in task.assignedTask.assignedPorts.items():
            command = command.replace('%port:' + name + '%', str(port))
        return command

    @classmethod
    def substitute(cls, command, task, cluster, **kw):
        if task.assignedTask.task.executorConfig:
            return cls.substitute_thermos(command, task, cluster, **kw)
        else:
            return cls.substitute_aurora(command, task, cluster, **kw)

    @classmethod
    def query_from(cls, role, env, job):
        return TaskQuery(statuses=LIVE_STATES,
                         owner=Identity(role),
                         jobName=job,
                         environment=env)

    def __init__(self, cluster, role, env, jobs, ssh_user=None):
        self._cluster = cluster
        self._api = AuroraClientAPI(cluster=cluster)
        self._role = role
        self._env = env
        self._jobs = jobs
        self._ssh_user = ssh_user if ssh_user else self._role

    def resolve(self):
        for job in self._jobs:
            resp = self._api.query(self.query_from(self._role, self._env, job))
            if resp.responseCode != ResponseCode.OK:
                log.error('Failed to query job: %s' % job)
                continue
            for task in resp.result.scheduleStatusResult.tasks:
                yield task

    def process_arguments(self, command, **kw):
        for task in self.resolve():
            host = task.assignedTask.slaveHost
            role = task.assignedTask.task.owner.role
            yield (host, self._ssh_user,
                   self.substitute(command, task, self._cluster, **kw))

    def run(self, command, parallelism=1, **kw):
        threadpool = ThreadPool(processes=parallelism)
        for result in threadpool.imap_unordered(
                self.execute, self.process_arguments(command, **kw)):
            print result
Beispiel #17
0
def query(args, options):
    """usage: query [--shards=N[,N,...]]
                  [--states=State[,State,...]]
                  cluster [role [job]]

  Query Mesos about jobs and tasks.
  """
    def _convert_fmt_string(fmtstr):
        import re

        def convert(match):
            return "%%(%s)s" % match.group(1)

        return re.sub(r'%(\w+)%', convert, fmtstr)

    def flatten_task(t, d={}):
        for key in t.__dict__.keys():
            val = getattr(t, key)
            try:
                val.__dict__.keys()
            except AttributeError:
                d[key] = val
            else:
                flatten_task(val, d)

        return d

    def map_values(d):
        default_value = lambda v: v
        mapping = {
            'status': lambda v: ScheduleStatus._VALUES_TO_NAMES[v],
        }
        return dict(
            (k, mapping.get(k, default_value)(v)) for (k, v) in d.items())

    for state in options.states.split(','):
        if state not in ScheduleStatus._NAMES_TO_VALUES:
            msg = "Unknown state '%s' specified.  Valid states are:\n" % state
            msg += ','.join(ScheduleStatus._NAMES_TO_VALUES.keys())
            die(msg)

    # Role, Job, Instances, States, and the listformat
    if len(args) == 0:
        die('Must specify at least cluster.')

    cluster = args[0]
    role = args[1] if len(args) > 1 else None
    job = args[2] if len(args) > 2 else None
    instances = set(map(
        int, options.shards.split(','))) if options.shards else set()

    if options.states:
        states = set(
            map(ScheduleStatus._NAMES_TO_VALUES.get,
                options.states.split(',')))
    else:
        states = ACTIVE_STATES | TERMINAL_STATES
    listformat = _convert_fmt_string(options.listformat)

    #  Figure out "expensive" queries here and bone if they do not have --force
    #  - Does not specify role
    if role is None and not options.force:
        die('--force is required for expensive queries (no role specified)')

    #  - Does not specify job
    if job is None and not options.force:
        die('--force is required for expensive queries (no job specified)')

    #  - Specifies status outside of ACTIVE_STATES
    if not (states <= ACTIVE_STATES) and not options.force:
        die('--force is required for expensive queries (states outside ACTIVE states'
            )

    api = AuroraClientAPI(CLUSTERS[cluster], options.verbosity)
    query_info = api.query(
        api.build_query(role, job, instances=instances, statuses=states))
    tasks = query_info.result.scheduleStatusResult.tasks
    if query_info.responseCode != ResponseCode.OK:
        die('Failed to query scheduler: %s' % query_info.message)
    if tasks is None:
        return

    try:
        for task in tasks:
            d = flatten_task(task)
            print(listformat % map_values(d))
    except KeyError:
        msg = "Unknown key in format string.  Valid keys are:\n"
        msg += ','.join(d.keys())
        die(msg)
 def __init__(self, cluster, verbosity):
     self._client = AuroraClientAPI(cluster, verbosity == 'verbose')
class MesosMaintenance(object):
    """This class provides more methods to interact with the mesos cluster and perform
  maintenance.
  """

    DEFAULT_GROUPING = 'by_host'
    GROUPING_FUNCTIONS = {
        'by_host': group_by_host,
    }
    START_MAINTENANCE_DELAY = Amount(30, Time.SECONDS)

    @classmethod
    def group_hosts(cls, hostnames, grouping_function=DEFAULT_GROUPING):
        try:
            grouping_function = cls.GROUPING_FUNCTIONS[grouping_function]
        except KeyError:
            raise ValueError('Unknown grouping function %s!' %
                             grouping_function)
        groups = defaultdict(set)
        for hostname in hostnames:
            groups[grouping_function(hostname)].add(hostname)
        return groups

    @classmethod
    def iter_batches(cls,
                     hostnames,
                     batch_size,
                     grouping_function=DEFAULT_GROUPING):
        if batch_size <= 0:
            raise ValueError('Batch size must be > 0!')
        groups = cls.group_hosts(hostnames, grouping_function)
        groups = sorted(groups.items(), key=lambda v: v[0])
        for k in range(0, len(groups), batch_size):
            yield Hosts(
                set.union(*(hostset
                            for (key, hostset) in groups[k:k + batch_size])))

    def __init__(self, cluster, verbosity):
        self._client = AuroraClientAPI(cluster, verbosity == 'verbose')

    def _drain_hosts(self, drainable_hosts, clock=time):
        """This will actively turn down tasks running on hosts."""
        check_and_log_response(self._client.drain_hosts(drainable_hosts))
        not_ready_hosts = [hostname for hostname in drainable_hosts.hostNames]
        while not_ready_hosts:
            log.info("Sleeping for %s." % self.START_MAINTENANCE_DELAY)
            clock.sleep(self.START_MAINTENANCE_DELAY.as_(Time.SECONDS))
            resp = self._client.maintenance_status(Hosts(not_ready_hosts))
            #TODO(jsmith): Workaround until scheduler responds with unknown slaves in MESOS-3454
            if not resp.result.maintenanceStatusResult.statuses:
                not_ready_hosts = None
            for host_status in resp.result.maintenanceStatusResult.statuses:
                if host_status.mode != MaintenanceMode.DRAINED:
                    log.warning(
                        '%s is currently in status %s' %
                        (host_status.host,
                         MaintenanceMode._VALUES_TO_NAMES[host_status.mode]))
                else:
                    not_ready_hosts.remove(host_status.host)

    def _complete_maintenance(self, drained_hosts):
        """End the maintenance status for a give set of hosts."""
        check_and_log_response(self._client.end_maintenance(drained_hosts))
        resp = self._client.maintenance_status(drained_hosts)
        for host_status in resp.result.maintenanceStatusResult.statuses:
            if host_status.mode != MaintenanceMode.NONE:
                log.warning('%s is DRAINING or in DRAINED' % host_status.host)

    def _operate_on_hosts(self, drained_hosts, callback):
        """Perform a given operation on a list of hosts that are ready for maintenance."""
        for host in drained_hosts.hostNames:
            callback(host)

    def end_maintenance(self, hosts):
        """Pull a list of hosts out of maintenance mode."""
        self._complete_maintenance(Hosts(set(hosts)))

    def start_maintenance(self, hosts):
        """Put a list of hosts into maintenance mode, to de-prioritize scheduling."""
        check_and_log_response(
            self._client.start_maintenance(Hosts(set(hosts))))

    def perform_maintenance(self,
                            hosts,
                            batch_size=1,
                            grouping_function=DEFAULT_GROUPING,
                            callback=None):
        """The wrap a callback in between sending hosts into maintenance mode and back.

    Walk through the process of putting hosts into maintenance, draining them of tasks,
    performing an action on them once drained, then removing them from maintenance mode
    so tasks can schedule.
    """
        self._complete_maintenance(Hosts(set(hosts)))
        self.start_maintenance(hosts)

        for hosts in self.iter_batches(hosts, batch_size, grouping_function):
            self._drain_hosts(hosts)
            if callback:
                self._operate_on_hosts(hosts, callback)
            self._complete_maintenance(hosts)

    def check_status(self, hosts):
        resp = self._client.maintenance_status(Hosts(set(hosts)))
        check_and_log_response(resp)
        statuses = []
        for host_status in resp.result.maintenanceStatusResult.statuses:
            statuses.append(
                (host_status.host,
                 MaintenanceMode._VALUES_TO_NAMES[host_status.mode]))
        return statuses