def test_rack_grouping():
  old_grouping_functions = MesosMaintenance.GROUPING_FUNCTIONS.copy()
  MesosMaintenance.GROUPING_FUNCTIONS['by_rack'] = rack_grouping

  example_host_list = [
    'west-aaa-001.example.com',
    'west-aaa-002.example.com',
    'west-xyz-002.example.com',
    'east-xyz-003.example.com',
    'east-xyz-004.example.com',
  ]

  try:
    batches = list(MesosMaintenance.iter_batches(example_host_list, 1, 'by_rack'))
    assert batches[0] == Hosts(set([
        'west-aaa-001.example.com',
        'west-aaa-002.example.com'
    ]))
    assert batches[1] == Hosts(set([
        'west-xyz-002.example.com',
        'east-xyz-003.example.com',
        'east-xyz-004.example.com',
    ]))

    batches = list(MesosMaintenance.iter_batches(example_host_list, 2, 'by_rack'))
    assert batches[0] == Hosts(set(example_host_list))

    batches = list(MesosMaintenance.iter_batches(example_host_list, 3, 'by_rack'))
    assert batches[0] == Hosts(set(example_host_list))

    with pytest.raises(ValueError):
      list(MesosMaintenance.iter_batches(example_host_list, 0))

  finally:
    MesosMaintenance.GROUPING_FUNCTIONS = old_grouping_functions
def test_default_grouping():
  example_host_list = [
    'xyz321.example.com',
    'bar337.example.com',
    'foo001.example.com',
  ]

  batches = list(MesosMaintenance.iter_batches(example_host_list, 1))
  assert batches[0] == Hosts(set(['bar337.example.com']))
  assert batches[1] == Hosts(set(['foo001.example.com']))
  assert batches[2] == Hosts(set(['xyz321.example.com']))

  batches = list(MesosMaintenance.iter_batches(example_host_list, 2))
  assert batches[0] == Hosts(set(['bar337.example.com', 'foo001.example.com']))
  assert batches[1] == Hosts(set(['xyz321.example.com']))
 def check_status(self, hosts):
     resp = self._client.maintenance_status(Hosts(set(hosts)))
     check_and_log_response(resp)
     statuses = []
     for host_status in resp.result.maintenanceStatusResult.statuses:
         statuses.append(
             (host_status.host,
              MaintenanceMode._VALUES_TO_NAMES[host_status.mode]))
     return statuses
 def iter_batches(cls,
                  hostnames,
                  batch_size,
                  grouping_function=DEFAULT_GROUPING):
     if batch_size <= 0:
         raise ValueError('Batch size must be > 0!')
     groups = cls.group_hosts(hostnames, grouping_function)
     groups = sorted(groups.items(), key=lambda v: v[0])
     for k in range(0, len(groups), batch_size):
         yield Hosts(
             set.union(*(hostset
                         for (key, hostset) in groups[k:k + batch_size])))
    def perform_maintenance(self,
                            hosts,
                            batch_size=1,
                            grouping_function=DEFAULT_GROUPING,
                            callback=None):
        """The wrap a callback in between sending hosts into maintenance mode and back.

    Walk through the process of putting hosts into maintenance, draining them of tasks,
    performing an action on them once drained, then removing them from maintenance mode
    so tasks can schedule.
    """
        self._complete_maintenance(Hosts(set(hosts)))
        self.start_maintenance(hosts)

        for hosts in self.iter_batches(hosts, batch_size, grouping_function):
            self._drain_hosts(hosts)
            if callback:
                self._operate_on_hosts(hosts, callback)
            self._complete_maintenance(hosts)
 def _drain_hosts(self, drainable_hosts, clock=time):
     """This will actively turn down tasks running on hosts."""
     check_and_log_response(self._client.drain_hosts(drainable_hosts))
     not_ready_hosts = [hostname for hostname in drainable_hosts.hostNames]
     while not_ready_hosts:
         log.info("Sleeping for %s." % self.START_MAINTENANCE_DELAY)
         clock.sleep(self.START_MAINTENANCE_DELAY.as_(Time.SECONDS))
         resp = self._client.maintenance_status(Hosts(not_ready_hosts))
         #TODO(jsmith): Workaround until scheduler responds with unknown slaves in MESOS-3454
         if not resp.result.maintenanceStatusResult.statuses:
             not_ready_hosts = None
         for host_status in resp.result.maintenanceStatusResult.statuses:
             if host_status.mode != MaintenanceMode.DRAINED:
                 log.warning(
                     '%s is currently in status %s' %
                     (host_status.host,
                      MaintenanceMode._VALUES_TO_NAMES[host_status.mode]))
             else:
                 not_ready_hosts.remove(host_status.host)
 def start_maintenance(self, hosts):
     """Put a list of hosts into maintenance mode, to de-prioritize scheduling."""
     check_and_log_response(
         self._client.start_maintenance(Hosts(set(hosts))))
 def end_maintenance(self, hosts):
     """Pull a list of hosts out of maintenance mode."""
     self._complete_maintenance(Hosts(set(hosts)))