def test_perform_maintenance_partial_sla_failure(self, mock_check_sla, mock_start_maintenance,
                               mock_drain_hosts):
    failed_host = 'us-west-001.example.com'
    mock_check_sla.return_value = set([failed_host])
    mock_start_maintenance.return_value = TEST_HOSTNAMES
    drained_hosts = set(TEST_HOSTNAMES) - set([failed_host])
    maintenance = HostMaintenance(DEFAULT_CLUSTER, 'quiet')

    with temporary_file() as fp:
      with group_by_rack():
        drained = maintenance.perform_maintenance(
            TEST_HOSTNAMES,
            grouping_function='by_rack',
            output_file=fp.name)

        with open(fp.name, 'r') as fpr:
          content = fpr.read()
          assert failed_host in content

        mock_start_maintenance.assert_called_once_with(TEST_HOSTNAMES)
        assert len(drained) == 2
        assert failed_host not in drained
        assert mock_check_sla.call_count == 1
        assert mock_drain_hosts.call_count == 1
        assert mock_drain_hosts.call_args_list == [mock.call(Hosts(drained_hosts))]
    def test_check_status(self, mock_maintenance_status):
        mock_maintenance_status.return_value = Response(
            responseCode=ResponseCode.OK,
            result=Result(maintenanceStatusResult=MaintenanceStatusResult(
                set([
                    HostStatus(host=TEST_HOSTNAMES[0],
                               mode=MaintenanceMode.DRAINING),
                    HostStatus(host=TEST_HOSTNAMES[1],
                               mode=MaintenanceMode.DRAINED),
                    HostStatus(host=TEST_HOSTNAMES[2],
                               mode=MaintenanceMode.NONE)
                ]))))
        maintenance = HostMaintenance(DEFAULT_CLUSTER, 'quiet')
        result = maintenance.check_status(TEST_HOSTNAMES)
        mock_maintenance_status.assert_called_once_with(
            Hosts(set(TEST_HOSTNAMES)))

        assert len(result) == 3
        assert (TEST_HOSTNAMES[0],
                MaintenanceMode._VALUES_TO_NAMES[MaintenanceMode.DRAINING]
                ) in result
        assert (TEST_HOSTNAMES[1],
                MaintenanceMode._VALUES_TO_NAMES[MaintenanceMode.DRAINED]
                ) in result
        assert (
            TEST_HOSTNAMES[2],
            MaintenanceMode._VALUES_TO_NAMES[MaintenanceMode.NONE]) in result
예제 #3
0
  def test_drain_hosts_timed_out_wait(self, _, mock_drain_hosts, mock_maintenance_status, mock_log):
    fake_maintenance_status_response = Response(
        responseCode=ResponseCode.OK,
        result=Result(maintenanceStatusResult=MaintenanceStatusResult(set([
          HostStatus(host=TEST_HOSTNAMES[0], mode=MaintenanceMode.SCHEDULED),
          HostStatus(host=TEST_HOSTNAMES[1], mode=MaintenanceMode.SCHEDULED),
          HostStatus(host=TEST_HOSTNAMES[2], mode=MaintenanceMode.SCHEDULED)
        ]))))

    mock_drain_hosts.return_value = Response(responseCode=ResponseCode.OK)
    mock_maintenance_status.return_value = fake_maintenance_status_response
    test_hosts = Hosts(set(TEST_HOSTNAMES))
    maintenance = HostMaintenance(DEFAULT_CLUSTER, 'quiet')
    maintenance.MAX_STATUS_WAIT = Amount(1, Time.MILLISECONDS)

    not_drained_hostnames = maintenance._drain_hosts(test_hosts)
    assert TEST_HOSTNAMES == sorted(not_drained_hostnames)
    assert mock_maintenance_status.call_count == 1
    mock_drain_hosts.assert_called_once_with(test_hosts)
    mock_maintenance_status.assert_called_once_with((Hosts(set(TEST_HOSTNAMES))))
    assert mock_log.mock_calls == [mock.call(textwrap.dedent("""\
        Failed to move all hosts into DRAINED within 1 ms:
        \tHost:us-west-001.example.com\tStatus:SCHEDULED
        \tHost:us-west-002.example.com\tStatus:SCHEDULED
        \tHost:us-west-003.example.com\tStatus:SCHEDULED"""))]
    def test_drain_hosts_timed_out_wait(self, _, mock_drain_hosts,
                                        mock_maintenance_status):
        fake_maintenance_status_response = Response(
            responseCode=ResponseCode.OK,
            result=Result(maintenanceStatusResult=MaintenanceStatusResult(
                set([
                    HostStatus(host=TEST_HOSTNAMES[0],
                               mode=MaintenanceMode.SCHEDULED),
                    HostStatus(host=TEST_HOSTNAMES[1],
                               mode=MaintenanceMode.SCHEDULED),
                    HostStatus(host=TEST_HOSTNAMES[2],
                               mode=MaintenanceMode.SCHEDULED)
                ]))))

        mock_drain_hosts.return_value = Response(responseCode=ResponseCode.OK)
        mock_maintenance_status.return_value = fake_maintenance_status_response
        test_hosts = Hosts(set(TEST_HOSTNAMES))
        maintenance = HostMaintenance(DEFAULT_CLUSTER, 'quiet')
        maintenance.MAX_STATUS_WAIT = Amount(1, Time.MILLISECONDS)

        not_drained_hostnames = maintenance._drain_hosts(test_hosts)
        assert TEST_HOSTNAMES == sorted(not_drained_hostnames)
        assert mock_maintenance_status.call_count == 1
        mock_drain_hosts.assert_called_once_with(test_hosts)
        mock_maintenance_status.assert_called_once_with(
            (Hosts(set(TEST_HOSTNAMES))))
 def test_start_maintenance(self, mock_api):
     mock_api.return_value = Response(
         responseCode=ResponseCode.OK,
         result=Result(startMaintenanceResult=StartMaintenanceResult(
             statuses=set([HostStatus()]))))
     maintenance = HostMaintenance(DEFAULT_CLUSTER, 'quiet')
     maintenance.start_maintenance(TEST_HOSTNAMES)
     mock_api.assert_called_once_with(Hosts(set(TEST_HOSTNAMES)))
 def test_perform_maintenance(self, mock_check_sla, mock_start_maintenance, mock_drain_hosts):
   mock_check_sla.return_value = set()
   mock_start_maintenance.return_value = TEST_HOSTNAMES
   maintenance = HostMaintenance(DEFAULT_CLUSTER, 'quiet')
   maintenance.perform_maintenance(TEST_HOSTNAMES)
   mock_start_maintenance.assert_called_once_with(TEST_HOSTNAMES)
   assert mock_check_sla.call_count == 3
   assert mock_drain_hosts.call_count == 3
   assert mock_drain_hosts.call_args_list == [
       mock.call(Hosts(set([hostname]))) for hostname in TEST_HOSTNAMES]
예제 #7
0
def host_activate(cluster):
    """usage: host_activate {--filename=filename | --hosts=hosts}
                          cluster

  Removes maintenance mode from hosts.

  The list of hosts is marked as not in a drained state anymore. This will
  allow normal scheduling to resume on the given list of hosts.
  """
    options = app.get_options()
    HostMaintenance(CLUSTERS[cluster], options.verbosity).end_maintenance(
        parse_hostnames(options.filename, options.hosts))
예제 #8
0
def host_status(cluster):
    """usage: host_status {--filename=filename | --hosts=hosts}
                        cluster

  Print the drain status of each supplied host.
  """
    options = app.get_options()
    checkable_hosts = parse_hostnames(options.filename, options.hosts)
    statuses = HostMaintenance(CLUSTERS[cluster],
                               options.verbosity).check_status(checkable_hosts)
    for pair in statuses:
        log.info("%s is in state: %s" % pair)
예제 #9
0
  def test_drain_hosts(self, mock_event_wait, mock_drain_hosts, mock_maintenance_status):
    fake_maintenance_status_response = [
        Response(
            responseCode=ResponseCode.OK,
            result=Result(maintenanceStatusResult=MaintenanceStatusResult(set([
                HostStatus(host=TEST_HOSTNAMES[0], mode=MaintenanceMode.SCHEDULED),
                HostStatus(host=TEST_HOSTNAMES[1], mode=MaintenanceMode.SCHEDULED),
                HostStatus(host=TEST_HOSTNAMES[2], mode=MaintenanceMode.SCHEDULED)
            ])))),
        Response(
            responseCode=ResponseCode.OK,
            result=Result(maintenanceStatusResult=MaintenanceStatusResult(set([
                HostStatus(host=TEST_HOSTNAMES[0], mode=MaintenanceMode.DRAINING),
                HostStatus(host=TEST_HOSTNAMES[1], mode=MaintenanceMode.DRAINING),
                HostStatus(host=TEST_HOSTNAMES[2], mode=MaintenanceMode.DRAINING)
            ])))),
        Response(
            responseCode=ResponseCode.OK,
            result=Result(maintenanceStatusResult=MaintenanceStatusResult(set([
                HostStatus(host=TEST_HOSTNAMES[0], mode=MaintenanceMode.DRAINING),
                HostStatus(host=TEST_HOSTNAMES[1], mode=MaintenanceMode.DRAINED),
                HostStatus(host=TEST_HOSTNAMES[2], mode=MaintenanceMode.DRAINED)
            ])))),
        Response(
            responseCode=ResponseCode.OK,
            result=Result(maintenanceStatusResult=MaintenanceStatusResult(set([
                HostStatus(host=TEST_HOSTNAMES[0], mode=MaintenanceMode.DRAINED)
            ]))))]

    fake_maintenance_status_call_args = []
    def fake_maintenance_status_side_effect(hosts):
      fake_maintenance_status_call_args.append(copy.deepcopy(hosts))
      return fake_maintenance_status_response.pop(0)

    mock_drain_hosts.return_value = Response(responseCode=ResponseCode.OK)
    mock_maintenance_status.side_effect = fake_maintenance_status_side_effect
    test_hosts = Hosts(set(TEST_HOSTNAMES))
    maintenance = HostMaintenance(DEFAULT_CLUSTER, 'quiet')

    not_drained_hostnames = maintenance._drain_hosts(test_hosts)
    assert len(not_drained_hostnames) == 0
    mock_drain_hosts.assert_called_once_with(test_hosts)
    assert mock_maintenance_status.call_count == 4
    assert mock_event_wait.call_count == 4
    assert fake_maintenance_status_call_args == [
        (Hosts(set(TEST_HOSTNAMES))),
        (Hosts(set(TEST_HOSTNAMES))),
        (Hosts(set(TEST_HOSTNAMES))),
        (Hosts(set([TEST_HOSTNAMES[0]])))]
 def test_complete_maintenance(self, mock_end_maintenance, mock_maintenance_status, mock_warning):
   mock_maintenance_status.return_value = Response(result=Result(
       maintenanceStatusResult=MaintenanceStatusResult(set([
           HostStatus(host=TEST_HOSTNAMES[0], mode=MaintenanceMode.NONE),
           HostStatus(host=TEST_HOSTNAMES[1], mode=MaintenanceMode.NONE),
           HostStatus(host=TEST_HOSTNAMES[2], mode=MaintenanceMode.DRAINED)
       ]))
   ))
   mock_end_maintenance.return_value = Response(responseCode=ResponseCode.OK)
   test_hosts = Hosts(set(TEST_HOSTNAMES))
   maintenance = HostMaintenance(DEFAULT_CLUSTER, 'quiet')
   maintenance._complete_maintenance(test_hosts)
   mock_end_maintenance.assert_called_once_with(test_hosts)
   mock_maintenance_status.assert_called_once_with(test_hosts)
   mock_warning.assert_called_once_with('%s is DRAINING or in DRAINED' % TEST_HOSTNAMES[2])
예제 #11
0
def host_deactivate(cluster):
    """usage: host_deactivate {--filename=filename | --hosts=hosts}
                            cluster

  Puts hosts into maintenance mode.

  The list of hosts is marked for maintenance, and will be de-prioritized
  from consideration for scheduling.  Note, they are not removed from
  consideration, and may still schedule tasks if resources are very scarce.
  Usually you would mark a larger set of machines for drain, and then do
  them in batches within the larger set, to help drained tasks not land on
  future hosts that will be drained shortly in subsequent batches.
  """
    options = app.get_options()
    HostMaintenance(CLUSTERS[cluster], options.verbosity).start_maintenance(
        parse_hostnames(options.filename, options.hosts))
예제 #12
0
 def test_perform_maintenance(self, mock_operate_on_hosts, mock_check_sla, mock_start_maintenance,
                              mock_drain_hosts):
   mock_callback = mock.Mock()
   mock_check_sla.return_value = set()
   mock_start_maintenance.return_value = TEST_HOSTNAMES
   drain_hosts_results = [set() for _ in TEST_HOSTNAMES]
   drain_hosts_results[0] = set([TEST_HOSTNAMES[0]])
   def drain_hosts_result(*args):
     return drain_hosts_results.pop(0)
   mock_drain_hosts.side_effect = drain_hosts_result
   maintenance = HostMaintenance(DEFAULT_CLUSTER, 'quiet')
   maintenance.perform_maintenance(TEST_HOSTNAMES, callback=mock_callback)
   mock_start_maintenance.assert_called_once_with(TEST_HOSTNAMES)
   assert mock_check_sla.call_count == 3
   assert mock_drain_hosts.call_count == 3
   assert mock_drain_hosts.call_args_list == [
       mock.call(Hosts(set([hostname]))) for hostname in TEST_HOSTNAMES]
   assert mock_operate_on_hosts.call_count == 3
   assert mock_operate_on_hosts.call_args_list == [mock.call(set(), mock_callback)] + [
       mock.call(set([hostname]), mock_callback) for hostname in TEST_HOSTNAMES[1:]]
예제 #13
0
def host_drain(cluster):
    """usage: host_drain {--filename=filename | --hosts=hosts}
                       [--post_drain_script=path]
                       [--grouping=function]
                       [--override_percentage=percentage]
                       [--override_duration=duration]
                       [--override_reason=reason]
                       [--unsafe_hosts_file=unsafe_hosts_filename]
                       cluster

  Asks the scheduler to start maintenance on the list of provided hosts (see host_deactivate
  for more details) and drains any active tasks on them.

  The list of hosts is drained and marked in a drained state.  This will kill
  off any tasks currently running on these hosts, as well as prevent future
  tasks from scheduling on these hosts while they are drained.

  The hosts are left in maintenance mode upon completion. Use host_activate to
  return hosts back to service and allow scheduling tasks on them.
  """
    options = app.get_options()
    drainable_hosts = parse_hostnames(options.filename, options.hosts)
    get_grouping_or_die(options.grouping)

    override_percentage, override_duration = parse_and_validate_sla_overrides(
        options, drainable_hosts)

    post_drain_callback = parse_script(options.post_drain_script)

    drained_hostnames = HostMaintenance(
        CLUSTERS[cluster], options.verbosity).perform_maintenance(
            drainable_hosts,
            grouping_function=options.grouping,
            percentage=override_percentage,
            duration=override_duration,
            output_file=options.unsafe_hosts_filename)

    if post_drain_callback:
        for hostname in drained_hostnames:
            post_drain_callback(hostname)
 def test_end_maintenance(self, mock_complete_maintenance):
   maintenance = HostMaintenance(DEFAULT_CLUSTER, 'quiet')
   maintenance.end_maintenance(TEST_HOSTNAMES)
   mock_complete_maintenance.assert_called_once_with(Hosts(set(TEST_HOSTNAMES)))
예제 #15
0
 def test_operate_on_hosts(self):
   mock_callback = mock.Mock()
   test_hosts = Hosts(TEST_HOSTNAMES)
   maintenance = HostMaintenance(DEFAULT_CLUSTER, 'quiet')
   maintenance._operate_on_hosts(test_hosts, mock_callback)
   assert mock_callback.call_count == 3