def end_maintenance_hosts(cluster):
  """usage: end_maintenance_hosts {--filename=filename | --hosts=hosts}
                                  cluster
  """
  options = app.get_options()
  HostMaintenance(CLUSTERS[cluster], options.verbosity).end_maintenance(
      parse_hosts(options.filename, options.hosts))
def perform_maintenance_hosts(cluster):
  """usage: perform_maintenance_hosts {--filename=filename | --hosts=hosts}
                                      [--groups_per_batch=num]
                                      [--post_drain_script=path]
                                      [--grouping=function]
                                      cluster

  Asks the scheduler to remove any running tasks from the machine and remove it
  from service temporarily, perform some action on them, then return the machines
  to service.
  """
  options = app.get_options()
  drainable_hosts = parse_hosts(options.filename, options.hosts)

  if options.post_drain_script:
    if not os.path.exists(options.post_drain_script):
      die("No such file: %s" % options.post_drain_script)
    cmd = os.path.abspath(options.post_drain_script)
    drained_callback = lambda host: subprocess.Popen([cmd, host])
  else:
    drained_callback = None

  HostMaintenance(CLUSTERS[cluster], options.verbosity).perform_maintenance(
      drainable_hosts,
      groups_per_batch=int(options.groups_per_batch),
      callback=drained_callback,
      grouping_function=options.grouping)
def host_maintenance_status(cluster):
  """usage: host_maintenance_status {--filename=filename | --hosts=hosts}
                                    cluster

  Check on the schedulers maintenance status for a list of hosts in the cluster.
  """
  options = app.get_options()
  checkable_hosts = parse_hosts(options.filename, options.hosts)
  statuses = HostMaintenance(CLUSTERS[cluster], options.verbosity).check_status(checkable_hosts)
  for pair in statuses:
    log.info("%s is in state: %s" % pair)
Esempio n. 4
0
def sla_probe_hosts(cluster, percentage, duration):
    """usage: sla_probe_hosts
            [--filename=filename]
            [--hosts=hosts]
            cluster percentage duration

  Probes individual hosts with respect to their job SLA.
  Specifically, given a host, outputs all affected jobs with their projected SLAs
  if the host goes down. In addition, if a job's projected SLA does not clear
  the specified limits suggests the approximate time when that job reaches its SLA.

  Output format:
  HOST  JOB  PREDICTED_SLA  SAFE?  PREDICTED_SAFE_IN

  where:
  HOST - host being probed.
  JOB - job that has tasks running on the host being probed.
  PREDICTED_SLA - predicted effective percentage of up tasks if the host is shut down.
  SAFE? - PREDICTED_SLA >= percentage
  PREDICTED_SAFE_IN - expected wait time in seconds for the job to reach requested SLA threshold.
  """
    options = app.get_options()

    sla_percentage = parse_sla_percentage(percentage)
    sla_duration = parse_time(duration)
    hosts = parse_hosts(options.filename, options.hosts)

    vector = AuroraClientAPI(CLUSTERS[cluster], options.verbosity).sla_get_safe_domain_vector(hosts)
    probed_hosts = vector.probe_hosts(sla_percentage, sla_duration.as_(Time.SECONDS), hosts)

    results = []
    for host, job_details in sorted(probed_hosts.items()):
        results.append(
            "\n".join(
                [
                    "%s\t%s\t%.2f\t%s\t%s"
                    % (
                        host,
                        d.job.to_path(),
                        d.predicted_percentage,
                        d.safe,
                        "n/a" if d.safe_in_secs is None else d.safe_in_secs,
                    )
                    for d in sorted(job_details)
                ]
            )
        )

    print_results(results)