コード例 #1
0
ファイル: admin.py プロジェクト: isenhome-zxg/aurora
def increase_quota(cluster, role, cpu_str, ram_str, disk_str):
    """usage: increase_quota cluster role cpu ram[unit] disk[unit]

  Increases the amount of production quota allocated to a user.
  """
    cpu = float(cpu_str)
    ram = parse_data(ram_str)
    disk = parse_data(disk_str)

    client = make_admin_client(cluster)
    resp = client.get_quota(role)
    quota = resp.result.getQuotaResult.quota
    log.info('Current quota for %s:\n\tCPU\t%s\n\tRAM\t%s MB\n\tDisk\t%s MB' %
             (role, quota.numCpus, quota.ramMb, quota.diskMb))

    new_cpu = float(cpu + quota.numCpus)
    new_ram = int((ram + Amount(quota.ramMb, Data.MB)).as_(Data.MB))
    new_disk = int((disk + Amount(quota.diskMb, Data.MB)).as_(Data.MB))

    log.info(
        'Attempting to update quota for %s to\n\tCPU\t%s\n\tRAM\t%s MB\n\tDisk\t%s MB'
        % (role, new_cpu, new_ram, new_disk))

    resp = client.set_quota(role, new_cpu, new_ram, new_disk)
    check_and_log_response(resp)
コード例 #2
0
  def _drain_hosts(self, drainable_hosts):
    """"Drains tasks from the specified hosts.

    This will move active tasks on these hosts to the DRAINING state, causing them to be
    rescheduled elsewhere.

    :param drainable_hosts: Hosts that are in maintenance mode and ready to be drained
    :type drainable_hosts: gen.apache.aurora.ttypes.Hosts
    :rtype: set of host names failed to drain
    """
    check_and_log_response(self._client.drain_hosts(drainable_hosts))
    drainable_hostnames = [hostname for hostname in drainable_hosts.hostNames]

    total_wait = self.STATUS_POLL_INTERVAL
    not_drained_hostnames = set(drainable_hostnames)
    while not self._wait_event.is_set() and not_drained_hostnames:
      self._wait_event.wait(self.STATUS_POLL_INTERVAL.as_(Time.SECONDS))

      not_drained_hostnames = self.check_if_drained(drainable_hostnames)

      total_wait += self.STATUS_POLL_INTERVAL
      if not_drained_hostnames and total_wait > self.MAX_STATUS_WAIT:
        log.warning('Failed to move all hosts into DRAINED within %s' % self.MAX_STATUS_WAIT)
        break

    return not_drained_hostnames
コード例 #3
0
    def _drain_hosts(self, drainable_hosts):
        """"Drains tasks from the specified hosts.

    This will move active tasks on these hosts to the DRAINING state, causing them to be
    rescheduled elsewhere.

    :param drainable_hosts: Hosts that are in maintenance mode and ready to be drained
    :type drainable_hosts: gen.apache.aurora.ttypes.Hosts
    """
        check_and_log_response(self._client.drain_hosts(drainable_hosts))
        not_ready_hostnames = [
            hostname for hostname in drainable_hosts.hostNames
        ]
        while not_ready_hostnames:
            resp = self._client.maintenance_status(
                Hosts(set(not_ready_hostnames)))
            if not resp.result.maintenanceStatusResult.statuses:
                not_ready_hostnames = None
            for host_status in resp.result.maintenanceStatusResult.statuses:
                if host_status.mode != MaintenanceMode.DRAINED:
                    log.warning(
                        '%s is currently in status %s' %
                        (host_status.host,
                         MaintenanceMode._VALUES_TO_NAMES[host_status.mode]))
                else:
                    not_ready_hostnames.remove(host_status.host)
コード例 #4
0
ファイル: admin.py プロジェクト: bhuvan/incubator-aurora
def scheduler_backup_now(cluster):
  """usage: scheduler_backup_now cluster

  Immediately initiates a full storage backup.
  """
  options = app.get_options()
  check_and_log_response(AuroraClientAPI(CLUSTERS[cluster], options.verbosity).perform_backup())
コード例 #5
0
ファイル: admin.py プロジェクト: isabella232/client-3
def scheduler_commit_recovery(cluster):
    """usage: scheduler_commit_recovery cluster

  Commits a staged recovery.
  """
    check_and_log_response(
        make_admin_client_with_options(cluster).commit_recovery())
コード例 #6
0
def status(args, options):
    """usage: status cluster/role/env/job

  Fetches and prints information about the active tasks in a job.
  """
    def is_active(task):
        return task.status in ACTIVE_STATES

    def print_task(scheduled_task):
        assigned_task = scheduled_task.assignedTask
        taskInfo = assigned_task.task
        taskString = ''
        if taskInfo:
            taskString += '''cpus: %s, ram: %s MB, disk: %s MB''' % (
                taskInfo.numCpus, taskInfo.ramMb, taskInfo.diskMb)
        if assigned_task.assignedPorts:
            taskString += '\n\tports: %s' % assigned_task.assignedPorts
        taskString += '\n\tfailure count: %s (max %s)' % (
            scheduled_task.failureCount, taskInfo.maxTaskFailures)
        taskString += '\n\tevents:'
        for event in scheduled_task.taskEvents:
            taskString += '\n\t\t %s %s: %s' % (
                datetime.fromtimestamp(event.timestamp / 1000),
                ScheduleStatus._VALUES_TO_NAMES[event.status], event.message)
        taskString += '\n\tmetadata:'
        if assigned_task.task.metadata is not None:
            for md in assigned_task.task.metadata:
                taskString += ('\n\t\t%s: %s' % (md.key, md.value))

        return taskString

    def print_tasks(tasks):
        for task in tasks:
            taskString = print_task(task)

            log.info(
                'role: %s, env: %s, name: %s, shard: %s, status: %s on %s\n%s'
                %
                (task.assignedTask.task.owner.role,
                 task.assignedTask.task.environment,
                 task.assignedTask.task.jobName, task.assignedTask.instanceId,
                 ScheduleStatus._VALUES_TO_NAMES[task.status],
                 task.assignedTask.slaveHost, taskString))

    api, job_key, _ = LiveJobDisambiguator.disambiguate_args_or_die(
        args, options, make_client_factory())
    v1_deprecation_warning("status", ["job", "status", args[0]])
    resp = api.check_status(job_key)
    check_and_log_response(resp)

    tasks = resp.result.scheduleStatusResult.tasks
    if tasks:
        active_tasks = filter(is_active, tasks)
        log.info('Active Tasks (%s)' % len(active_tasks))
        print_tasks(active_tasks)
        inactive_tasks = filter(lambda x: not is_active(x), tasks)
        log.info('Inactive Tasks (%s)' % len(inactive_tasks))
        print_tasks(inactive_tasks)
    else:
        log.info('No tasks found.')
コード例 #7
0
def really_killall(args, options):
    """Helper for testing purposes: make it easier to mock out the actual kill process,
  while testing hooks in the command dispatch process.
  """
    maybe_disable_hooks(options)
    job_key = AuroraJobKey.from_path(args[0])
    config_file = args[1] if len(args) > 1 else None  # the config for hooks
    new_cmd = ["job", "killall", args[0]]
    if config_file is not None:
        new_cmd.append("--config=%s" % config_file)
    if options.open_browser:
        new_cmd.append("--open-browser")
    if options.batch_size is not None:
        new_cmd.append("--batch-size=%s" % options.batch_size)
    if options.max_total_failures is not None:
        new_cmd.append("--max-total-failures=%s" % options.max_total_failures)
    v1_deprecation_warning("killall", new_cmd)

    config = get_job_config(job_key.to_path(), config_file,
                            options) if config_file else None
    api = make_client(job_key.cluster)
    if options.batch_size is not None:
        kill_in_batches(api, job_key, None, options.batch_size,
                        options.max_failures_option)
    else:
        resp = api.kill_job(job_key, None, config=config)
        check_and_log_response(resp)
    handle_open(api.scheduler_proxy.scheduler_client().url, job_key.role,
                job_key.env, job_key.name)
    wait_kill_tasks(api.scheduler_proxy, job_key)
コード例 #8
0
  def _drain_hosts(self, drainable_hosts, clock=time):
    """"Drains tasks from the specified hosts.

    This will move active tasks on these hosts to the DRAINING state, causing them to be
    rescheduled elsewhere.

    :param drainable_hosts: Hosts that are in maintenance mode and ready to be drained
    :type drainable_hosts: gen.apache.aurora.ttypes.Hosts
    :param clock: time module for testing
    :type clock: time
    """
    check_and_log_response(self._client.drain_hosts(drainable_hosts))
    not_ready_hostnames = [hostname for hostname in drainable_hosts.hostNames]
    while not_ready_hostnames:
      log.info("Sleeping for %s." % self.START_MAINTENANCE_DELAY)
      clock.sleep(self.START_MAINTENANCE_DELAY.as_(Time.SECONDS))
      resp = self._client.maintenance_status(Hosts(set(not_ready_hostnames)))
      if not resp.result.maintenanceStatusResult.statuses:
        not_ready_hostnames = None
      for host_status in resp.result.maintenanceStatusResult.statuses:
        if host_status.mode != MaintenanceMode.DRAINED:
          log.warning('%s is currently in status %s' %
              (host_status.host, MaintenanceMode._VALUES_TO_NAMES[host_status.mode]))
        else:
          not_ready_hostnames.remove(host_status.host)
コード例 #9
0
ファイル: admin.py プロジェクト: isabella232/client-3
def scheduler_stage_recovery(cluster, backup_id):
    """usage: scheduler_stage_recovery cluster backup_id

  Stages a backup for recovery.
  """
    check_and_log_response(
        make_admin_client_with_options(cluster).stage_recovery(backup_id))
コード例 #10
0
ファイル: admin.py プロジェクト: isabella232/client-3
def increase_quota(cluster, role, cpu_str, ram_str, disk_str):
    """usage: increase_quota cluster role cpu ram[unit] disk[unit]

  Increases the amount of production quota allocated to a user.
  """
    cpu = float(cpu_str)
    ram = parse_data(ram_str).as_(Data.MB)
    disk = parse_data(disk_str).as_(Data.MB)

    client = make_admin_client_with_options(cluster)
    resp = client.get_quota(role)
    quota = resp.result.getQuotaResult.quota
    resource_details = ResourceManager.resource_details_from_quota(quota)
    log.info('Current quota for %s:\n\t%s' % (role, '\n\t'.join(
        '%s\t%s%s' %
        (r.resource_type.display_name, r.value, r.resource_type.display_unit)
        for r in resource_details)))

    new_cpu = ResourceType.CPUS.value_type(
        cpu + ResourceManager.quantity_of(resource_details, ResourceType.CPUS))
    new_ram = ResourceType.RAM_MB.value_type(
        ram +
        ResourceManager.quantity_of(resource_details, ResourceType.RAM_MB))
    new_disk = ResourceType.DISK_MB.value_type(
        disk +
        ResourceManager.quantity_of(resource_details, ResourceType.DISK_MB))

    log.info(
        'Attempting to update quota for %s to\n\tCPU\t%s\n\tRAM\t%s MB\n\tDisk\t%s MB'
        % (role, new_cpu, new_ram, new_disk))

    resp = client.set_quota(role, new_cpu, new_ram, new_disk)
    check_and_log_response(resp)
コード例 #11
0
ファイル: core.py プロジェクト: kevinburg/incubator-aurora
def really_start_cron(args, options):
  api, job_key, config_file = LiveJobDisambiguator.disambiguate_args_or_die(
      args, options, make_client_factory())
  config = get_job_config(job_key.to_path(), config_file, options) if config_file else None
  resp = api.start_cronjob(job_key, config=config)
  check_and_log_response(resp)
  handle_open(api.scheduler_proxy.scheduler_client().url, job_key.role, job_key.env, job_key.name)
コード例 #12
0
ファイル: admin.py プロジェクト: mkacik/incubator-aurora
def increase_quota(cluster, role, cpu_str, ram_str, disk_str):
    """usage: increase_quota cluster role cpu ram[unit] disk[unit]

  Increases the amount of production quota allocated to a user.
  """
    cpu = float(cpu_str)
    ram = parse_data(ram_str)
    disk = parse_data(disk_str)

    options = app.get_options()
    client = AuroraClientAPI(CLUSTERS[cluster], options.verbosity == "verbose")
    resp = client.get_quota(role)
    quota = resp.result.getQuotaResult.quota
    log.info(
        "Current quota for %s:\n\tCPU\t%s\n\tRAM\t%s MB\n\tDisk\t%s MB"
        % (role, quota.numCpus, quota.ramMb, quota.diskMb)
    )

    new_cpu = float(cpu + quota.numCpus)
    new_ram = int((ram + Amount(quota.ramMb, Data.MB)).as_(Data.MB))
    new_disk = int((disk + Amount(quota.diskMb, Data.MB)).as_(Data.MB))

    log.info(
        "Attempting to update quota for %s to\n\tCPU\t%s\n\tRAM\t%s MB\n\tDisk\t%s MB"
        % (role, new_cpu, new_ram, new_disk)
    )

    resp = client.set_quota(role, new_cpu, new_ram, new_disk)
    check_and_log_response(resp)
コード例 #13
0
ファイル: admin.py プロジェクト: isabella232/client-3
def scheduler_unload_recovery(cluster):
    """usage: scheduler_unload_recovery cluster

  Unloads a staged recovery.
  """
    check_and_log_response(
        make_admin_client_with_options(cluster).unload_recovery())
コード例 #14
0
ファイル: admin.py プロジェクト: mkacik/incubator-aurora
def scheduler_unload_recovery(cluster):
    """usage: scheduler_unload_recovery cluster

  Unloads a staged recovery.
  """
    options = app.get_options()
    check_and_log_response(AuroraClientAPI(CLUSTERS[cluster], options.verbosity).unload_recovery())
コード例 #15
0
ファイル: admin.py プロジェクト: mkacik/incubator-aurora
def scheduler_snapshot(cluster):
    """usage: scheduler_snapshot cluster

  Request that the scheduler perform a storage snapshot and block until complete.
  """
    options = app.get_options()
    check_and_log_response(AuroraClientAPI(CLUSTERS[cluster], options.verbosity).snapshot())
コード例 #16
0
ファイル: core.py プロジェクト: kevinburg/incubator-aurora
def really_killall(args, options):
  """Helper for testing purposes: make it easier to mock out the actual kill process,
  while testing hooks in the command dispatch process.
  """
  maybe_disable_hooks(options)
  job_key = AuroraJobKey.from_path(args[0])
  config_file = args[1] if len(args) > 1 else None  # the config for hooks
  new_cmd = ["job", "killall", args[0]]
  if config_file is not None:
    new_cmd.append("--config=%s" % config_file)
  if options.open_browser:
    new_cmd.append("--open-browser")
  if options.batch_size is not None:
    new_cmd.append("--batch-size=%s" % options.batch_size)
  if options.max_total_failures is not None:
    new_cmd.append("--max-total-failures=%s" % options.max_total_failures)
  v1_deprecation_warning("killall", new_cmd)

  config = get_job_config(job_key.to_path(), config_file, options) if config_file else None
  api = make_client(job_key.cluster)
  if options.batch_size is not None:
    kill_in_batches(api, job_key, None, options.batch_size, options.max_failures_option)
  else:
    resp = api.kill_job(job_key, None, config=config)
    check_and_log_response(resp)
  handle_open(api.scheduler_proxy.scheduler_client().url, job_key.role, job_key.env, job_key.name)
  wait_kill_tasks(api.scheduler_proxy, job_key)
コード例 #17
0
ファイル: admin.py プロジェクト: mkacik/incubator-aurora
def scheduler_stage_recovery(cluster, backup_id):
    """usage: scheduler_stage_recovery cluster backup_id

  Stages a backup for recovery.
  """
    options = app.get_options()
    check_and_log_response(AuroraClientAPI(CLUSTERS[cluster], options.verbosity).stage_recovery(backup_id))
コード例 #18
0
ファイル: core.py プロジェクト: kpfell/incubator-aurora
def status(args, options):
  """usage: status cluster/role/env/job

  Fetches and prints information about the active tasks in a job.
  """
  def is_active(task):
    return task.status in ACTIVE_STATES

  def print_task(scheduled_task):
    assigned_task = scheduled_task.assignedTask
    taskInfo = assigned_task.task
    taskString = ''
    if taskInfo:
      taskString += '''cpus: %s, ram: %s MB, disk: %s MB''' % (taskInfo.numCpus,
                                                               taskInfo.ramMb,
                                                               taskInfo.diskMb)
    if assigned_task.assignedPorts:
      taskString += '\n\tports: %s' % assigned_task.assignedPorts
    taskString += '\n\tfailure count: %s (max %s)' % (scheduled_task.failureCount,
                                                      taskInfo.maxTaskFailures)
    taskString += '\n\tevents:'
    for event in scheduled_task.taskEvents:
      taskString += '\n\t\t %s %s: %s' % (datetime.fromtimestamp(event.timestamp / 1000),
                                          ScheduleStatus._VALUES_TO_NAMES[event.status],
                                          event.message)
    taskString += '\n\tmetadata:'
    if assigned_task.task.metadata is not None:
      for md in assigned_task.task.metadata:
        taskString += ('\n\t\t%s: %s' % (md.key, md.value))

    return taskString

  def print_tasks(tasks):
    for task in tasks:
      taskString = print_task(task)

      log.info('role: %s, env: %s, name: %s, shard: %s, status: %s on %s\n%s' %
             (task.assignedTask.task.owner.role,
              task.assignedTask.task.environment,
              task.assignedTask.task.jobName,
              task.assignedTask.instanceId,
              ScheduleStatus._VALUES_TO_NAMES[task.status],
              task.assignedTask.slaveHost,
              taskString))

  api, job_key, _ = LiveJobDisambiguator.disambiguate_args_or_die(
      args, options, make_client_factory())
  resp = api.check_status(job_key)
  check_and_log_response(resp)

  tasks = resp.result.scheduleStatusResult.tasks
  if tasks:
    active_tasks = filter(is_active, tasks)
    log.info('Active Tasks (%s)' % len(active_tasks))
    print_tasks(active_tasks)
    inactive_tasks = filter(lambda x: not is_active(x), tasks)
    log.info('Inactive Tasks (%s)' % len(inactive_tasks))
    print_tasks(inactive_tasks)
  else:
    log.info('No tasks found.')
コード例 #19
0
ファイル: admin.py プロジェクト: bhuvan/incubator-aurora
def scheduler_snapshot(cluster):
  """usage: scheduler_snapshot cluster

  Request that the scheduler perform a storage snapshot and block until complete.
  """
  options = app.get_options()
  check_and_log_response(AuroraClientAPI(CLUSTERS[cluster], options.verbosity).snapshot())
コード例 #20
0
ファイル: host_maintenance.py プロジェクト: rosmo/aurora
    def _drain_hosts(self, drainable_hosts):
        """"Drains tasks from the specified hosts.

    This will move active tasks on these hosts to the DRAINING state, causing them to be
    rescheduled elsewhere.

    :param drainable_hosts: Hosts that are in maintenance mode and ready to be drained
    :type drainable_hosts: gen.apache.aurora.ttypes.Hosts
    :rtype: set of host names failed to drain
    """
        check_and_log_response(self._client.drain_hosts(drainable_hosts))
        drainable_hostnames = [hostname for hostname in drainable_hosts.hostNames]

        total_wait = self.STATUS_POLL_INTERVAL
        not_drained_hostnames = set(drainable_hostnames)
        while not self._wait_event.is_set() and not_drained_hostnames:
            log.info("Waiting for hosts to be in DRAINED: %s" % not_drained_hostnames)
            self._wait_event.wait(self.STATUS_POLL_INTERVAL.as_(Time.SECONDS))

            statuses = self.check_status(list(not_drained_hostnames))
            not_drained_hostnames = set(h[0] for h in statuses if h[1] != "DRAINED")

            total_wait += self.STATUS_POLL_INTERVAL
            if not_drained_hostnames and total_wait > self.MAX_STATUS_WAIT:
                log.warning(
                    "Failed to move all hosts into DRAINED within %s:\n%s"
                    % (
                        self.MAX_STATUS_WAIT,
                        "\n".join("\tHost:%s\tStatus:%s" % h for h in sorted(statuses) if h[1] != "DRAINED"),
                    )
                )
                break

        return not_drained_hostnames
コード例 #21
0
 def check_status(self, hosts):
   resp = self._client.maintenance_status(Hosts(set(hosts)))
   check_and_log_response(resp)
   statuses = []
   for host_status in resp.result.maintenanceStatusResult.statuses:
     statuses.append((host_status.host, MaintenanceMode._VALUES_TO_NAMES[host_status.mode]))
   return statuses
コード例 #22
0
ファイル: admin.py プロジェクト: isabella232/client-3
def scheduler_backup_now(cluster):
    """usage: scheduler_backup_now cluster

  Immediately initiates a full storage backup.
  """
    check_and_log_response(
        make_admin_client_with_options(cluster).perform_backup())
コード例 #23
0
  def _drain_hosts(self, drainable_hosts):
    """"Drains tasks from the specified hosts.

    This will move active tasks on these hosts to the DRAINING state, causing them to be
    rescheduled elsewhere.

    :param drainable_hosts: Hosts that are in maintenance mode and ready to be drained
    :type drainable_hosts: gen.apache.aurora.ttypes.Hosts
    :rtype: set of host names failed to drain
    """
    check_and_log_response(self._client.drain_hosts(drainable_hosts))
    drainable_hostnames = [hostname for hostname in drainable_hosts.hostNames]

    total_wait = self.STATUS_POLL_INTERVAL
    not_drained_hostnames = set(drainable_hostnames)
    while not self._wait_event.is_set() and not_drained_hostnames:
      log.info('Waiting for hosts to be in DRAINED: %s' % not_drained_hostnames)
      self._wait_event.wait(self.STATUS_POLL_INTERVAL.as_(Time.SECONDS))

      statuses = self.check_status(list(not_drained_hostnames))
      not_drained_hostnames = set(h[0] for h in statuses if h[1] != 'DRAINED')

      total_wait += self.STATUS_POLL_INTERVAL
      if not_drained_hostnames and total_wait > self.MAX_STATUS_WAIT:
        log.warning('Failed to move all hosts into DRAINED within %s:\n%s' %
            (self.MAX_STATUS_WAIT,
            '\n'.join("\tHost:%s\tStatus:%s" % h for h in sorted(statuses) if h[1] != 'DRAINED')))
        break

    return not_drained_hostnames
コード例 #24
0
ファイル: admin.py プロジェクト: bmhatfield/aurora
def increase_quota(cluster, role, cpu_str, ram_str, disk_str):
  """usage: increase_quota cluster role cpu ram[unit] disk[unit]

  Increases the amount of production quota allocated to a user.
  """
  cpu = float(cpu_str)
  ram = parse_data(ram_str).as_(Data.MB)
  disk = parse_data(disk_str).as_(Data.MB)

  client = make_admin_client_with_options(cluster)
  resp = client.get_quota(role)
  quota = resp.result.getQuotaResult.quota
  resource_details = ResourceManager.resource_details_from_quota(quota)
  log.info('Current quota for %s:\n\t%s' % (
      role,
      '\n\t'.join('%s\t%s%s' % (
          r.resource_type.display_name,
          r.value,
          r.resource_type.display_unit) for r in resource_details)))

  new_cpu = ResourceType.CPUS.value_type(
    cpu + ResourceManager.quantity_of(resource_details, ResourceType.CPUS))
  new_ram = ResourceType.RAM_MB.value_type(
    ram + ResourceManager.quantity_of(resource_details, ResourceType.RAM_MB))
  new_disk = ResourceType.DISK_MB.value_type(
    disk + ResourceManager.quantity_of(resource_details, ResourceType.DISK_MB))

  log.info('Attempting to update quota for %s to\n\tCPU\t%s\n\tRAM\t%s MB\n\tDisk\t%s MB' %
           (role, new_cpu, new_ram, new_disk))

  resp = client.set_quota(role, new_cpu, new_ram, new_disk)
  check_and_log_response(resp)
コード例 #25
0
ファイル: core.py プロジェクト: sumanau7/incubator-aurora
def list_jobs(cluster_and_role):
  """usage: list_jobs [--show-cron] cluster/role/env/job

  Shows all jobs that match the job-spec known by the scheduler.
  If --show-cron is specified, then also shows the registered cron schedule.
  """
  def show_job_simple(job):
    if options.show_cron_schedule:
      print(('{0}/{1.key.role}/{1.key.environment}/{1.key.name}' +
          '\t\'{1.cronSchedule}\'\t{1.cronCollisionPolicy}').format(cluster, job))
    else:
      print('{0}/{1.key.role}/{1.key.environment}/{1.key.name}'.format(cluster, job))

  def show_job_pretty(job):
    print("Job %s/%s/%s/%s:" %
        (cluster, job.key.role, job.key.environment, job.key.name))
    print('\tcron schedule: %s' % job.cronSchedule)
    print('\tcron policy:   %s' % job.cronCollisionPolicy)

  options = app.get_options()
  if options.show_cron_schedule and options.pretty:
    print_fn = show_job_pretty
  else:
    print_fn = show_job_simple
  # Take the cluster_and_role parameter, and split it into its two components.
  if cluster_and_role.count('/') != 1:
    die('list_jobs parameter must be in cluster/role format')
  (cluster,role) = cluster_and_role.split('/')
  api = make_client(cluster)
  resp = api.get_jobs(role)
  check_and_log_response(resp)
  for job in resp.result.getJobsResult.configs:
    print_fn(job)
コード例 #26
0
ファイル: core.py プロジェクト: sumanau7/incubator-aurora
def restart(args, options):
  """usage: restart cluster/role/env/job
               [--shards=SHARDS]
               [--batch_size=INT]
               [--updater_health_check_interval_seconds=SECONDS]
               [--max_per_shard_failures=INT]
               [--max_total_failures=INT]
               [--restart_threshold=INT]
               [--watch_secs=SECONDS]

  Performs a rolling restart of shards within a job.

  Restarts are fully controlled client-side, so aborting halts the restart.
  """
  api, job_key, config_file = LiveJobDisambiguator.disambiguate_args_or_die(
      args, options, make_client_factory())
  config = get_job_config(job_key.to_path(), config_file, options) if config_file else None
  updater_config = UpdaterConfig(
      options.batch_size,
      options.restart_threshold,
      options.watch_secs,
      options.max_per_shard_failures,
      options.max_total_failures)
  resp = api.restart(job_key, options.shards, updater_config,
      options.health_check_interval_seconds, config=config)
  check_and_log_response(resp)
  handle_open(api.scheduler_proxy.scheduler_client().url, job_key.role, job_key.env, job_key.name)
コード例 #27
0
ファイル: core.py プロジェクト: kevinburg/incubator-aurora
def really_kill(args, options):
  if options.shards is None:
    print('Shards option is required for kill; use killall to kill all shards', file=sys.stderr)
    exit(1)
  api, job_key, config_file = LiveJobDisambiguator.disambiguate_args_or_die(
      args, options, make_client_factory())
  instance_key = str(job_key)
  if options.shards is not None:
    instance_key = "%s/%s" % (instance_key, ",".join(map(str, options.shards)))
  new_cmd = ["job", "kill", instance_key]
  if config_file is not None:
    new_cmd.append("--config=%s" % config_file)
  if options.open_browser:
    new_cmd.append("--open-browser")
  if options.batch_size is not None:
    new_cmd.append("--batch-size=%s" % options.batch_size)
  if options.max_total_failures is not None:
    new_cmd.append("--max-total-failures=%s" % options.max_total_failures)
  v1_deprecation_warning("kill", new_cmd)

  config = get_job_config(job_key.to_path(), config_file, options) if config_file else None
  if options.batch_size is not None:
    kill_in_batches(api, job_key, options.shards, options.batch_size, options.max_failures_option)
  else:
    resp = api.kill_job(job_key, options.shards, config=config)
    check_and_log_response(resp)
  handle_open(api.scheduler_proxy.scheduler_client().url, job_key.role, job_key.env, job_key.name)
  wait_kill_tasks(api.scheduler_proxy, job_key, options.shards)
コード例 #28
0
ファイル: core.py プロジェクト: kevinburg/incubator-aurora
def really_update(job_spec, config_file, options):
  def warn_if_dangerous_change(api, job_spec, config):
    # Get the current job status, so that we can check if there's anything
    # dangerous about this update.
    resp = api.query_no_configs(api.build_query(config.role(), config.name(),
        statuses=ACTIVE_STATES, env=config.environment()))
    if resp.responseCode != ResponseCode.OK:
      die('Could not get job status from server for comparison: %s' % resp.messageDEPRECATED)
    remote_tasks = [t.assignedTask.task for t in resp.result.scheduleStatusResult.tasks]
    resp = api.populate_job_config(config)
    if resp.responseCode != ResponseCode.OK:
      die('Server could not populate job config for comparison: %s' % resp.messageDEPRECATED)
    local_task_count = len(resp.result.populateJobResult.populated)
    remote_task_count = len(remote_tasks)
    if (local_task_count >= 4 * remote_task_count or local_task_count <= 4 * remote_task_count
        or local_task_count == 0):
      print('Warning: this update is a large change. Press ^c within 5 seconds to abort')
      time.sleep(5)

  maybe_disable_hooks(options)
  config = get_job_config(job_spec, config_file, options)
  api = make_client(config.cluster())
  if not options.force:
    warn_if_dangerous_change(api, job_spec, config)
  resp = api.update_job(config, options.health_check_interval_seconds, options.shards)
  check_and_log_response(resp)
コード例 #29
0
def really_kill(args, options):
    if options.shards is None:
        print(
            'Shards option is required for kill; use killall to kill all shards',
            file=sys.stderr)
        exit(1)
    api, job_key, config_file = LiveJobDisambiguator.disambiguate_args_or_die(
        args, options, make_client_factory())
    instance_key = str(job_key)
    if options.shards is not None:
        instance_key = "%s/%s" % (instance_key, ",".join(
            map(str, options.shards)))
    new_cmd = ["job", "kill", instance_key]
    if config_file is not None:
        new_cmd.append("--config=%s" % config_file)
    if options.open_browser:
        new_cmd.append("--open-browser")
    if options.batch_size is not None:
        new_cmd.append("--batch-size=%s" % options.batch_size)
    if options.max_total_failures is not None:
        new_cmd.append("--max-total-failures=%s" % options.max_total_failures)
    v1_deprecation_warning("kill", new_cmd)

    config = get_job_config(job_key.to_path(), config_file,
                            options) if config_file else None
    if options.batch_size is not None:
        kill_in_batches(api, job_key, options.shards, options.batch_size,
                        options.max_failures_option)
    else:
        resp = api.kill_job(job_key, options.shards, config=config)
        check_and_log_response(resp)
    handle_open(api.scheduler_proxy.scheduler_client().url, job_key.role,
                job_key.env, job_key.name)
    wait_kill_tasks(api.scheduler_proxy, job_key, options.shards)
コード例 #30
0
def scheduler_delete_recovery_tasks(cluster, task_ids):
  """usage: scheduler_delete_recovery_tasks cluster task_ids

  Deletes a comma-separated list of task IDs from a staged recovery.
  """
  ids = set(task_ids.split(','))
  check_and_log_response(make_admin_client(cluster).delete_recovery_tasks(TaskQuery(taskIds=ids)))
コード例 #31
0
 def _complete_maintenance(self, drained_hosts):
   """End the maintenance status for a give set of hosts."""
   check_and_log_response(self._client.end_maintenance(drained_hosts))
   resp = self._client.maintenance_status(drained_hosts)
   for host_status in resp.result.maintenanceStatusResult.statuses:
     if host_status.mode != MaintenanceMode.NONE:
       log.warning('%s is DRAINING or in DRAINED' % host_status.host)
コード例 #32
0
ファイル: admin.py プロジェクト: mkacik/incubator-aurora
def scheduler_backup_now(cluster):
    """usage: scheduler_backup_now cluster

  Immediately initiates a full storage backup.
  """
    options = app.get_options()
    check_and_log_response(AuroraClientAPI(CLUSTERS[cluster], options.verbosity).perform_backup())
コード例 #33
0
ファイル: admin.py プロジェクト: bhuvan/incubator-aurora
def scheduler_stage_recovery(cluster, backup_id):
  """usage: scheduler_stage_recovery cluster backup_id

  Stages a backup for recovery.
  """
  options = app.get_options()
  check_and_log_response(
      AuroraClientAPI(CLUSTERS[cluster], options.verbosity).stage_recovery(backup_id))
コード例 #34
0
ファイル: admin.py プロジェクト: bhuvan/incubator-aurora
def scheduler_unload_recovery(cluster):
  """usage: scheduler_unload_recovery cluster

  Unloads a staged recovery.
  """
  options = app.get_options()
  check_and_log_response(AuroraClientAPI(CLUSTERS[cluster], options.verbosity)
      .unload_recovery())
コード例 #35
0
ファイル: core.py プロジェクト: aalzabarah/incubator-aurora
def really_cancel_update(args, options):
  api, job_key, config_file = LiveJobDisambiguator.disambiguate_args_or_die(
      args, options, make_client_factory())
  new_cmd = ["job", "cancel-update", str(job_key)]
  v1_deprecation_warning("cancel_update", new_cmd)
  config = get_job_config(job_key.to_path(), config_file, options) if config_file else None
  resp = api.cancel_update(job_key, config=config)
  check_and_log_response(resp)
コード例 #36
0
def ssh(args, options):
  """usage: ssh cluster/role/env/job shard [args...]

  Initiate an SSH session on the machine that a shard is running on.
  """
  if not args:
    die('Job path is required')
  job_path = args.pop(0)
  try:
    cluster_name, role, env, name = AuroraJobKey.from_path(job_path)
  except AuroraJobKey.Error as e:
    die('Invalid job path "%s": %s' % (job_path, e))
  if not args:
    die('Shard is required')
  try:
    shard = int(args.pop(0))
  except ValueError:
    die('Shard must be an integer')

  newcmd = ["task", "ssh", "%s/%s" % (job_path, shard)]
  if len(options.tunnels) > 0:
    newcmd.append("--tunnels=%s" % options.tunnels)
  if options.ssh_user is not None:
    newcmd.append("--ssh-user=%s" % options.ssh_user)
  if options.executor_sandbox:
    newcmd.append("--executor-sandbox")
  if len(args) > 0:
    newcmd.append("--command=\"%s\"" % " ".join(args))
  v1_deprecation_warning("ssh", newcmd)

  api = make_client(cluster_name)
  resp = api.query(api.build_query(role, name, set([int(shard)]), env=env))
  check_and_log_response(resp)

  first_task = resp.result.scheduleStatusResult.tasks[0]
  remote_cmd = 'bash' if not args else ' '.join(args)
  command = DistributedCommandRunner.substitute(remote_cmd, first_task,
      api.cluster, executor_sandbox=options.executor_sandbox)

  ssh_command = ['ssh', '-t']


  role = first_task.assignedTask.task.owner.role
  slave_host = first_task.assignedTask.slaveHost

  for tunnel in options.tunnels:
    try:
      port, name = tunnel.split(':')
      port = int(port)
    except ValueError:
      die('Could not parse tunnel: %s.  Must be of form PORT:NAME' % tunnel)
    if name not in first_task.assignedTask.assignedPorts:
      die('Task %s has no port named %s' % (first_task.assignedTask.taskId, name))
    ssh_command += [
        '-L', '%d:%s:%d' % (port, slave_host, first_task.assignedTask.assignedPorts[name])]

  ssh_command += ['%s@%s' % (options.ssh_user or role, slave_host), command]
  return subprocess.call(ssh_command)
コード例 #37
0
ファイル: admin.py プロジェクト: sumanau7/incubator-aurora
def scheduler_delete_recovery_tasks(cluster, task_ids):
  """usage: scheduler_delete_recovery_tasks cluster task_ids

  Deletes a comma-separated list of task IDs from a staged recovery.
  """
  ids = set(task_ids.split(','))
  options = app.get_options()
  check_and_log_response(AuroraClientAPI(CLUSTERS[cluster], options.verbosity)
      .delete_recovery_tasks(TaskQuery(taskIds=ids)))
コード例 #38
0
def really_cancel_update(args, options):
    api, job_key, config_file = LiveJobDisambiguator.disambiguate_args_or_die(
        args, options, make_client_factory())
    new_cmd = ["job", "cancel-update", str(job_key)]
    v1_deprecation_warning("cancel_update", new_cmd)
    config = get_job_config(job_key.to_path(), config_file,
                            options) if config_file else None
    resp = api.cancel_update(job_key, config=config)
    check_and_log_response(resp)
コード例 #39
0
ファイル: admin.py プロジェクト: bhuvan/incubator-aurora
def scheduler_delete_recovery_tasks(cluster, task_ids):
  """usage: scheduler_delete_recovery_tasks cluster task_ids

  Deletes a comma-separated list of task IDs from a staged recovery.
  """
  ids = set(task_ids.split(','))
  options = app.get_options()
  check_and_log_response(AuroraClientAPI(CLUSTERS[cluster], options.verbosity)
      .delete_recovery_tasks(TaskQuery(taskIds=ids)))
コード例 #40
0
def really_start_cron(args, options):
    api, job_key, config_file = LiveJobDisambiguator.disambiguate_args_or_die(
        args, options, make_client_factory())
    config = get_job_config(job_key.to_path(), config_file,
                            options) if config_file else None
    resp = api.start_cronjob(job_key, config=config)
    check_and_log_response(resp)
    handle_open(api.scheduler_proxy.scheduler_client().url, job_key.role,
                job_key.env, job_key.name)
コード例 #41
0
  def start_maintenance(self, hostnames):
    """Put a list of hostnames into maintenance mode, to de-prioritize scheduling.

    This is part of two-phase draining- tasks will still be running on these hosts until
    drain_hosts is called upon them.

    :param hostnames: List of hosts to set for initial maintenance
    :type hostnames: list of strings
    """
    check_and_log_response(self._client.start_maintenance(Hosts(set(hostnames))))
コード例 #42
0
def scheduler_list_backups(cluster):
  """usage: scheduler_list_backups cluster

  Lists backups available for recovery.
  """
  resp = make_admin_client(cluster).list_backups()
  check_and_log_response(resp)
  backups = resp.result.listBackupsResult.backups
  print('%s available backups:' % len(backups))
  for backup in backups:
    print(backup)
コード例 #43
0
  def _complete_maintenance(self, drained_hosts):
    """End the maintenance status for a given set of hosts.

    :param drained_hosts: Hosts that are drained and finished being operated upon
    :type drained_hosts: gen.apache.aurora.ttypes.Hosts
    """
    check_and_log_response(self._client.end_maintenance(drained_hosts))
    resp = self._client.maintenance_status(drained_hosts)
    for host_status in resp.result.maintenanceStatusResult.statuses:
      if host_status.mode != MaintenanceMode.NONE:
        log.warning('%s is DRAINING or in DRAINED' % host_status.host)
コード例 #44
0
    def _complete_maintenance(self, drained_hosts):
        """End the maintenance status for a given set of hosts.

    :param drained_hosts: Hosts that are drained and finished being operated upon
    :type drained_hosts: gen.apache.aurora.ttypes.Hosts
    """
        check_and_log_response(self._client.end_maintenance(drained_hosts))
        resp = self._client.maintenance_status(drained_hosts)
        for host_status in resp.result.maintenanceStatusResult.statuses:
            if host_status.mode != MaintenanceMode.NONE:
                log.warning('%s is DRAINING or in DRAINED' % host_status.host)
コード例 #45
0
ファイル: admin.py プロジェクト: bhuvan/incubator-aurora
def scheduler_list_backups(cluster):
  """usage: scheduler_list_backups cluster

  Lists backups available for recovery.
  """
  options = app.get_options()
  resp = AuroraClientAPI(CLUSTERS[cluster], options.verbosity).list_backups()
  check_and_log_response(resp)
  backups = resp.result.listBackupsResult.backups
  print('%s available backups:' % len(backups))
  for backup in backups:
    print(backup)
コード例 #46
0
def killall(args, options):
  """usage: killall cluster/role/env/job
  Kills all tasks in a running job, blocking until all specified tasks have been terminated.
  """

  job_key = AuroraJobKey.from_path(args[0])
  config_file = args[1] if len(args) > 1 else None  # the config for hooks
  config = get_job_config(job_key.to_path(), config_file, options) if config_file else None
  api = make_client(job_key.cluster)
  resp = api.kill_job(job_key, None, config=config)
  check_and_log_response(resp)
  handle_open(api.scheduler_proxy.scheduler_client().url, job_key.role, job_key.env, job_key.name)
コード例 #47
0
ファイル: admin.py プロジェクト: mkacik/incubator-aurora
def scheduler_list_backups(cluster):
    """usage: scheduler_list_backups cluster

  Lists backups available for recovery.
  """
    options = app.get_options()
    resp = AuroraClientAPI(CLUSTERS[cluster], options.verbosity).list_backups()
    check_and_log_response(resp)
    backups = resp.result.listBackupsResult.backups
    print("%s available backups:" % len(backups))
    for backup in backups:
        print(backup)
コード例 #48
0
ファイル: core.py プロジェクト: sumanau7/incubator-aurora
def cancel_update(args, options):
  """usage: cancel_update cluster/role/env/job

  Unlocks a job for updates.
  A job may be locked if a client's update session terminated abnormally,
  or if another user is actively updating the job.  This command should only
  be used when the user is confident that they are not conflicting with another user.
  """
  api, job_key, config_file = LiveJobDisambiguator.disambiguate_args_or_die(
      args, options, make_client_factory())
  config = get_job_config(job_key.to_path(), config_file, options) if config_file else None
  resp = api.cancel_update(job_key, config=config)
  check_and_log_response(resp)
コード例 #49
0
  def check_status(self, hostnames):
    """Query the scheduler to determine the maintenance status for a list of hostnames

    :param hostnames: Hosts to query for
    :type hostnames: list of strings
    :rtype: list of 2-tuples, hostname and MaintenanceMode
    """
    resp = self._client.maintenance_status(Hosts(set(hostnames)))
    check_and_log_response(resp)
    statuses = []
    for host_status in resp.result.maintenanceStatusResult.statuses:
      statuses.append((host_status.host, MaintenanceMode._VALUES_TO_NAMES[host_status.mode]))
    return statuses
コード例 #50
0
  def check_status(self, hostnames):
    """Query the scheduler to determine the maintenance status for a list of hostnames

    :param hostnames: Hosts to query for
    :type hostnames: list of strings
    :rtype: list of 2-tuples, hostname and MaintenanceMode
    """
    resp = self._client.maintenance_status(Hosts(set(hostnames)))
    check_and_log_response(resp)
    statuses = []
    for host_status in resp.result.maintenanceStatusResult.statuses:
      statuses.append((host_status.host, MaintenanceMode._VALUES_TO_NAMES[host_status.mode]))
    return statuses
コード例 #51
0
def get_locks(cluster):
  """usage: get_locks cluster

  Prints all context/operation locks in the scheduler.
  """
  resp = make_admin_client(cluster).get_locks()
  check_and_log_response(resp)

  pp = pprint.PrettyPrinter(indent=2)
  def pretty_print_lock(lock):
    return pp.pformat(vars(lock))

  print_results([',\n'.join(pretty_print_lock(t) for t in resp.result.getLocksResult.locks)])
コード例 #52
0
ファイル: admin.py プロジェクト: bhuvan/incubator-aurora
def get_locks(cluster):
  """usage: get_locks cluster

  Prints all context/operation locks in the scheduler.
  """
  options = app.get_options()
  resp = AuroraClientAPI(CLUSTERS[cluster], options.verbosity).get_locks()
  check_and_log_response(resp)

  pp = pprint.PrettyPrinter(indent=2)
  def pretty_print_lock(lock):
    return pp.pformat(vars(lock))

  print_results([',\n'.join(pretty_print_lock(t) for t in resp.result.getLocksResult.locks)])
コード例 #53
0
ファイル: admin.py プロジェクト: isabella232/client-3
def scheduler_print_recovery_tasks(cluster):
    """usage: scheduler_print_recovery_tasks cluster

  Prints all active tasks in a staged recovery.
  """
    resp = make_admin_client_with_options(cluster).query_recovery(
        TaskQuery(statuses=ACTIVE_STATES))
    check_and_log_response(resp)
    log.info('Role\tJob\tShard\tStatus\tTask ID')
    for task in resp.result.queryRecoveryResult.tasks:
        assigned = task.assignedTask
        conf = assigned.task
        log.info('\t'.join(
            (conf.job.role, conf.job.name, str(assigned.instanceId),
             ScheduleStatus._VALUES_TO_NAMES[task.status], assigned.taskId)))
コード例 #54
0
 def test_check_and_log_response(self, mock_sys_exit, mock_log):
     resp = Response(responseCode=ResponseCode.LOCK_ERROR)
     out = base.check_and_log_response(resp)
     self.assertIsNone(out)
     mock_sys_exit.assert_called_once_with(1)
     mock_log.assert_any_call(
         'Response from scheduler: LOCK_ERROR (message: )')
コード例 #55
0
  def start_maintenance(self, hostnames):
    """Put a list of hostnames into maintenance mode, to de-prioritize scheduling.

    This is part of two-phase draining- tasks will still be running on these hosts until
    drain_hosts is called upon them.

    :param hostnames: List of hosts to set for initial maintenance
    :type hostnames: list of strings
    :rtype: list of hostnames with the maintenance mode set
    """
    resp = self._client.start_maintenance(Hosts(set(hostnames)))
    check_and_log_response(resp)
    result = [host_status.host for host_status in resp.result.startMaintenanceResult.statuses]
    if len(result) != len(hostnames):
      log.warning('Skipping maintenance for unknown hosts: %s' % (set(hostnames) - set(result)))

    return result
コード例 #56
0
def really_create(job_spec, config_file, options):
    try:
        config = get_job_config(job_spec, config_file, options)
    except ValueError as v:
        print("Error: %s" % v)
        sys.exit(1)
    api = make_client(config.cluster())
    resp = api.create_job(config)
    check_and_log_response(resp)
    handle_open(api.scheduler_proxy.scheduler_client().url, config.role(),
                config.environment(), config.name())
    if options.wait_until == 'RUNNING':
        JobMonitor(api.scheduler_proxy,
                   config.job_key()).wait_until(JobMonitor.running_or_finished)
    elif options.wait_until == 'FINISHED':
        JobMonitor(api.scheduler_proxy,
                   config.job_key()).wait_until(JobMonitor.terminal)
コード例 #57
0
ファイル: admin.py プロジェクト: bhuvan/incubator-aurora
def scheduler_print_recovery_tasks(cluster):
  """usage: scheduler_print_recovery_tasks cluster

  Prints all active tasks in a staged recovery.
  """
  options = app.get_options()
  resp = AuroraClientAPI(CLUSTERS[cluster], options.verbosity).query_recovery(
      TaskQuery(statuses=ACTIVE_STATES))
  check_and_log_response(resp)
  log.info('Role\tJob\tShard\tStatus\tTask ID')
  for task in resp.result.queryRecoveryResult.tasks:
    assigned = task.assignedTask
    conf = assigned.task
    log.info('\t'.join((conf.owner.role,
                        conf.jobName,
                        str(assigned.instanceId),
                        ScheduleStatus._VALUES_TO_NAMES[task.status],
                        assigned.taskId)))