Example #1
0
def gc(args, options):
  """Garbage collect task(s) and task metadata.

    Usage: thermos gc [options] [task_id1 task_id2 ...]

    If tasks specified, restrict garbage collection to only those tasks,
    otherwise all tasks are considered.  The optional constraints are still
    honored.
  """
  print('Analyzing root at %s' % options.root)
  gc_options = {}
  if options.max_age is not None:
    gc_options['max_age'] = parse_time(options.max_age)
  if options.max_space is not None:
    gc_options['max_space'] = parse_data(options.max_space)
  if options.max_tasks is not None:
    gc_options['max_tasks'] = int(options.max_tasks)
  gc_options.update(include_metadata=not options.keep_metadata,
                    include_logs=not options.keep_logs,
                    verbose=True,
                    logger=print)
  if args:
    gc_tasks = list(tasks_from_re(args, state='finished'))
  else:
    print('No task ids specified, using default collector.')
    gc_tasks = [(task.checkpoint_root, task.task_id)
        for task in GarbageCollectionPolicy(get_path_detector(), **gc_options).run()]

  if not gc_tasks:
    print('No tasks to garbage collect.  Exiting')
    return

  def maybe(function, *args):
    if options.dryrun:
      print('    would run %s%r' % (function.__name__, args))
    else:
      function(*args)

  value = 'y'
  if not options.force:
    value = raw_input("Continue [y/N]? ") or 'N'
  if value.lower() == 'y':
    print('Running gc...')

    for checkpoint_root, task_id in gc_tasks:
      tgc = TaskGarbageCollector(checkpoint_root, task_id)
      print('  Task %s ' % task_id, end='')
      print('data (%s) ' % ('keeping' if options.keep_data else 'deleting'), end='')
      print('logs (%s) ' % ('keeping' if options.keep_logs else 'deleting'), end='')
      print('metadata (%s) ' % ('keeping' if options.keep_metadata else 'deleting'))
      if not options.keep_data:
        maybe(tgc.erase_data)
      if not options.keep_logs:
        maybe(tgc.erase_logs)
      if not options.keep_metadata:
        maybe(tgc.erase_metadata)
      print('done.')
  else:
    print('Cancelling gc.')
Example #2
0
def tail(args, options):
  """Tail the logs of a task process.

    Usage: thermos tail task_name [process_name]
  """
  if len(args) == 0:
    app.error('Expected a task to tail, got nothing!')
  if len(args) not in (1, 2):
    app.error('Expected at most two arguments (task and optional process), got %d' % len(args))

  task_id = args[0]
  path_detector = get_path_detector()
  for root in path_detector.get_paths():
    detector = TaskDetector(root=root)
    checkpoint = CheckpointDispatcher.from_file(detector.get_checkpoint(task_id))
    if checkpoint:
      break
  else:
    print('ERROR: Could not find task.')
    sys.exit(1)

  log_dir = checkpoint.header.log_dir
  process_runs = [(process, run) for (process, run) in detector.get_process_runs(task_id, log_dir)]
  if len(args) == 2:
    process_runs = [(process, run) for (process, run) in process_runs if process == args[1]]

  if len(process_runs) == 0:
    print('ERROR: No processes found.', file=sys.stderr)
    sys.exit(1)

  processes = set([process for process, _ in process_runs])
  if len(processes) != 1:
    print('ERROR: More than one process matches query.', file=sys.stderr)
    sys.exit(1)

  process = processes.pop()
  run = max([run for _, run in process_runs])

  logdir = TaskPath(root=root, task_id=args[0], process=process,
     run=run, log_dir=log_dir).getpath('process_logdir')
  logfile = os.path.join(logdir, 'stderr' if options.use_stderr else 'stdout')

  monitor = TaskMonitor(root, args[0])
  def log_is_active():
    active_processes = monitor.get_active_processes()
    for process_status, process_run in active_processes:
      if process_status.process == process and process_run == run:
        return True
    return False

  if not log_is_active():
    print('Tail of terminal log %s' % logfile)
    for line in tail_closed(logfile):
      print(line.rstrip())
    return

  now = time.time()
  next_check = now + 5.0
  print('Tail of active log %s' % logfile)
  for line in tail_f(logfile, include_last=True, forever=False):
    print(line.rstrip())
    if time.time() > next_check:
      if not log_is_active():
        break
      else:
        next_check = time.time() + 5.0
Example #3
0
def status(args, options):
    """Get the status of task(s).

    Usage: thermos status [options] [task_name(s) or task_regexp(s)]
  """
    path_detector = get_path_detector()

    def format_task(detector, task_id):
        checkpoint_filename = detector.get_checkpoint(task_id)
        checkpoint_stat = os.stat(checkpoint_filename)
        try:
            checkpoint_owner = pwd.getpwuid(checkpoint_stat.st_uid).pw_name
        except KeyError:
            checkpoint_owner = 'uid:%s' % checkpoint_stat.st_uid
        print('  %-20s [owner: %8s]' % (task_id, checkpoint_owner), end='')
        if options.verbose == 0:
            print()
        if options.verbose > 0:
            state = CheckpointDispatcher.from_file(checkpoint_filename)
            if state is None or state.header is None:
                print(' - checkpoint stream CORRUPT or outdated format')
                return
            print('  state: %8s' % TaskState._VALUES_TO_NAMES.get(
                state.statuses[-1].state, 'Unknown'),
                  end='')
            print(' start: %25s' % time.asctime(
                time.localtime(state.header.launch_time_ms / 1000.0)))
        if options.verbose > 1:
            print('    user: %s' % state.header.user, end='')
            if state.header.ports:
                print(' ports: %s' %
                      ' '.join('%s -> %s' % (key, val)
                               for key, val in state.header.ports.items()))
            else:
                print(' ports: None')
            print('    sandbox: %s' % state.header.sandbox)
        if options.verbose > 2:
            print('    process table:')
            for process, process_history in state.processes.items():
                print('      - %s runs: %s' % (process, len(process_history)),
                      end='')
                last_run = process_history[-1]
                print(' last: pid=%s, rc=%s, finish:%s, state:%s' %
                      (last_run.pid or 'None', last_run.return_code
                       if last_run.return_code is not None else '',
                       time.asctime(time.localtime(last_run.stop_time))
                       if last_run.stop_time else 'None',
                       ProcessState._VALUES_TO_NAMES.get(
                           last_run.state, 'Unknown')))
            print()

    matchers = map(re.compile, args or ['.*'])

    active = []
    finished = []

    for root in path_detector.get_paths():
        detector = TaskDetector(root)
        active.extend((detector, t_id)
                      for _, t_id in detector.get_task_ids(state='active')
                      if any(pattern.match(t_id) for pattern in matchers))
        finished.extend((detector, t_id)
                        for _, t_id in detector.get_task_ids(state='finished')
                        if any(pattern.match(t_id) for pattern in matchers))

    found = False
    if options.only is None or options.only == 'active':
        if active:
            print('Active tasks:')
            found = True
            for detector, task_id in active:
                format_task(detector, task_id)
            print()

    if options.only is None or options.only == 'finished':
        if finished:
            print('Finished tasks:')
            found = True
            for detector, task_id in finished:
                format_task(detector, task_id)
            print()

    if not found:
        print('No tasks found.')
        sys.exit(1)
Example #4
0
def status(args, options):
  """Get the status of task(s).

    Usage: thermos status [options] [task_name(s) or task_regexp(s)]
  """
  path_detector = get_path_detector()

  def format_task(detector, task_id):
    checkpoint_filename = detector.get_checkpoint(task_id)
    checkpoint_stat = os.stat(checkpoint_filename)
    try:
      checkpoint_owner = pwd.getpwuid(checkpoint_stat.st_uid).pw_name
    except KeyError:
      checkpoint_owner = 'uid:%s' % checkpoint_stat.st_uid
    print('  %-20s [owner: %8s]' % (task_id, checkpoint_owner), end='')
    if options.verbose == 0:
      print()
    if options.verbose > 0:
      state = CheckpointDispatcher.from_file(checkpoint_filename)
      if state is None or state.header is None:
        print(' - checkpoint stream CORRUPT or outdated format')
        return
      print('  state: %8s' % TaskState._VALUES_TO_NAMES.get(state.statuses[-1].state, 'Unknown'),
        end='')
      print(' start: %25s' % time.asctime(time.localtime(state.header.launch_time_ms / 1000.0)))
    if options.verbose > 1:
      print('    user: %s' % state.header.user, end='')
      if state.header.ports:
        print(' ports: %s' % ' '.join('%s -> %s' % (key, val)
                                         for key, val in state.header.ports.items()))
      else:
        print(' ports: None')
      print('    sandbox: %s' % state.header.sandbox)
    if options.verbose > 2:
      print('    process table:')
      for process, process_history in state.processes.items():
        print('      - %s runs: %s' % (process, len(process_history)), end='')
        last_run = process_history[-1]
        print(' last: pid=%s, rc=%s, finish:%s, state:%s' % (
          last_run.pid or 'None',
          last_run.return_code if last_run.return_code is not None else '',
          time.asctime(time.localtime(last_run.stop_time)) if last_run.stop_time else 'None',
          ProcessState._VALUES_TO_NAMES.get(last_run.state, 'Unknown')))
      print()

  matchers = map(re.compile, args or ['.*'])

  active = []
  finished = []

  for root in path_detector.get_paths():
    detector = TaskDetector(root)
    active.extend((detector, t_id) for _, t_id in detector.get_task_ids(state='active')
        if any(pattern.match(t_id) for pattern in matchers))
    finished.extend((detector, t_id)for _, t_id in detector.get_task_ids(state='finished')
        if any(pattern.match(t_id) for pattern in matchers))

  found = False
  if options.only is None or options.only == 'active':
    if active:
      print('Active tasks:')
      found = True
      for detector, task_id in active:
        format_task(detector, task_id)
      print()

  if options.only is None or options.only == 'finished':
    if finished:
      print('Finished tasks:')
      found = True
      for detector, task_id in finished:
        format_task(detector, task_id)
      print()

  if not found:
    print('No tasks found.')
    sys.exit(1)