def tasks_from_re(expressions, root, state=None):
  task_ids = [t_id for _, t_id in TaskDetector(root=root).get_task_ids(state=state)]
  matched_tasks = set()
  for task_expr in map(re.compile, expressions):
    for task_id in task_ids:
      if task_expr.match(task_id):
        matched_tasks.add(task_id)
  return matched_tasks
 def partition_tasks(self):
     """Return active/finished tasks as discovered from the checkpoint root."""
     detector = TaskDetector(root=self._checkpoint_root)
     active_tasks = set(
         t_id for _, t_id in detector.get_task_ids(state='active'))
     finished_tasks = set(
         t_id for _, t_id in detector.get_task_ids(state='finished'))
     return active_tasks, finished_tasks
def tail(args, options):
  """Tail the logs of a task process.

    Usage: thermos tail task_name [process_name]
  """
  if len(args) == 0:
    app.error('Expected a task to tail, got nothing!')
  if len(args) not in (1, 2):
    app.error('Expected at most two arguments (task and optional process), got %d' % len(args))

  task_id = args[0]
  detector = TaskDetector(root=options.root)
  checkpoint = CheckpointDispatcher.from_file(detector.get_checkpoint(task_id))
  log_dir = checkpoint.header.log_dir
  process_runs = [(process, run) for (process, run) in detector.get_process_runs(task_id, log_dir)]
  if len(args) == 2:
    process_runs = [(process, run) for (process, run) in process_runs if process == args[1]]

  if len(process_runs) == 0:
    print('ERROR: No processes found.', file=sys.stderr)
    sys.exit(1)

  processes = set([process for process, _ in process_runs])
  if len(processes) != 1:
    print('ERROR: More than one process matches query.', file=sys.stderr)
    sys.exit(1)

  process = processes.pop()
  run = max([run for _, run in process_runs])

  logdir = TaskPath(root=options.root, task_id=args[0], process=process,
     run=run, log_dir=log_dir).getpath('process_logdir')
  logfile = os.path.join(logdir, 'stderr' if options.use_stderr else 'stdout')

  monitor = TaskMonitor(TaskPath(root=options.root), args[0])
  def log_is_active():
    active_processes = monitor.get_active_processes()
    for process_status, process_run in active_processes:
      if process_status.process == process and process_run == run:
        return True
    return False

  if not log_is_active():
    print('Tail of terminal log %s' % logfile)
    for line in tail_closed(logfile):
      print(line.rstrip())
    return

  now = time.time()
  next_check = now + 5.0
  print('Tail of active log %s' % logfile)
  for line in tail_f(logfile, include_last=True, forever=False):
    print(line.rstrip())
    if time.time() > next_check:
      if not log_is_active():
        break
      else:
        next_check = time.time() + 5.0
Exemple #4
0
  def _refresh_detectors(self):
    new_paths = set(self._path_detector.get_paths())
    old_paths = set(self._detectors)

    for path in old_paths - new_paths:
      self._detectors.pop(path)

    for path in new_paths - old_paths:
      self._detectors[path] = TaskDetector(root=path)
 def __init__(self, root, resource_monitor_class=TaskResourceMonitor):
   self._pathspec = TaskPath(root=root)
   self._detector = TaskDetector(root)
   if not issubclass(resource_monitor_class, ResourceMonitorBase):
     raise ValueError("resource monitor class must implement ResourceMonitorBase!")
   self._resource_monitor = resource_monitor_class
   self._active_tasks = {}    # task_id => ActiveObservedTask
   self._finished_tasks = {}  # task_id => FinishedObservedTask
   self._stop_event = threading.Event()
   ExceptionalThread.__init__(self)
   Lockable.__init__(self)
   self.daemon = True
Exemple #6
0
  def partition_tasks(self):
    """Return active/finished tasks as discovered from the checkpoint roots."""
    active_tasks, finished_tasks = set(), set()

    for checkpoint_root in self._path_detector.get_paths():
      detector = TaskDetector(root=checkpoint_root)

      active_tasks.update(RootedTask(checkpoint_root, task_id)
          for _, task_id in detector.get_task_ids(state='active'))
      finished_tasks.update(RootedTask(checkpoint_root, task_id)
          for _, task_id in detector.get_task_ids(state='finished'))

    return active_tasks, finished_tasks
Exemple #7
0
def tasks_from_re(expressions, state=None):
    path_detector = get_path_detector()

    matched_tasks = set()

    for root in path_detector.get_paths():
        task_ids = [
            t_id for _, t_id in TaskDetector(root).get_task_ids(state=state)
        ]
        for task_expr in map(re.compile, expressions):
            for task_id in task_ids:
                if task_expr.match(task_id):
                    matched_tasks.add((root, task_id))

    return matched_tasks
def status(args, options):
  """Get the status of task(s).

    Usage: thermos status [options] [task_name(s) or task_regexp(s)]

    Options:
      --verbosity=LEVEL     Verbosity level for logging. [default: 0]
      --only=TYPE	    Only print tasks of TYPE (options: active finished)
  """
  detector = TaskDetector(root=options.root)

  def format_task(task_id):
    checkpoint_filename = detector.get_checkpoint(task_id)
    checkpoint_stat = os.stat(checkpoint_filename)
    try:
      checkpoint_owner = pwd.getpwuid(checkpoint_stat.st_uid).pw_name
    except KeyError:
      checkpoint_owner = 'uid:%s' % checkpoint_stat.st_uid
    print('  %-20s [owner: %8s]' % (task_id, checkpoint_owner), end='')
    if options.verbose == 0:
      print()
    if options.verbose > 0:
      state = CheckpointDispatcher.from_file(checkpoint_filename)
      if state is None or state.header is None:
        print(' - checkpoint stream CORRUPT or outdated format')
        return
      print('  state: %8s' % TaskState._VALUES_TO_NAMES.get(state.statuses[-1].state, 'Unknown'),
        end='')
      print(' start: %25s' % time.asctime(time.localtime(state.header.launch_time_ms / 1000.0)))
    if options.verbose > 1:
      print('    user: %s' % state.header.user, end='')
      if state.header.ports:
        print(' ports: %s' % ' '.join('%s -> %s' % (key, val)
                                         for key, val in state.header.ports.items()))
      else:
        print(' ports: None')
      print('    sandbox: %s' % state.header.sandbox)
    if options.verbose > 2:
      print('    process table:')
      for process, process_history in state.processes.items():
        print('      - %s runs: %s' % (process, len(process_history)), end='')
        last_run = process_history[-1]
        print(' last: pid=%s, rc=%s, finish:%s, state:%s' % (
          last_run.pid or 'None',
          last_run.return_code if last_run.return_code is not None else '',
          time.asctime(time.localtime(last_run.stop_time)) if last_run.stop_time else 'None',
          ProcessState._VALUES_TO_NAMES.get(last_run.state, 'Unknown')))
      print()

  matchers = map(re.compile, args or ['.*'])
  active = [t_id for _, t_id in detector.get_task_ids(state='active')
            if any(pattern.match(t_id) for pattern in matchers)]
  finished = [t_id for _, t_id in detector.get_task_ids(state='finished')
              if any(pattern.match(t_id) for pattern in matchers)]

  found = False
  if options.only is None or options.only == 'active':
    if active:
      print('Active tasks:')
      found = True
      for task_id in active:
        format_task(task_id)
      print()

  if options.only is None or options.only == 'finished':
    if finished:
      print('Finished tasks:')
      found = True
      for task_id in finished:
        format_task(task_id)
      print()

  if not found:
    print('No tasks found in root [%s]' % options.root)
    sys.exit(1)
Exemple #9
0
def test_task_detector():
  with temporary_dir() as root:
    active_log_dir = os.path.join(root, 'active_log')
    finished_log_dir = os.path.join(root, 'finished_log')

    path = TaskPath(root=root)
    detector = TaskDetector(root)

    # test empty paths

    assert list(detector.get_task_ids(state='active')) == []
    assert list(detector.get_task_ids(state='finished')) == []
    assert set(detector.get_task_ids()) == set()

    assert detector.get_checkpoint(task_id='active_task') == path.given(
        task_id='active_task').getpath('runner_checkpoint')

    assert detector.get_checkpoint(task_id='finished_task') == path.given(
        task_id='finished_task').getpath('runner_checkpoint')

    assert set(detector.get_process_checkpoints('active_task')) == set()
    assert set(detector.get_process_checkpoints('finished_task')) == set()
    assert set(detector.get_process_runs('active_task', active_log_dir)) == set()
    assert set(detector.get_process_runs('finished_task', finished_log_dir)) == set()
    assert set(detector.get_process_logs('active_task', active_log_dir)) == set()
    assert set(detector.get_process_logs('finished_task', finished_log_dir)) == set()

    # create paths

    paths = [
        path.given(state='active', task_id='active_task').getpath('task_path'),
        path.given(state='finished', task_id='finished_task').getpath('task_path'),
        path.given(task_id='active_task').getpath('runner_checkpoint'),
        path.given(task_id='finished_task').getpath('runner_checkpoint'),
        path.given(
            task_id='active_task',
            process='hello_world',
            run='0',
            log_dir=active_log_dir
        ).with_filename('stdout').getpath('process_logdir'),
        path.given(
            task_id='finished_task',
            process='goodbye_world',
            run='1',
            log_dir=finished_log_dir
        ).with_filename('stderr').getpath('process_logdir'),
        path.given(task_id='active_task', process='hello_world').getpath('process_checkpoint'),
        path.given(task_id='finished_task', process='goodbye_world').getpath('process_checkpoint'),
    ]

    for p in paths:
      touch(p)

    detector = TaskDetector(root)

    assert list(detector.get_task_ids(state='active')) == list([('active', 'active_task')])
    assert list(detector.get_task_ids(state='finished')) == list([('finished', 'finished_task')])
    assert set(detector.get_task_ids()) == set(
        [('active', 'active_task'), ('finished', 'finished_task')])

    assert list(detector.get_process_checkpoints('active_task')) == [
        path.given(task_id='active_task', process='hello_world').getpath('process_checkpoint')]

    assert list(detector.get_process_checkpoints('finished_task')) == [
        path.given(task_id='finished_task', process='goodbye_world').getpath('process_checkpoint')]

    assert list(detector.get_process_runs('active_task', active_log_dir)) == [
        ('hello_world', 0)]
    assert list(detector.get_process_runs('finished_task', finished_log_dir)) == [
        ('goodbye_world', 1)]

    assert list(detector.get_process_logs('active_task', active_log_dir)) == [
        path.given(
            task_id='active_task',
            process='hello_world',
            run='0',
            log_dir=active_log_dir
        ).with_filename('stdout').getpath('process_logdir')]

    assert list(detector.get_process_logs('finished_task', finished_log_dir)) == [
        path.given(
            task_id='finished_task',
            process='goodbye_world',
            run='1',
            log_dir=finished_log_dir
        ).with_filename('stderr').getpath('process_logdir')]