예제 #1
0
    async def filter_grep(self,
                          params,
                          filter_expr,
                          ignore_case=False,
                          invert_match=False,
                          line_regexp=False):
        """Filter parameters using a regular expression on parameter names."""

        flags = 0
        if ignore_case:
            flags |= re.I

        pat = re.compile(filter_expr, flags)

        if not line_regexp:
            pat_func = pat.search
        else:
            pat_func = pat.fullmatch

        selected_params = []

        if not invert_match:
            for param in params:
                mat = pat_func(get_name(param))
                if mat is not None:
                    selected_params.append(param)
        else:
            for param in params:
                mat = pat_func(get_name(param))
                if mat is None:
                    selected_params.append(param)

        return selected_params
예제 #2
0
  async def _handle_du(self):
    """summarize parameters with time information"""
    params = await self._execute_chain('params')

    param_id_max_len = self._get_param_id_max_len(params)

    for param in params:
      meta = param['_']

      line = f'{get_param_id(param):{param_id_max_len}} '
      line += f'{get_hash_id(param)} '

      if meta['finished'] is not None:
        finished = meta['finished']
        if self.args.local:
          finished = format_local(parse_utc(finished))
        line += f"[{finished.partition('.')[0]:>19}] "
      else:
        line += ' ' * (19 + 3)

      if meta['duration'] is not None:
        line += f"[{format_sec_short(meta['duration']):>7}] "
      else:
        line += ' ' * (7 + 3)

      if meta['succeeded'] is None:
        line += f'           '
      elif meta['succeeded']:
        line += f'succeeded  '
      else:
        line += f'FAILED     '

      line += get_name(param)

      print(line)
예제 #3
0
파일: queue.py 프로젝트: hyeontaek/exptools
  def _get_state_fast(self):
    """Return the state. Parameter details are removed."""
    assert self.lock.locked()

    state = {
      'finished_jobs': None,
      'started_jobs': None,
      'queued_jobs': None,
      'concurrency': self._state['concurrency'],
      'next_job_id': self._state['next_job_id'],
    }

    for key in ['finished_jobs', 'started_jobs', 'queued_jobs']:
      jobs = []
      for job in self._state[key].values():
        job = dict(job)
        param = job['param']
        job['param'] = {
          '_': {
            'param_id': get_param_id(param),
            'hash_id': get_hash_id(param),
          },
          '_name': get_name(param),
        }
        jobs.append(job)
      state[key] = jobs

    return state
예제 #4
0
  async def _handle_d(self):
    """summarize parameters"""
    params = await self._execute_chain('params')

    param_id_max_len = self._get_param_id_max_len(params)

    for param in params:
      line = f'{get_param_id(param):{param_id_max_len}} '
      line += f'{get_hash_id(param)}  '
      line += get_name(param)

      print(line)
예제 #5
0
  def _construct_env(job, job_paths):
    """Construct environment variables."""
    param = job['param']

    env = dict(os.environ)

    env['EXPTOOLS_JOB_DIR'] = job_paths['job_dir']
    env['EXPTOOLS_JOB_ID'] = job['job_id']
    env['EXPTOOLS_PARAM_ID'] = get_param_id(param)
    env['EXPTOOLS_HASH_ID'] = get_hash_id(param)
    env['EXPTOOLS_NAME'] = get_name(param)
    env['EXPTOOLS_CWD'] = get_cwd(param) or os.getcwd()
    env['EXPTOOLS_RETRY'] = str(get_retry(param))
    env['EXPTOOLS_RETRY_DELAY'] = str(get_retry_delay(param))
    env['EXPTOOLS_TIME_LIMIT'] = str(get_time_limit(param))

    env['EXPTOOLS_JOB_JSON_PATH'] = job_paths['job.json']
    env['EXPTOOLS_PARAM_JSON_PATH'] = job_paths['param.json']
    env['EXPTOOLS_RESOURCES_JSON_PATH'] = job_paths['resources.json']

    env['EXPTOOLS_STATUS_JSON_PATH'] = job_paths['status.json']
    return env
예제 #6
0
  async def _handle_status(self):
    """show the queue state"""
    limit = self.args.limit
    use_similar = self.args.similar

    if self.args.job_types == 'all':
      job_types = {'finished', 'started', 'queued'}
    else:
      job_types = set(self.args.job_types)

    estimator = Estimator(self.client.registry, self.client.history)
    use_color = self.common_args.color == 'yes'

    if use_color:
      colored = termcolor.colored
    else:
      def colored(s, *args, **kwargs):  # pylint: disable=invalid-name,unused-argument
        """Use no color."""
        return s

    async for queue_state in self._get_queue_state():
      oneshot = await self.client.scheduler.is_oneshot()

      output = ''

      all_jobs = (queue_state['finished_jobs'] +
                  queue_state['started_jobs'] +
                  queue_state['queued_jobs'])
      all_params = [job['param'] for job in all_jobs]

      job_id_max_len = self._get_job_id_max_len(all_jobs)
      param_id_max_len = self._get_param_id_max_len(all_params)

      if 'finished' in job_types:
        succeeded_count = len([job for job in queue_state['finished_jobs'] if job['succeeded']])
        failed_count = len(queue_state['finished_jobs']) - succeeded_count
        finished_jobs_color = 'red' if failed_count else 'green'
        output += colored(
          f"Finished jobs (S:{succeeded_count} / F:{failed_count})",
          finished_jobs_color, attrs=['reverse']) + '\n'

        if limit and len(queue_state['finished_jobs']) > limit:
          line = colored('  ', finished_jobs_color, attrs=['reverse'])
          output += line + ' ...\n'

        jobs = queue_state['finished_jobs']
        if limit:
          jobs = jobs[-limit:]

        for job in jobs:
          if job['succeeded']:
            line = colored('  ', 'green', attrs=['reverse'])
          else:
            line = colored('  ', 'red', attrs=['reverse'])

          param_id = get_param_id(job['param'])
          hash_id = get_hash_id(job['param'])
          name = get_name(job['param'])

          line += f" {job['job_id']:{job_id_max_len}} {param_id:{param_id_max_len}} {hash_id}"
          line += f' [{format_sec_short(job_elapsed_time(job)):>7}]'
          if job['succeeded']:
            line += ' succeeded  '
          else:
            line += ' FAILED     '
          line += f"{name}"
          output += line + '\n'

        output += '\n'

      remaining_time, rem_map = (
        await estimator.estimate_remaining_time(queue_state, False, use_similar))
      last_rem = 0.

      if 'started' in job_types:
        output += colored(
          f"Started jobs (A:{len(queue_state['started_jobs'])})",
          'cyan', attrs=['reverse']) + '\n'

        if limit and len(queue_state['started_jobs']) > limit:
          line = colored('  ', 'cyan', attrs=['reverse'])
          output += line + ' ...\n'

        jobs = queue_state['started_jobs']
        if limit:
          jobs = jobs[-limit:]

        for job in jobs:
          rem = rem_map[job['job_id']]

          param_id = get_param_id(job['param'])
          hash_id = get_hash_id(job['param'])
          name = get_name(job['param'])

          line = colored('  ', 'cyan', attrs=['reverse'])
          line += f" {job['job_id']:{job_id_max_len}} {param_id:{param_id_max_len}} {hash_id}"
          line += f' [{format_sec_short(job_elapsed_time(job)):>7}]'
          line += f'+[{format_sec_short(max(rem - last_rem, 0)):>7}]'
          line += '  '
          last_rem = rem
          line += f"{name}"
          output += line + '\n'

        output += '\n'

      if 'queued' in job_types:
        output += colored(
          f"Queued jobs (Q:{len(queue_state['queued_jobs'])})",
          'blue', attrs=['reverse']) + '  '

        output += 'Scheduler: '
        if oneshot:
          output += colored('Oneshot', 'blue')
        elif await self.client.scheduler.is_running():
          output += colored('Running', 'cyan')
        else:
          output += colored('Stopped', 'red')
        output += '\n'

        jobs = queue_state['queued_jobs']
        if limit:
          jobs = jobs[:limit]

        for job in jobs:
          rem = rem_map[job['job_id']]

          param_id = get_param_id(job['param'])
          hash_id = get_hash_id(job['param'])
          name = get_name(job['param'])

          line = colored('  ', 'blue', attrs=['reverse'])
          line += f" {job['job_id']:{job_id_max_len}} {param_id:{param_id_max_len}} {hash_id}"
          line += f'           [{format_sec_short(max(rem - last_rem, 0)):>7}]'
          line += '  '
          last_rem = rem
          line += f"{name}"
          output += line + '\n'

        if limit and len(queue_state['queued_jobs']) > limit:
          line = colored('  ', 'blue', attrs=['reverse'])
          output += line + ' ...\n'

        output += '\n'

      # output += f"Concurrency: {queue_state['concurrency']}"

      if not oneshot:
        # reuse remaining_time
        pass
      else:
        remaining_time, _ = await estimator.estimate_remaining_time(queue_state, True, use_similar)
      output += await format_estimated_time(remaining_time, queue_state, use_color) + '\n'

      if self.args.clear_screen:
        os.system('clear')
      print(output)

      if (self.args.stop_empty and
          not queue_state['started_jobs'] and (oneshot or not queue_state['queued_jobs'])):
        break
예제 #7
0
  async def _try(self, job, job_id, param, current_retry):
    """Run a job."""

    param_id = get_param_id(param)
    hash_id = get_hash_id(param)

    name = get_name(param)
    expanded_command = [arg.format(**param) for arg in get_command(param)]
    cwd = get_cwd(param) or os.getcwd()
    time_limit = get_time_limit(param)

    succeeded = False

    try:
      self.logger.info(f'Launching job {job_id}: {name}')

      job_paths = await self.output.make_job_directory(job, current_retry)
      job_paths = await self.output.create_job_files(job, job_paths)

      env = self._construct_env(job, job_paths)

      with self.output.open_job_stdio(job_paths) as stdio:
        stdout, stderr = stdio

        await self.output.make_tmp_symlinks(param_id, hash_id, job_paths)

        # Launch process
        proc = await asyncio.create_subprocess_exec(
          *expanded_command,
          cwd=cwd,
          stdin=asyncio.subprocess.DEVNULL,
          stdout=stdout,
          stderr=stderr,
          env=env,
          loop=self.loop)

        await self.queue.set_pid(job_id, proc.pid)

        # Watch status changes
        status_task = asyncio.ensure_future(
          self._watch_status(job_id, job_paths), loop=self.loop)

        try:
          if time_limit <= 0:
            await proc.communicate()
          else:
            await asyncio.wait_for(proc.communicate(), time_limit, loop=self.loop)

        except asyncio.TimeoutError:
          self.logger.error(f'Timeout while waiting for job {job_id}')

        finally:
          status_task.cancel()
          try:
            await status_task
          except concurrent.futures.CancelledError:
            # Ignore CancelledError because we caused it
            pass

          if proc.returncode is None:
            try:
              proc.send_signal(signal.SIGTERM)
            except Exception:  # pylint: disable=broad-except
              self.logger.exception('Exception while killing process')

            try:
              await asyncio.wait_for(proc.wait(), 10, loop=self.loop)
            except Exception:  # pylint: disable=broad-except
              self.logger.exception('Exception while waiting for process')

          if proc.returncode is None:
            try:
              proc.send_signal(signal.SIGKILL)
            except Exception:  # pylint: disable=broad-except
              self.logger.exception('Exception while killing process')

            try:
              await proc.wait()
            except Exception:  # pylint: disable=broad-except
              self.logger.exception('Exception while waiting for process')

      # Read status before making the job finished
      await self._read_status(job_id, job_paths)

      if proc.returncode == 0:
        await self.output.make_symlinks(param_id, hash_id, job_paths)

        succeeded = True

    except concurrent.futures.CancelledError:
      # Pass through
      raise

    except Exception:  # pylint: disable=broad-except
      self.logger.exception(f'Exception while running job {job_id}')

    finally:
      await self.output.remove_tmp_symlinks(param_id, hash_id)

    return succeeded