Esempio n. 1
0
 def start(self):
   """Starts the local Swarming bot."""
   assert not self._proc
   bot_zip = os.path.join(self._tmpdir, 'swarming_bot.zip')
   urllib.urlretrieve(self._swarming_server_url + '/bot_code', bot_zip)
   cmd = [sys.executable, bot_zip, 'start_slave']
   if self._redirect:
     logs = os.path.join(self._tmpdir, 'logs')
     if not os.path.isdir(logs):
       os.mkdir(logs)
     with open(os.path.join(logs, 'bot_stdout.log'), 'wb') as f:
       self._proc = subprocess42.Popen(
           cmd, cwd=self._tmpdir, stdout=f, stderr=f, detached=True)
   else:
     self._proc = subprocess42.Popen(cmd, cwd=self._tmpdir, detached=True)
Esempio n. 2
0
    def test_run_bot_signal(self):
        # Test SIGTERM signal handling. Run it as an external process to not mess
        # things up.
        proc = subprocess42.Popen(
            [sys.executable, self._zip_file, 'start_slave'],
            stdout=subprocess42.PIPE,
            stderr=subprocess42.PIPE,
            detached=True)

        # Wait for the grand-child process to poll the server.
        self._server.has_polled.wait(60)
        self.assertEqual(True, self._server.has_polled.is_set())
        proc.terminate()
        out, _ = proc.communicate()
        self.assertEqual(0, proc.returncode)
        self.assertEqual('', out)
        events = self._server.get_events()
        for event in events:
            event.pop('dimensions')
            event.pop('state')
            event.pop('version')
        expected = [
            {
                u'event': u'bot_shutdown',
                u'message': u'Signal was received',
            },
        ]
        if sys.platform == 'win32':
            # Sadly, the signal handler generate an error.
            # TODO(maruel): Fix one day.
            self.assertEqual(u'bot_error', events.pop(0)['event'])
        self.assertEqual(expected, events)
Esempio n. 3
0
def exec_python(args):
    """Executes a python process, replacing the current process if possible.

  On Windows, it returns the child process code. The caller must exit at the
  earliest opportunity.
  """
    cmd = [sys.executable] + args
    if sys.platform not in ('cygwin', 'win32'):
        os.execv(cmd[0], cmd)
        return 1

    try:
        # On Windows, we cannot sanely exec() so shell out the child process
        # instead. But we need to forward any signal received that the bot may care
        # about. This means processes accumulate, sadly.
        # TODO(maruel): If stdin closes, it tells the child process that the parent
        # process died.
        proc = subprocess42.Popen(cmd, detached=True, stdin=subprocess42.PIPE)

        def handler(sig, _):
            logging.info('Got signal %s', sig)
            # Always send SIGTERM, which is properly translated.
            proc.send_signal(signal.SIGTERM)

        sig = signal.SIGBREAK if sys.platform == 'win32' else signal.SIGTERM
        with subprocess42.set_signal_handler([sig], handler):
            proc.wait()
            return proc.returncode
    except Exception as e:
        logging.exception('failed to start: %s', e)
        # Swallow the exception.
        return 1
Esempio n. 4
0
    def _run(self, command, args):
        """Runs swarming.py and capture the stdout to a log file.

    The log file will be printed by the test framework in case of failure or
    verbose mode.

    Returns:
      The process exit code.
    """
        name = os.path.join(self._tmpdir, 'client_%d.log' % self._index)
        self._index += 1
        cmd = [
            sys.executable,
            'swarming.py',
            command,
            '-S',
            self._swarming_server,
            '--verbose',
        ] + args
        with open(name, 'wb') as f:
            f.write('\nRunning: %s\n' % ' '.join(cmd))
            f.flush()
            p = subprocess42.Popen(cmd,
                                   stdout=f,
                                   stderr=subprocess42.STDOUT,
                                   cwd=CLIENT_DIR)
            p.communicate()
            return p.returncode
    def test_containment_auto_limit_process(self):
        # Process creates a children process. It should fail, throwing not enough
        # quota.
        cmd = self._cmd_print_good()
        containment = subprocess42.Containment(
            containment_type=subprocess42.Containment.JOB_OBJECT,
            limit_processes=1)
        start = lambda: subprocess42.Popen(cmd,
                                           stdout=subprocess42.PIPE,
                                           stderr=subprocess42.PIPE,
                                           containment=containment)

        if sys.platform == 'win32':
            p = start()
            out, err = p.communicate()
            self.assertEqual(1, p.returncode)
            self.assertEqual('', out)
            self.assertIn('WindowsError', err)
            # Value for ERROR_NOT_ENOUGH_QUOTA. See
            # https://docs.microsoft.com/windows/desktop/debug/system-error-codes--1700-3999-
            self.assertIn('1816', err)
        else:
            # JOB_OBJECT is not usable on non-Windows.
            with self.assertRaises(NotImplementedError):
                start()
Esempio n. 6
0
  def test_kill_and_wait(self):
    # Test the case where the script swallows the SIGTERM/SIGBREAK signal and
    # hangs.
    script = os.path.join(self.root_dir, 'ignore_sigterm.py')
    with open(script, 'wb') as f:
      # The warning signal is received as SIGTERM on posix and SIGBREAK on
      # Windows.
      sig = 'SIGBREAK' if sys.platform == 'win32' else 'SIGTERM'
      f.write((
          'import signal, sys, time\n'
          'def handler(_signum, _frame):\n'
          '  sys.stdout.write("got it\\n")\n'
          'signal.signal(signal.%s, handler)\n'
          'sys.stdout.write("ok\\n")\n'
          'while True:\n'
          '  try:\n'
          '    time.sleep(1)\n'
          '  except IOError:\n'
          '    pass\n') % sig)
    cmd = [sys.executable, '-u', script]
    # detached=True is required on Windows for SIGBREAK to propagate properly.
    p = subprocess42.Popen(cmd, detached=True, stdout=subprocess42.PIPE)

    # Wait for it to write 'ok', so we know it's handling signals. It's
    # important because otherwise SIGTERM/SIGBREAK could be sent before the
    # signal handler is installed, and this is not what we're testing here.
    self.assertEqual('ok\n', p.stdout.readline())

    # Send a SIGTERM/SIGBREAK, the process ignores it, send a SIGKILL.
    exit_code = task_runner.kill_and_wait(p, 0.1, 'testing purposes')
    expected = 1 if sys.platform == 'win32' else -signal.SIGKILL
    self.assertEqual(expected, exit_code)
    self.assertEqual('got it\n', p.stdout.readline())
Esempio n. 7
0
  def test_attached(self):
    is_win = (sys.platform == 'win32')
    cmd = [sys.executable, '-c', SCRIPT]
    # TODO(maruel): Make universal_newlines=True work and not hang.
    proc = subprocess42.Popen(cmd, detached=False, stdout=subprocess42.PIPE)
    try:
      if is_win:
        # There's something extremely weird happening on the Swarming bots in
        # the sense that they return 'hi\n' on Windows, while running the script
        # locally on Windows results in 'hi\r\n'. Author raises fist.
        self.assertIn(proc.recv_out(), ('hi\r\n', 'hi\n'))
      else:
        self.assertEqual('hi\n', proc.recv_out())

      proc.terminate()

      if is_win:
        # If attached, it's hard killed.
        self.assertEqual(1, proc.wait())
        self.assertEqual((None, None), proc.recv_any())
      else:
        self.assertEqual(0, proc.wait())
        self.assertEqual(('stdout', 'got signal 15\nbye\n'), proc.recv_any())
    finally:
      # In case the test fails.
      proc.kill()
      proc.wait()
Esempio n. 8
0
def get_output_sleep_proc(flush, unbuffered, sleep_duration):
    """Returns process with universal_newlines=True that prints to stdout before
  after a sleep.

  It also optionally sys.stdout.flush() before the sleep and optionally enable
  unbuffered output in python.
  """
    command = [
        'import sys,time',
        'print(\'A\')',
    ]
    if flush:
        # Sadly, this doesn't work otherwise in some combination.
        command.append('sys.stdout.flush()')
    command.extend((
        'time.sleep(%s)' % sleep_duration,
        'print(\'B\')',
    ))
    cmd = [sys.executable, '-c', ';'.join(command)]
    if unbuffered:
        cmd.append('-u')
    return subprocess42.Popen(cmd,
                              env=ENV,
                              stdout=subprocess42.PIPE,
                              universal_newlines=True)
Esempio n. 9
0
 def _Start(self):
     logging.info('Starting process %s', self)
     self.proc = subprocess42.Popen(self.cmd,
                                    stdout=subprocess42.PIPE,
                                    stderr=subprocess42.PIPE,
                                    detached=self.detached,
                                    cwd=self.cwd)
     threading.Thread(target=self._reader).start()
Esempio n. 10
0
 def test_communicate_input(self):
   cmd = [
     sys.executable, '-u', '-c',
     'import sys; sys.stdout.write(sys.stdin.read(5))',
   ]
   proc = subprocess42.Popen(
       cmd, stdin=subprocess42.PIPE, stdout=subprocess42.PIPE)
   out, err = proc.communicate(input='12345')
   self.assertEqual('12345', out)
   self.assertEqual(None, err)
Esempio n. 11
0
 def test_communicate_input_timeout(self):
   cmd = [sys.executable, '-u', '-c', 'import time; time.sleep(60)']
   proc = subprocess42.Popen(cmd, stdin=subprocess42.PIPE)
   try:
     proc.communicate(input='12345', timeout=0.5)
     self.fail()
   except subprocess42.TimeoutExpired as e:
     self.assertEqual(None, e.output)
     self.assertEqual(None, e.stderr)
     self.assertTrue(proc.kill())
     proc.wait()
     self.assertLess(0.5, proc.duration())
Esempio n. 12
0
 def _proc(self, err, **kwargs):
     # Do not use the -u flag here, we want to test when it is buffered by
     # default. See reference above about PYTHONUNBUFFERED.
     # That's why the two scripts uses .flush(). Sadly, the flush() call is
     # needed on Windows even for sys.stderr (!)
     cmd = [sys.executable, '-c', SCRIPT_ERR if err else SCRIPT_OUT]
     # TODO(maruel): Make universal_newlines=True work and not hang.
     if err:
         kwargs['stderr'] = subprocess42.PIPE
     else:
         kwargs['stdout'] = subprocess42.PIPE
     return subprocess42.Popen(cmd, **kwargs)
Esempio n. 13
0
 def test_communicate_timeout_no_pipe(self):
   # In this case, it's effectively a wait() call.
   cmd = [sys.executable, '-u', '-c', 'import time; time.sleep(60)']
   proc = subprocess42.Popen(cmd)
   try:
     proc.communicate(timeout=0.5)
     self.fail()
   except subprocess42.TimeoutExpired as e:
     self.assertEqual(None, e.output)
     self.assertEqual(None, e.stderr)
     self.assertTrue(proc.kill())
     proc.wait()
     self.assertLess(0.5, proc.duration())
Esempio n. 14
0
def get_output_sleep_proc_err(sleep_duration):
  """Returns process with universal_newlines=True that prints to stderr before
  and after a sleep.
  """
  command = [
    'import sys,time',
    'sys.stderr.write(\'A\\n\')',
  ]
  command.extend((
    'time.sleep(%s)' % sleep_duration,
    'sys.stderr.write(\'B\\n\')',
  ))
  cmd = [sys.executable, '-c', ';'.join(command)]
  return subprocess42.Popen(
      cmd, env=ENV, stderr=subprocess42.PIPE, universal_newlines=True)
Esempio n. 15
0
 def test_yield_any_returncode(self):
   proc = subprocess42.Popen(
       [sys.executable, '-c', 'import sys;sys.stdout.write("yo");sys.exit(1)'],
       stdout=subprocess42.PIPE)
   for p, d in proc.yield_any():
     self.assertEqual('stdout', p)
     self.assertEqual('yo', d)
   # There was a bug where the second call to wait() would overwrite
   # proc.returncode with 0 when timeout is not None.
   self.assertEqual(1, proc.wait())
   self.assertEqual(1, proc.wait(timeout=0))
   self.assertEqual(1, proc.poll())
   self.assertEqual(1, proc.returncode)
   # On Windows, the clock resolution is 15ms so Popen.duration() will likely
   # be 0.
   self.assertLessEqual(0, proc.duration())
 def _test_lower_priority(self, lower_priority):
   if sys.platform == 'win32':
     cmd = [
       sys.executable, '-u', '-c',
       'import ctypes,sys; v=ctypes.windll.kernel32.GetPriorityClass(-1);'
       'sys.stdout.write(hex(v))'
     ]
   else:
     cmd = [
       sys.executable, '-u', '-c',
       'import os,sys;sys.stdout.write(str(os.nice(0)))',
     ]
   proc = subprocess42.Popen(
       cmd, stdout=subprocess42.PIPE, lower_priority=lower_priority)
   out, err = proc.communicate()
   self.assertEqual(None, err)
   return out
Esempio n. 17
0
def clean_cache(botobj):
    """Asks run_isolated to clean its cache.

  This may take a while but it ensures that in the case of a run_isolated run
  failed and it temporarily used more space than min_free_disk, it can cleans up
  the mess properly.

  It will remove unexpected files, remove corrupted files, trim the cache size
  based on the policies and update state.json.
  """
    cmd = [
        sys.executable,
        THIS_FILE,
        'run_isolated',
        '--clean',
        '--log-file',
        os.path.join(botobj.base_dir, 'logs', 'run_isolated.log'),
        '--cache',
        os.path.join(botobj.base_dir, 'isolated_cache'),
        '--named-cache-root',
        os.path.join(botobj.base_dir, 'c'),
        '--min-free-space',
        str(get_min_free_space(botobj)),
    ]
    logging.info('Running: %s', cmd)
    try:
        # Intentionally do not use a timeout, it can take a while to hash 50gb but
        # better be safe than sorry.
        proc = subprocess42.Popen(cmd,
                                  stdin=subprocess42.PIPE,
                                  stdout=subprocess42.PIPE,
                                  stderr=subprocess42.STDOUT,
                                  cwd=botobj.base_dir,
                                  detached=True,
                                  close_fds=sys.platform != 'win32')
        output, _ = proc.communicate(None)
        logging.info('Result:\n%s', output)
        if proc.returncode:
            botobj.post_error(
                'swarming_bot.zip failure during run_isolated --clean:\n%s' %
                output)
    except OSError:
        botobj.post_error(
            'swarming_bot.zip internal failure during run_isolated --clean')
Esempio n. 18
0
    def test_kill_background(self):
        # Test process group killing.

        # Leaking a pipe through to the grandchild process is the only way to be
        # sure that we have a way to detect that this grandchild process is still
        # running or not. When all handles to the `w` end of the pipe have been
        # dropped, the `r` end will unblock.
        r, w = os.pipe()

        # Use fcntl instead of os.set_blocking because of python2
        import fcntl
        fcntl.fcntl(r, fcntl.F_SETFL,
                    fcntl.fcntl(r, fcntl.F_GETFL) | os.O_NONBLOCK)

        p = subprocess42.Popen(
            [sys.executable, self.output_script, 'out_leak'],
            stdout=w,
            detached=True)
        os.close(w)  # close so we don't think that a child is hanging onto it.

        self.assertEqual(p.wait(), 0)  # our immediate child has exited!

        with self.assertRaises(OSError):
            # oops, something still has a handle to this pipe! it's the grandchild!
            os.read(r, 1)

        # kill the group! That'll show 'em (unless the child actually daemonized, in
        # which case we're hosed).
        p.kill()

        # sleepy-loop until the pipe is closed, should take O(ms) but we generously
        # wait up to 5s. The sub-child will wait 30s and should outlive this loop if
        # somehow it survived the kill.
        now = time.time()
        while True:
            try:
                self.assertEqual(os.read(r, 1), b'')  # i.e. EOF
                return
            except OSError as ex:
                if ex.errno != errno.EWOULDBLOCK:
                    raise
                time.sleep(0.1)
                if time.time() - now > 5:
                    raise Exception('pipe not unblocked after 5s, bailing')
Esempio n. 19
0
def _start_task_runner(args, work_dir, ctx_file):
  """Starts task_runner process and returns its handle.

  Raises:
    _FailureOnStart if a problem occurred.
  """
  cmd = get_run_isolated()
  args_path = os.path.join(work_dir, 'run_isolated_args.json')
  cmd.extend(['-a', args_path])
  env = os.environ.copy()
  if ctx_file:
    env['LUCI_CONTEXT'] = ctx_file
  logging.info('cmd=%s', cmd)
  logging.info('cwd=%s', work_dir)
  logging.info('args=%s', args)

  # We write args to a file since there may be more of them than the OS
  # can handle. Since it is inside work_dir, it'll be automatically be deleted
  # upon task termination.
  try:
    with open(args_path, 'wb') as f:
      json.dump(args, f)
  except (IOError, OSError) as e:
    raise _FailureOnStart(
        e.errno or -1,
        'Could not write args to %s: %s' % (args_path, e))

  try:
    # TODO(maruel): Support separate streams for stdout and stderr.
    proc = subprocess42.Popen(
        cmd,
        env=env,
        cwd=work_dir,
        detached=True,
        stdout=subprocess42.PIPE,
        stderr=subprocess42.STDOUT,
        stdin=subprocess42.PIPE)
  except OSError as e:
    raise _FailureOnStart(
        e.errno or -1,
        'Command "%s" failed to start.\nError: %s' % (' '.join(cmd), e))
  return proc
Esempio n. 20
0
 def _get_output_sleep_proc(flush, env, duration):
     command = [
         'import sys,time',
         'print(\'A\')',
     ]
     if flush:
         # Sadly, this doesn't work otherwise in some combination.
         command.append('sys.stdout.flush()')
     command.extend((
         'time.sleep(%s)' % duration,
         'print(\'B\')',
     ))
     return subprocess42.Popen([
         sys.executable,
         '-c',
         ';'.join(command),
     ],
                               stdout=subprocess42.PIPE,
                               universal_newlines=True,
                               env=env)
 def test_containment_auto_kill(self):
   # Test process killing.
   cmd = [
     sys.executable, '-u', '-c',
     'import sys,time; print("hi");time.sleep(60)',
   ]
   containment = subprocess42.Containment(
       containment_type=subprocess42.Containment.AUTO,
       limit_processes=1,
       limit_total_committed_memory=1024*1024*1024)
   p = subprocess42.Popen(
       cmd, stdout=subprocess42.PIPE, containment=containment)
   itr = p.yield_any_line()
   self.assertEqual(('stdout', 'hi'), next(itr))
   p.kill()
   p.wait()
   if sys.platform != 'win32':
     # signal.SIGKILL is not defined on Windows. Validate our assumption here.
     self.assertEqual(9, signal.SIGKILL)
   self.assertEqual(-9, p.returncode)
  def test_containment_auto_limit_memory(self):
    # Process allocates a lot of memory. It should fail due to quota.
    cmd = self._cmd_large_memory()
    containment = subprocess42.Containment(
        containment_type=subprocess42.Containment.JOB_OBJECT,
        # 20 MiB.
        limit_total_committed_memory=20*1024*1024)
    start = lambda: subprocess42.Popen(
        cmd, stdout=subprocess42.PIPE, stderr=subprocess42.PIPE,
        containment=containment)

    if sys.platform == 'win32':
      p = start()
      out, err = p.communicate()
      self.assertEqual(1, p.returncode)
      self.assertEqual('', out)
      self.assertIn('MemoryError', err)
    else:
      # JOB_OBJECT is not usable on non-Windows.
      with self.assertRaises(NotImplementedError):
        start()
Esempio n. 23
0
    def test_communicate_input_stdout_timeout(self):
        cmd = [
            sys.executable,
            '-u',
            '-c',
            """
import sys, time
sys.stdout.write(sys.stdin.read(5))
sys.stdout.flush()
time.sleep(60)
        """,
        ]
        proc = subprocess42.Popen(cmd,
                                  stdin=subprocess42.PIPE,
                                  stdout=subprocess42.PIPE)
        try:
            proc.communicate(input=b'12345', timeout=2)
            self.fail()
        except subprocess42.TimeoutExpired as e:
            self.assertEqual(b'12345', e.output)
            self.assertEqual(None, e.stderr)
            self.assertTrue(proc.kill())
            proc.wait()
            self.assertLessEqual(0.5, proc.duration())
Esempio n. 24
0
 def test_signal(self):
     # Tests when task_runner gets a SIGTERM.
     os.mkdir(os.path.join(self.root_dir, 'work'))
     signal_file = os.path.join(self.root_dir, 'work', 'signal')
     open(signal_file, 'wb').close()
     manifest = get_manifest(
         script='import os,time;os.remove(%r);time.sleep(60)' % signal_file,
         hard_timeout=60.,
         io_timeout=60.)
     task_in_file = os.path.join(self.root_dir, 'task_runner_in.json')
     task_result_file = os.path.join(self.root_dir, 'task_runner_out.json')
     with open(task_in_file, 'wb') as f:
         json.dump(manifest, f)
     bot = os.path.join(self.root_dir, 'swarming_bot.1.zip')
     code, _ = fake_swarming.gen_zip(self._server.url)
     with open(bot, 'wb') as f:
         f.write(code)
     cmd = [
         sys.executable,
         bot,
         'task_runner',
         '--swarming-server',
         self._server.url,
         '--in-file',
         task_in_file,
         '--out-file',
         task_result_file,
         '--cost-usd-hour',
         '1',
         # Include the time taken to poll the task in the cost.
         '--start',
         str(time.time()),
     ]
     logging.info('%s', cmd)
     proc = subprocess42.Popen(cmd, cwd=self.root_dir, detached=True)
     # Wait for the child process to be alive.
     while os.path.isfile(signal_file):
         time.sleep(0.01)
     # Send SIGTERM to task_runner itself. Ensure the right thing happen.
     # Note that on Windows, this is actually sending a SIGBREAK since there's no
     # such thing as SIGTERM.
     proc.send_signal(signal.SIGTERM)
     proc.wait()
     task_runner_log = os.path.join(self.root_dir, 'logs',
                                    'task_runner.log')
     with open(task_runner_log, 'rb') as f:
         logging.info('task_runner.log:\n---\n%s---', f.read())
     expected = {
         u'exit_code': 0,
         u'hard_timeout': False,
         u'io_timeout': False,
         u'must_signal_internal_failure': None,
         u'version': task_runner.OUT_VERSION,
     }
     self.assertEqual([], self._server.get_events())
     tasks = self._server.get_tasks()
     for task in tasks.itervalues():
         for event in task:
             event.pop('cost_usd')
             event.pop('duration', None)
     expected = {
         '23': [
             {
                 u'id': u'localhost',
                 u'task_id': 23,
             },
             {
                 u'exit_code':
                 1 if sys.platform == 'win32' else -signal.SIGTERM,
                 u'hard_timeout': False,
                 u'id': u'localhost',
                 u'io_timeout': False,
                 u'task_id': 23,
             },
         ],
     }
     self.assertEqual(expected, tasks)
     expected = {
         'swarming_bot.1.zip',
         '092b5bd4562f579711823f61e311de37247c853a-cacert.pem',
         'work',
         'logs',
         # TODO(maruel): Move inside work.
         'task_runner_in.json',
         'task_runner_out.json',
     }
     self.assertEqual(expected, set(os.listdir(self.root_dir)))
     expected = {
         u'exit_code':
         1 if sys.platform == 'win32' else -signal.SIGTERM,
         u'hard_timeout':
         False,
         u'io_timeout':
         False,
         u'must_signal_internal_failure':
         u'task_runner received signal %d' % task_runner.SIG_BREAK_OR_TERM,
         u'version':
         3,
     }
     with open(task_result_file, 'rb') as f:
         self.assertEqual(expected, json.load(f))
     self.assertEqual(0, proc.returncode)
Esempio n. 25
0
    def ensure(self,
               site_root,
               packages,
               cache_dir=None,
               tmp_dir=None,
               timeout=None):
        """Ensures that packages installed in |site_root| equals |packages| set.

    Blocking call.

    Args:
      site_root (str): where to install packages.
      packages: dict of subdir -> list of (package_template, version) tuples.
      cache_dir (str): if set, cache dir for cipd binary own cache.
        Typically contains packages and tags.
      tmp_dir (str): if not None, dir for temp files.
      timeout (int): if not None, timeout in seconds for this function to run.

    Returns:
      Pinned packages in the form of {subdir: [(package_name, package_id)]},
      which correspond 1:1 with the input packages argument.

    Raises:
      Error if could not install packages or timed out.
    """
        timeoutfn = tools.sliding_timeout(timeout)
        logging.info('Installing packages %r into %s', packages, site_root)

        ensure_file_handle, ensure_file_path = tempfile.mkstemp(
            dir=tmp_dir, prefix=u'cipd-ensure-file-', suffix='.txt')
        json_out_file_handle, json_file_path = tempfile.mkstemp(
            dir=tmp_dir, prefix=u'cipd-ensure-result-', suffix='.json')
        os.close(json_out_file_handle)

        try:
            try:
                for subdir, pkgs in sorted(packages.iteritems()):
                    if '\n' in subdir:
                        raise Error(
                            'Could not install packages; subdir %r contains newline'
                            % subdir)
                    os.write(ensure_file_handle, '@Subdir %s\n' % (subdir, ))
                    for pkg, version in pkgs:
                        os.write(ensure_file_handle,
                                 '%s %s\n' % (pkg, version))
            finally:
                os.close(ensure_file_handle)

            cmd = [
                self.binary_path,
                'ensure',
                '-root',
                site_root,
                '-ensure-file',
                ensure_file_path,
                '-verbose',  # this is safe because cipd-ensure does not print a lot
                '-json-output',
                json_file_path,
            ]
            if cache_dir:
                cmd += ['-cache-dir', cache_dir]
            if self.service_url:
                cmd += ['-service-url', self.service_url]

            logging.debug('Running %r', cmd)
            process = subprocess42.Popen(cmd,
                                         stdout=subprocess42.PIPE,
                                         stderr=subprocess42.PIPE)
            output = []
            for pipe_name, line in process.yield_any_line(timeout=0.1):
                to = timeoutfn()
                if to is not None and to <= 0:
                    raise Error(
                        'Could not install packages; took more than %d seconds'
                        % timeout)
                if not pipe_name:
                    # stdout or stderr was closed, but yield_any_line still may have
                    # something to yield.
                    continue
                output.append(line)
                if pipe_name == 'stderr':
                    logging.debug('cipd client: %s', line)
                else:
                    logging.info('cipd client: %s', line)

            exit_code = process.wait(timeout=timeoutfn())
            if exit_code != 0:
                raise Error(
                    'Could not install packages; exit code %d\noutput:%s' %
                    (exit_code, '\n'.join(output)))
            with open(json_file_path) as jfile:
                result_json = json.load(jfile)
            return {
                subdir: [(x['package'], x['instance_id']) for x in pins]
                for subdir, pins in result_json['result'].iteritems()
            }
        finally:
            fs.remove(ensure_file_path)
            fs.remove(json_file_path)
Esempio n. 26
0
def run_command(swarming_server, task_details, root_dir, cost_usd_hour,
                task_start, json_file):
    """Runs a command and sends packets to the server to stream results back.

  Implements both I/O and hard timeouts. Sends the packets numbered, so the
  server can ensure they are processed in order.

  Returns:
    Child process exit code.
  """
    # Signal the command is about to be started.
    last_packet = start = now = monotonic_time()
    params = {
        'cost_usd': cost_usd_hour * (now - task_start) / 60. / 60.,
        'id': task_details.bot_id,
        'task_id': task_details.task_id,
    }
    post_update(swarming_server, params, None, '', 0)

    logging.info('Executing: %s', task_details.command)
    # TODO(maruel): Support both channels independently and display stderr in red.
    env = None
    if task_details.env:
        env = os.environ.copy()
        env.update(task_details.env)
    try:
        proc = subprocess42.Popen(task_details.command,
                                  env=env,
                                  cwd=root_dir,
                                  detached=True,
                                  stdout=subprocess.PIPE,
                                  stderr=subprocess.STDOUT,
                                  stdin=subprocess.PIPE)
    except OSError as e:
        stdout = 'Command "%s" failed to start.\nError: %s' % (' '.join(
            task_details.command), e)
        now = monotonic_time()
        params['cost_usd'] = cost_usd_hour * (now - task_start) / 60. / 60.
        params['duration'] = now - start
        params['io_timeout'] = False
        params['hard_timeout'] = False
        post_update(swarming_server, params, 1, stdout, 0)
        return 1

    output_chunk_start = 0
    stdout = ''
    exit_code = None
    had_hard_timeout = False
    had_io_timeout = False
    timed_out = None
    try:
        calc = lambda: calc_yield_wait(task_details, start, last_io, timed_out,
                                       stdout)
        maxsize = lambda: MAX_CHUNK_SIZE - len(stdout)
        last_io = monotonic_time()
        for _, new_data in proc.yield_any(maxsize=maxsize, soft_timeout=calc):
            now = monotonic_time()
            if new_data:
                stdout += new_data
                last_io = now

            # Post update if necessary.
            if should_post_update(stdout, now, last_packet):
                last_packet = monotonic_time()
                params['cost_usd'] = (cost_usd_hour *
                                      (last_packet - task_start) / 60. / 60.)
                post_update(swarming_server, params, None, stdout,
                            output_chunk_start)
                output_chunk_start += len(stdout)
                stdout = ''

            # Send signal on timeout if necessary. Both are failures, not
            # internal_failures.
            # Eventually kill but return 0 so bot_main.py doesn't cancel the task.
            if not timed_out:
                if now - last_io > task_details.io_timeout:
                    had_io_timeout = True
                    logging.warning('I/O timeout')
                    proc.terminate()
                    timed_out = monotonic_time()
                elif now - start > task_details.hard_timeout:
                    had_hard_timeout = True
                    logging.warning('Hard timeout')
                    proc.terminate()
                    timed_out = monotonic_time()
            else:
                # During grace period.
                if now >= timed_out + task_details.grace_period:
                    # Now kill for real. The user can distinguish between the following
                    # states:
                    # - signal but process exited within grace period,
                    #   (hard_|io_)_timed_out will be set but the process exit code will
                    #   be script provided.
                    # - processed exited late, exit code will be -9 on posix.
                    try:
                        logging.warning('proc.kill() after grace')
                        proc.kill()
                    except OSError:
                        pass
        logging.info('Waiting for proces exit')
        exit_code = proc.wait()
        logging.info('Waiting for proces exit - done')
    finally:
        # Something wrong happened, try to kill the child process.
        if exit_code is None:
            had_hard_timeout = True
            try:
                logging.warning('proc.kill() in finally')
                proc.kill()
            except OSError:
                # The process has already exited.
                pass

            # TODO(maruel): We'd wait only for X seconds.
            logging.info('Waiting for proces exit in finally')
            exit_code = proc.wait()
            logging.info('Waiting for proces exit in finally - done')

        # This is the very last packet for this command.
        now = monotonic_time()
        params['cost_usd'] = cost_usd_hour * (now - task_start) / 60. / 60.
        params['duration'] = now - start
        params['io_timeout'] = had_io_timeout
        params['hard_timeout'] = had_hard_timeout
        # At worst, it'll re-throw, which will be caught by bot_main.py.
        post_update(swarming_server, params, exit_code, stdout,
                    output_chunk_start)
        output_chunk_start += len(stdout)
        stdout = ''

        with open(json_file, 'w') as fd:
            json.dump({'exit_code': exit_code, 'version': 1}, fd)

    logging.info('run_command() = %s', exit_code)
    assert not stdout
    return exit_code
Esempio n. 27
0
def run_command(command, cwd, env, hard_timeout, grace_period):
    """Runs the command.

  Returns:
    tuple(process exit code, bool if had a hard timeout)
  """
    logging.info('run_command(%s, %s)' % (command, cwd))

    exit_code = None
    had_hard_timeout = False
    with tools.Profiler('RunTest'):
        proc = None
        had_signal = []
        try:
            # TODO(maruel): This code is imperfect. It doesn't handle well signals
            # during the download phase and there's short windows were things can go
            # wrong.
            def handler(signum, _frame):
                if proc and not had_signal:
                    logging.info('Received signal %d', signum)
                    had_signal.append(True)
                    raise subprocess42.TimeoutExpired(command, None)

            proc = subprocess42.Popen(command, cwd=cwd, env=env, detached=True)
            with subprocess42.set_signal_handler(subprocess42.STOP_SIGNALS,
                                                 handler):
                try:
                    exit_code = proc.wait(hard_timeout or None)
                except subprocess42.TimeoutExpired:
                    if not had_signal:
                        logging.warning('Hard timeout')
                        had_hard_timeout = True
                    logging.warning('Sending SIGTERM')
                    proc.terminate()

            # Ignore signals in grace period. Forcibly give the grace period to the
            # child process.
            if exit_code is None:
                ignore = lambda *_: None
                with subprocess42.set_signal_handler(subprocess42.STOP_SIGNALS,
                                                     ignore):
                    try:
                        exit_code = proc.wait(grace_period or None)
                    except subprocess42.TimeoutExpired:
                        # Now kill for real. The user can distinguish between the
                        # following states:
                        # - signal but process exited within grace period,
                        #   hard_timed_out will be set but the process exit code will be
                        #   script provided.
                        # - processed exited late, exit code will be -9 on posix.
                        logging.warning('Grace exhausted; sending SIGKILL')
                        proc.kill()
            logging.info('Waiting for proces exit')
            exit_code = proc.wait()
        except OSError:
            # This is not considered to be an internal error. The executable simply
            # does not exit.
            sys.stderr.write(
                '<The executable does not exist or a dependent library is missing>\n'
                '<Check for missing .so/.dll in the .isolate or GN file>\n'
                '<Command: %s>\n' % command)
            if os.environ.get('SWARMING_TASK_ID'):
                # Give an additional hint when running as a swarming task.
                sys.stderr.write(
                    '<See the task\'s page for commands to help diagnose this issue '
                    'by reproducing the task locally>\n')
            exit_code = 1
    logging.info('Command finished with exit code %d (%s)', exit_code,
                 hex(0xffffffff & exit_code))
    return exit_code, had_hard_timeout
Esempio n. 28
0
  def test_recv_any(self):
    # Test all pipe direction and output scenarios.
    combinations = [
      {
        'cmd': ['out_print', 'err_print'],
        'stdout': None,
        'stderr': None,
        'expected': {},
      },
      {
        'cmd': ['out_print', 'err_print'],
        'stdout': None,
        'stderr': subprocess42.STDOUT,
        'expected': {},
      },

      {
        'cmd': ['out_print'],
        'stdout': subprocess42.PIPE,
        'stderr': subprocess42.PIPE,
        'expected': {'stdout': 'printing'},
      },
      {
        'cmd': ['out_print'],
        'stdout': subprocess42.PIPE,
        'stderr': None,
        'expected': {'stdout': 'printing'},
      },
      {
        'cmd': ['out_print'],
        'stdout': subprocess42.PIPE,
        'stderr': subprocess42.STDOUT,
        'expected': {'stdout': 'printing'},
      },

      {
        'cmd': ['err_print'],
        'stdout': subprocess42.PIPE,
        'stderr': subprocess42.PIPE,
        'expected': {'stderr': 'printing'},
      },
      {
        'cmd': ['err_print'],
        'stdout': None,
        'stderr': subprocess42.PIPE,
        'expected': {'stderr': 'printing'},
      },
      {
        'cmd': ['err_print'],
        'stdout': subprocess42.PIPE,
        'stderr': subprocess42.STDOUT,
        'expected': {'stdout': 'printing'},
      },

      {
        'cmd': ['out_print', 'err_print'],
        'stdout': subprocess42.PIPE,
        'stderr': subprocess42.PIPE,
        'expected': {'stderr': 'printing', 'stdout': 'printing'},
      },
      {
        'cmd': ['out_print', 'err_print'],
        'stdout': subprocess42.PIPE,
        'stderr': subprocess42.STDOUT,
        'expected': {'stdout': 'printingprinting'},
      },
    ]
    for i, testcase in enumerate(combinations):
      cmd = [sys.executable, self.output_script] + testcase['cmd']
      p = subprocess42.Popen(
          cmd, env=ENV, stdout=testcase['stdout'], stderr=testcase['stderr'])
      actual = {}
      while p.poll() is None:
        pipe, data = p.recv_any()
        if data:
          actual.setdefault(pipe, '')
          actual[pipe] += data

      # The process exited, read any remaining data in the pipes.
      while True:
        pipe, data = p.recv_any()
        if pipe is None:
          break
        actual.setdefault(pipe, '')
        actual[pipe] += data
      self.assertEqual(
          testcase['expected'],
          actual,
          (i, testcase['cmd'], testcase['expected'], actual))
      self.assertEqual((None, None), p.recv_any())
      self.assertEqual(0, p.returncode)
Esempio n. 29
0
  def test_communicate_timeout(self):
    timedout = 1 if sys.platform == 'win32' else -9
    # Format is:
    # ( (cmd, stderr_pipe, timeout), (stdout, stderr, returncode) ), ...
    # See OUTPUT script for the meaning of the commands.
    test_data = [
      # 0 means no timeout, like None.
      (
        (['out_sleeping', '0.001', 'out_slept', 'err_print'], None, 0),
        ('Sleeping.\nSlept.\n', None, 0),
      ),
      (
        (['err_print'], subprocess42.STDOUT, 0),
        ('printing', None, 0),
      ),
      (
        (['err_print'], subprocess42.PIPE, 0),
        ('', 'printing', 0),
      ),

      # On a loaded system, this can be tight.
      (
        (['out_sleeping', 'out_flush', '60', 'out_slept'], None, 0.5),
        ('Sleeping.\n', None, timedout),
      ),
      (
        (
          # Note that err_flush is necessary on Windows but not on the other
          # OSes. This means the likelihood of missing stderr output from a
          # killed child process on Windows is much higher than on other OSes.
          [
            'out_sleeping', 'out_flush', 'err_print', 'err_flush', '60',
            'out_slept',
          ],
          subprocess42.PIPE,
          0.5),
        ('Sleeping.\n', 'printing', timedout),
      ),

      (
        (['out_sleeping', '0.001', 'out_slept'], None, 60),
        ('Sleeping.\nSlept.\n', None, 0),
      ),
    ]
    for i, ((args, errpipe, timeout), expected) in enumerate(test_data):
      proc = subprocess42.Popen(
          [sys.executable, self.output_script] + args,
          env=ENV,
          stdout=subprocess42.PIPE,
          stderr=errpipe)
      try:
        stdout, stderr = proc.communicate(timeout=timeout)
        code = proc.returncode
      except subprocess42.TimeoutExpired as e:
        stdout = e.output
        stderr = e.stderr
        self.assertTrue(proc.kill())
        code = proc.wait()
      finally:
        duration = proc.duration()
      expected_duration = 0.0001 if not timeout or timeout == 60 else timeout
      self.assertTrue(duration > expected_duration, (i, expected_duration))
      self.assertEqual(
          (i, stdout, stderr, code),
          (i,
            to_native_eol(expected[0]),
            to_native_eol(expected[1]),
            expected[2]))

      # Try again with universal_newlines=True.
      proc = subprocess42.Popen(
          [sys.executable, self.output_script] + args,
          env=ENV,
          stdout=subprocess42.PIPE,
          stderr=errpipe,
          universal_newlines=True)
      try:
        stdout, stderr = proc.communicate(timeout=timeout)
        code = proc.returncode
      except subprocess42.TimeoutExpired as e:
        stdout = e.output
        stderr = e.stderr
        self.assertTrue(proc.kill())
        code = proc.wait()
      finally:
        duration = proc.duration()
      self.assertTrue(duration > expected_duration, (i, expected_duration))
      self.assertEqual(
          (i, stdout, stderr, code),
          (i,) + expected)
Esempio n. 30
0
def run_command(command, cwd, tmp_dir, hard_timeout, grace_period):
  """Runs the command.

  Returns:
    tuple(process exit code, bool if had a hard timeout)
  """
  logging.info('run_command(%s, %s)' % (command, cwd))
  sys.stdout.flush()

  env = os.environ.copy()
  if sys.platform == 'darwin':
    env['TMPDIR'] = tmp_dir.encode('ascii')
  elif sys.platform == 'win32':
    # Temporarily disable this behavior on Windows while investigating
    # https://crbug.com/533552.
    # env['TEMP'] = tmp_dir.encode('ascii')
    pass
  else:
    env['TMP'] = tmp_dir.encode('ascii')
  exit_code = None
  had_hard_timeout = False
  with tools.Profiler('RunTest'):
    proc = None
    had_signal = []
    try:
      # TODO(maruel): This code is imperfect. It doesn't handle well signals
      # during the download phase and there's short windows were things can go
      # wrong.
      def handler(signum, _frame):
        if proc and not had_signal:
          logging.info('Received signal %d', signum)
          had_signal.append(True)
          raise subprocess42.TimeoutExpired(command, None)

      proc = subprocess42.Popen(command, cwd=cwd, env=env, detached=True)
      with subprocess42.set_signal_handler(subprocess42.STOP_SIGNALS, handler):
        try:
          exit_code = proc.wait(hard_timeout or None)
        except subprocess42.TimeoutExpired:
          if not had_signal:
            logging.warning('Hard timeout')
            had_hard_timeout = True
          logging.warning('Sending SIGTERM')
          proc.terminate()

      # Ignore signals in grace period. Forcibly give the grace period to the
      # child process.
      if exit_code is None:
        ignore = lambda *_: None
        with subprocess42.set_signal_handler(subprocess42.STOP_SIGNALS, ignore):
          try:
            exit_code = proc.wait(grace_period or None)
          except subprocess42.TimeoutExpired:
            # Now kill for real. The user can distinguish between the
            # following states:
            # - signal but process exited within grace period,
            #   hard_timed_out will be set but the process exit code will be
            #   script provided.
            # - processed exited late, exit code will be -9 on posix.
            logging.warning('Grace exhausted; sending SIGKILL')
            proc.kill()
      logging.info('Waiting for proces exit')
      exit_code = proc.wait()
    except OSError:
      # This is not considered to be an internal error. The executable simply
      # does not exit.
      exit_code = 1
  logging.info(
      'Command finished with exit code %d (%s)',
      exit_code, hex(0xffffffff & exit_code))
  return exit_code, had_hard_timeout