Ejemplo n.º 1
0
def isolate_and_archive(trees, server_ref):
  """Isolates and uploads a bunch of isolated trees.

  Args:
    trees: list of pairs (Options, working directory) that describe what tree
        to isolate. Options are processed by 'process_isolate_options'.
    server_ref: isolate_storage.ServerRef instance.

  Returns a dict {target name -> isolate hash or None}, where target name is
  a name of *.isolated file without an extension (e.g. 'base_unittests').

  Have multiple failure modes:
    * If the upload fails due to server or network error returns None.
    * If some *.isolate file is incorrect (but rest of them are fine and were
      successfully uploaded), returns a dict where the value of the entry
      corresponding to invalid *.isolate file is None.
  """
  if not trees:
    return {}

  # Helper generator to avoid materializing the full (huge) list of files until
  # the very end (in upload_items()).
  def emit_files(root_dir, files):
    for path, meta in files.iteritems():
      yield (os.path.join(root_dir, path), meta)

  # Process all *.isolate files, it involves parsing, file system traversal and
  # hashing. The result is a list of generators that produce files to upload
  # and the mapping {target name -> hash of *.isolated file} to return from
  # this function.
  files_generators = []
  isolated_hashes = {}
  with tools.Profiler('Isolate'):
    for opts, cwd in trees:
      target_name = os.path.splitext(os.path.basename(opts.isolated))[0]
      try:
        complete_state, files, isolated_hash = prepare_for_archival(opts, cwd)
        files_generators.append(emit_files(complete_state.root_dir, files))
        isolated_hashes[target_name] = isolated_hash[0]
        print('%s  %s' % (isolated_hash[0], target_name))
      except Exception:
        logging.exception('Exception when isolating %s', target_name)
        isolated_hashes[target_name] = None

  # All bad? Nothing to upload.
  if all(v is None for v in isolated_hashes.itervalues()):
    return isolated_hashes

  # Now upload all necessary files at once.
  with tools.Profiler('Upload'):
    try:
      items = _process_infiles(itertools.chain(*files_generators))
      with isolateserver.get_storage(server_ref) as storage:
        storage.upload_items(items)
    except Exception:
      logging.exception('Exception while uploading files')
      return None

  return isolated_hashes
Ejemplo n.º 2
0
def delete_and_upload(storage, out_dir, leak_temp_dir):
    """Deletes the temporary run directory and uploads results back.

  Returns:
    tuple(outputs_ref, success, stats)
    - outputs_ref: a dict referring to the results archived back to the isolated
          server, if applicable.
    - success: False if something occurred that means that the task must
          forcibly be considered a failure, e.g. zombie processes were left
          behind.
    - stats: uploading stats.
  """

    # Upload out_dir and generate a .isolated file out of this directory. It is
    # only done if files were written in the directory.
    outputs_ref = None
    cold = []
    hot = []
    start = time.time()

    if fs.isdir(out_dir) and fs.listdir(out_dir):
        with tools.Profiler('ArchiveOutput'):
            try:
                results, f_cold, f_hot = isolateserver.archive_files_to_storage(
                    storage, [out_dir], None)
                outputs_ref = {
                    'isolated': results[0][0],
                    'isolatedserver': storage.location,
                    'namespace': storage.namespace,
                }
                cold = sorted(i.size for i in f_cold)
                hot = sorted(i.size for i in f_hot)
            except isolateserver.Aborted:
                # This happens when a signal SIGTERM was received while uploading data.
                # There is 2 causes:
                # - The task was too slow and was about to be killed anyway due to
                #   exceeding the hard timeout.
                # - The amount of data uploaded back is very large and took too much
                #   time to archive.
                sys.stderr.write('Received SIGTERM while uploading')
                # Re-raise, so it will be treated as an internal failure.
                raise

    success = False
    try:
        if (not leak_temp_dir and fs.isdir(out_dir)
                and not file_path.rmtree(out_dir)):
            logging.error('Had difficulties removing out_dir %s', out_dir)
        else:
            success = True
    except OSError as e:
        # When this happens, it means there's a process error.
        logging.exception('Had difficulties removing out_dir %s: %s', out_dir,
                          e)
    stats = {
        'duration': time.time() - start,
        'items_cold': base64.b64encode(large.pack(cold)),
        'items_hot': base64.b64encode(large.pack(hot)),
    }
    return outputs_ref, success, stats
Ejemplo n.º 3
0
 def __init__(self, cache_dir, policies, hash_algo, trim, time_fn=None):
     """
 Arguments:
   cache_dir: directory where to place the cache.
   policies: CachePolicies instance, cache retention policies.
   algo: hashing algorithm used.
   trim: if True to enforce |policies| right away.
     It can be done later by calling trim() explicitly.
 """
     # All protected methods (starting with '_') except _path should be called
     # with self._lock held.
     super(DiskContentAddressedCache, self).__init__(cache_dir)
     self.policies = policies
     self.hash_algo = hash_algo
     self.state_file = os.path.join(cache_dir, self.STATE_FILE)
     # Items in a LRU lookup dict(digest: size).
     self._lru = lru.LRUDict()
     # Current cached free disk space. It is updated by self._trim().
     file_path.ensure_tree(self.cache_dir)
     self._free_disk = file_path.get_free_space(self.cache_dir)
     # The first item in the LRU cache that must not be evicted during this run
     # since it was referenced. All items more recent that _protected in the LRU
     # cache are also inherently protected. It could be a set() of all items
     # referenced but this increases memory usage without a use case.
     self._protected = None
     # Cleanup operations done by self._load(), if any.
     self._operations = []
     with tools.Profiler('Setup'):
         with self._lock:
             self._load(trim, time_fn)
Ejemplo n.º 4
0
  def __init__(self, cache_dir, policies, hash_algo):
    """
    Arguments:
      cache_dir: directory where to place the cache.
      policies: cache retention policies.
      algo: hashing algorithm used.
    """
    super(DiskCache, self).__init__()
    self.cache_dir = cache_dir
    self.policies = policies
    self.hash_algo = hash_algo
    self.state_file = os.path.join(cache_dir, self.STATE_FILE)

    # All protected methods (starting with '_') except _path should be called
    # with this lock locked.
    self._lock = threading_utils.LockWithAssert()
    self._lru = lru.LRUDict()

    # Profiling values.
    self._added = []
    self._removed = []
    self._free_disk = 0

    with tools.Profiler('Setup'):
      with self._lock:
        self._load()
Ejemplo n.º 5
0
def run_command(command, cwd, tmp_dir):
    """Runs the command, returns the process exit code."""
    logging.info('run_command(%s, %s)' % (command, cwd))
    sys.stdout.flush()

    env = os.environ.copy()
    if sys.platform == 'darwin':
        env['TMPDIR'] = tmp_dir.encode('ascii')
    elif sys.platform == 'win32':
        # Temporarily disable this behavior on Windows while investigating
        # https://crbug.com/533552.
        # env['TEMP'] = tmp_dir.encode('ascii')
        pass
    else:
        env['TMP'] = tmp_dir.encode('ascii')
    with tools.Profiler('RunTest'):
        try:
            with subprocess42.Popen_with_handler(command, cwd=cwd,
                                                 env=env) as p:
                p.communicate()
                exit_code = p.returncode
        except OSError:
            # This is not considered to be an internal error. The executable simply
            # does not exit.
            exit_code = 1
    logging.info('Command finished with exit code %d (%s)', exit_code,
                 hex(0xffffffff & exit_code))
    return exit_code
Ejemplo n.º 6
0
    def __exit__(self, _exc_type, _exec_value, _traceback):
        with tools.Profiler('CleanupTrimming'):
            with self._lock:
                self._trim()

                logging.info('%5d (%8dkb) added', len(self._added),
                             sum(self._added) / 1024)
                logging.info('%5d (%8dkb) current', len(self._lru),
                             sum(self._lru.itervalues()) / 1024)
                logging.info('%5d (%8dkb) evicted', len(self._evicted),
                             sum(self._evicted) / 1024)
                logging.info('       %8dkb free', self._free_disk / 1024)
        return False
Ejemplo n.º 7
0
def run_tha_test(isolated_hash, storage, cache, algo, outdir):
    """Downloads the dependencies in the cache, hardlinks them into a |outdir|
  and runs the executable.
  """
    try:
        try:
            settings = isolateserver.fetch_isolated(
                isolated_hash=isolated_hash,
                storage=storage,
                cache=cache,
                algo=algo,
                outdir=outdir,
                os_flavor=get_flavor(),
                require_command=True)
        except isolateserver.ConfigError as e:
            print >> sys.stderr, str(e)
            return 1

        if settings.read_only:
            logging.info('Making files read only')
            make_writable(outdir, True)
        cwd = os.path.normpath(os.path.join(outdir, settings.relative_cwd))
        logging.info('Running %s, cwd=%s' % (settings.command, cwd))

        # TODO(csharp): This should be specified somewhere else.
        # TODO(vadimsh): Pass it via 'env_vars' in manifest.
        # Add a rotating log file if one doesn't already exist.
        env = os.environ.copy()
        if MAIN_DIR:
            env.setdefault('RUN_TEST_CASES_LOG_FILE',
                           os.path.join(MAIN_DIR, RUN_TEST_CASES_LOG))
        try:
            with tools.Profiler('RunTest'):
                return subprocess.call(settings.command, cwd=cwd, env=env)
        except OSError:
            print >> sys.stderr, 'Failed to run %s; cwd=%s' % (
                settings.command, cwd)
            return 1
    finally:
        if outdir:
            rmtree(outdir)
Ejemplo n.º 8
0
def run_command(command, cwd, env, hard_timeout, grace_period):
    """Runs the command.

  Returns:
    tuple(process exit code, bool if had a hard timeout)
  """
    logging.info('run_command(%s, %s)' % (command, cwd))

    exit_code = None
    had_hard_timeout = False
    with tools.Profiler('RunTest'):
        proc = None
        had_signal = []
        try:
            # TODO(maruel): This code is imperfect. It doesn't handle well signals
            # during the download phase and there's short windows were things can go
            # wrong.
            def handler(signum, _frame):
                if proc and not had_signal:
                    logging.info('Received signal %d', signum)
                    had_signal.append(True)
                    raise subprocess42.TimeoutExpired(command, None)

            proc = subprocess42.Popen(command, cwd=cwd, env=env, detached=True)
            with subprocess42.set_signal_handler(subprocess42.STOP_SIGNALS,
                                                 handler):
                try:
                    exit_code = proc.wait(hard_timeout or None)
                except subprocess42.TimeoutExpired:
                    if not had_signal:
                        logging.warning('Hard timeout')
                        had_hard_timeout = True
                    logging.warning('Sending SIGTERM')
                    proc.terminate()

            # Ignore signals in grace period. Forcibly give the grace period to the
            # child process.
            if exit_code is None:
                ignore = lambda *_: None
                with subprocess42.set_signal_handler(subprocess42.STOP_SIGNALS,
                                                     ignore):
                    try:
                        exit_code = proc.wait(grace_period or None)
                    except subprocess42.TimeoutExpired:
                        # Now kill for real. The user can distinguish between the
                        # following states:
                        # - signal but process exited within grace period,
                        #   hard_timed_out will be set but the process exit code will be
                        #   script provided.
                        # - processed exited late, exit code will be -9 on posix.
                        logging.warning('Grace exhausted; sending SIGKILL')
                        proc.kill()
            logging.info('Waiting for proces exit')
            exit_code = proc.wait()
        except OSError:
            # This is not considered to be an internal error. The executable simply
            # does not exit.
            sys.stderr.write(
                '<The executable does not exist or a dependent library is missing>\n'
                '<Check for missing .so/.dll in the .isolate or GN file>\n'
                '<Command: %s>\n' % command)
            if os.environ.get('SWARMING_TASK_ID'):
                # Give an additional hint when running as a swarming task.
                sys.stderr.write(
                    '<See the task\'s page for commands to help diagnose this issue '
                    'by reproducing the task locally>\n')
            exit_code = 1
    logging.info('Command finished with exit code %d (%s)', exit_code,
                 hex(0xffffffff & exit_code))
    return exit_code, had_hard_timeout
Ejemplo n.º 9
0
def run_command(command, cwd, tmp_dir, hard_timeout, grace_period):
  """Runs the command.

  Returns:
    tuple(process exit code, bool if had a hard timeout)
  """
  logging.info('run_command(%s, %s)' % (command, cwd))
  sys.stdout.flush()

  env = os.environ.copy()
  if sys.platform == 'darwin':
    env['TMPDIR'] = tmp_dir.encode('ascii')
  elif sys.platform == 'win32':
    # Temporarily disable this behavior on Windows while investigating
    # https://crbug.com/533552.
    # env['TEMP'] = tmp_dir.encode('ascii')
    pass
  else:
    env['TMP'] = tmp_dir.encode('ascii')
  exit_code = None
  had_hard_timeout = False
  with tools.Profiler('RunTest'):
    proc = None
    had_signal = []
    try:
      # TODO(maruel): This code is imperfect. It doesn't handle well signals
      # during the download phase and there's short windows were things can go
      # wrong.
      def handler(signum, _frame):
        if proc and not had_signal:
          logging.info('Received signal %d', signum)
          had_signal.append(True)
          raise subprocess42.TimeoutExpired(command, None)

      proc = subprocess42.Popen(command, cwd=cwd, env=env, detached=True)
      with subprocess42.set_signal_handler(subprocess42.STOP_SIGNALS, handler):
        try:
          exit_code = proc.wait(hard_timeout or None)
        except subprocess42.TimeoutExpired:
          if not had_signal:
            logging.warning('Hard timeout')
            had_hard_timeout = True
          logging.warning('Sending SIGTERM')
          proc.terminate()

      # Ignore signals in grace period. Forcibly give the grace period to the
      # child process.
      if exit_code is None:
        ignore = lambda *_: None
        with subprocess42.set_signal_handler(subprocess42.STOP_SIGNALS, ignore):
          try:
            exit_code = proc.wait(grace_period or None)
          except subprocess42.TimeoutExpired:
            # Now kill for real. The user can distinguish between the
            # following states:
            # - signal but process exited within grace period,
            #   hard_timed_out will be set but the process exit code will be
            #   script provided.
            # - processed exited late, exit code will be -9 on posix.
            logging.warning('Grace exhausted; sending SIGKILL')
            proc.kill()
      logging.info('Waiting for proces exit')
      exit_code = proc.wait()
    except OSError:
      # This is not considered to be an internal error. The executable simply
      # does not exit.
      exit_code = 1
  logging.info(
      'Command finished with exit code %d (%s)',
      exit_code, hex(0xffffffff & exit_code))
  return exit_code, had_hard_timeout
Ejemplo n.º 10
0
def run_tha_test(isolated_hash, storage, cache, leak_temp_dir, extra_args):
  """Downloads the dependencies in the cache, hardlinks them into a temporary
  directory and runs the executable from there.

  A temporary directory is created to hold the output files. The content inside
  this directory will be uploaded back to |storage| packaged as a .isolated
  file.

  Arguments:
    isolated_hash: the SHA-1 of the .isolated file that must be retrieved to
                   recreate the tree of files to run the target executable.
    storage: an isolateserver.Storage object to retrieve remote objects. This
             object has a reference to an isolateserver.StorageApi, which does
             the actual I/O.
    cache: an isolateserver.LocalCache to keep from retrieving the same objects
           constantly by caching the objects retrieved. Can be on-disk or
           in-memory.
    leak_temp_dir: if true, the temporary directory will be deliberately leaked
                   for later examination.
    extra_args: optional arguments to add to the command stated in the .isolate
                file.
  """
  run_dir = make_temp_dir(u'run_tha_test', cache.cache_dir)
  out_dir = unicode(make_temp_dir(u'isolated_out', cache.cache_dir))
  result = 0
  try:
    try:
      bundle = isolateserver.fetch_isolated(
          isolated_hash=isolated_hash,
          storage=storage,
          cache=cache,
          outdir=run_dir,
          require_command=True)
    except isolated_format.IsolatedError:
      on_error.report(None)
      return 1

    change_tree_read_only(run_dir, bundle.read_only)
    cwd = os.path.normpath(os.path.join(run_dir, bundle.relative_cwd))
    command = bundle.command + extra_args

    file_path.ensure_command_has_abs_path(command, cwd)
    command = process_command(command, out_dir)
    logging.info('Running %s, cwd=%s' % (command, cwd))

    # TODO(csharp): This should be specified somewhere else.
    # TODO(vadimsh): Pass it via 'env_vars' in manifest.
    # Add a rotating log file if one doesn't already exist.
    env = os.environ.copy()
    if MAIN_DIR:
      env.setdefault('RUN_TEST_CASES_LOG_FILE',
          os.path.join(MAIN_DIR, RUN_TEST_CASES_LOG))
    sys.stdout.flush()
    with tools.Profiler('RunTest'):
      try:
        with subprocess42.Popen_with_handler(command, cwd=cwd, env=env) as p:
          p.communicate()
          result = p.returncode
      except OSError:
        on_error.report('Failed to run %s; cwd=%s' % (command, cwd))
        result = 1
    logging.info(
        'Command finished with exit code %d (%s)',
        result, hex(0xffffffff & result))
  finally:
    try:
      if leak_temp_dir:
        logging.warning('Deliberately leaking %s for later examination',
                        run_dir)
      else:
        try:
          if not file_path.rmtree(run_dir):
            print >> sys.stderr, (
                'Failed to delete the temporary directory, forcibly failing\n'
                'the task because of it. No zombie process can outlive a\n'
                'successful task run and still be marked as successful.\n'
                'Fix your stuff.')
            result = result or 1
        except OSError:
          logging.warning('Leaking %s', run_dir)
          result = 1

      # HACK(vadimsh): On Windows rmtree(run_dir) call above has
      # a synchronization effect: it finishes only when all task child processes
      # terminate (since a running process locks *.exe file). Examine out_dir
      # only after that call completes (since child processes may
      # write to out_dir too and we need to wait for them to finish).

      # Upload out_dir and generate a .isolated file out of this directory.
      # It is only done if files were written in the directory.
      if os.path.isdir(out_dir) and os.listdir(out_dir):
        with tools.Profiler('ArchiveOutput'):
          results = isolateserver.archive_files_to_storage(
              storage, [out_dir], None)
        # TODO(maruel): Implement side-channel to publish this information.
        output_data = {
          'hash': results[0][0],
          'namespace': storage.namespace,
          'storage': storage.location,
        }
        sys.stdout.flush()
        print(
            '[run_isolated_out_hack]%s[/run_isolated_out_hack]' %
            tools.format_json(output_data, dense=True))

    finally:
      try:
        if os.path.isdir(out_dir) and not file_path.rmtree(out_dir):
          result = result or 1
      except OSError:
        # The error was already printed out. Report it but that's it. Only
        # report on non-Windows or on Windows when the process had succeeded.
        # Due to the way file sharing works on Windows, it's sadly expected that
        # file deletion may fail when a test failed.
        if sys.platform != 'win32' or not result:
          on_error.report(None)
        result = 1

  return result
Ejemplo n.º 11
0
def run_tha_test(isolated_hash, storage, cache, extra_args):
  """Downloads the dependencies in the cache, hardlinks them into a temporary
  directory and runs the executable from there.

  A temporary directory is created to hold the output files. The content inside
  this directory will be uploaded back to |storage| packaged as a .isolated
  file.

  Arguments:
    isolated_hash: the sha-1 of the .isolated file that must be retrieved to
                   recreate the tree of files to run the target executable.
    storage: an isolateserver.Storage object to retrieve remote objects. This
             object has a reference to an isolateserver.StorageApi, which does
             the actual I/O.
    cache: an isolateserver.LocalCache to keep from retrieving the same objects
           constantly by caching the objects retrieved. Can be on-disk or
           in-memory.
    extra_args: optional arguments to add to the command stated in the .isolate
                file.
  """
  run_dir = make_temp_dir('run_tha_test', cache.cache_dir)
  out_dir = unicode(tempfile.mkdtemp(prefix='run_tha_test'))
  result = 0
  try:
    try:
      settings = isolateserver.fetch_isolated(
          isolated_hash=isolated_hash,
          storage=storage,
          cache=cache,
          outdir=run_dir,
          require_command=True)
    except isolateserver.ConfigError as e:
      tools.report_error(e)
      result = 1
      return result

    change_tree_read_only(run_dir, settings.read_only)
    cwd = os.path.normpath(os.path.join(run_dir, settings.relative_cwd))
    command = settings.command + extra_args

    # subprocess.call doesn't consider 'cwd' when searching for executable.
    # Yet isolate can specify command relative to 'cwd'. Convert it to absolute
    # path if necessary.
    if not os.path.isabs(command[0]):
      command[0] = os.path.abspath(os.path.join(cwd, command[0]))
    command = process_command(command, out_dir)
    logging.info('Running %s, cwd=%s' % (command, cwd))

    # TODO(csharp): This should be specified somewhere else.
    # TODO(vadimsh): Pass it via 'env_vars' in manifest.
    # Add a rotating log file if one doesn't already exist.
    env = os.environ.copy()
    if MAIN_DIR:
      env.setdefault('RUN_TEST_CASES_LOG_FILE',
          os.path.join(MAIN_DIR, RUN_TEST_CASES_LOG))
    try:
      with tools.Profiler('RunTest'):
        result = subprocess.call(command, cwd=cwd, env=env)
    except OSError as e:
      tools.report_error('Failed to run %s; cwd=%s: %s' % (command, cwd, e))
      result = 1

    # Upload out_dir and generate a .isolated file out of this directory. It is
    # only done if files were written in the directory.
    if os.listdir(out_dir):
      with tools.Profiler('ArchiveOutput'):
        results = isolateserver.archive_files_to_storage(
            storage, [out_dir], None)
      # TODO(maruel): Implement side-channel to publish this information.
      output_data = {
        'hash': results[0][0],
        'namespace': storage.namespace,
        'storage': storage.location,
      }
      sys.stdout.flush()
      sys.stderr.flush()
      print(
          '[run_isolated_out_hack]%s[/run_isolated_out_hack]' %
          tools.format_json(output_data, dense=True))

  finally:
    try:
      rmtree(out_dir)
    finally:
      try:
        rmtree(run_dir)
      except OSError:
        logging.warning('Leaking %s', run_dir)
        # Swallow the exception so it doesn't generate an infrastructure error.
        #
        # It usually happens on Windows when a child process is not properly
        # terminated, usually because of a test case starting child processes
        # that time out. This causes files to be locked and it becomes
        # impossible to delete them.
        #
        # Only report an infrastructure error if the test didn't fail. This is
        # because a swarming bot will likely not reboot. This situation will
        # cause accumulation of temporary hardlink trees.
        if not result:
          raise
  return result