Example #1
0
def delete_and_upload(storage, out_dir, leak_temp_dir):
  """Deletes the temporary run directory and uploads results back.

  Returns:
    tuple(outputs_ref, success, cold, hot)
    - outputs_ref: a dict referring to the results archived back to the isolated
          server, if applicable.
    - success: False if something occurred that means that the task must
          forcibly be considered a failure, e.g. zombie processes were left
          behind.
    - cold: list of size of cold items, they had to be uploaded.
    - hot: list of size of hot items, they didn't have to be uploaded.
  """

  # Upload out_dir and generate a .isolated file out of this directory. It is
  # only done if files were written in the directory.
  outputs_ref = None
  cold = []
  hot = []
  if fs.isdir(out_dir) and fs.listdir(out_dir):
    with tools.Profiler('ArchiveOutput'):
      try:
        results, f_cold, f_hot = isolateserver.archive_files_to_storage(
            storage, [out_dir], None)
        outputs_ref = {
          'isolated': results[0][0],
          'isolatedserver': storage.location,
          'namespace': storage.namespace,
        }
        cold = sorted(i.size for i in f_cold)
        hot = sorted(i.size for i in f_hot)
      except isolateserver.Aborted:
        # This happens when a signal SIGTERM was received while uploading data.
        # There is 2 causes:
        # - The task was too slow and was about to be killed anyway due to
        #   exceeding the hard timeout.
        # - The amount of data uploaded back is very large and took too much
        #   time to archive.
        sys.stderr.write('Received SIGTERM while uploading')
        # Re-raise, so it will be treated as an internal failure.
        raise
  try:
    if (not leak_temp_dir and fs.isdir(out_dir) and
        not file_path.rmtree(out_dir)):
      logging.error('Had difficulties removing out_dir %s', out_dir)
      return outputs_ref, False, cold, hot
  except OSError as e:
    # When this happens, it means there's a process error.
    logging.exception('Had difficulties removing out_dir %s: %s', out_dir, e)
    return outputs_ref, False, cold, hot
  return outputs_ref, True, cold, hot
Example #2
0
def CMDremap(parser, args):
  """Creates a directory with all the dependencies mapped into it.

  Useful to test manually why a test is failing. The target executable is not
  run.
  """
  add_isolate_options(parser)
  add_outdir_options(parser)
  add_skip_refresh_option(parser)
  options, args = parser.parse_args(args)
  if args:
    parser.error('Unsupported argument: %s' % args)
  cwd = os.getcwd()
  process_isolate_options(parser, options, cwd, require_isolated=False)
  process_outdir_options(parser, options, cwd)
  complete_state = load_complete_state(options, cwd, None, options.skip_refresh)

  if not fs.isdir(options.outdir):
    fs.makedirs(options.outdir)
  print('Remapping into %s' % options.outdir)
  if fs.listdir(options.outdir):
    raise ExecutionError('Can\'t remap in a non-empty directory')

  create_isolate_tree(
      options.outdir, complete_state.root_dir, complete_state.saved_state.files,
      complete_state.saved_state.relative_cwd,
      complete_state.saved_state.read_only)
  if complete_state.isolated_filepath:
    complete_state.save_files()
  return 0
Example #3
0
def create_isolate_tree(outdir, root_dir, files, relative_cwd, read_only):
  """Creates a isolated tree usable for test execution.

  Returns the current working directory where the isolated command should be
  started in.
  """
  # Forcibly copy when the tree has to be read only. Otherwise the inode is
  # modified, and this cause real problems because the user's source tree
  # becomes read only. On the other hand, the cost of doing file copy is huge.
  if read_only not in (0, None):
    action = file_path.COPY
  else:
    action = file_path.HARDLINK_WITH_FALLBACK

  recreate_tree(
      outdir=outdir,
      indir=root_dir,
      infiles=files,
      action=action,
      as_hash=False)
  cwd = os.path.normpath(os.path.join(outdir, relative_cwd))
  if not fs.isdir(cwd):
    # It can happen when no files are mapped from the directory containing the
    # .isolate file. But the directory must exist to be the current working
    # directory.
    fs.makedirs(cwd)
  run_isolated.change_tree_read_only(outdir, read_only)
  return cwd
Example #4
0
def read_tree(path):
    """Returns a dict with {filepath: content}."""
    if not fs.isdir(path):
        return None
    out = {}
    for root, _, filenames in fs.walk(path):
        for filename in filenames:
            p = os.path.join(root, filename)
            out[os.path.relpath(p, path)] = read_file(p)
    return out
    def trim(self):
        evicted = []
        with self._lock:
            if not fs.isdir(self.cache_dir):
                return evicted

            # Trim according to maximum number of items.
            if self._policies.max_items:
                while len(self._lru) > self._policies.max_items:
                    name, size = self._remove_lru_item()
                    evicted.append(size)
                    logging.info(
                        'NamedCache.trim(): Removed %r(%d) due to max_items(%d)',
                        name, size, self._policies.max_items)

            # Trim according to maximum age.
            if self._policies.max_age_secs:
                cutoff = self._lru.time_fn() - self._policies.max_age_secs
                while self._lru:
                    _name, (_data, ts) = self._lru.get_oldest()
                    if ts >= cutoff:
                        break
                    name, size = self._remove_lru_item()
                    evicted.append(size)
                    logging.info(
                        'NamedCache.trim(): Removed %r(%d) due to max_age_secs(%d)',
                        name, size, self._policies.max_age_secs)

            # Trim according to minimum free space.
            if self._policies.min_free_space:
                while self._lru:
                    free_space = file_path.get_free_space(self.cache_dir)
                    if free_space >= self._policies.min_free_space:
                        break
                    name, size = self._remove_lru_item()
                    evicted.append(size)
                    logging.info(
                        'NamedCache.trim(): Removed %r(%d) due to min_free_space(%d)',
                        name, size, self._policies.min_free_space)

            # Trim according to maximum total size.
            if self._policies.max_cache_size:
                while self._lru:
                    total = sum(size
                                for _rel_cache, size in self._lru.values())
                    if total <= self._policies.max_cache_size:
                        break
                    name, size = self._remove_lru_item()
                    evicted.append(size)
                    logging.info(
                        'NamedCache.trim(): Removed %r(%d) due to max_cache_size(%d)',
                        name, size, self._policies.max_cache_size)

            self._save()
        return evicted
Example #6
0
 def _save(self):
     """Saves the LRU ordering."""
     self._lock.assert_locked()
     if sys.platform != 'win32':
         d = os.path.dirname(self.state_file)
         if fs.isdir(d):
             # Necessary otherwise the file can't be created.
             file_path.set_read_only(d, False)
     if fs.isfile(self.state_file):
         file_path.set_read_only(self.state_file, False)
     self._lru.save(self.state_file)
Example #7
0
def make_tree(out, contents):
  for relpath, content in sorted(contents.iteritems()):
    filepath = os.path.join(out, relpath.replace('/', os.path.sep))
    dirpath = os.path.dirname(filepath)
    if not fs.isdir(dirpath):
      fs.makedirs(dirpath, 0700)
    if isinstance(content, SymLink):
      fs.symlink(content, filepath)
    else:
      mode = 0700 if relpath.endswith('.py') else 0600
      flags = os.O_WRONLY | os.O_CREAT
      if sys.platform == 'win32':
        # pylint: disable=no-member
        flags |= os.O_BINARY
      with os.fdopen(os.open(filepath, flags, mode), 'wb') as f:
        f.write(content)
Example #8
0
  def __init__(self, task_output_dir, shard_count):
    """Initializes TaskOutputCollector, ensures |task_output_dir| exists.

    Args:
      task_output_dir: (optional) local directory to put fetched files to.
      shard_count: expected number of task shards.
    """
    self.task_output_dir = (
        unicode(os.path.abspath(task_output_dir))
        if task_output_dir else task_output_dir)
    self.shard_count = shard_count

    self._lock = threading.Lock()
    self._per_shard_results = {}
    self._storage = None

    if self.task_output_dir and not fs.isdir(self.task_output_dir):
      fs.makedirs(self.task_output_dir)
Example #9
0
    def cleanup(self):
        """Cleans up the cache directory.

    Ensures there is no unknown files in cache_dir.
    Ensures the read-only bits are set correctly.

    At that point, the cache was already loaded, trimmed to respect cache
    policies.
    """
        with self._lock:
            fs.chmod(self.cache_dir, 0700)
            # Ensure that all files listed in the state still exist and add new ones.
            previous = set(self._lru)
            # It'd be faster if there were a readdir() function.
            for filename in fs.listdir(self.cache_dir):
                if filename == self.STATE_FILE:
                    fs.chmod(os.path.join(self.cache_dir, filename), 0600)
                    continue
                if filename in previous:
                    fs.chmod(os.path.join(self.cache_dir, filename), 0400)
                    previous.remove(filename)
                    continue

                # An untracked file. Delete it.
                logging.warning('Removing unknown file %s from cache',
                                filename)
                p = self._path(filename)
                if fs.isdir(p):
                    try:
                        file_path.rmtree(p)
                    except OSError:
                        pass
                else:
                    file_path.try_remove(p)
                continue

            if previous:
                # Filter out entries that were not found.
                logging.warning('Removed %d lost files', len(previous))
                for filename in previous:
                    self._lru.pop(filename)
                self._save()
    def _remove(self, name):
        """Removes a cache directory and entry.

    Returns:
      Number of caches deleted.
    """
        self._lock.assert_locked()
        # First try to remove the alias if it exists.
        named_dir = self._get_named_path(name)
        if fs.islink(named_dir):
            fs.unlink(named_dir)

        # Then remove the actual data.
        if name not in self._lru:
            return
        rel_path, _size = self._lru.get(name)
        abs_path = os.path.join(self.cache_dir, rel_path)
        if fs.isdir(abs_path):
            file_path.rmtree(abs_path)
        self._lru.pop(name)
Example #11
0
    def _load(self, trim, time_fn):
        """Loads state of the cache from json file.

    If cache_dir does not exist on disk, it is created.
    """
        self._lock.assert_locked()

        if not fs.isfile(self.state_file):
            if not fs.isdir(self.cache_dir):
                fs.makedirs(self.cache_dir)
        else:
            # Load state of the cache.
            try:
                self._lru = lru.LRUDict.load(self.state_file)
            except ValueError as err:
                logging.error('Failed to load cache state: %s' % (err, ))
                # Don't want to keep broken state file.
                file_path.try_remove(self.state_file)
        if time_fn:
            self._lru.time_fn = time_fn
        if trim:
            self._trim()
    def __init__(self, cache_dir, policies, time_fn=None):
        """Initializes NamedCaches.

    Arguments:
    - cache_dir is a directory for persistent cache storage.
    - policies is a CachePolicies instance.
    - time_fn is a function that returns timestamp (float) and used to take
      timestamps when new caches are requested. Used in unit tests.
    """
        super(NamedCache, self).__init__(cache_dir)
        self._policies = policies
        # LRU {cache_name -> tuple(cache_location, size)}
        self.state_file = os.path.join(cache_dir, self.STATE_FILE)
        self._lru = lru.LRUDict()
        if not fs.isdir(self.cache_dir):
            fs.makedirs(self.cache_dir)
        elif fs.isfile(self.state_file):
            try:
                self._lru = lru.LRUDict.load(self.state_file)
                for _, size in self._lru.values():
                    if not isinstance(size, six.integer_types):
                        with open(self.state_file, 'r') as f:
                            logging.info('named cache state file: %s\n%s',
                                         self.state_file, f.read())
                        raise ValueError("size is not integer: %s" % size)

            except ValueError:
                logging.exception(
                    'NamedCache: failed to load named cache state file; obliterating'
                )
                file_path.rmtree(self.cache_dir)
                fs.makedirs(self.cache_dir)
                self._lru = lru.LRUDict()
            with self._lock:
                self._try_upgrade()
        if time_fn:
            self._lru.time_fn = time_fn
Example #13
0
def map_and_run(command, isolated_hash, storage, isolate_cache, outputs,
                init_named_caches, leak_temp_dir, root_dir, hard_timeout,
                grace_period, bot_file, install_packages_fn, use_symlinks,
                constant_run_path):
    """Runs a command with optional isolated input/output.

  See run_tha_test for argument documentation.

  Returns metadata about the result.
  """
    assert isinstance(command, list), command
    assert root_dir or root_dir is None
    result = {
        'duration': None,
        'exit_code': None,
        'had_hard_timeout': False,
        'internal_failure': None,
        'stats': {
            # 'isolated': {
            #    'cipd': {
            #      'duration': 0.,
            #      'get_client_duration': 0.,
            #    },
            #    'download': {
            #      'duration': 0.,
            #      'initial_number_items': 0,
            #      'initial_size': 0,
            #      'items_cold': '<large.pack()>',
            #      'items_hot': '<large.pack()>',
            #    },
            #    'upload': {
            #      'duration': 0.,
            #      'items_cold': '<large.pack()>',
            #      'items_hot': '<large.pack()>',
            #    },
            #  },
        },
        # 'cipd_pins': {
        #   'packages': [
        #     {'package_name': ..., 'version': ..., 'path': ...},
        #     ...
        #   ],
        #  'client_package': {'package_name': ..., 'version': ...},
        # },
        'outputs_ref': None,
        'version': 5,
    }

    if root_dir:
        file_path.ensure_tree(root_dir, 0700)
    elif isolate_cache.cache_dir:
        root_dir = os.path.dirname(isolate_cache.cache_dir)
    # See comment for these constants.
    # If root_dir is not specified, it is not constant.
    # TODO(maruel): This is not obvious. Change this to become an error once we
    # make the constant_run_path an exposed flag.
    if constant_run_path and root_dir:
        run_dir = os.path.join(root_dir, ISOLATED_RUN_DIR)
        os.mkdir(run_dir)
    else:
        run_dir = make_temp_dir(ISOLATED_RUN_DIR, root_dir)
    # storage should be normally set but don't crash if it is not. This can happen
    # as Swarming task can run without an isolate server.
    out_dir = make_temp_dir(ISOLATED_OUT_DIR, root_dir) if storage else None
    tmp_dir = make_temp_dir(ISOLATED_TMP_DIR, root_dir)
    cwd = run_dir

    try:
        with install_packages_fn(run_dir) as cipd_info:
            if cipd_info:
                result['stats']['cipd'] = cipd_info.stats
                result['cipd_pins'] = cipd_info.pins

            if isolated_hash:
                isolated_stats = result['stats'].setdefault('isolated', {})
                bundle, isolated_stats['download'] = fetch_and_map(
                    isolated_hash=isolated_hash,
                    storage=storage,
                    cache=isolate_cache,
                    outdir=run_dir,
                    use_symlinks=use_symlinks)
                change_tree_read_only(run_dir, bundle.read_only)
                cwd = os.path.normpath(os.path.join(cwd, bundle.relative_cwd))
                # Inject the command
                if bundle.command:
                    command = bundle.command + command

            if not command:
                # Handle this as a task failure, not an internal failure.
                sys.stderr.write(
                    '<No command was specified!>\n'
                    '<Please secify a command when triggering your Swarming task>\n'
                )
                result['exit_code'] = 1
                return result

            # If we have an explicit list of files to return, make sure their
            # directories exist now.
            if storage and outputs:
                isolateserver.create_directories(run_dir, outputs)

            command = tools.fix_python_path(command)
            command = process_command(command, out_dir, bot_file)
            file_path.ensure_command_has_abs_path(command, cwd)

            with init_named_caches(run_dir):
                sys.stdout.flush()
                start = time.time()
                try:
                    result['exit_code'], result[
                        'had_hard_timeout'] = run_command(
                            command, cwd, get_command_env(tmp_dir, cipd_info),
                            hard_timeout, grace_period)
                finally:
                    result['duration'] = max(time.time() - start, 0)
    except Exception as e:
        # An internal error occurred. Report accordingly so the swarming task will
        # be retried automatically.
        logging.exception('internal failure: %s', e)
        result['internal_failure'] = str(e)
        on_error.report(None)

    # Clean up
    finally:
        try:
            # Try to link files to the output directory, if specified.
            if out_dir:
                link_outputs_to_outdir(run_dir, out_dir, outputs)

            success = False
            if leak_temp_dir:
                success = True
                logging.warning(
                    'Deliberately leaking %s for later examination', run_dir)
            else:
                # On Windows rmtree(run_dir) call above has a synchronization effect: it
                # finishes only when all task child processes terminate (since a running
                # process locks *.exe file). Examine out_dir only after that call
                # completes (since child processes may write to out_dir too and we need
                # to wait for them to finish).
                if fs.isdir(run_dir):
                    try:
                        success = file_path.rmtree(run_dir)
                    except OSError as e:
                        logging.error('Failure with %s', e)
                        success = False
                    if not success:
                        print >> sys.stderr, (
                            'Failed to delete the run directory, thus failing the task.\n'
                            'This may be due to a subprocess outliving the main task\n'
                            'process, holding on to resources. Please fix the task so\n'
                            'that it releases resources and cleans up subprocesses.'
                        )
                        if result['exit_code'] == 0:
                            result['exit_code'] = 1
                if fs.isdir(tmp_dir):
                    try:
                        success = file_path.rmtree(tmp_dir)
                    except OSError as e:
                        logging.error('Failure with %s', e)
                        success = False
                    if not success:
                        print >> sys.stderr, (
                            'Failed to delete the temp directory, thus failing the task.\n'
                            'This may be due to a subprocess outliving the main task\n'
                            'process, holding on to resources. Please fix the task so\n'
                            'that it releases resources and cleans up subprocesses.'
                        )
                        if result['exit_code'] == 0:
                            result['exit_code'] = 1

            # This deletes out_dir if leak_temp_dir is not set.
            if out_dir:
                isolated_stats = result['stats'].setdefault('isolated', {})
                result['outputs_ref'], success, isolated_stats['upload'] = (
                    delete_and_upload(storage, out_dir, leak_temp_dir))
            if not success and result['exit_code'] == 0:
                result['exit_code'] = 1
        except Exception as e:
            # Swallow any exception in the main finally clause.
            if out_dir:
                logging.exception('Leaking out_dir %s: %s', out_dir, e)
            result['internal_failure'] = str(e)
    return result
Example #14
0
def run_tha_test(isolated_hash, storage, cache, leak_temp_dir, result_json,
                 root_dir, hard_timeout, grace_period, extra_args):
    """Downloads the dependencies in the cache, hardlinks them into a temporary
  directory and runs the executable from there.

  A temporary directory is created to hold the output files. The content inside
  this directory will be uploaded back to |storage| packaged as a .isolated
  file.

  Arguments:
    isolated_hash: the SHA-1 of the .isolated file that must be retrieved to
                   recreate the tree of files to run the target executable.
    storage: an isolateserver.Storage object to retrieve remote objects. This
             object has a reference to an isolateserver.StorageApi, which does
             the actual I/O.
    cache: an isolateserver.LocalCache to keep from retrieving the same objects
           constantly by caching the objects retrieved. Can be on-disk or
           in-memory.
    leak_temp_dir: if true, the temporary directory will be deliberately leaked
                   for later examination.
    result_json: file path to dump result metadata into. If set, the process
                 exit code is always 0 unless an internal error occured.
    root_dir: directory to the path to use to create the temporary directory. If
              not specified, a random temporary directory is created.
    hard_timeout: kills the process if it lasts more than this amount of
                  seconds.
    grace_period: number of seconds to wait between SIGTERM and SIGKILL.
    extra_args: optional arguments to add to the command stated in the .isolate
                file.

  Returns:
    Process exit code that should be used.
  """
    if result_json:
        # Write a json output file right away in case we get killed.
        result = {
            'exit_code': None,
            'had_hard_timeout': False,
            'internal_failure': 'Was terminated before completion',
            'outputs_ref': None,
            'version': 2,
        }
        tools.write_json(result_json, result, dense=True)

    # run_isolated exit code. Depends on if result_json is used or not.
    result = map_and_run(isolated_hash, storage, cache, leak_temp_dir,
                         root_dir, hard_timeout, grace_period, extra_args)
    logging.info('Result:\n%s', tools.format_json(result, dense=True))
    if result_json:
        # We've found tests to delete 'work' when quitting, causing an exception
        # here. Try to recreate the directory if necessary.
        work_dir = os.path.dirname(result_json)
        if not fs.isdir(work_dir):
            fs.mkdir(work_dir)
        tools.write_json(result_json, result, dense=True)
        # Only return 1 if there was an internal error.
        return int(bool(result['internal_failure']))

    # Marshall into old-style inline output.
    if result['outputs_ref']:
        data = {
            'hash': result['outputs_ref']['isolated'],
            'namespace': result['outputs_ref']['namespace'],
            'storage': result['outputs_ref']['isolatedserver'],
        }
        sys.stdout.flush()
        print('[run_isolated_out_hack]%s[/run_isolated_out_hack]' %
              tools.format_json(data, dense=True))
        sys.stdout.flush()
    return result['exit_code'] or int(bool(result['internal_failure']))
Example #15
0
def map_and_run(isolated_hash, storage, cache, leak_temp_dir, root_dir,
                hard_timeout, grace_period, extra_args):
    """Maps and run the command. Returns metadata about the result."""
    # TODO(maruel): Include performance statistics.
    result = {
        'exit_code': None,
        'had_hard_timeout': False,
        'internal_failure': None,
        'outputs_ref': None,
        'version': 2,
    }
    if root_dir:
        if not fs.isdir(root_dir):
            fs.makedirs(root_dir, 0700)
        prefix = u''
    else:
        root_dir = os.path.dirname(
            cache.cache_dir) if cache.cache_dir else None
        prefix = u'isolated_'
    run_dir = make_temp_dir(prefix + u'run', root_dir)
    out_dir = make_temp_dir(prefix + u'out', root_dir)
    tmp_dir = make_temp_dir(prefix + u'tmp', root_dir)
    try:
        try:
            bundle = isolateserver.fetch_isolated(isolated_hash=isolated_hash,
                                                  storage=storage,
                                                  cache=cache,
                                                  outdir=run_dir,
                                                  require_command=True)
        except isolateserver.IsolatedErrorNoCommand:
            # Handle this as a task failure, not an internal failure.
            sys.stderr.write(
                '<The .isolated doesn\'t declare any command to run!>\n'
                '<Check your .isolate for missing \'command\' variable>\n')
            if os.environ.get('SWARMING_TASK_ID'):
                # Give an additional hint when running as a swarming task.
                sys.stderr.write('<This occurs at the \'isolate\' step>\n')
            result['exit_code'] = 1
            return result

        change_tree_read_only(run_dir, bundle.read_only)
        cwd = os.path.normpath(os.path.join(run_dir, bundle.relative_cwd))
        command = bundle.command + extra_args
        file_path.ensure_command_has_abs_path(command, cwd)
        result['exit_code'], result['had_hard_timeout'] = run_command(
            process_command(command, out_dir), cwd, tmp_dir, hard_timeout,
            grace_period)
    except Exception as e:
        # An internal error occured. Report accordingly so the swarming task will be
        # retried automatically.
        logging.exception('internal failure: %s', e)
        result['internal_failure'] = str(e)
        on_error.report(None)
    finally:
        try:
            if leak_temp_dir:
                logging.warning(
                    'Deliberately leaking %s for later examination', run_dir)
            else:
                # On Windows rmtree(run_dir) call above has a synchronization effect: it
                # finishes only when all task child processes terminate (since a running
                # process locks *.exe file). Examine out_dir only after that call
                # completes (since child processes may write to out_dir too and we need
                # to wait for them to finish).
                if fs.isdir(run_dir):
                    try:
                        success = file_path.rmtree(run_dir)
                    except OSError as e:
                        logging.error('Failure with %s', e)
                        success = False
                    if not success:
                        print >> sys.stderr, (
                            'Failed to delete the run directory, forcibly failing\n'
                            'the task because of it. No zombie process can outlive a\n'
                            'successful task run and still be marked as successful.\n'
                            'Fix your stuff.')
                        if result['exit_code'] == 0:
                            result['exit_code'] = 1
                if fs.isdir(tmp_dir):
                    try:
                        success = file_path.rmtree(tmp_dir)
                    except OSError as e:
                        logging.error('Failure with %s', e)
                        success = False
                    if not success:
                        print >> sys.stderr, (
                            'Failed to delete the temporary directory, forcibly failing\n'
                            'the task because of it. No zombie process can outlive a\n'
                            'successful task run and still be marked as successful.\n'
                            'Fix your stuff.')
                        if result['exit_code'] == 0:
                            result['exit_code'] = 1

            # This deletes out_dir if leak_temp_dir is not set.
            result['outputs_ref'], success = delete_and_upload(
                storage, out_dir, leak_temp_dir)
            if not success and result['exit_code'] == 0:
                result['exit_code'] = 1
        except Exception as e:
            # Swallow any exception in the main finally clause.
            logging.exception('Leaking out_dir %s: %s', out_dir, e)
            result['internal_failure'] = str(e)
    return result
Example #16
0
def CMDbatcharchive(parser, args):
    """Archives multiple isolated trees at once.

  Using single command instead of multiple sequential invocations allows to cut
  redundant work when isolated trees share common files (e.g. file hashes are
  checked only once, their presence on the server is checked only once, and
  so on).

  Takes a list of paths to *.isolated.gen.json files that describe what trees to
  isolate. Format of files is:
  {
    "version": 1,
    "dir": <absolute path to a directory all other paths are relative to>,
    "args": [list of command line arguments for single 'archive' command]
  }
  """
    isolateserver.add_isolate_server_options(parser)
    isolateserver.add_archive_options(parser)
    auth.add_auth_options(parser)
    parser.add_option(
        '--dump-json',
        metavar='FILE',
        help='Write isolated hashes of archived trees to this file as JSON')
    options, args = parser.parse_args(args)
    auth.process_auth_options(parser, options)
    isolateserver.process_isolate_server_options(parser, options, True, True)

    # Validate all incoming options, prepare what needs to be archived as a list
    # of tuples (archival options, working directory).
    work_units = []
    for gen_json_path in args:
        # Validate JSON format of a *.isolated.gen.json file.
        try:
            data = tools.read_json(gen_json_path)
        except IOError as e:
            parser.error('Failed to open %s: %s' % (gen_json_path, e))
        if data.get('version') != ISOLATED_GEN_JSON_VERSION:
            parser.error('Invalid version in %s' % gen_json_path)
        cwd = data.get('dir')
        if not isinstance(cwd, unicode) or not fs.isdir(cwd):
            parser.error('Invalid dir in %s' % gen_json_path)
        args = data.get('args')
        if (not isinstance(args, list)
                or not all(isinstance(x, unicode) for x in args)):
            parser.error('Invalid args in %s' % gen_json_path)
        # Convert command line (embedded in JSON) to Options object.
        work_units.append((parse_archive_command_line(args, cwd), cwd))

    # Perform the archival, all at once.
    isolated_hashes = isolate_and_archive(work_units, options.isolate_server,
                                          options.namespace)

    # TODO(vadimsh): isolate_and_archive returns None on upload failure, there's
    # no way currently to figure out what *.isolated file from a batch were
    # successfully uploaded, so consider them all failed (and emit empty dict
    # as JSON result).
    if options.dump_json:
        tools.write_json(options.dump_json, isolated_hashes or {}, False)

    if isolated_hashes is None:
        return EXIT_CODE_UPLOAD_ERROR

    # isolated_hashes[x] is None if 'x.isolate' contains a error.
    if not all(isolated_hashes.itervalues()):
        return EXIT_CODE_ISOLATE_ERROR

    return 0
Example #17
0
    def cleanup(self):
        """Removes unknown directories.

    Does not recalculate the cache size since it's surprisingly slow on some
    OSes.
    """
        success = True
        with self._lock:
            try:
                actual = set(fs.listdir(self.cache_dir))
                actual.discard(self.NAMED_DIR)
                actual.discard(self.STATE_FILE)
                expected = {v[0]: k for k, v in self._lru.iteritems()}
                # First, handle the actual cache content.
                # Remove missing entries.
                for missing in (set(expected) - actual):
                    self._lru.pop(expected[missing])
                # Remove unexpected items.
                for unexpected in (actual - set(expected)):
                    try:
                        p = os.path.join(self.cache_dir, unexpected)
                        if fs.isdir(p) and not fs.islink(p):
                            file_path.rmtree(p)
                        else:
                            fs.remove(p)
                    except (IOError, OSError) as e:
                        logging.error('Failed to remove %s: %s', unexpected, e)
                        success = False

                # Second, fix named cache links.
                named = os.path.join(self.cache_dir, self.NAMED_DIR)
                if os.path.isdir(named):
                    actual = set(fs.listdir(named))
                    expected = set(self._lru)
                    # Confirm entries. Do not add missing ones for now.
                    for name in expected.intersection(actual):
                        p = os.path.join(self.cache_dir, self.NAMED_DIR, name)
                        expected_link = os.path.join(self.cache_dir,
                                                     self._lru[name][0])
                        if fs.islink(p):
                            if sys.platform == 'win32':
                                # TODO(maruel): Implement readlink() on Windows in fs.py, then
                                # remove this condition.
                                # https://crbug.com/853721
                                continue
                            link = fs.readlink(p)
                            if expected_link == link:
                                continue
                            logging.warning(
                                'Unexpected symlink for cache %s: %s, expected %s',
                                name, link, expected_link)
                        else:
                            logging.warning(
                                'Unexpected non symlink for cache %s', name)
                        if fs.isdir(p) and not fs.islink(p):
                            file_path.rmtree(p)
                        else:
                            fs.remove(p)
                    # Remove unexpected items.
                    for unexpected in (actual - expected):
                        try:
                            p = os.path.join(self.cache_dir, self.NAMED_DIR,
                                             unexpected)
                            if fs.isdir(p):
                                file_path.rmtree(p)
                            else:
                                fs.remove(p)
                        except (IOError, OSError) as e:
                            logging.error('Failed to remove %s: %s',
                                          unexpected, e)
                            success = False
            finally:
                self._save()
        return success
    def uninstall(self, src, name):
        """Moves the cache directory back into the named cache hive for an eventual
    reuse.

    The opposite of install().

    src must be absolute and unicode. Its content is moved back into the local
    named caches cache.

    Returns the named cache size in bytes.

    Raises NamedCacheError if cannot uninstall the cache.
    """
        logging.info('NamedCache.uninstall(%r, %r)', src, name)
        with self._lock:
            try:
                if not fs.isdir(src):
                    logging.warning(
                        'NamedCache: Directory %r does not exist anymore. Cache lost.',
                        src)
                    return

                if name in self._lru:
                    # This shouldn't happen but just remove the preexisting one and move
                    # on.
                    logging.error('- overwriting existing cache!')
                    self._remove(name)

                # Calculate the size of the named cache to keep. It's important because
                # if size is zero (it's empty), we do not want to add it back to the
                # named caches cache.
                size = _get_recursive_size(src)
                logging.info('- Size is %d', size)
                if not size:
                    # Do not save empty named cache.
                    return size

                # Move the dir and create an entry for the named cache.
                rel_cache = self._allocate_dir()
                abs_cache = os.path.join(self.cache_dir, rel_cache)
                logging.info('- Moving to %r', rel_cache)
                file_path.ensure_tree(os.path.dirname(abs_cache))
                fs.rename(src, abs_cache)

                self._lru.add(name, (rel_cache, size))
                self._added.append(size)

                # Create symlink <cache_dir>/<named>/<name> -> <cache_dir>/<short name>
                # for user convenience.
                named_path = self._get_named_path(name)
                if fs.exists(named_path):
                    file_path.remove(named_path)
                else:
                    file_path.ensure_tree(os.path.dirname(named_path))

                try:
                    fs.symlink(os.path.join(u'..', rel_cache), named_path)
                    logging.info('NamedCache: Created symlink %r to %r',
                                 named_path, abs_cache)
                except OSError:
                    # Ignore on Windows. It happens when running as a normal user or when
                    # UAC is enabled and the user is a filtered administrator account.
                    if sys.platform != 'win32':
                        raise
                return size
            except (IOError, OSError) as ex:
                # Raise using the original traceback.
                exc = NamedCacheError(
                    'cannot uninstall cache named %r at %r: %s' %
                    (name, src, ex))
                six.reraise(exc, None, sys.exc_info()[2])
            finally:
                # Call save() at every uninstall. The assumptions are:
                # - The total the number of named caches is low, so the state.json file
                #   is small, so the time it takes to write it to disk is short.
                # - The number of mapped named caches per task is low, so the number of
                #   times save() is called on tear-down isn't high enough to be
                #   significant.
                # - uninstall() sometimes throws due to file locking on Windows or
                #   access rights on Linux. We want to keep as many as possible.
                self._save()
Example #19
0
def map_and_run(
    isolated_hash, storage, cache, leak_temp_dir, root_dir, hard_timeout,
    grace_period, extra_args):
  """Maps and run the command. Returns metadata about the result."""
  # TODO(maruel): Include performance statistics.
  result = {
    'exit_code': None,
    'had_hard_timeout': False,
    'internal_failure': None,
    'outputs_ref': None,
    'version': 2,
  }
  if root_dir:
    if not fs.isdir(root_dir):
      fs.makedirs(root_dir, 0700)
    prefix = u''
  else:
    root_dir = os.path.dirname(cache.cache_dir) if cache.cache_dir else None
    prefix = u'isolated_'
  run_dir = make_temp_dir(prefix + u'run', root_dir)
  out_dir = make_temp_dir(prefix + u'out', root_dir)
  tmp_dir = make_temp_dir(prefix + u'tmp', root_dir)
  try:
    bundle = isolateserver.fetch_isolated(
        isolated_hash=isolated_hash,
        storage=storage,
        cache=cache,
        outdir=run_dir,
        require_command=True)

    change_tree_read_only(run_dir, bundle.read_only)
    cwd = os.path.normpath(os.path.join(run_dir, bundle.relative_cwd))
    command = bundle.command + extra_args
    file_path.ensure_command_has_abs_path(command, cwd)
    result['exit_code'], result['had_hard_timeout'] = run_command(
        process_command(command, out_dir), cwd, tmp_dir, hard_timeout,
        grace_period)
  except Exception as e:
    # An internal error occured. Report accordingly so the swarming task will be
    # retried automatically.
    logging.exception('internal failure: %s', e)
    result['internal_failure'] = str(e)
    on_error.report(None)
  finally:
    try:
      if leak_temp_dir:
        logging.warning(
            'Deliberately leaking %s for later examination', run_dir)
      else:
        # On Windows rmtree(run_dir) call above has a synchronization effect: it
        # finishes only when all task child processes terminate (since a running
        # process locks *.exe file). Examine out_dir only after that call
        # completes (since child processes may write to out_dir too and we need
        # to wait for them to finish).
        if fs.isdir(run_dir):
          try:
            success = file_path.rmtree(run_dir)
          except OSError as e:
            logging.error('Failure with %s', e)
            success = False
          if not success:
            print >> sys.stderr, (
                'Failed to delete the run directory, forcibly failing\n'
                'the task because of it. No zombie process can outlive a\n'
                'successful task run and still be marked as successful.\n'
                'Fix your stuff.')
            if result['exit_code'] == 0:
              result['exit_code'] = 1
        if fs.isdir(tmp_dir):
          try:
            success = file_path.rmtree(tmp_dir)
          except OSError as e:
            logging.error('Failure with %s', e)
            success = False
          if not success:
            print >> sys.stderr, (
                'Failed to delete the temporary directory, forcibly failing\n'
                'the task because of it. No zombie process can outlive a\n'
                'successful task run and still be marked as successful.\n'
                'Fix your stuff.')
            if result['exit_code'] == 0:
              result['exit_code'] = 1

      # This deletes out_dir if leak_temp_dir is not set.
      result['outputs_ref'], success = delete_and_upload(
          storage, out_dir, leak_temp_dir)
      if not success and result['exit_code'] == 0:
        result['exit_code'] = 1
    except Exception as e:
      # Swallow any exception in the main finally clause.
      logging.exception('Leaking out_dir %s: %s', out_dir, e)
      result['internal_failure'] = str(e)
  return result
Example #20
0
def map_and_run(
    isolated_hash, storage, cache, leak_temp_dir, root_dir, hard_timeout,
    grace_period, extra_args):
  """Maps and run the command. Returns metadata about the result."""
  result = {
    'duration': None,
    'exit_code': None,
    'had_hard_timeout': False,
    'internal_failure': None,
    'stats': {
    #  'download': {
    #    'duration': 0.,
    #    'initial_number_items': 0,
    #    'initial_size': 0,
    #    'items_cold': '<large.pack()>',
    #    'items_hot': '<large.pack()>',
    #  },
    #  'upload': {
    #    'duration': 0.,
    #    'items_cold': '<large.pack()>',
    #    'items_hot': '<large.pack()>',
    #  },
    },
    'outputs_ref': None,
    'version': 3,
  }
  if root_dir:
    if not fs.isdir(root_dir):
      fs.makedirs(root_dir, 0700)
    prefix = u''
  else:
    root_dir = os.path.dirname(cache.cache_dir) if cache.cache_dir else None
    prefix = u'isolated_'
  run_dir = make_temp_dir(prefix + u'run', root_dir)
  out_dir = make_temp_dir(prefix + u'out', root_dir)
  tmp_dir = make_temp_dir(prefix + u'tmp', root_dir)
  try:
    start = time.time()
    bundle = isolateserver.fetch_isolated(
        isolated_hash=isolated_hash,
        storage=storage,
        cache=cache,
        outdir=run_dir)
    if not bundle.command:
      # Handle this as a task failure, not an internal failure.
      sys.stderr.write(
          '<The .isolated doesn\'t declare any command to run!>\n'
          '<Check your .isolate for missing \'command\' variable>\n')
      if os.environ.get('SWARMING_TASK_ID'):
        # Give an additional hint when running as a swarming task.
        sys.stderr.write('<This occurs at the \'isolate\' step>\n')
      result['exit_code'] = 1
      return result
    result['stats']['download'] = {
      'duration': time.time() - start,
      'initial_number_items': cache.initial_number_items,
      'initial_size': cache.initial_size,
      'items_cold': base64.b64encode(large.pack(sorted(cache.added))),
      'items_hot': base64.b64encode(
          large.pack(sorted(set(cache.linked) - set(cache.added)))),
    }

    change_tree_read_only(run_dir, bundle.read_only)
    cwd = os.path.normpath(os.path.join(run_dir, bundle.relative_cwd))
    command = bundle.command + extra_args
    file_path.ensure_command_has_abs_path(command, cwd)
    sys.stdout.flush()
    start = time.time()
    try:
      result['exit_code'], result['had_hard_timeout'] = run_command(
          process_command(command, out_dir), cwd, tmp_dir, hard_timeout,
          grace_period)
    finally:
      result['duration'] = max(time.time() - start, 0)
  except Exception as e:
    # An internal error occured. Report accordingly so the swarming task will be
    # retried automatically.
    logging.exception('internal failure: %s', e)
    result['internal_failure'] = str(e)
    on_error.report(None)
  finally:
    try:
      if leak_temp_dir:
        logging.warning(
            'Deliberately leaking %s for later examination', run_dir)
      else:
        # On Windows rmtree(run_dir) call above has a synchronization effect: it
        # finishes only when all task child processes terminate (since a running
        # process locks *.exe file). Examine out_dir only after that call
        # completes (since child processes may write to out_dir too and we need
        # to wait for them to finish).
        if fs.isdir(run_dir):
          try:
            success = file_path.rmtree(run_dir)
          except OSError as e:
            logging.error('Failure with %s', e)
            success = False
          if not success:
            print >> sys.stderr, (
                'Failed to delete the run directory, forcibly failing\n'
                'the task because of it. No zombie process can outlive a\n'
                'successful task run and still be marked as successful.\n'
                'Fix your stuff.')
            if result['exit_code'] == 0:
              result['exit_code'] = 1
        if fs.isdir(tmp_dir):
          try:
            success = file_path.rmtree(tmp_dir)
          except OSError as e:
            logging.error('Failure with %s', e)
            success = False
          if not success:
            print >> sys.stderr, (
                'Failed to delete the temporary directory, forcibly failing\n'
                'the task because of it. No zombie process can outlive a\n'
                'successful task run and still be marked as successful.\n'
                'Fix your stuff.')
            if result['exit_code'] == 0:
              result['exit_code'] = 1

      # This deletes out_dir if leak_temp_dir is not set.
      start = time.time()
      result['outputs_ref'], success, cold, hot = delete_and_upload(
          storage, out_dir, leak_temp_dir)
      result['stats']['upload'] = {
        'duration': time.time() - start,
        'items_cold': base64.b64encode(large.pack(cold)),
        'items_hot': base64.b64encode(large.pack(hot)),
      }
      if not success and result['exit_code'] == 0:
        result['exit_code'] = 1
    except Exception as e:
      # Swallow any exception in the main finally clause.
      logging.exception('Leaking out_dir %s: %s', out_dir, e)
      result['internal_failure'] = str(e)
  return result
Example #21
0
def CMDreproduce(parser, args):
  """Runs a task locally that was triggered on the server.

  This running locally the same commands that have been run on the bot. The data
  downloaded will be in a subdirectory named 'work' of the current working
  directory.

  You can pass further additional arguments to the target command by passing
  them after --.
  """
  parser.add_option(
      '--output-dir', metavar='DIR', default='',
      help='Directory that will have results stored into')
  options, args = parser.parse_args(args)
  extra_args = []
  if not args:
    parser.error('Must specify exactly one task id.')
  if len(args) > 1:
    if args[1] == '--':
      if len(args) > 2:
        extra_args = args[2:]
    else:
      extra_args = args[1:]

  url = options.swarming + '/_ah/api/swarming/v1/task/%s/request' % args[0]
  request = net.url_read_json(url)
  if not request:
    print >> sys.stderr, 'Failed to retrieve request data for the task'
    return 1

  workdir = unicode(os.path.abspath('work'))
  if fs.isdir(workdir):
    parser.error('Please delete the directory \'work\' first')
  fs.mkdir(workdir)

  properties = request['properties']
  env = None
  if properties.get('env'):
    env = os.environ.copy()
    logging.info('env: %r', properties['env'])
    for i in properties['env']:
      key = i['key'].encode('utf-8')
      if not i['value']:
        env.pop(key, None)
      else:
        env[key] = i['value'].encode('utf-8')

  if properties.get('inputs_ref'):
    # Create the tree.
    with isolateserver.get_storage(
          properties['inputs_ref']['isolatedserver'],
          properties['inputs_ref']['namespace']) as storage:
      bundle = isolateserver.fetch_isolated(
          properties['inputs_ref']['isolated'],
          storage,
          isolateserver.MemoryCache(file_mode_mask=0700),
          workdir)
      command = bundle.command
      if bundle.relative_cwd:
        workdir = os.path.join(workdir, bundle.relative_cwd)
      command.extend(properties.get('extra_args') or [])
    # https://github.com/luci/luci-py/blob/master/appengine/swarming/doc/Magic-Values.md
    new_command = run_isolated.process_command(command, options.output_dir)
    if not options.output_dir and new_command != command:
      parser.error('The task has outputs, you must use --output-dir')
    command = new_command
  else:
    command = properties['command']
  try:
    return subprocess.call(command + extra_args, env=env, cwd=workdir)
  except OSError as e:
    print >> sys.stderr, 'Failed to run: %s' % ' '.join(command)
    print >> sys.stderr, str(e)
    return 1
Example #22
0
def CMDbatcharchive(parser, args):
  """Archives multiple isolated trees at once.

  Using single command instead of multiple sequential invocations allows to cut
  redundant work when isolated trees share common files (e.g. file hashes are
  checked only once, their presence on the server is checked only once, and
  so on).

  Takes a list of paths to *.isolated.gen.json files that describe what trees to
  isolate. Format of files is:
  {
    "version": 1,
    "dir": <absolute path to a directory all other paths are relative to>,
    "args": [list of command line arguments for single 'archive' command]
  }
  """
  isolateserver.add_isolate_server_options(parser)
  isolateserver.add_archive_options(parser)
  auth.add_auth_options(parser)
  parser.add_option(
      '--dump-json',
      metavar='FILE',
      help='Write isolated hashes of archived trees to this file as JSON')
  options, args = parser.parse_args(args)
  auth.process_auth_options(parser, options)
  isolateserver.process_isolate_server_options(parser, options, True)

  # Validate all incoming options, prepare what needs to be archived as a list
  # of tuples (archival options, working directory).
  work_units = []
  for gen_json_path in args:
    # Validate JSON format of a *.isolated.gen.json file.
    try:
      data = tools.read_json(gen_json_path)
    except IOError as e:
      parser.error('Failed to open %s: %s' % (gen_json_path, e))
    if data.get('version') != ISOLATED_GEN_JSON_VERSION:
      parser.error('Invalid version in %s' % gen_json_path)
    cwd = data.get('dir')
    if not isinstance(cwd, unicode) or not fs.isdir(cwd):
      parser.error('Invalid dir in %s' % gen_json_path)
    args = data.get('args')
    if (not isinstance(args, list) or
        not all(isinstance(x, unicode) for x in args)):
      parser.error('Invalid args in %s' % gen_json_path)
    # Convert command line (embedded in JSON) to Options object.
    work_units.append((parse_archive_command_line(args, cwd), cwd))

  # Perform the archival, all at once.
  isolated_hashes = isolate_and_archive(
      work_units, options.isolate_server, options.namespace)

  # TODO(vadimsh): isolate_and_archive returns None on upload failure, there's
  # no way currently to figure out what *.isolated file from a batch were
  # successfully uploaded, so consider them all failed (and emit empty dict
  # as JSON result).
  if options.dump_json:
    tools.write_json(options.dump_json, isolated_hashes or {}, False)

  if isolated_hashes is None:
    return EXIT_CODE_UPLOAD_ERROR

  # isolated_hashes[x] is None if 'x.isolate' contains a error.
  if not all(isolated_hashes.itervalues()):
    return EXIT_CODE_ISOLATE_ERROR

  return 0
Example #23
0
def map_and_run(data, constant_run_path):
  """Runs a command with optional isolated input/output.

  Arguments:
  - data: TaskData instance.
  - constant_run_path: TODO

  Returns metadata about the result.
  """
  result = {
    'duration': None,
    'exit_code': None,
    'had_hard_timeout': False,
    'internal_failure': 'run_isolated did not complete properly',
    'stats': {
    # 'isolated': {
    #    'cipd': {
    #      'duration': 0.,
    #      'get_client_duration': 0.,
    #    },
    #    'download': {
    #      'duration': 0.,
    #      'initial_number_items': 0,
    #      'initial_size': 0,
    #      'items_cold': '<large.pack()>',
    #      'items_hot': '<large.pack()>',
    #    },
    #    'upload': {
    #      'duration': 0.,
    #      'items_cold': '<large.pack()>',
    #      'items_hot': '<large.pack()>',
    #    },
    #  },
    },
    # 'cipd_pins': {
    #   'packages': [
    #     {'package_name': ..., 'version': ..., 'path': ...},
    #     ...
    #   ],
    #  'client_package': {'package_name': ..., 'version': ...},
    # },
    'outputs_ref': None,
    'version': 5,
  }

  if data.root_dir:
    file_path.ensure_tree(data.root_dir, 0700)
  elif data.isolate_cache.cache_dir:
    data = data._replace(
        root_dir=os.path.dirname(data.isolate_cache.cache_dir))
  # See comment for these constants.
  # If root_dir is not specified, it is not constant.
  # TODO(maruel): This is not obvious. Change this to become an error once we
  # make the constant_run_path an exposed flag.
  if constant_run_path and data.root_dir:
    run_dir = os.path.join(data.root_dir, ISOLATED_RUN_DIR)
    if os.path.isdir(run_dir):
      file_path.rmtree(run_dir)
    os.mkdir(run_dir, 0700)
  else:
    run_dir = make_temp_dir(ISOLATED_RUN_DIR, data.root_dir)
  # storage should be normally set but don't crash if it is not. This can happen
  # as Swarming task can run without an isolate server.
  out_dir = make_temp_dir(
      ISOLATED_OUT_DIR, data.root_dir) if data.storage else None
  tmp_dir = make_temp_dir(ISOLATED_TMP_DIR, data.root_dir)
  cwd = run_dir
  if data.relative_cwd:
    cwd = os.path.normpath(os.path.join(cwd, data.relative_cwd))
  command = data.command
  try:
    with data.install_packages_fn(run_dir) as cipd_info:
      if cipd_info:
        result['stats']['cipd'] = cipd_info.stats
        result['cipd_pins'] = cipd_info.pins

      if data.isolated_hash:
        isolated_stats = result['stats'].setdefault('isolated', {})
        bundle, isolated_stats['download'] = fetch_and_map(
            isolated_hash=data.isolated_hash,
            storage=data.storage,
            cache=data.isolate_cache,
            outdir=run_dir,
            use_symlinks=data.use_symlinks)
        change_tree_read_only(run_dir, bundle.read_only)
        # Inject the command
        if not command and bundle.command:
          command = bundle.command + data.extra_args
          # Only set the relative directory if the isolated file specified a
          # command, and no raw command was specified.
          if bundle.relative_cwd:
            cwd = os.path.normpath(os.path.join(cwd, bundle.relative_cwd))

      if not command:
        # Handle this as a task failure, not an internal failure.
        sys.stderr.write(
            '<No command was specified!>\n'
            '<Please secify a command when triggering your Swarming task>\n')
        result['exit_code'] = 1
        return result

      if not cwd.startswith(run_dir):
        # Handle this as a task failure, not an internal failure. This is a
        # 'last chance' way to gate against directory escape.
        sys.stderr.write('<Relative CWD is outside of run directory!>\n')
        result['exit_code'] = 1
        return result

      if not os.path.isdir(cwd):
        # Accepts relative_cwd that does not exist.
        os.makedirs(cwd, 0700)

      # If we have an explicit list of files to return, make sure their
      # directories exist now.
      if data.storage and data.outputs:
        isolateserver.create_directories(run_dir, data.outputs)

      command = tools.fix_python_path(command)
      command = process_command(command, out_dir, data.bot_file)
      file_path.ensure_command_has_abs_path(command, cwd)

      with data.install_named_caches(run_dir):
        sys.stdout.flush()
        start = time.time()
        try:
          # Need to switch the default account before 'get_command_env' call,
          # so it can grab correct value of LUCI_CONTEXT env var.
          with set_luci_context_account(data.switch_to_account, tmp_dir):
            env = get_command_env(
                tmp_dir, cipd_info, run_dir, data.env, data.env_prefix)
            result['exit_code'], result['had_hard_timeout'] = run_command(
                command, cwd, env, data.hard_timeout, data.grace_period)
        finally:
          result['duration'] = max(time.time() - start, 0)

    # We successfully ran the command, set internal_failure back to
    # None (even if the command failed, it's not an internal error).
    result['internal_failure'] = None
  except Exception as e:
    # An internal error occurred. Report accordingly so the swarming task will
    # be retried automatically.
    logging.exception('internal failure: %s', e)
    result['internal_failure'] = str(e)
    on_error.report(None)

  # Clean up
  finally:
    try:
      # Try to link files to the output directory, if specified.
      if out_dir:
        link_outputs_to_outdir(run_dir, out_dir, data.outputs)

      success = False
      if data.leak_temp_dir:
        success = True
        logging.warning(
            'Deliberately leaking %s for later examination', run_dir)
      else:
        # On Windows rmtree(run_dir) call above has a synchronization effect: it
        # finishes only when all task child processes terminate (since a running
        # process locks *.exe file). Examine out_dir only after that call
        # completes (since child processes may write to out_dir too and we need
        # to wait for them to finish).
        if fs.isdir(run_dir):
          try:
            success = file_path.rmtree(run_dir)
          except OSError as e:
            logging.error('Failure with %s', e)
            success = False
          if not success:
            sys.stderr.write(OUTLIVING_ZOMBIE_MSG % ('run', data.grace_period))
            if result['exit_code'] == 0:
              result['exit_code'] = 1
        if fs.isdir(tmp_dir):
          try:
            success = file_path.rmtree(tmp_dir)
          except OSError as e:
            logging.error('Failure with %s', e)
            success = False
          if not success:
            sys.stderr.write(OUTLIVING_ZOMBIE_MSG % ('temp', data.grace_period))
            if result['exit_code'] == 0:
              result['exit_code'] = 1

      # This deletes out_dir if leak_temp_dir is not set.
      if out_dir:
        isolated_stats = result['stats'].setdefault('isolated', {})
        result['outputs_ref'], success, isolated_stats['upload'] = (
            delete_and_upload(data.storage, out_dir, data.leak_temp_dir))
      if not success and result['exit_code'] == 0:
        result['exit_code'] = 1
    except Exception as e:
      # Swallow any exception in the main finally clause.
      if out_dir:
        logging.exception('Leaking out_dir %s: %s', out_dir, e)
      result['internal_failure'] = str(e)
  return result
Example #24
0
def expand_directory_and_symlink(indir, relfile, blacklist, follow_symlinks):
    """Expands a single input. It can result in multiple outputs.

  This function is recursive when relfile is a directory.

  Note: this code doesn't properly handle recursive symlink like one created
  with:
    ln -s .. foo

  Yields:
    tuple(Relative path, bool is_symlink) to files and symlinks inside |indir|.
  """
    if os.path.isabs(relfile):
        raise MappingError(u'Can\'t map absolute path %s' % relfile)

    infile = file_path.normpath(os.path.join(indir, relfile))
    if not infile.startswith(indir):
        raise MappingError(u'Can\'t map file %s outside %s' % (infile, indir))

    filepath = os.path.join(indir, relfile)
    native_filepath = file_path.get_native_path_case(filepath)
    if filepath != native_filepath:
        # Special case './'.
        if filepath != native_filepath + u'.' + os.path.sep:
            # While it'd be nice to enforce path casing on Windows, it's impractical.
            # Also give up enforcing strict path case on OSX. Really, it's that sad.
            # The case where it happens is very specific and hard to reproduce:
            # get_native_path_case(
            #    u'Foo.framework/Versions/A/Resources/Something.nib') will return
            # u'Foo.framework/Versions/A/resources/Something.nib', e.g. lowercase 'r'.
            #
            # Note that this is really something deep in OSX because running
            # ls Foo.framework/Versions/A
            # will print out 'Resources', while file_path.get_native_path_case()
            # returns a lower case 'r'.
            #
            # So *something* is happening under the hood resulting in the command 'ls'
            # and Carbon.File.FSPathMakeRef('path').FSRefMakePath() to disagree.  We
            # have no idea why.
            if sys.platform not in ('darwin', 'win32'):
                raise MappingError(
                    u'File path doesn\'t equal native file path\n%s != %s' %
                    (filepath, native_filepath))

    symlinks = []
    if follow_symlinks:
        try:
            relfile, symlinks = _expand_symlinks(indir, relfile)
        except OSError:
            # The file doesn't exist, it will throw below.
            pass

    # The symlinks need to be mapped in.
    for s in symlinks:
        yield s, True

    if relfile.endswith(os.path.sep):
        if not fs.isdir(infile):
            raise MappingError(u'%s is not a directory but ends with "%s"' %
                               (infile, os.path.sep))

        # Special case './'.
        if relfile.startswith(u'.' + os.path.sep):
            relfile = relfile[2:]
        try:
            for filename in fs.listdir(infile):
                inner_relfile = os.path.join(relfile, filename)
                if blacklist and blacklist(inner_relfile):
                    continue
                if fs.isdir(os.path.join(indir, inner_relfile)):
                    inner_relfile += os.path.sep
                # Apply recursively.
                for i, is_symlink in expand_directory_and_symlink(
                        indir, inner_relfile, blacklist, follow_symlinks):
                    yield i, is_symlink
        except OSError as e:
            raise MappingError(u'Unable to iterate over directory %s.\n%s' %
                               (infile, e))
    else:
        # Always add individual files even if they were blacklisted.
        if fs.isdir(infile):
            raise MappingError(
                u'Input directory %s must have a trailing slash' % infile)

        if not fs.isfile(infile):
            raise MappingError(u'Input file %s doesn\'t exist' % infile)

        yield relfile, False
    def cleanup(self):
        """Cleans up the cache directory.

    Ensures there is no unknown files in cache_dir.
    Ensures the read-only bits are set correctly.

    At that point, the cache was already loaded, trimmed to respect cache
    policies.
    """
        with self._lock:
            fs.chmod(self.cache_dir, 0o700)
            # Ensure that all files listed in the state still exist and add new ones.
            previous = set(self._lru)
            # It'd be faster if there were a readdir() function.
            for filename in fs.listdir(self.cache_dir):
                if filename == self.STATE_FILE:
                    fs.chmod(os.path.join(self.cache_dir, filename), 0o600)
                    continue
                if filename in previous:
                    fs.chmod(os.path.join(self.cache_dir, filename), 0o400)
                    previous.remove(filename)
                    continue

                # An untracked file. Delete it.
                logging.warning('Removing unknown file %s from cache',
                                filename)
                p = self._path(filename)
                if fs.isdir(p):
                    try:
                        file_path.rmtree(p)
                    except OSError:
                        pass
                else:
                    file_path.try_remove(p)
                continue

            if previous:
                # Filter out entries that were not found.
                logging.warning('Removed %d lost files', len(previous))
                for filename in previous:
                    self._lru.pop(filename)
                self._save()

        # Verify hash of every single item to detect corruption. the corrupted
        # files will be evicted.
        with self._lock:
            for digest, (_, timestamp) in list(self._lru._items.items()):
                # verify only if the mtime is grather than the timestamp in state.json
                # to avoid take too long time.
                if self._get_mtime(digest) <= timestamp:
                    continue
                logging.warning('Item has been modified. item: %s', digest)
                if self._is_valid_hash(digest):
                    # Update timestamp in state.json
                    self._lru.touch(digest)
                    continue
                # remove corrupted file from LRU and file system
                self._lru.pop(digest)
                self._delete_file(digest, UNKNOWN_FILE_SIZE)
                logging.error('Deleted corrupted item: %s', digest)
            self._save()
Example #26
0
def run_tha_test(
    isolated_hash, storage, cache, leak_temp_dir, result_json, root_dir,
    hard_timeout, grace_period, extra_args):
  """Downloads the dependencies in the cache, hardlinks them into a temporary
  directory and runs the executable from there.

  A temporary directory is created to hold the output files. The content inside
  this directory will be uploaded back to |storage| packaged as a .isolated
  file.

  Arguments:
    isolated_hash: the SHA-1 of the .isolated file that must be retrieved to
                   recreate the tree of files to run the target executable.
    storage: an isolateserver.Storage object to retrieve remote objects. This
             object has a reference to an isolateserver.StorageApi, which does
             the actual I/O.
    cache: an isolateserver.LocalCache to keep from retrieving the same objects
           constantly by caching the objects retrieved. Can be on-disk or
           in-memory.
    leak_temp_dir: if true, the temporary directory will be deliberately leaked
                   for later examination.
    result_json: file path to dump result metadata into. If set, the process
                 exit code is always 0 unless an internal error occured.
    root_dir: directory to the path to use to create the temporary directory. If
              not specified, a random temporary directory is created.
    hard_timeout: kills the process if it lasts more than this amount of
                  seconds.
    grace_period: number of seconds to wait between SIGTERM and SIGKILL.
    extra_args: optional arguments to add to the command stated in the .isolate
                file.

  Returns:
    Process exit code that should be used.
  """
  # run_isolated exit code. Depends on if result_json is used or not.
  result = map_and_run(
      isolated_hash, storage, cache, leak_temp_dir, root_dir, hard_timeout,
      grace_period, extra_args)
  logging.info('Result:\n%s', tools.format_json(result, dense=True))
  if result_json:
    # We've found tests to delete 'work' when quitting, causing an exception
    # here. Try to recreate the directory if necessary.
    work_dir = os.path.dirname(result_json)
    if not fs.isdir(work_dir):
      fs.mkdir(work_dir)
    tools.write_json(result_json, result, dense=True)
    # Only return 1 if there was an internal error.
    return int(bool(result['internal_failure']))

  # Marshall into old-style inline output.
  if result['outputs_ref']:
    data = {
      'hash': result['outputs_ref']['isolated'],
      'namespace': result['outputs_ref']['namespace'],
      'storage': result['outputs_ref']['isolatedserver'],
    }
    sys.stdout.flush()
    print(
        '[run_isolated_out_hack]%s[/run_isolated_out_hack]' %
        tools.format_json(data, dense=True))
    sys.stdout.flush()
  return result['exit_code'] or int(bool(result['internal_failure']))
Example #27
0
def CMDreproduce(parser, args):
  """Runs a task locally that was triggered on the server.

  This running locally the same commands that have been run on the bot. The data
  downloaded will be in a subdirectory named 'work' of the current working
  directory.

  You can pass further additional arguments to the target command by passing
  them after --.
  """
  options, args = parser.parse_args(args)
  extra_args = []
  if not args:
    parser.error('Must specify exactly one task id.')
  if len(args) > 1:
    if args[1] == '--':
      if len(args) > 2:
        extra_args = args[2:]
    else:
      extra_args = args[1:]

  url = options.swarming + '/_ah/api/swarming/v1/task/%s/request' % args[0]
  request = net.url_read_json(url)
  if not request:
    print >> sys.stderr, 'Failed to retrieve request data for the task'
    return 1

  workdir = unicode(os.path.abspath('work'))
  if not fs.isdir(workdir):
    fs.mkdir(workdir)

  properties = request['properties']
  env = None
  if properties.get('env'):
    env = os.environ.copy()
    logging.info('env: %r', properties['env'])
    for i in properties['env']:
      key = i['key'].encode('utf-8')
      if not i['value']:
        env.pop(key, None)
      else:
        env[key] = i['value'].encode('utf-8')

  if properties.get('inputs_ref'):
    # Create the tree.
    with isolateserver.get_storage(
          properties['inputs_ref']['isolatedserver'],
          properties['inputs_ref']['namespace']) as storage:
      bundle = isolateserver.fetch_isolated(
          properties['inputs_ref']['isolated'],
          storage,
          isolateserver.MemoryCache(file_mode_mask=0700),
          workdir,
          False)
      command = bundle.command
      if bundle.relative_cwd:
        workdir = os.path.join(workdir, bundle.relative_cwd)
  else:
    command = properties['command']
  try:
    return subprocess.call(command + extra_args, env=env, cwd=workdir)
  except OSError as e:
    print >> sys.stderr, 'Failed to run: %s' % ' '.join(command)
    print >> sys.stderr, str(e)
    return 1
Example #28
0
def ensure_tree(path, perm=0777):
  """Ensures a directory exists."""
  if not fs.isdir(path):
    fs.makedirs(path, perm)
Example #29
0
def map_and_run(command, isolated_hash, storage, isolate_cache, outputs,
                init_name_caches, leak_temp_dir, root_dir, hard_timeout,
                grace_period, bot_file, extra_args, install_packages_fn,
                use_symlinks):
    """Runs a command with optional isolated input/output.

  See run_tha_test for argument documentation.

  Returns metadata about the result.
  """
    assert root_dir or root_dir is None
    assert bool(command) ^ bool(isolated_hash)
    result = {
        'duration': None,
        'exit_code': None,
        'had_hard_timeout': False,
        'internal_failure': None,
        'stats': {
            # 'isolated': {
            #    'cipd': {
            #      'duration': 0.,
            #      'get_client_duration': 0.,
            #    },
            #    'download': {
            #      'duration': 0.,
            #      'initial_number_items': 0,
            #      'initial_size': 0,
            #      'items_cold': '<large.pack()>',
            #      'items_hot': '<large.pack()>',
            #    },
            #    'upload': {
            #      'duration': 0.,
            #      'items_cold': '<large.pack()>',
            #      'items_hot': '<large.pack()>',
            #    },
            #  },
        },
        # 'cipd_pins': {
        #   'packages': [
        #     {'package_name': ..., 'version': ..., 'path': ...},
        #     ...
        #   ],
        #  'client_package': {'package_name': ..., 'version': ...},
        # },
        'outputs_ref': None,
        'version': 5,
    }

    if root_dir:
        file_path.ensure_tree(root_dir, 0700)
    elif isolate_cache.cache_dir:
        root_dir = os.path.dirname(isolate_cache.cache_dir)
    # See comment for these constants.
    run_dir = make_temp_dir(ISOLATED_RUN_DIR, root_dir)
    # storage should be normally set but don't crash if it is not. This can happen
    # as Swarming task can run without an isolate server.
    out_dir = make_temp_dir(ISOLATED_OUT_DIR, root_dir) if storage else None
    tmp_dir = make_temp_dir(ISOLATED_TMP_DIR, root_dir)
    cwd = run_dir

    try:
        cipd_info = install_packages_fn(run_dir)
        if cipd_info:
            result['stats']['cipd'] = cipd_info['stats']
            result['cipd_pins'] = cipd_info['cipd_pins']

        if isolated_hash:
            isolated_stats = result['stats'].setdefault('isolated', {})
            bundle, isolated_stats['download'] = fetch_and_map(
                isolated_hash=isolated_hash,
                storage=storage,
                cache=isolate_cache,
                outdir=run_dir,
                use_symlinks=use_symlinks)
            if not bundle.command:
                # Handle this as a task failure, not an internal failure.
                sys.stderr.write(
                    '<The .isolated doesn\'t declare any command to run!>\n'
                    '<Check your .isolate for missing \'command\' variable>\n')
                if os.environ.get('SWARMING_TASK_ID'):
                    # Give an additional hint when running as a swarming task.
                    sys.stderr.write('<This occurs at the \'isolate\' step>\n')
                result['exit_code'] = 1
                return result

            change_tree_read_only(run_dir, bundle.read_only)
            cwd = os.path.normpath(os.path.join(cwd, bundle.relative_cwd))
            command = bundle.command + extra_args

        # If we have an explicit list of files to return, make sure their
        # directories exist now.
        if storage and outputs:
            isolateserver.create_directories(run_dir, outputs)

        command = tools.fix_python_path(command)
        command = process_command(command, out_dir, bot_file)
        file_path.ensure_command_has_abs_path(command, cwd)

        init_name_caches(run_dir)

        sys.stdout.flush()
        start = time.time()
        try:
            result['exit_code'], result['had_hard_timeout'] = run_command(
                command, cwd, tmp_dir, hard_timeout, grace_period)
        finally:
            result['duration'] = max(time.time() - start, 0)
    except Exception as e:
        # An internal error occurred. Report accordingly so the swarming task will
        # be retried automatically.
        logging.exception('internal failure: %s', e)
        result['internal_failure'] = str(e)
        on_error.report(None)

    # Clean up
    finally:
        try:
            # Try to link files to the output directory, if specified.
            if out_dir:
                link_outputs_to_outdir(run_dir, out_dir, outputs)

            success = False
            if leak_temp_dir:
                success = True
                logging.warning(
                    'Deliberately leaking %s for later examination', run_dir)
            else:
                # On Windows rmtree(run_dir) call above has a synchronization effect: it
                # finishes only when all task child processes terminate (since a running
                # process locks *.exe file). Examine out_dir only after that call
                # completes (since child processes may write to out_dir too and we need
                # to wait for them to finish).
                if fs.isdir(run_dir):
                    try:
                        success = file_path.rmtree(run_dir)
                    except OSError as e:
                        logging.error('Failure with %s', e)
                        success = False
                    if not success:
                        print >> sys.stderr, (
                            'Failed to delete the run directory, forcibly failing\n'
                            'the task because of it. No zombie process can outlive a\n'
                            'successful task run and still be marked as successful.\n'
                            'Fix your stuff.')
                        if result['exit_code'] == 0:
                            result['exit_code'] = 1
                if fs.isdir(tmp_dir):
                    try:
                        success = file_path.rmtree(tmp_dir)
                    except OSError as e:
                        logging.error('Failure with %s', e)
                        success = False
                    if not success:
                        print >> sys.stderr, (
                            'Failed to delete the temporary directory, forcibly failing\n'
                            'the task because of it. No zombie process can outlive a\n'
                            'successful task run and still be marked as successful.\n'
                            'Fix your stuff.')
                        if result['exit_code'] == 0:
                            result['exit_code'] = 1

            # This deletes out_dir if leak_temp_dir is not set.
            if out_dir:
                isolated_stats = result['stats'].setdefault('isolated', {})
                result['outputs_ref'], success, isolated_stats['upload'] = (
                    delete_and_upload(storage, out_dir, leak_temp_dir))
            if not success and result['exit_code'] == 0:
                result['exit_code'] = 1
        except Exception as e:
            # Swallow any exception in the main finally clause.
            if out_dir:
                logging.exception('Leaking out_dir %s: %s', out_dir, e)
            result['internal_failure'] = str(e)
    return result