Beispiel #1
0
def link_file(outfile, infile, action):
  """Links a file. The type of link depends on |action|.

  Returns:
    True if the action was caried on, False if fallback was used.
  """
  if action not in (HARDLINK, HARDLINK_WITH_FALLBACK, SYMLINK, COPY):
    raise ValueError('Unknown mapping action %s' % action)
  if not fs.isfile(infile):
    raise OSError('%s is missing' % infile)
  if fs.isfile(outfile):
    raise OSError(
        '%s already exist; insize:%d; outsize:%d' %
        (outfile, fs.stat(infile).st_size, fs.stat(outfile).st_size))

  if action == COPY:
    readable_copy(outfile, infile)
  elif action == SYMLINK and sys.platform != 'win32':
    # On windows, symlink are converted to hardlink and fails over to copy.
    fs.symlink(infile, outfile)  # pylint: disable=E1101
  else:
    # HARDLINK or HARDLINK_WITH_FALLBACK.
    try:
      hardlink(infile, outfile)
    except OSError as e:
      if action == HARDLINK:
        raise OSError('Failed to hardlink %s to %s: %s' % (infile, outfile, e))
      # Probably a different file system.
      logging.warning(
          'Failed to hardlink, failing back to copy %s to %s' % (
            infile, outfile))
      readable_copy(outfile, infile)
      # Signal caller that fallback copy was used.
      return False
  return True
Beispiel #2
0
def link_file(outfile, infile, action):
  """Links a file. The type of link depends on |action|.

  Returns:
    True if the action was caried on, False if fallback was used.
  """
  if action not in (HARDLINK, HARDLINK_WITH_FALLBACK, SYMLINK, COPY):
    raise ValueError('Unknown mapping action %s' % action)
  if not fs.isfile(infile):
    raise OSError('%s is missing' % infile)
  if fs.isfile(outfile):
    raise OSError(
        '%s already exist; insize:%d; outsize:%d' %
        (outfile, fs.stat(infile).st_size, fs.stat(outfile).st_size))

  if action == COPY:
    readable_copy(outfile, infile)
  elif action == SYMLINK and sys.platform != 'win32':
    # On windows, symlink are converted to hardlink and fails over to copy.
    fs.symlink(infile, outfile)  # pylint: disable=E1101
  else:
    # HARDLINK or HARDLINK_WITH_FALLBACK.
    try:
      hardlink(infile, outfile)
    except OSError as e:
      if action == HARDLINK:
        raise OSError('Failed to hardlink %s to %s: %s' % (infile, outfile, e))
      # Probably a different file system.
      logging.warning(
          'Failed to hardlink, failing back to copy %s to %s' % (
            infile, outfile))
      readable_copy(outfile, infile)
      # Signal caller that fallback copy was used.
      return False
  return True
Beispiel #3
0
def link_file(outfile, infile, action):
  """Links a file. The type of link depends on |action|.

  Returns:
    True if the action was carried on, False if fallback was used.
  """
  if action < 1 or action > COPY:
    raise ValueError('Unknown mapping action %s' % action)
  # TODO(maruel): Skip these checks.
  if not fs.isfile(infile):
    raise OSError('%s is missing' % infile)
  if fs.isfile(outfile):
    raise OSError(
        '%s already exist; insize:%d; outsize:%d' %
        (outfile, fs.stat(infile).st_size, fs.stat(outfile).st_size))

  if action == COPY:
    readable_copy(outfile, infile)
    return True

  if action in (SYMLINK, SYMLINK_WITH_FALLBACK):
    try:
      fs.symlink(infile, outfile)  # pylint: disable=E1101
      return True
    except OSError:
      if action == SYMLINK:
        raise
      logging.warning(
          'Failed to symlink, falling back to copy %s to %s' % (
            infile, outfile))
      # Signal caller that fallback copy was used.
      readable_copy(outfile, infile)
      return False

  # HARDLINK or HARDLINK_WITH_FALLBACK.
  try:
    hardlink(infile, outfile)
    return True
  except OSError as e:
    if action == HARDLINK:
      raise OSError('Failed to hardlink %s to %s: %s' % (infile, outfile, e))

  # Probably a different file system.
  logging.warning(
      'Failed to hardlink, falling back to copy %s to %s' % (
        infile, outfile))
  readable_copy(outfile, infile)
  # Signal caller that fallback copy was used.
  return False
Beispiel #4
0
def is_same_filesystem(path1, path2):
  """Returns True if both paths are on the same filesystem.

  This is required to enable the use of hardlinks.
  """
  assert os.path.isabs(path1), path1
  assert os.path.isabs(path2), path2
  if sys.platform == 'win32':
    # If the drive letter mismatches, assume it's a separate partition.
    # TODO(maruel): It should look at the underlying drive, a drive letter could
    # be a mount point to a directory on another drive.
    assert re.match(ur'^[a-zA-Z]\:\\.*', path1), path1
    assert re.match(ur'^[a-zA-Z]\:\\.*', path2), path2
    if path1[0].lower() != path2[0].lower():
      return False
  return fs.stat(path1).st_dev == fs.stat(path2).st_dev
Beispiel #5
0
def is_same_filesystem(path1, path2):
  """Returns True if both paths are on the same filesystem.

  This is required to enable the use of hardlinks.
  """
  assert os.path.isabs(path1), path1
  assert os.path.isabs(path2), path2
  if sys.platform == 'win32':
    # If the drive letter mismatches, assume it's a separate partition.
    # TODO(maruel): It should look at the underlying drive, a drive letter could
    # be a mount point to a directory on another drive.
    assert re.match(ur'^[a-zA-Z]\:\\.*', path1), path1
    assert re.match(ur'^[a-zA-Z]\:\\.*', path2), path2
    if path1[0].lower() != path2[0].lower():
      return False
  return fs.stat(path1).st_dev == fs.stat(path2).st_dev
Beispiel #6
0
 def assertFileMode(self, filepath, mode, umask=None):
   umask = test_utils.umask() if umask is None else umask
   actual = fs.stat(filepath).st_mode
   expected = mode & ~umask
   self.assertEqual(
       expected,
       actual,
       (filepath, oct(expected), oct(actual), oct(umask)))
 def assertFileMode(self, filepath, mode, umask=None):
   umask = test_utils.umask() if umask is None else umask
   actual = fs.stat(filepath).st_mode
   expected = mode & ~umask
   self.assertEqual(
       expected,
       actual,
       (filepath, oct(expected), oct(actual), oct(umask)))
def file_to_metadata(filepath, collapse_symlinks):
  """Processes an input file, a dependency, and return meta data about it.

  Behaviors:
  - Retrieves the file mode, file size, file timestamp, file link
    destination if it is a file link and calcultate the SHA-1 of the file's
    content if the path points to a file and not a symlink.

  Arguments:
    filepath: File to act on.
    collapse_symlinks: True if symlinked files should be treated like they were
                       the normal underlying file.

  Returns:
    The necessary dict to create a entry in the 'files' section of an .isolated
    file *except* 'h' for files.
  """
  out = {}
  # Always check the file stat and check if it is a link.
  try:
    if collapse_symlinks:
      # os.stat follows symbolic links
      filestats = fs.stat(filepath)
    else:
      # os.lstat does not follow symbolic links, and thus preserves them.
      filestats = fs.lstat(filepath)
  except OSError:
    # The file is not present.
    raise MappingError('%s is missing' % filepath)
  is_link = stat.S_ISLNK(filestats.st_mode)

  if sys.platform != 'win32':
    # Ignore file mode on Windows since it's not really useful there.
    filemode = stat.S_IMODE(filestats.st_mode)
    # Remove write access for group and all access to 'others'.
    filemode &= ~(stat.S_IWGRP | stat.S_IRWXO)
    if filemode & (stat.S_IXUSR|stat.S_IRGRP) == (stat.S_IXUSR|stat.S_IRGRP):
      # Only keep x group bit if both x user bit and group read bit are set.
      filemode |= stat.S_IXGRP
    else:
      filemode &= ~stat.S_IXGRP
    if not is_link:
      out['m'] = filemode

  if not is_link:
    out['s'] = filestats.st_size
  else:
    # The link could be in an incorrect path case. In practice, this only
    # happens on macOS on case insensitive HFS.
    # TODO(maruel): It'd be better if it was only done once, in
    # expand_directory_and_symlink(), so it would not be necessary to do again
    # here.
    symlink_value = fs.readlink(filepath)  # pylint: disable=no-member
    filedir = file_path.get_native_path_case(os.path.dirname(filepath))
    native_dest = file_path.fix_native_path_case(filedir, symlink_value)
    out['l'] = os.path.relpath(native_dest, filedir)
  return out
Beispiel #9
0
def recreate_tree(outdir, indir, infiles, action, as_hash):
  """Creates a new tree with only the input files in it.

  Arguments:
    outdir:    Output directory to create the files in.
    indir:     Root directory the infiles are based in.
    infiles:   dict of files to map from |indir| to |outdir|.
    action:    One of accepted action of file_path.link_file().
    as_hash:   Output filename is the hash instead of relfile.
  """
  logging.info(
      'recreate_tree(outdir=%s, indir=%s, files=%d, action=%s, as_hash=%s)' %
      (outdir, indir, len(infiles), action, as_hash))

  assert os.path.isabs(outdir) and outdir == os.path.normpath(outdir), outdir
  if not os.path.isdir(outdir):
    logging.info('Creating %s' % outdir)
    fs.makedirs(outdir)

  for relfile, metadata in infiles.iteritems():
    infile = os.path.join(indir, relfile)
    if as_hash:
      # Do the hashtable specific checks.
      if 'l' in metadata:
        # Skip links when storing a hashtable.
        continue
      outfile = os.path.join(outdir, metadata['h'])
      if os.path.isfile(outfile):
        # Just do a quick check that the file size matches. No need to stat()
        # again the input file, grab the value from the dict.
        if not 's' in metadata:
          raise isolated_format.MappingError(
              'Misconfigured item %s: %s' % (relfile, metadata))
        if metadata['s'] == fs.stat(outfile).st_size:
          continue
        else:
          logging.warn('Overwritting %s' % metadata['h'])
          fs.remove(outfile)
    else:
      outfile = os.path.join(outdir, relfile)
      outsubdir = os.path.dirname(outfile)
      if not os.path.isdir(outsubdir):
        fs.makedirs(outsubdir)

    if 'l' in metadata:
      pointed = metadata['l']
      logging.debug('Symlink: %s -> %s' % (outfile, pointed))
      # symlink doesn't exist on Windows.
      fs.symlink(pointed, outfile)  # pylint: disable=E1101
    else:
      file_path.link_file(outfile, infile, action)
Beispiel #10
0
def recreate_tree(outdir, indir, infiles, action, as_hash):
  """Creates a new tree with only the input files in it.

  Arguments:
    outdir:    Output directory to create the files in.
    indir:     Root directory the infiles are based in.
    infiles:   dict of files to map from |indir| to |outdir|.
    action:    One of accepted action of file_path.link_file().
    as_hash:   Output filename is the hash instead of relfile.
  """
  logging.info(
      'recreate_tree(outdir=%s, indir=%s, files=%d, action=%s, as_hash=%s)' %
      (outdir, indir, len(infiles), action, as_hash))

  assert os.path.isabs(outdir) and outdir == os.path.normpath(outdir), outdir
  if not os.path.isdir(outdir):
    logging.info('Creating %s' % outdir)
    fs.makedirs(outdir)

  for relfile, metadata in infiles.iteritems():
    infile = os.path.join(indir, relfile)
    if as_hash:
      # Do the hashtable specific checks.
      if 'l' in metadata:
        # Skip links when storing a hashtable.
        continue
      outfile = os.path.join(outdir, metadata['h'])
      if os.path.isfile(outfile):
        # Just do a quick check that the file size matches. No need to stat()
        # again the input file, grab the value from the dict.
        if not 's' in metadata:
          raise isolated_format.MappingError(
              'Misconfigured item %s: %s' % (relfile, metadata))
        if metadata['s'] == fs.stat(outfile).st_size:
          continue
        else:
          logging.warn('Overwritting %s' % metadata['h'])
          fs.remove(outfile)
    else:
      outfile = os.path.join(outdir, relfile)
      outsubdir = os.path.dirname(outfile)
      if not os.path.isdir(outsubdir):
        fs.makedirs(outsubdir)

    if 'l' in metadata:
      pointed = metadata['l']
      logging.debug('Symlink: %s -> %s' % (outfile, pointed))
      # symlink doesn't exist on Windows.
      fs.symlink(pointed, outfile)  # pylint: disable=E1101
    else:
      file_path.link_file(outfile, infile, action)
Beispiel #11
0
 def _delete_file(self, digest, size=UNKNOWN_FILE_SIZE):
     """Deletes cache file from the file system."""
     self._lock.assert_locked()
     try:
         if size == UNKNOWN_FILE_SIZE:
             try:
                 size = fs.stat(self._path(digest)).st_size
             except OSError:
                 size = 0
         file_path.try_remove(self._path(digest))
         self._evicted.append(size)
         self._free_disk += size
     except OSError as e:
         if e.errno != errno.ENOENT:
             logging.error('Error attempting to delete a file %s:\n%s' %
                           (digest, e))
Beispiel #12
0
def is_valid_file(path, size):
    """Returns if the given files appears valid.

  Currently it just checks the file exists and its size matches the expectation.
  """
    if size == UNKNOWN_FILE_SIZE:
        return fs.isfile(path)
    try:
        actual_size = fs.stat(path).st_size
    except OSError as e:
        logging.warning('Can\'t read item %s, assuming it\'s invalid: %s',
                        os.path.basename(path), e)
        return False
    if size != actual_size:
        logging.warning('Found invalid item %s; %d != %d',
                        os.path.basename(path), actual_size, size)
        return False
    return True
Beispiel #13
0
 def _add(self, digest, size=UNKNOWN_FILE_SIZE):
     """Adds an item into LRU cache marking it as a newest one."""
     self._lock.assert_locked()
     if size == UNKNOWN_FILE_SIZE:
         size = fs.stat(self._path(digest)).st_size
     self._added.append(size)
     self._lru.add(digest, size)
     self._free_disk -= size
     # Do a quicker version of self._trim(). It only enforces free disk space,
     # not cache size limits. It doesn't actually look at real free disk space,
     # only uses its cache values. self._trim() will be called later to enforce
     # real trimming but doing this quick version here makes it possible to map
     # an isolated that is larger than the current amount of free disk space when
     # the cache size is already large.
     while (self.policies.min_free_space and self._lru
            and self._free_disk < self.policies.min_free_space):
         if self._remove_lru_file(False) == -1:
             break
Beispiel #14
0
def readable_copy(outfile, infile):
  """Makes a copy of the file that is readable by everyone."""
  fs.copy2(infile, outfile)
  fs.chmod(
      outfile,
      fs.stat(outfile).st_mode | stat.S_IRUSR | stat.S_IRGRP | stat.S_IROTH)
Beispiel #15
0
 def try_remove(p):
     try:
         self._free_disk += fs.stat(p).st_size
     except OSError:
         pass
     return old_try_remove(p)
Beispiel #16
0
def file_to_metadata(filepath, prevdict, read_only, algo, collapse_symlinks):
    """Processes an input file, a dependency, and return meta data about it.

  Behaviors:
  - Retrieves the file mode, file size, file timestamp, file link
    destination if it is a file link and calcultate the SHA-1 of the file's
    content if the path points to a file and not a symlink.

  Arguments:
    filepath: File to act on.
    prevdict: the previous dictionary. It is used to retrieve the cached hash
              to skip recalculating the hash. Optional.
    read_only: If 1 or 2, the file mode is manipulated. In practice, only save
               one of 4 modes: 0755 (rwx), 0644 (rw), 0555 (rx), 0444 (r). On
               windows, mode is not set since all files are 'executable' by
               default.
    algo:      Hashing algorithm used.
    collapse_symlinks: True if symlinked files should be treated like they were
                       the normal underlying file.

  Returns:
    The necessary dict to create a entry in the 'files' section of an .isolated
    file.
  """
    # TODO(maruel): None is not a valid value.
    assert read_only in (None, 0, 1, 2), read_only
    out = {}
    # Always check the file stat and check if it is a link. The timestamp is used
    # to know if the file's content/symlink destination should be looked into.
    # E.g. only reuse from prevdict if the timestamp hasn't changed.
    # There is the risk of the file's timestamp being reset to its last value
    # manually while its content changed. We don't protect against that use case.
    try:
        if collapse_symlinks:
            # os.stat follows symbolic links
            filestats = fs.stat(filepath)
        else:
            # os.lstat does not follow symbolic links, and thus preserves them.
            filestats = fs.lstat(filepath)
    except OSError:
        # The file is not present.
        raise MappingError('%s is missing' % filepath)
    is_link = stat.S_ISLNK(filestats.st_mode)

    if sys.platform != 'win32':
        # Ignore file mode on Windows since it's not really useful there.
        filemode = stat.S_IMODE(filestats.st_mode)
        # Remove write access for group and all access to 'others'.
        filemode &= ~(stat.S_IWGRP | stat.S_IRWXO)
        if read_only:
            filemode &= ~stat.S_IWUSR
        if filemode & (stat.S_IXUSR | stat.S_IRGRP) == (stat.S_IXUSR
                                                        | stat.S_IRGRP):
            # Only keep x group bit if both x user bit and group read bit are set.
            filemode |= stat.S_IXGRP
        else:
            filemode &= ~stat.S_IXGRP
        if not is_link:
            out['m'] = filemode

    # Used to skip recalculating the hash or link destination. Use the most recent
    # update time.
    out['t'] = int(round(filestats.st_mtime))

    if not is_link:
        out['s'] = filestats.st_size
        # If the timestamp wasn't updated and the file size is still the same, carry
        # on the hash.
        if (prevdict.get('t') == out['t'] and prevdict.get('s') == out['s']):
            # Reuse the previous hash if available.
            out['h'] = prevdict.get('h')
        if not out.get('h'):
            out['h'] = hash_file(filepath, algo)
    else:
        # If the timestamp wasn't updated, carry on the link destination.
        if prevdict.get('t') == out['t']:
            # Reuse the previous link destination if available.
            out['l'] = prevdict.get('l')
        if out.get('l') is None:
            # The link could be in an incorrect path case. In practice, this only
            # happen on OSX on case insensitive HFS.
            # TODO(maruel): It'd be better if it was only done once, in
            # expand_directory_and_symlink(), so it would not be necessary to do again
            # here.
            symlink_value = fs.readlink(filepath)  # pylint: disable=E1101
            filedir = file_path.get_native_path_case(os.path.dirname(filepath))
            native_dest = file_path.fix_native_path_case(
                filedir, symlink_value)
            out['l'] = os.path.relpath(native_dest, filedir)
    return out
Beispiel #17
0
def readable_copy(outfile, infile):
  """Makes a copy of the file that is readable by everyone."""
  fs.copy2(infile, outfile)
  fs.chmod(
      outfile,
      fs.stat(outfile).st_mode | stat.S_IRUSR | stat.S_IRGRP | stat.S_IROTH)