def file_to_metadata(filepath, collapse_symlinks):
  """Processes an input file, a dependency, and return meta data about it.

  Behaviors:
  - Retrieves the file mode, file size, file timestamp, file link
    destination if it is a file link and calcultate the SHA-1 of the file's
    content if the path points to a file and not a symlink.

  Arguments:
    filepath: File to act on.
    collapse_symlinks: True if symlinked files should be treated like they were
                       the normal underlying file.

  Returns:
    The necessary dict to create a entry in the 'files' section of an .isolated
    file *except* 'h' for files.
  """
  out = {}
  # Always check the file stat and check if it is a link.
  try:
    if collapse_symlinks:
      # os.stat follows symbolic links
      filestats = fs.stat(filepath)
    else:
      # os.lstat does not follow symbolic links, and thus preserves them.
      filestats = fs.lstat(filepath)
  except OSError:
    # The file is not present.
    raise MappingError('%s is missing' % filepath)
  is_link = stat.S_ISLNK(filestats.st_mode)

  if sys.platform != 'win32':
    # Ignore file mode on Windows since it's not really useful there.
    filemode = stat.S_IMODE(filestats.st_mode)
    # Remove write access for group and all access to 'others'.
    filemode &= ~(stat.S_IWGRP | stat.S_IRWXO)
    if filemode & (stat.S_IXUSR|stat.S_IRGRP) == (stat.S_IXUSR|stat.S_IRGRP):
      # Only keep x group bit if both x user bit and group read bit are set.
      filemode |= stat.S_IXGRP
    else:
      filemode &= ~stat.S_IXGRP
    if not is_link:
      out['m'] = filemode

  if not is_link:
    out['s'] = filestats.st_size
  else:
    # The link could be in an incorrect path case. In practice, this only
    # happens on macOS on case insensitive HFS.
    # TODO(maruel): It'd be better if it was only done once, in
    # expand_directory_and_symlink(), so it would not be necessary to do again
    # here.
    symlink_value = fs.readlink(filepath)  # pylint: disable=no-member
    filedir = file_path.get_native_path_case(os.path.dirname(filepath))
    native_dest = file_path.fix_native_path_case(filedir, symlink_value)
    out['l'] = os.path.relpath(native_dest, filedir)
  return out
Example #2
0
def get_recursive_size(path):
  """Returns the total data size for the specified path."""
  try:
    total = 0
    for root, _, files in fs.walk(path):
      for f in files:
        total += fs.lstat(os.path.join(root, f)).st_size
    return total
  except (IOError, OSError, UnicodeEncodeError) as exc:
    logging.warning('Exception while getting the size of %s:\n%s', path, exc)
    # Returns a negative number to make it clear that something is wrong.
    return -1
Example #3
0
def _get_recursive_size(path):
    """Returns the total data size for the specified path.

  This function can be surprisingly slow on OSX, so its output should be cached.
  """
    try:
        total = 0
        for root, _, files in fs.walk(path):
            for f in files:
                total += fs.lstat(os.path.join(root, f)).st_size
        return total
    except (IOError, OSError, UnicodeEncodeError) as exc:
        logging.warning('Exception while getting the size of %s:\n%s', path,
                        exc)
        return None
def _get_recursive_size(path):
    """Returns the total data size for the specified path.

  This function can be surprisingly slow on OSX, so its output should be cached.
  """
    try:
        total = 0
        if _use_scandir():

            if sys.platform == 'win32':

                def direntIsJunction(entry):
                    # both st_file_attributes and FILE_ATTRIBUTE_REPARSE_POINT are
                    # windows-only symbols.
                    return bool(entry.stat().st_file_attributes
                                & scandir.FILE_ATTRIBUTE_REPARSE_POINT)
            else:

                def direntIsJunction(_entry):
                    return False

            stack = [path]
            while stack:
                for entry in scandir.scandir(stack.pop()):
                    if entry.is_symlink() or direntIsJunction(entry):
                        continue
                    if entry.is_file():
                        total += entry.stat().st_size
                    elif entry.is_dir():
                        stack.append(entry.path)
                    else:
                        logging.warning('non directory/file entry: %s', entry)
            return total

        for root, _, files in fs.walk(path):
            for f in files:
                st = fs.lstat(os.path.join(root, f))
                if stat.S_ISLNK(st.st_mode):
                    continue
                total += st.st_size
        return total
    except (IOError, OSError, UnicodeEncodeError) as exc:
        logging.warning('Exception while getting the size of %s:\n%s', path,
                        exc)
        return None
Example #5
0
def set_read_only(path, read_only):
    """Sets or resets the write bit on a file or directory.

  Zaps out access to 'group' and 'others'.
  """
    mode = fs.lstat(path).st_mode
    # TODO(maruel): Stop removing GO bits.
    mode = (mode & 0500) if read_only else (mode | 0200)
    if hasattr(os, 'lchmod'):
        fs.lchmod(path, mode)  # pylint: disable=E1101
    else:
        if stat.S_ISLNK(mode):
            # Skip symlink without lchmod() support.
            logging.debug('Can\'t change %sw bit on symlink %s',
                          '-' if read_only else '+', path)
            return

        # TODO(maruel): Implement proper DACL modification on Windows.
        fs.chmod(path, mode)
Example #6
0
def set_read_only(path, read_only):
  """Sets or resets the write bit on a file or directory.

  Zaps out access to 'group' and 'others'.
  """
  mode = fs.lstat(path).st_mode
  # TODO(maruel): Stop removing GO bits.
  mode = (mode & 0500) if read_only else (mode | 0200)
  if hasattr(os, 'lchmod'):
    fs.lchmod(path, mode)  # pylint: disable=E1101
  else:
    if stat.S_ISLNK(mode):
      # Skip symlink without lchmod() support.
      logging.debug(
          'Can\'t change %sw bit on symlink %s',
          '-' if read_only else '+', path)
      return

    # TODO(maruel): Implement proper DACL modification on Windows.
    fs.chmod(path, mode)
Example #7
0
def set_read_only(path, read_only):
    """Sets or resets the write bit on a file or directory.

  Zaps out access to 'group' and 'others'.
  """
    mode = fs.lstat(path).st_mode
    # TODO(maruel): Stop removing GO bits.
    if read_only:
        mode &= stat.S_IRUSR | stat.S_IXUSR  # 0500
    else:
        mode |= stat.S_IRUSR | stat.S_IWUSR  # 0600
        if sys.platform != 'win32' and stat.S_ISDIR(mode):
            mode |= stat.S_IXUSR  # 0100
    if hasattr(os, 'lchmod'):
        fs.lchmod(path, mode)  # pylint: disable=E1101
    else:
        if stat.S_ISLNK(mode):
            # Skip symlink without lchmod() support.
            return

        # TODO(maruel): Implement proper DACL modification on Windows.
        fs.chmod(path, mode)
Example #8
0
def file_to_metadata(filepath, prevdict, read_only, algo, collapse_symlinks):
    """Processes an input file, a dependency, and return meta data about it.

  Behaviors:
  - Retrieves the file mode, file size, file timestamp, file link
    destination if it is a file link and calcultate the SHA-1 of the file's
    content if the path points to a file and not a symlink.

  Arguments:
    filepath: File to act on.
    prevdict: the previous dictionary. It is used to retrieve the cached hash
              to skip recalculating the hash. Optional.
    read_only: If 1 or 2, the file mode is manipulated. In practice, only save
               one of 4 modes: 0755 (rwx), 0644 (rw), 0555 (rx), 0444 (r). On
               windows, mode is not set since all files are 'executable' by
               default.
    algo:      Hashing algorithm used.
    collapse_symlinks: True if symlinked files should be treated like they were
                       the normal underlying file.

  Returns:
    The necessary dict to create a entry in the 'files' section of an .isolated
    file.
  """
    # TODO(maruel): None is not a valid value.
    assert read_only in (None, 0, 1, 2), read_only
    out = {}
    # Always check the file stat and check if it is a link. The timestamp is used
    # to know if the file's content/symlink destination should be looked into.
    # E.g. only reuse from prevdict if the timestamp hasn't changed.
    # There is the risk of the file's timestamp being reset to its last value
    # manually while its content changed. We don't protect against that use case.
    try:
        if collapse_symlinks:
            # os.stat follows symbolic links
            filestats = fs.stat(filepath)
        else:
            # os.lstat does not follow symbolic links, and thus preserves them.
            filestats = fs.lstat(filepath)
    except OSError:
        # The file is not present.
        raise MappingError('%s is missing' % filepath)
    is_link = stat.S_ISLNK(filestats.st_mode)

    if sys.platform != 'win32':
        # Ignore file mode on Windows since it's not really useful there.
        filemode = stat.S_IMODE(filestats.st_mode)
        # Remove write access for group and all access to 'others'.
        filemode &= ~(stat.S_IWGRP | stat.S_IRWXO)
        if read_only:
            filemode &= ~stat.S_IWUSR
        if filemode & (stat.S_IXUSR | stat.S_IRGRP) == (stat.S_IXUSR
                                                        | stat.S_IRGRP):
            # Only keep x group bit if both x user bit and group read bit are set.
            filemode |= stat.S_IXGRP
        else:
            filemode &= ~stat.S_IXGRP
        if not is_link:
            out['m'] = filemode

    # Used to skip recalculating the hash or link destination. Use the most recent
    # update time.
    out['t'] = int(round(filestats.st_mtime))

    if not is_link:
        out['s'] = filestats.st_size
        # If the timestamp wasn't updated and the file size is still the same, carry
        # on the hash.
        if (prevdict.get('t') == out['t'] and prevdict.get('s') == out['s']):
            # Reuse the previous hash if available.
            out['h'] = prevdict.get('h')
        if not out.get('h'):
            out['h'] = hash_file(filepath, algo)
    else:
        # If the timestamp wasn't updated, carry on the link destination.
        if prevdict.get('t') == out['t']:
            # Reuse the previous link destination if available.
            out['l'] = prevdict.get('l')
        if out.get('l') is None:
            # The link could be in an incorrect path case. In practice, this only
            # happen on OSX on case insensitive HFS.
            # TODO(maruel): It'd be better if it was only done once, in
            # expand_directory_and_symlink(), so it would not be necessary to do again
            # here.
            symlink_value = fs.readlink(filepath)  # pylint: disable=E1101
            filedir = file_path.get_native_path_case(os.path.dirname(filepath))
            native_dest = file_path.fix_native_path_case(
                filedir, symlink_value)
            out['l'] = os.path.relpath(native_dest, filedir)
    return out