Ejemplo n.º 1
0
    def test_clean_cache(self):
        dest_dir = os.path.join(self.tempdir, 'dest')
        cache = self.get_cache(_get_policies())
        self.assertEqual([], fs.listdir(cache.cache_dir))

        a_path = os.path.join(dest_dir, u'a')
        b_path = os.path.join(dest_dir, u'b')

        self.assertEqual(0, cache.install(a_path, u'1'))
        self.assertEqual(0, cache.install(b_path, u'2'))
        self.assertEqual(
            False, fs.exists(os.path.join(cache.cache_dir, cache.NAMED_DIR)))

        self.assertEqual({u'a', u'b'}, set(fs.listdir(dest_dir)))
        self.assertFalse(cache.available)
        self.assertEqual([cache.STATE_FILE], fs.listdir(cache.cache_dir))

        write_file(os.path.join(a_path, u'x'), u'x')
        write_file(os.path.join(b_path, u'y'), u'y')

        self.assertEqual(1, cache.uninstall(a_path, u'1'))
        self.assertEqual(1, cache.uninstall(b_path, u'2'))

        self.assertEqual(4, len(fs.listdir(cache.cache_dir)))
        path1 = os.path.join(cache.cache_dir, cache._lru['1'][0])
        self.assertEqual('x', read_file(os.path.join(path1, u'x')))
        path2 = os.path.join(cache.cache_dir, cache._lru['2'][0])
        self.assertEqual('y', read_file(os.path.join(path2, u'y')))
        self.assertEqual(os.path.join(u'..', cache._lru['1'][0]),
                         fs.readlink(cache._get_named_path('1')))
        self.assertEqual(os.path.join(u'..', cache._lru['2'][0]),
                         fs.readlink(cache._get_named_path('2')))
        self.assertEqual(
            [u'1', u'2'],
            sorted(fs.listdir(os.path.join(cache.cache_dir, cache.NAMED_DIR))))
Ejemplo n.º 2
0
    def test_symlink_missing_destination_abs(self):
        # A symlink to a missing destination is valid and can be read back.
        filepath = os.path.join(self.tempdir, 'file')
        linkfile = os.path.join(self.tempdir, 'lf')
        fs.symlink(filepath, linkfile)

        self.assertEqual(True, fs.islink(linkfile))
        self.assertEqual(filepath, fs.readlink(linkfile))
Ejemplo n.º 3
0
    def test_symlink_absolute(self):
        # A symlink to an absolute path is valid.
        # /dir
        # /dir/file
        # /ld -> /dir
        # /lf -> /ld/file
        dirpath = os.path.join(self.tempdir, 'dir')
        filepath = os.path.join(dirpath, 'file')
        fs.mkdir(dirpath)
        write_content(filepath, b'hello')

        linkfile = os.path.join(self.tempdir, 'lf')
        linkdir = os.path.join(self.tempdir, 'ld')
        dstfile = os.path.join(linkdir, 'file')
        fs.symlink(dstfile, linkfile)
        fs.symlink(dirpath, linkdir)

        self.assertEqual(True, fs.islink(linkfile))
        self.assertEqual(True, fs.islink(linkdir))
        self.assertEqual(dstfile, fs.readlink(linkfile))
        self.assertEqual(dirpath, fs.readlink(linkdir))
        self.assertEqual(['file'], fs.listdir(linkdir))
        # /lf resolves to /dir/file.
        with fs.open(linkfile) as f:
            self.assertEqual('hello', f.read())

        # Ensures that followlinks is respected in walk().
        expected = [
            (self.tempdir, ['dir', 'ld'], ['lf']),
            (dirpath, [], ['file']),
        ]
        actual = [
            (r, sorted(d), sorted(f))
            for r, d, f in sorted(fs.walk(self.tempdir, followlinks=False))
        ]
        self.assertEqual(expected, actual)
        expected = [
            (self.tempdir, ['dir', 'ld'], ['lf']),
            (dirpath, [], ['file']),
            (linkdir, [], ['file']),
        ]
        actual = [
            (r, sorted(d), sorted(f))
            for r, d, f in sorted(fs.walk(self.tempdir, followlinks=True))
        ]
        self.assertEqual(expected, actual)
Ejemplo n.º 4
0
def file_to_metadata(filepath, collapse_symlinks):
  """Processes an input file, a dependency, and return meta data about it.

  Behaviors:
  - Retrieves the file mode, file size, file timestamp, file link
    destination if it is a file link and calcultate the SHA-1 of the file's
    content if the path points to a file and not a symlink.

  Arguments:
    filepath: File to act on.
    collapse_symlinks: True if symlinked files should be treated like they were
                       the normal underlying file.

  Returns:
    The necessary dict to create a entry in the 'files' section of an .isolated
    file *except* 'h' for files.
  """
  out = {}
  # Always check the file stat and check if it is a link.
  try:
    if collapse_symlinks:
      # os.stat follows symbolic links
      filestats = fs.stat(filepath)
    else:
      # os.lstat does not follow symbolic links, and thus preserves them.
      filestats = fs.lstat(filepath)
  except OSError:
    # The file is not present.
    raise MappingError('%s is missing' % filepath)
  is_link = stat.S_ISLNK(filestats.st_mode)

  if sys.platform != 'win32':
    # Ignore file mode on Windows since it's not really useful there.
    filemode = stat.S_IMODE(filestats.st_mode)
    # Remove write access for group and all access to 'others'.
    filemode &= ~(stat.S_IWGRP | stat.S_IRWXO)
    if filemode & (stat.S_IXUSR|stat.S_IRGRP) == (stat.S_IXUSR|stat.S_IRGRP):
      # Only keep x group bit if both x user bit and group read bit are set.
      filemode |= stat.S_IXGRP
    else:
      filemode &= ~stat.S_IXGRP
    if not is_link:
      out['m'] = filemode

  if not is_link:
    out['s'] = filestats.st_size
  else:
    # The link could be in an incorrect path case. In practice, this only
    # happens on macOS on case insensitive HFS.
    # TODO(maruel): It'd be better if it was only done once, in
    # expand_directory_and_symlink(), so it would not be necessary to do again
    # here.
    symlink_value = fs.readlink(filepath)  # pylint: disable=no-member
    filedir = file_path.get_native_path_case(os.path.dirname(filepath))
    native_dest = file_path.fix_native_path_case(filedir, symlink_value)
    out['l'] = os.path.relpath(native_dest, filedir)
  return out
Ejemplo n.º 5
0
    def test_existing_cache(self):
        # Ensures that the code does what is expected under number use.
        dest_dir = os.path.join(self.tempdir, 'dest')
        cache = self.get_cache(_get_policies())
        # Assume test_clean passes.
        a_path = os.path.join(dest_dir, u'a')
        b_path = os.path.join(dest_dir, u'b')

        self.assertEqual(0, cache.install(a_path, u'1'))
        write_file(os.path.join(dest_dir, u'a', u'x'), u'x')
        self.assertEqual(1, cache.uninstall(a_path, u'1'))

        # Test starts here.
        self.assertEqual(1, cache.install(a_path, u'1'))
        self.assertEqual(0, cache.install(b_path, u'2'))
        self.assertEqual({'a', 'b'}, set(fs.listdir(dest_dir)))
        self.assertFalse(cache.available)
        self.assertEqual(sorted([cache.NAMED_DIR, cache.STATE_FILE]),
                         sorted(fs.listdir(cache.cache_dir)))
        self.assertEqual([],
                         fs.listdir(
                             os.path.join(cache.cache_dir, cache.NAMED_DIR)))

        self.assertEqual('x', read_file(os.path.join(dest_dir, u'a', u'x')))
        write_file(os.path.join(a_path, 'x'), 'x2')
        write_file(os.path.join(b_path, 'y'), 'y')

        self.assertEqual(2, cache.uninstall(a_path, '1'))
        self.assertEqual(1, cache.uninstall(b_path, '2'))

        self.assertEqual(4, len(fs.listdir(cache.cache_dir)))
        path1 = os.path.join(cache.cache_dir, cache._lru['1'][0])
        self.assertEqual('x2', read_file(os.path.join(path1, 'x')))
        path2 = os.path.join(cache.cache_dir, cache._lru['2'][0])
        self.assertEqual('y', read_file(os.path.join(path2, 'y')))
        self.assertEqual(os.path.join(u'..', cache._lru['1'][0]),
                         fs.readlink(cache._get_named_path('1')))
        self.assertEqual(os.path.join(u'..', cache._lru['2'][0]),
                         fs.readlink(cache._get_named_path('2')))
        self.assertEqual(
            [u'1', u'2'],
            sorted(fs.listdir(os.path.join(cache.cache_dir, cache.NAMED_DIR))))
Ejemplo n.º 6
0
 def _prepare_named_cache(self, cache):
     self._prepare_cache(cache)
     # Figure out the short names via the symlinks.
     items = range(1, 11)
     short_names = {
         n: os.path.basename(
             fs.readlink(
                 os.path.join(cache.cache_dir, cache.NAMED_DIR,
                              unicode(n))))
         for n in items
     }
     self._verify_named_cache(cache, short_names, items)
     return short_names
Ejemplo n.º 7
0
def copy_recursively(src, dst):
  """Efficiently copies a file or directory from src_dir to dst_dir.

  `item` may be a file, directory, or a symlink to a file or directory.
  All symlinks are replaced with their targets, so the resulting
  directory structure in dst_dir will never have any symlinks.

  To increase speed, copy_recursively hardlinks individual files into the
  (newly created) directory structure if possible, unlike Python's
  shutil.copytree().
  """
  orig_src = src
  try:
    # Replace symlinks with their final target.
    while fs.islink(src):
      res = fs.readlink(src)
      src = os.path.join(os.path.dirname(src), res)
    # TODO(sadafm): Explicitly handle cyclic symlinks.

    # Note that fs.isfile (which is a wrapper around os.path.isfile) throws
    # an exception if src does not exist. A warning will be logged in that case.
    if fs.isfile(src):
      file_path.link_file(dst, src, file_path.HARDLINK_WITH_FALLBACK)
      return

    if not fs.exists(dst):
      os.makedirs(dst)

    for child in fs.listdir(src):
      copy_recursively(os.path.join(src, child), os.path.join(dst, child))

  except OSError as e:
    if e.errno == errno.ENOENT:
      logging.warning('Path %s does not exist or %s is a broken symlink',
                      src, orig_src)
    else:
      logging.info("Couldn't collect output file %s: %s", src, e)
Ejemplo n.º 8
0
def _expand_symlinks(indir, relfile):
    """Finds symlinks in relfile.

  Follows symlinks in |relfile|, but treating symlinks that point outside the
  build tree as if they were ordinary directories/files. Returns the final
  symlink-free target and a list of paths to symlinks encountered in the
  process.

  The rule about symlinks outside the build tree is for the benefit of the
  Chromium OS ebuild, which symlinks the output directory to an unrelated path
  in the chroot.

  Fails when a directory loop is detected, although in theory we could support
  that case.

  Arguments:
  - indir: base directory; symlinks in indir are not processed; this is
    the base directory that is considered 'outside of the tree'.
  - relfile: part of the path to expand symlink.

  Returns:
    tuple(relfile, list(symlinks)): relfile is real path of relfile where all
    symlinks were evaluated. symlinks if the chain of symlinks found along the
    way, if any.
  """
    is_directory = relfile.endswith(os.path.sep)
    done = indir
    todo = relfile.strip(os.path.sep)
    symlinks = []

    while todo:
        pre_symlink, symlink, post_symlink = file_path.split_at_symlink(
            done, todo)
        if not symlink:
            todo = file_path.fix_native_path_case(done, todo)
            done = os.path.join(done, todo)
            break
        symlink_path = os.path.join(done, pre_symlink, symlink)
        post_symlink = post_symlink.lstrip(os.path.sep)
        # readlink doesn't exist on Windows.
        # pylint: disable=E1101
        target = os.path.normpath(os.path.join(done, pre_symlink))
        symlink_target = fs.readlink(symlink_path)
        if os.path.isabs(symlink_target):
            # Absolute path are considered a normal directories. The use case is
            # generally someone who puts the output directory on a separate drive.
            target = symlink_target
        else:
            # The symlink itself could be using the wrong path case.
            target = file_path.fix_native_path_case(target, symlink_target)

        if not fs.exists(target):
            raise MappingError('Symlink target doesn\'t exist: %s -> %s' %
                               (symlink_path, target))
        target = file_path.get_native_path_case(target)
        if not file_path.path_starts_with(indir, target):
            done = symlink_path
            todo = post_symlink
            continue
        if file_path.path_starts_with(target, symlink_path):
            raise MappingError(
                'Can\'t map recursive symlink reference %s -> %s' %
                (symlink_path, target))
        logging.info('Found symlink: %s -> %s', symlink_path, target)
        symlinks.append(os.path.relpath(symlink_path, indir))
        # Treat the common prefix of the old and new paths as done, and start
        # scanning again.
        target = target.split(os.path.sep)
        symlink_path = symlink_path.split(os.path.sep)
        prefix_length = 0
        for target_piece, symlink_path_piece in zip(target, symlink_path):
            if target_piece != symlink_path_piece:
                break
            prefix_length += 1
        done = os.path.sep.join(target[:prefix_length])
        todo = os.path.join(os.path.sep.join(target[prefix_length:]),
                            post_symlink)

    relfile = os.path.relpath(done, indir)
    relfile = relfile.rstrip(os.path.sep) + is_directory * os.path.sep
    return relfile, symlinks
Ejemplo n.º 9
0
def file_to_metadata(filepath, prevdict, read_only, algo, collapse_symlinks):
    """Processes an input file, a dependency, and return meta data about it.

  Behaviors:
  - Retrieves the file mode, file size, file timestamp, file link
    destination if it is a file link and calcultate the SHA-1 of the file's
    content if the path points to a file and not a symlink.

  Arguments:
    filepath: File to act on.
    prevdict: the previous dictionary. It is used to retrieve the cached hash
              to skip recalculating the hash. Optional.
    read_only: If 1 or 2, the file mode is manipulated. In practice, only save
               one of 4 modes: 0755 (rwx), 0644 (rw), 0555 (rx), 0444 (r). On
               windows, mode is not set since all files are 'executable' by
               default.
    algo:      Hashing algorithm used.
    collapse_symlinks: True if symlinked files should be treated like they were
                       the normal underlying file.

  Returns:
    The necessary dict to create a entry in the 'files' section of an .isolated
    file.
  """
    # TODO(maruel): None is not a valid value.
    assert read_only in (None, 0, 1, 2), read_only
    out = {}
    # Always check the file stat and check if it is a link. The timestamp is used
    # to know if the file's content/symlink destination should be looked into.
    # E.g. only reuse from prevdict if the timestamp hasn't changed.
    # There is the risk of the file's timestamp being reset to its last value
    # manually while its content changed. We don't protect against that use case.
    try:
        if collapse_symlinks:
            # os.stat follows symbolic links
            filestats = fs.stat(filepath)
        else:
            # os.lstat does not follow symbolic links, and thus preserves them.
            filestats = fs.lstat(filepath)
    except OSError:
        # The file is not present.
        raise MappingError('%s is missing' % filepath)
    is_link = stat.S_ISLNK(filestats.st_mode)

    if sys.platform != 'win32':
        # Ignore file mode on Windows since it's not really useful there.
        filemode = stat.S_IMODE(filestats.st_mode)
        # Remove write access for group and all access to 'others'.
        filemode &= ~(stat.S_IWGRP | stat.S_IRWXO)
        if read_only:
            filemode &= ~stat.S_IWUSR
        if filemode & (stat.S_IXUSR | stat.S_IRGRP) == (stat.S_IXUSR
                                                        | stat.S_IRGRP):
            # Only keep x group bit if both x user bit and group read bit are set.
            filemode |= stat.S_IXGRP
        else:
            filemode &= ~stat.S_IXGRP
        if not is_link:
            out['m'] = filemode

    # Used to skip recalculating the hash or link destination. Use the most recent
    # update time.
    out['t'] = int(round(filestats.st_mtime))

    if not is_link:
        out['s'] = filestats.st_size
        # If the timestamp wasn't updated and the file size is still the same, carry
        # on the hash.
        if (prevdict.get('t') == out['t'] and prevdict.get('s') == out['s']):
            # Reuse the previous hash if available.
            out['h'] = prevdict.get('h')
        if not out.get('h'):
            out['h'] = hash_file(filepath, algo)
    else:
        # If the timestamp wasn't updated, carry on the link destination.
        if prevdict.get('t') == out['t']:
            # Reuse the previous link destination if available.
            out['l'] = prevdict.get('l')
        if out.get('l') is None:
            # The link could be in an incorrect path case. In practice, this only
            # happen on OSX on case insensitive HFS.
            # TODO(maruel): It'd be better if it was only done once, in
            # expand_directory_and_symlink(), so it would not be necessary to do again
            # here.
            symlink_value = fs.readlink(filepath)  # pylint: disable=E1101
            filedir = file_path.get_native_path_case(os.path.dirname(filepath))
            native_dest = file_path.fix_native_path_case(
                filedir, symlink_value)
            out['l'] = os.path.relpath(native_dest, filedir)
    return out
Ejemplo n.º 10
0
    def cleanup(self):
        """Removes unknown directories.

    Does not recalculate the cache size since it's surprisingly slow on some
    OSes.
    """
        success = True
        with self._lock:
            try:
                actual = set(fs.listdir(self.cache_dir))
                actual.discard(self.NAMED_DIR)
                actual.discard(self.STATE_FILE)
                expected = {v[0]: k for k, v in self._lru.iteritems()}
                # First, handle the actual cache content.
                # Remove missing entries.
                for missing in (set(expected) - actual):
                    self._lru.pop(expected[missing])
                # Remove unexpected items.
                for unexpected in (actual - set(expected)):
                    try:
                        p = os.path.join(self.cache_dir, unexpected)
                        if fs.isdir(p) and not fs.islink(p):
                            file_path.rmtree(p)
                        else:
                            fs.remove(p)
                    except (IOError, OSError) as e:
                        logging.error('Failed to remove %s: %s', unexpected, e)
                        success = False

                # Second, fix named cache links.
                named = os.path.join(self.cache_dir, self.NAMED_DIR)
                if os.path.isdir(named):
                    actual = set(fs.listdir(named))
                    expected = set(self._lru)
                    # Confirm entries. Do not add missing ones for now.
                    for name in expected.intersection(actual):
                        p = os.path.join(self.cache_dir, self.NAMED_DIR, name)
                        expected_link = os.path.join(self.cache_dir,
                                                     self._lru[name][0])
                        if fs.islink(p):
                            if sys.platform == 'win32':
                                # TODO(maruel): Implement readlink() on Windows in fs.py, then
                                # remove this condition.
                                # https://crbug.com/853721
                                continue
                            link = fs.readlink(p)
                            if expected_link == link:
                                continue
                            logging.warning(
                                'Unexpected symlink for cache %s: %s, expected %s',
                                name, link, expected_link)
                        else:
                            logging.warning(
                                'Unexpected non symlink for cache %s', name)
                        if fs.isdir(p) and not fs.islink(p):
                            file_path.rmtree(p)
                        else:
                            fs.remove(p)
                    # Remove unexpected items.
                    for unexpected in (actual - expected):
                        try:
                            p = os.path.join(self.cache_dir, self.NAMED_DIR,
                                             unexpected)
                            if fs.isdir(p):
                                file_path.rmtree(p)
                            else:
                                fs.remove(p)
                        except (IOError, OSError) as e:
                            logging.error('Failed to remove %s: %s',
                                          unexpected, e)
                            success = False
            finally:
                self._save()
        return success
Ejemplo n.º 11
0
 def test_readlink_fail(self):
     # Reading a non-existing symlink fails. Obvious but it's to make sure the
     # Windows part acts the same.
     with self.assertRaises(OSError):
         fs.readlink(os.path.join(self.tempdir, 'not_there'))