def test_cli_integrity_missing_file(test_dir, testname, monkeypatch):
    monkeypatch.chdir(str(test_dir))
    name = archive_name(tags=[testname])
    base = Path("base")
    missing = base / "data" / "not-present"
    with missing.open("wt") as f:
        f.write("Hello!")
    manifest = Manifest(paths=[base])
    with open("manifest.yaml", "wb") as f:
        manifest.write(f)
    mtime_parent = os.stat(str(missing.parent)).st_mtime
    missing.unlink()
    os.utime(str(missing.parent), times=(mtime_parent, mtime_parent))
    with tarfile.open(name, "w") as tarf:
        with open("manifest.yaml", "rb") as f:
            manifest_info = tarf.gettarinfo(arcname="base/.manifest.yaml", 
                                            fileobj=f)
            manifest_info.mode = stat.S_IFREG | 0o444
            tarf.addfile(manifest_info, f)
        tarf.add("base")
    with TemporaryFile(mode="w+t", dir=str(test_dir)) as f:
        args = ["verify", name]
        callscript("archive-tool.py", args, returncode=3, stderr=f)
        f.seek(0)
        line = f.readline()
        assert "%s:%s: missing" % (name, missing) in line
Example #2
0
 def open(self, path):
     self.path = path
     try:
         self._file = tarfile.open(str(self.path), 'r')
     except OSError as e:
         raise ArchiveReadError(str(e))
     md = self.get_metadata(".manifest.yaml")
     self.basedir = md.path.parent
     self.manifest = Manifest(fileobj=md.fileobj)
     if not self.manifest.metadata:
         # Legacy: Manifest version 1.0 did not have metadata.
         self.manifest.add_metadata(self.basedir / ".manifest.yaml")
     return self
Example #3
0
 def create(self,
            path,
            compression=None,
            paths=None,
            fileinfos=None,
            basedir=None,
            workdir=None,
            excludes=None,
            dedup=DedupMode.LINK,
            tags=None):
     if compression is None:
         try:
             compression = compression_map["".join(path.suffixes)]
         except KeyError:
             # Last ressort default
             compression = 'gz'
     mode = 'x:' + compression
     save_wd = None
     try:
         if workdir:
             save_wd = os.getcwd()
             os.chdir(workdir)
         self.path = path.resolve()
         self._dedup = dedup
         self._dupindex = {}
         if fileinfos is not None:
             if not isinstance(fileinfos, Sequence):
                 fileinfos = list(fileinfos)
             self._check_paths([fi.path for fi in fileinfos], basedir)
             try:
                 self.manifest = Manifest(fileinfos=fileinfos, tags=tags)
             except ValueError as e:
                 raise ArchiveCreateError("invalid fileinfos: %s" % e)
         else:
             self._check_paths(paths, basedir, excludes)
             self.manifest = Manifest(paths=paths,
                                      excludes=excludes,
                                      tags=tags)
         bd_fi = self.manifest.find(self.basedir)
         if bd_fi and not bd_fi.is_dir():
             raise ArchiveCreateError("base directory %s must "
                                      "be a directory" % self.basedir)
         self.manifest.add_metadata(self.basedir / ".manifest.yaml")
         for md in self._metadata:
             md.set_path(self.basedir)
             self.manifest.add_metadata(md.path)
         self._create(mode)
     finally:
         if save_wd:
             os.chdir(save_wd)
     return self
def test_mnifest_sort(test_dir, monkeypatch):
    """Test the Manifest.sort() method.
    """
    monkeypatch.chdir(str(test_dir))
    manifest = Manifest(paths=[Path("base")])
    check_manifest(manifest, testdata)
    fileinfos = set(manifest)
    manifest.sort(key=lambda fi: getattr(fi, "size", 0), reverse=True)
    assert set(manifest) == fileinfos
    prev = None
    for fi in manifest:
        k = getattr(fi, "size", 0)
        if prev is not None:
            assert k <= prev
        prev = k
    manifest.sort(key=lambda fi: (fi.type, fi.path))
    assert set(manifest) == fileinfos
    prev = None
    for fi in manifest:
        if prev is not None:
            assert fi.type >= prev.type
            if fi.type == prev.type:
                assert fi.path >= prev.path
        prev = fi
    manifest.sort()
    assert set(manifest) == fileinfos
    prev = None
    for fi in manifest:
        if prev is not None:
            assert fi.path >= prev.path
        prev = fi
def test_manifest_exclude_samelevel(test_dir, monkeypatch):
    """Test excludes: exclude things explictely named in paths.
    """
    monkeypatch.chdir(str(test_dir))
    paths = [Path("base", "data"), Path("base", "empty")]
    excludes = [paths[1]]
    manifest = Manifest(paths=paths, excludes=excludes)
    data = sub_testdata(testdata, Path("base"), paths[0])
    check_manifest(manifest, data)
def test_manifest_exclude_subdir(test_dir, monkeypatch):
    """Test excludes: excluding a subdirectory.
    """
    monkeypatch.chdir(str(test_dir))
    paths = [Path("base")]
    excludes = [Path("base", "data")]
    manifest = Manifest(paths=paths, excludes=excludes)
    data = sub_testdata(testdata, excludes[0])
    check_manifest(manifest, data)
def test_manifest_exclude_file(test_dir, monkeypatch):
    """Test excludes: excluding one single file.
    """
    monkeypatch.chdir(str(test_dir))
    paths = [Path("base")]
    excludes = [Path("base", "msg.txt")]
    manifest = Manifest(paths=paths, excludes=excludes)
    data = sub_testdata(testdata, excludes[0])
    check_manifest(manifest, data)
def test_manifest_exclude_explicit_include(test_dir, monkeypatch):
    """Test excludes: it is possible to explicitely include files, even if
    their parent directory is excluded.
    """
    monkeypatch.chdir(str(test_dir))
    paths = [Path("base"), Path("base", "data", "rnd.dat")]
    excludes = [Path("base", "data")]
    manifest = Manifest(paths=paths, excludes=excludes)
    data = sub_testdata(testdata, excludes[0], paths[1])
    check_manifest(manifest, data)
def test_create_fileinfos_manifest(test_dir, monkeypatch):
    """Create the archive from a Manifest.
    A Manifest is an iterable of FileInfo objects.
    """
    monkeypatch.chdir(test_dir)
    manifest = Manifest(paths=[Path("base")])
    archive_path = Path("archive-fi-manifest.tar")
    Archive().create(archive_path, "", fileinfos=manifest)
    with Archive().open(archive_path) as archive:
        check_manifest(archive.manifest, testdata)
        archive.verify()
Example #10
0
def test_manifest_exclude_nonexistent(test_dir, monkeypatch):
    """Test excludes argument to Manifest: excluding a nonexistent file.

    This is legal, but should have no effect.
    """
    monkeypatch.chdir(str(test_dir))
    paths = [Path("base")]
    excludes = [Path("base", "non-existent.dat")]
    manifest = Manifest(paths=paths, excludes=excludes)
    data = sub_testdata(testdata, excludes[0])
    check_manifest(manifest, data)
Example #11
0
def get_fileinfos(config, schedule):
    fileinfos = Manifest(paths=config.dirs, excludes=config.excludes)
    try:
        base_archives = schedule.get_base_archives(get_prev_backups(config))
    except NoFullBackupError:
        raise ArchiveCreateError("No previous full backup found, can not "
                                 "create %s archive" % schedule.name)
    for p in [i.path for i in base_archives]:
        log.debug("considering %s to create differential archive", p)
        with Archive().open(p) as base:
            fileinfos = filter_fileinfos(base.manifest, fileinfos)
    return fileinfos
Example #12
0
def test_manifest_from_fileobj():
    """Read a manifest from a YAML file.
    """
    with gettestdata("manifest.yaml").open("rt") as f:
        manifest = Manifest(fileobj=f)
    head = manifest.head
    assert set(head.keys()) == {
        "Checksums", "Date", "Generator", "Metadata", "Version"
    }
    assert manifest.version == "1.1"
    assert isinstance(manifest.date, datetime.datetime)
    assert manifest.checksums == ("sha256", )
    assert manifest.tags == ()
    check_manifest(manifest, testdata)
Example #13
0
def test_manifest_from_paths(test_dir, monkeypatch):
    """Create a manifest reading the files in test_dir.
    """
    monkeypatch.chdir(str(test_dir))
    manifest = Manifest(paths=[Path("base")])
    head = manifest.head
    assert set(head.keys()) == {
        "Checksums", "Date", "Generator", "Metadata", "Version"
    }
    assert manifest.version == Manifest.Version
    assert isinstance(manifest.date, datetime.datetime)
    assert manifest.checksums == tuple(FileInfo.Checksums)
    assert manifest.tags == ()
    check_manifest(manifest, testdata)
Example #14
0
def test_data(tmpdir, monkeypatch):
    monkeypatch.chdir(str(tmpdir))
    shutil.rmtree("base", ignore_errors=True)
    setup_testdata(tmpdir, testdata)
    manifest = Manifest(paths=[Path("base")])
    manifest.add_metadata(Path("base", ".manifest.yaml"))
    with open("manifest.yaml", "wb") as f:
        manifest.write(f)
    return tmpdir
Example #15
0
def test_manifest_from_fileinfos(test_dir, monkeypatch):
    """Create a manifest providing an iterable of fileinfos.
    """
    monkeypatch.chdir(test_dir)
    fileinfos = FileInfo.iterpaths([Path("base")], set())
    manifest = Manifest(fileinfos=fileinfos)
    head = manifest.head
    assert set(head.keys()) == {
        "Checksums", "Date", "Generator", "Metadata", "Version"
    }
    assert manifest.version == Manifest.Version
    assert isinstance(manifest.date, datetime.datetime)
    assert manifest.checksums == tuple(FileInfo.Checksums)
    assert manifest.tags == ()
    check_manifest(manifest, testdata)
Example #16
0
 def _create(self, path, mode, paths, basedir, excludes, dedup, tags):
     self.path = path
     self._check_paths(paths, basedir, excludes)
     self.manifest = Manifest(paths=paths, excludes=excludes, tags=tags)
     self.manifest.add_metadata(self.basedir / ".manifest.yaml")
     for md in self._metadata:
         md.set_path(self.basedir)
         self.manifest.add_metadata(md.path)
     with tarfile.open(str(self.path), mode) as tarf:
         with tempfile.TemporaryFile() as tmpf:
             self.manifest.write(tmpf)
             tmpf.seek(0)
             self.add_metadata(".manifest.yaml", tmpf)
             md_names = self._add_metadata_files(tarf)
         dupindex = {}
         for fi in self.manifest:
             p = fi.path
             name = self._arcname(p)
             if name in md_names:
                 raise ArchiveCreateError("cannot add %s: "
                                          "this filename is reserved" % p)
             if fi.is_file():
                 ti = tarf.gettarinfo(str(p), arcname=name)
                 dup = self._check_duplicate(fi, name, dedup, dupindex)
                 if dup:
                     ti.type = tarfile.LNKTYPE
                     ti.linkname = dup
                     tarf.addfile(ti)
                 else:
                     ti.size = fi.size
                     ti.type = tarfile.REGTYPE
                     ti.linkname = ''
                     with p.open("rb") as f:
                         tarf.addfile(ti, fileobj=f)
             else:
                 tarf.add(str(p), arcname=name, recursive=False)
Example #17
0
def test_verify_missing_metadata_item(test_data, testname):
    name = archive_name(tags=[testname])
    manifest = Manifest(paths=[Path("base")])
    manifest.add_metadata(Path("base", ".manifest.yaml"))
    manifest.add_metadata(Path("base", ".msg.txt"))
    with tarfile.open(name, "w") as tarf:
        with tempfile.TemporaryFile(dir=str(test_data)) as tmpf:
            manifest.write(tmpf)
            tmpf.seek(0)
            ti = tarf.gettarinfo(arcname="base/.manifest.yaml", 
                                 fileobj=tmpf)
            ti.mode = stat.S_IFREG | stat.S_IMODE(0o444)
            tarf.addfile(ti, tmpf)
        tarf.add("base")
    with Archive().open(Path(name)) as archive:
        with pytest.raises(ArchiveIntegrityError) as err:
            archive.verify()
        assert "'base/.msg.txt' not found" in str(err.value)
Example #18
0
def test_create_manifest_missing_checksum(test_dir, testname, monkeypatch):
    """Same as last test, but now creating the archive from a precompiled
    manifest.
    """
    monkeypatch.chdir(test_dir)
    name = archive_name(tags=[testname])
    with monkeypatch.context() as m:
        m.setattr(FileInfo, "Checksums", ['md5'])
        manifest = Manifest(paths=[Path("base")])
        # Checksums are calculated lazily, we must explicitely access
        # the attribute while monkeypatching FileInfo.Checksums is
        # active.
        for fi in manifest:
            if fi.is_file():
                assert set(fi.checksum.keys()) == {'md5'}
    with pytest.raises(ArchiveCreateError) as err:
        Archive().create(Path(name), "", fileinfos=manifest)
    assert "Missing checksum" in str(err.value)
Example #19
0
def test_manifest_tags(test_dir, monkeypatch, tags, expected):
    """Set tags in a manifest reading the files in test_dir.
    """
    monkeypatch.chdir(str(test_dir))
    manifest = Manifest(paths=[Path("base")], tags=tags)
    assert manifest.tags == expected
Example #20
0
class Archive:
    def __init__(self):
        self.path = None
        self.basedir = None
        self.manifest = None
        self._file = None
        self._metadata = []

    def create(self,
               path,
               compression,
               paths,
               basedir=None,
               workdir=None,
               excludes=None,
               dedup=DedupMode.LINK,
               tags=None):
        if sys.version_info < (3, 5):
            # The 'x' (exclusive creation) mode was added to tarfile
            # in Python 3.5.
            mode = 'w:' + compression
        else:
            mode = 'x:' + compression
        if workdir:
            with tmp_chdir(workdir):
                self._create(workdir / path, mode, paths, basedir, excludes,
                             dedup, tags)
        else:
            self._create(path, mode, paths, basedir, excludes, dedup, tags)
        return self

    def _create(self, path, mode, paths, basedir, excludes, dedup, tags):
        self.path = path
        self._check_paths(paths, basedir, excludes)
        self.manifest = Manifest(paths=paths, excludes=excludes, tags=tags)
        self.manifest.add_metadata(self.basedir / ".manifest.yaml")
        for md in self._metadata:
            md.set_path(self.basedir)
            self.manifest.add_metadata(md.path)
        with tarfile.open(str(self.path), mode) as tarf:
            with tempfile.TemporaryFile() as tmpf:
                self.manifest.write(tmpf)
                tmpf.seek(0)
                self.add_metadata(".manifest.yaml", tmpf)
                md_names = self._add_metadata_files(tarf)
            dupindex = {}
            for fi in self.manifest:
                p = fi.path
                name = self._arcname(p)
                if name in md_names:
                    raise ArchiveCreateError("cannot add %s: "
                                             "this filename is reserved" % p)
                if fi.is_file():
                    ti = tarf.gettarinfo(str(p), arcname=name)
                    dup = self._check_duplicate(fi, name, dedup, dupindex)
                    if dup:
                        ti.type = tarfile.LNKTYPE
                        ti.linkname = dup
                        tarf.addfile(ti)
                    else:
                        ti.size = fi.size
                        ti.type = tarfile.REGTYPE
                        ti.linkname = ''
                        with p.open("rb") as f:
                            tarf.addfile(ti, fileobj=f)
                else:
                    tarf.add(str(p), arcname=name, recursive=False)

    def _check_paths(self, paths, basedir, excludes):
        """Check the paths to be added to an archive for several error
        conditions.  Accept a list of either strings or path-like
        objects.  Convert them to a list of Path objects.  Also sets
        self.basedir.
        """
        if not paths:
            raise ArchiveCreateError("refusing to create an empty archive")
        if not basedir:
            p = paths[0]
            if p.is_absolute():
                self.basedir = Path(self.path.name.split('.')[0])
            else:
                self.basedir = Path(p.parts[0])
        else:
            self.basedir = basedir
        if self.basedir.is_absolute():
            raise ArchiveCreateError("basedir must be relative")
        # We allow two different cases: either
        # - all paths are absolute, or
        # - all paths are relative and start with basedir.
        # The same rules for paths also apply to excludes, if
        # provided.  So we may just iterate over the chain of both
        # lists.
        abspath = None
        for p in itertools.chain(paths, excludes or ()):
            if not _is_normalized(p):
                raise ArchiveCreateError(
                    "invalid path %s: must be normalized" % p)
            if abspath is None:
                abspath = p.is_absolute()
            else:
                if abspath != p.is_absolute():
                    raise ArchiveCreateError("mixing of absolute and relative "
                                             "paths is not allowed")
            if not p.is_absolute():
                try:
                    # This will raise ValueError if p does not start
                    # with basedir:
                    p.relative_to(self.basedir)
                except ValueError as e:
                    raise ArchiveCreateError(str(e))
        if not abspath:
            if self.basedir.is_symlink() or not self.basedir.is_dir():
                raise ArchiveCreateError("basedir must be a directory")

    def _add_metadata_files(self, tarf):
        """Add the metadata files to the tar file.
        """
        md_names = set()
        for md in self._metadata:
            name = str(md.path)
            if name in md_names:
                raise ArchiveCreateError("duplicate metadata %s" % name)
            md_names.add(name)
            ti = tarf.gettarinfo(arcname=name, fileobj=md.fileobj)
            ti.mode = stat.S_IFREG | stat.S_IMODE(md.mode)
            tarf.addfile(ti, md.fileobj)
        return md_names

    def _check_duplicate(self, fileinfo, name, dedup, dupindex):
        """Check if the archive item fileinfo should be linked
        to another item already added to the archive.
        """
        assert fileinfo.is_file()
        if dedup == DedupMode.LINK:
            st = fileinfo.path.stat()
            if st.st_nlink == 1:
                return None
            idxkey = (st.st_dev, st.st_ino)
        elif dedup == DedupMode.CONTENT:
            try:
                hashalg = fileinfo.Checksums[0]
            except IndexError:
                return None
            idxkey = fileinfo.checksum[hashalg]
        else:
            return None
        if idxkey in dupindex:
            return dupindex[idxkey]
        else:
            dupindex[idxkey] = name
            return None

    def add_metadata(self, name, fileobj, mode=0o444):
        path = self.basedir / name if self.basedir else None
        md = MetadataItem(name=name, path=path, fileobj=fileobj, mode=mode)
        self._metadata.insert(0, md)

    def open(self, path):
        self.path = path
        try:
            self._file = tarfile.open(str(self.path), 'r')
        except OSError as e:
            raise ArchiveReadError(str(e))
        md = self.get_metadata(".manifest.yaml")
        self.basedir = md.path.parent
        self.manifest = Manifest(fileobj=md.fileobj)
        if not self.manifest.metadata:
            # Legacy: Manifest version 1.0 did not have metadata.
            self.manifest.add_metadata(self.basedir / ".manifest.yaml")
        return self

    def get_metadata(self, name):
        ti = self._file.next()
        path = Path(ti.path)
        if path.name != name:
            raise ArchiveIntegrityError("%s not found" % name)
        fileobj = self._file.extractfile(ti)
        md = MetadataItem(path=path, tarinfo=ti, fileobj=fileobj)
        self._metadata.append(md)
        return md

    def close(self):
        if self._file:
            self._file.close()
        self._file = None

    def __enter__(self):
        return self

    def __exit__(self, type, value, tb):
        self.close()

    def __del__(self):
        self.close()

    def _arcname(self, p):
        if p.is_absolute():
            return str(self.basedir / p.relative_to(p.root))
        else:
            return str(p)

    def verify(self):
        if not self._file:
            raise ValueError("archive is closed.")
        # Verify that all metadata items are present in the proper
        # order at the beginning of the tar file.  Start iterating for
        # TarInfo objects in the tarfile from the beginning,
        # regardless of what has already been read:
        tarf_it = iter(self._file)
        for md in self.manifest.metadata:
            ti = next(tarf_it)
            if ti.name != md:
                raise ArchiveIntegrityError("Expected metadata item '%s' "
                                            "not found" % (md))
        # Check the content of the archive.
        for fileinfo in self.manifest:
            self._verify_item(fileinfo)

    def _verify_item(self, fileinfo):
        def _check_condition(cond, item, message):
            if not cond:
                raise ArchiveIntegrityError("%s: %s" % (item, message))

        itemname = "%s:%s" % (self.path, fileinfo.path)
        try:
            tarinfo = self._file.getmember(self._arcname(fileinfo.path))
        except KeyError:
            raise ArchiveIntegrityError("%s: missing" % itemname)
        _check_condition(tarinfo.mode == fileinfo.mode, itemname, "wrong mode")
        _check_condition(
            int(tarinfo.mtime) == int(fileinfo.mtime), itemname,
            "wrong modification time")
        if fileinfo.is_dir():
            _check_condition(tarinfo.isdir(), itemname,
                             "wrong type, expected directory")
        elif fileinfo.is_file():
            _check_condition(tarinfo.isfile() or tarinfo.islnk(), itemname,
                             "wrong type, expected regular file")
            if tarinfo.isfile():
                _check_condition(tarinfo.size == fileinfo.size, itemname,
                                 "wrong size")
            with self._file.extractfile(tarinfo) as f:
                cs = checksum(f, fileinfo.checksum.keys())
                _check_condition(cs == fileinfo.checksum, itemname,
                                 "checksum does not match")
        elif fileinfo.is_symlink():
            _check_condition(tarinfo.issym(), itemname,
                             "wrong type, expected symbolic link")
            _check_condition(tarinfo.linkname == str(fileinfo.target),
                             itemname, "wrong link target")
        else:
            raise ArchiveIntegrityError("%s: invalid type" % (itemname))

    def extract(self, targetdir, inclmeta=False):
        # We extract the directories last in reverse order.  This way,
        # the directory attributes, in particular the file modification
        # time, is set correctly after the file content is written into
        # the directory.
        dirstack = []
        if inclmeta:
            for mi in self.manifest.metadata:
                self._file.extract(mi, path=str(targetdir))
        for fi in self.manifest:
            if fi.is_dir():
                dirstack.append(fi.path)
            else:
                self._file.extract(self._arcname(fi.path), path=str(targetdir))
        while True:
            try:
                p = dirstack.pop()
            except IndexError:
                break
            self._file.extract(self._arcname(p), path=str(targetdir))