Ejemplo n.º 1
0
def _golevel(level, f, ename, newentry, metastore, tmax):
    # close nodes back up the tree
    assert (level)
    default_meta_ofs = metastore.store(metadata.Metadata())
    while ename[:len(level.ename)] != level.ename:
        n = BlankNewEntry(level.ename[-1], default_meta_ofs, tmax)
        n.flags |= IX_EXISTS
        (n.children_ofs, n.children_n) = level.write(f)
        level.parent.list.append(n)
        level = level.parent

    # create nodes down the tree
    while len(level.ename) < len(ename):
        level = Level(ename[:len(level.ename) + 1], level)

    # are we in precisely the right place?
    assert (ename == level.ename)
    n = newentry or \
        BlankNewEntry(ename and level.ename[-1] or None, default_meta_ofs, tmax)
    (n.children_ofs, n.children_n) = level.write(f)
    if level.parent:
        level.parent.list.append(n)
    level = level.parent

    return level
Ejemplo n.º 2
0
 def add(self, name, st, meta_ofs, hashgen=None):
     endswith = name.endswith(b'/')
     ename = pathsplit(name)
     basename = ename[-1]
     #log('add: %r %r\n' % (basename, name))
     flags = IX_EXISTS
     sha = None
     if hashgen:
         (gitmode, sha) = hashgen(name)
         flags |= IX_HASHVALID
     else:
         (gitmode, sha) = (0, EMPTY_SHA)
     if st:
         isdir = stat.S_ISDIR(st.st_mode)
         assert (isdir == endswith)
         e = NewEntry(basename, name, self.tmax, st.st_dev, st.st_ino,
                      st.st_nlink, st.st_ctime, st.st_mtime, st.st_atime,
                      st.st_size, st.st_mode, gitmode, sha, flags, meta_ofs,
                      0, 0)
     else:
         assert (endswith)
         meta_ofs = self.metastore.store(metadata.Metadata())
         e = BlankNewEntry(basename, meta_ofs, self.tmax)
         e.gitmode = gitmode
         e.sha = sha
         e.flags = flags
     self._add(ename, e)
Ejemplo n.º 3
0
Archivo: ls.py Proyecto: prodigeni/bup
def node_info(n, name,
              show_hash = False,
              long_fmt = False,
              classification = None,
              numeric_ids = False,
              human_readable = False):
    """Return a string containing the information to display for the node
    n.  Classification may be "all", "type", or None."""
    result = ''
    if show_hash:
        result += "%s " % n.hash.encode('hex')
    if long_fmt:
        meta = copy.copy(n.metadata())
        if meta:
            meta.path = name
            meta.size = n.size()
        else:
            # Fake it -- summary_str() is designed to handle a fake.
            meta = metadata.Metadata()
            meta.size = n.size()
            meta.mode = n.mode
            meta.path = name
            meta.atime, meta.mtime, meta.ctime = n.atime, n.mtime, n.ctime
            if stat.S_ISLNK(meta.mode):
                meta.symlink_target = n.readlink()
        result += metadata.summary_str(meta,
                                       numeric_ids = numeric_ids,
                                       classification = classification,
                                       human_readable = human_readable)
    else:
        result += name
        if classification:
            mode = n.metadata() and n.metadata().mode or n.mode
            result += xstat.classification_str(mode, classification == 'all')
    return result
Ejemplo n.º 4
0
    if first_root == None:
        first_root = dirp[0]
    elif first_root != dirp[0]:
        root_collision = True

    # If switching to a new sub-tree, finish the current sub-tree.
    while parts > [x[0] for x in dirp]:
        _pop(force_tree = None)

    # If switching to a new sub-tree, start a new sub-tree.
    for path_component in dirp[len(parts):]:
        dir_name, fs_path = path_component
        # Not indexed, so just grab the FS metadata or use empty metadata.
        try:
            meta = metadata.from_path(fs_path, normalized=True) \
                if fs_path else metadata.Metadata()
        except (OSError, IOError) as e:
            add_error(e)
            lastskip_name = dir_name
            meta = metadata.Metadata()
        _push(dir_name, meta)

    if not file:
        if len(parts) == 1:
            continue # We're at the top level -- keep the current root dir
        # Since there's no filename, this is a subdir -- finish it.
        oldtree = already_saved(ent) # may be None
        newtree = _pop(force_tree = oldtree)
        if not oldtree:
            if lastskip_name and lastskip_name.startswith(ent.name):
                ent.invalidate()
Ejemplo n.º 5
0
Archivo: tindex.py Proyecto: vipup/bup
def index_dirty():
    with no_lingering_errors():
        with test_tempdir('bup-tindex-') as tmpdir:
            orig_cwd = os.getcwd()
            try:
                os.chdir(tmpdir)
                default_meta = metadata.Metadata()
                ms1 = index.MetaStoreWriter('index.meta.tmp')
                ms2 = index.MetaStoreWriter('index2.meta.tmp')
                ms3 = index.MetaStoreWriter('index3.meta.tmp')
                meta_ofs1 = ms1.store(default_meta)
                meta_ofs2 = ms2.store(default_meta)
                meta_ofs3 = ms3.store(default_meta)

                ds = xstat.stat(lib_t_dir)
                fs = xstat.stat(lib_t_dir + '/tindex.py')
                tmax = (time.time() - 1) * 10**9

                w1 = index.Writer('index.tmp', ms1, tmax)
                w1.add('/a/b/x', fs, meta_ofs1)
                w1.add('/a/b/c', fs, meta_ofs1)
                w1.add('/a/b/', ds, meta_ofs1)
                w1.add('/a/', ds, meta_ofs1)
                #w1.close()
                WVPASS()

                w2 = index.Writer('index2.tmp', ms2, tmax)
                w2.add('/a/b/n/2', fs, meta_ofs2)
                #w2.close()
                WVPASS()

                w3 = index.Writer('index3.tmp', ms3, tmax)
                w3.add('/a/c/n/3', fs, meta_ofs3)
                #w3.close()
                WVPASS()

                r1 = w1.new_reader()
                r2 = w2.new_reader()
                r3 = w3.new_reader()
                WVPASS()

                r1all = [e.name for e in r1]
                WVPASSEQ(r1all, ['/a/b/x', '/a/b/c', '/a/b/', '/a/', '/'])
                r2all = [e.name for e in r2]
                WVPASSEQ(r2all, ['/a/b/n/2', '/a/b/n/', '/a/b/', '/a/', '/'])
                r3all = [e.name for e in r3]
                WVPASSEQ(r3all, ['/a/c/n/3', '/a/c/n/', '/a/c/', '/a/', '/'])
                all = [e.name for e in index.merge(r2, r1, r3)]
                WVPASSEQ(all, [
                    '/a/c/n/3', '/a/c/n/', '/a/c/', '/a/b/x', '/a/b/n/2',
                    '/a/b/n/', '/a/b/c', '/a/b/', '/a/', '/'
                ])
                fake_validate(r1)
                dump(r1)

                print[hex(e.flags) for e in r1]
                WVPASSEQ([e.name for e in r1 if e.is_valid()], r1all)
                WVPASSEQ([e.name for e in r1 if not e.is_valid()], [])
                WVPASSEQ([
                    e.name
                    for e in index.merge(r2, r1, r3) if not e.is_valid()
                ], [
                    '/a/c/n/3', '/a/c/n/', '/a/c/', '/a/b/n/2', '/a/b/n/',
                    '/a/b/', '/a/', '/'
                ])

                expect_invalid = ['/'] + r2all + r3all
                expect_real = (set(r1all) - set(r2all) - set(r3all)) \
                                | set(['/a/b/n/2', '/a/c/n/3'])
                dump(index.merge(r2, r1, r3))
                for e in index.merge(r2, r1, r3):
                    print e.name, hex(e.flags), e.ctime
                    eiv = e.name in expect_invalid
                    er = e.name in expect_real
                    WVPASSEQ(eiv, not e.is_valid())
                    WVPASSEQ(er, e.is_real())
                fake_validate(r2, r3)
                dump(index.merge(r2, r1, r3))
                WVPASSEQ([
                    e.name
                    for e in index.merge(r2, r1, r3) if not e.is_valid()
                ], [])

                e = eget(index.merge(r2, r1, r3), '/a/b/c')
                e.invalidate()
                e.repack()
                dump(index.merge(r2, r1, r3))
                WVPASSEQ([
                    e.name
                    for e in index.merge(r2, r1, r3) if not e.is_valid()
                ], ['/a/b/c', '/a/b/', '/a/', '/'])
                w1.close()
                w2.close()
                w3.close()
            finally:
                os.chdir(orig_cwd)
Ejemplo n.º 6
0
def main(argv):

    # Hack around lack of nonlocal vars in python 2
    _nonlocal = {}

    o = options.Options(optspec)
    opt, flags, extra = o.parse_bytes(argv[1:])

    if opt.indexfile:
        opt.indexfile = argv_bytes(opt.indexfile)
    if opt.name:
        opt.name = argv_bytes(opt.name)
    if opt.remote:
        opt.remote = argv_bytes(opt.remote)
    if opt.strip_path:
        opt.strip_path = argv_bytes(opt.strip_path)

    git.check_repo_or_die()
    if not (opt.tree or opt.commit or opt.name):
        o.fatal("use one or more of -t, -c, -n")
    if not extra:
        o.fatal("no filenames given")

    extra = [argv_bytes(x) for x in extra]

    opt.progress = (istty2 and not opt.quiet)
    opt.smaller = parse_num(opt.smaller or 0)
    if opt.bwlimit:
        client.bwlimit = parse_num(opt.bwlimit)

    if opt.date:
        date = parse_date_or_fatal(opt.date, o.fatal)
    else:
        date = time.time()

    if opt.strip and opt.strip_path:
        o.fatal("--strip is incompatible with --strip-path")

    graft_points = []
    if opt.graft:
        if opt.strip:
            o.fatal("--strip is incompatible with --graft")

        if opt.strip_path:
            o.fatal("--strip-path is incompatible with --graft")

        for (option, parameter) in flags:
            if option == "--graft":
                parameter = argv_bytes(parameter)
                splitted_parameter = parameter.split(b'=')
                if len(splitted_parameter) != 2:
                    o.fatal(
                        "a graft point must be of the form old_path=new_path")
                old_path, new_path = splitted_parameter
                if not (old_path and new_path):
                    o.fatal("a graft point cannot be empty")
                graft_points.append(
                    (resolve_parent(old_path), resolve_parent(new_path)))

    is_reverse = environ.get(b'BUP_SERVER_REVERSE')
    if is_reverse and opt.remote:
        o.fatal("don't use -r in reverse mode; it's automatic")

    name = opt.name
    if name and not valid_save_name(name):
        o.fatal("'%s' is not a valid branch name" % path_msg(name))
    refname = name and b'refs/heads/%s' % name or None
    if opt.remote or is_reverse:
        try:
            cli = client.Client(opt.remote)
        except client.ClientError as e:
            log('error: %s' % e)
            sys.exit(1)
        oldref = refname and cli.read_ref(refname) or None
        w = cli.new_packwriter(compression_level=opt.compress)
    else:
        cli = None
        oldref = refname and git.read_ref(refname) or None
        w = git.PackWriter(compression_level=opt.compress)

    handle_ctrl_c()

    # Metadata is stored in a file named .bupm in each directory.  The
    # first metadata entry will be the metadata for the current directory.
    # The remaining entries will be for each of the other directory
    # elements, in the order they're listed in the index.
    #
    # Since the git tree elements are sorted according to
    # git.shalist_item_sort_key, the metalist items are accumulated as
    # (sort_key, metadata) tuples, and then sorted when the .bupm file is
    # created.  The sort_key should have been computed using the element's
    # mangled name and git mode (after hashsplitting), but the code isn't
    # actually doing that but rather uses the element's real name and mode.
    # This makes things a bit more difficult when reading it back, see
    # vfs.ordered_tree_entries().

    # Maintain a stack of information representing the current location in
    # the archive being constructed.  The current path is recorded in
    # parts, which will be something like ['', 'home', 'someuser'], and
    # the accumulated content and metadata for of the dirs in parts is
    # stored in parallel stacks in shalists and metalists.

    parts = []  # Current archive position (stack of dir names).
    shalists = []  # Hashes for each dir in paths.
    metalists = []  # Metadata for each dir in paths.

    def _push(part, metadata):
        # Enter a new archive directory -- make it the current directory.
        parts.append(part)
        shalists.append([])
        metalists.append([(b'', metadata)])  # This dir's metadata (no name).

    def _pop(force_tree, dir_metadata=None):
        # Leave the current archive directory and add its tree to its parent.
        assert (len(parts) >= 1)
        part = parts.pop()
        shalist = shalists.pop()
        metalist = metalists.pop()
        # FIXME: only test if collision is possible (i.e. given --strip, etc.)?
        if force_tree:
            tree = force_tree
        else:
            names_seen = set()
            clean_list = []
            metaidx = 1  # entry at 0 is for the dir
            for x in shalist:
                name = x[1]
                if name in names_seen:
                    parent_path = b'/'.join(parts) + b'/'
                    add_error('error: ignoring duplicate path %s in %s' %
                              (path_msg(name), path_msg(parent_path)))
                    if not stat.S_ISDIR(x[0]):
                        del metalist[metaidx]
                else:
                    names_seen.add(name)
                    clean_list.append(x)
                    if not stat.S_ISDIR(x[0]):
                        metaidx += 1

            if dir_metadata:  # Override the original metadata pushed for this dir.
                metalist = [(b'', dir_metadata)] + metalist[1:]
            sorted_metalist = sorted(metalist, key=lambda x: x[0])
            metadata = b''.join([m[1].encode() for m in sorted_metalist])
            metadata_f = BytesIO(metadata)
            mode, id = hashsplit.split_to_blob_or_tree(w.new_blob,
                                                       w.new_tree,
                                                       [metadata_f],
                                                       keep_boundaries=False)
            clean_list.append((mode, b'.bupm', id))

            tree = w.new_tree(clean_list)
        if shalists:
            shalists[-1].append((GIT_MODE_TREE,
                                 git.mangle_name(part, GIT_MODE_TREE,
                                                 GIT_MODE_TREE), tree))
        return tree

    _nonlocal['count'] = 0
    _nonlocal['subcount'] = 0
    _nonlocal['lastremain'] = None

    def progress_report(n):
        _nonlocal['subcount'] += n
        cc = _nonlocal['count'] + _nonlocal['subcount']
        pct = total and (cc * 100.0 / total) or 0
        now = time.time()
        elapsed = now - tstart
        kps = elapsed and int(cc / 1024. / elapsed)
        kps_frac = 10**int(math.log(kps + 1, 10) - 1)
        kps = int(kps / kps_frac) * kps_frac
        if cc:
            remain = elapsed * 1.0 / cc * (total - cc)
        else:
            remain = 0.0
        if (_nonlocal['lastremain'] and (remain > _nonlocal['lastremain']) and
            ((remain - _nonlocal['lastremain']) / _nonlocal['lastremain'] <
             0.05)):
            remain = _nonlocal['lastremain']
        else:
            _nonlocal['lastremain'] = remain
        hours = int(remain / 60 / 60)
        mins = int(remain / 60 - hours * 60)
        secs = int(remain - hours * 60 * 60 - mins * 60)
        if elapsed < 30:
            remainstr = ''
            kpsstr = ''
        else:
            kpsstr = '%dk/s' % kps
            if hours:
                remainstr = '%dh%dm' % (hours, mins)
            elif mins:
                remainstr = '%dm%d' % (mins, secs)
            else:
                remainstr = '%ds' % secs
        qprogress(
            'Saving: %.2f%% (%d/%dk, %d/%d files) %s %s\r' %
            (pct, cc / 1024, total / 1024, fcount, ftotal, remainstr, kpsstr))

    indexfile = opt.indexfile or git.repo(b'bupindex')
    r = index.Reader(indexfile)
    try:
        msr = index.MetaStoreReader(indexfile + b'.meta')
    except IOError as ex:
        if ex.errno != EACCES:
            raise
        log('error: cannot access %r; have you run bup index?' %
            path_msg(indexfile))
        sys.exit(1)
    hlink_db = hlinkdb.HLinkDB(indexfile + b'.hlink')

    def already_saved(ent):
        return ent.is_valid() and w.exists(ent.sha) and ent.sha

    def wantrecurse_pre(ent):
        return not already_saved(ent)

    def wantrecurse_during(ent):
        return not already_saved(ent) or ent.sha_missing()

    def find_hardlink_target(hlink_db, ent):
        if hlink_db and not stat.S_ISDIR(ent.mode) and ent.nlink > 1:
            link_paths = hlink_db.node_paths(ent.dev, ent.ino)
            if link_paths:
                return link_paths[0]

    total = ftotal = 0
    if opt.progress:
        for (transname, ent) in r.filter(extra, wantrecurse=wantrecurse_pre):
            if not (ftotal % 10024):
                qprogress('Reading index: %d\r' % ftotal)
            exists = ent.exists()
            hashvalid = already_saved(ent)
            ent.set_sha_missing(not hashvalid)
            if not opt.smaller or ent.size < opt.smaller:
                if exists and not hashvalid:
                    total += ent.size
            ftotal += 1
        progress('Reading index: %d, done.\n' % ftotal)
        hashsplit.progress_callback = progress_report

    # Root collisions occur when strip or graft options map more than one
    # path to the same directory (paths which originally had separate
    # parents).  When that situation is detected, use empty metadata for
    # the parent.  Otherwise, use the metadata for the common parent.
    # Collision example: "bup save ... --strip /foo /foo/bar /bar".

    # FIXME: Add collision tests, or handle collisions some other way.

    # FIXME: Detect/handle strip/graft name collisions (other than root),
    # i.e. if '/foo/bar' and '/bar' both map to '/'.

    first_root = None
    root_collision = None
    tstart = time.time()
    fcount = 0
    lastskip_name = None
    lastdir = b''
    for (transname, ent) in r.filter(extra, wantrecurse=wantrecurse_during):
        (dir, file) = os.path.split(ent.name)
        exists = (ent.flags & index.IX_EXISTS)
        hashvalid = already_saved(ent)
        wasmissing = ent.sha_missing()
        oldsize = ent.size
        if opt.verbose:
            if not exists:
                status = 'D'
            elif not hashvalid:
                if ent.sha == index.EMPTY_SHA:
                    status = 'A'
                else:
                    status = 'M'
            else:
                status = ' '
            if opt.verbose >= 2:
                log('%s %-70s\n' % (status, path_msg(ent.name)))
            elif not stat.S_ISDIR(ent.mode) and lastdir != dir:
                if not lastdir.startswith(dir):
                    log('%s %-70s\n' %
                        (status, path_msg(os.path.join(dir, b''))))
                lastdir = dir

        if opt.progress:
            progress_report(0)
        fcount += 1

        if not exists:
            continue
        if opt.smaller and ent.size >= opt.smaller:
            if exists and not hashvalid:
                if opt.verbose:
                    log('skipping large file "%s"\n' % path_msg(ent.name))
                lastskip_name = ent.name
            continue

        assert (dir.startswith(b'/'))
        if opt.strip:
            dirp = stripped_path_components(dir, extra)
        elif opt.strip_path:
            dirp = stripped_path_components(dir, [opt.strip_path])
        elif graft_points:
            dirp = grafted_path_components(graft_points, dir)
        else:
            dirp = path_components(dir)

        # At this point, dirp contains a representation of the archive
        # path that looks like [(archive_dir_name, real_fs_path), ...].
        # So given "bup save ... --strip /foo/bar /foo/bar/baz", dirp
        # might look like this at some point:
        #   [('', '/foo/bar'), ('baz', '/foo/bar/baz'), ...].

        # This dual representation supports stripping/grafting, where the
        # archive path may not have a direct correspondence with the
        # filesystem.  The root directory is represented by an initial
        # component named '', and any component that doesn't have a
        # corresponding filesystem directory (due to grafting, for
        # example) will have a real_fs_path of None, i.e. [('', None),
        # ...].

        if first_root == None:
            first_root = dirp[0]
        elif first_root != dirp[0]:
            root_collision = True

        # If switching to a new sub-tree, finish the current sub-tree.
        while parts > [x[0] for x in dirp]:
            _pop(force_tree=None)

        # If switching to a new sub-tree, start a new sub-tree.
        for path_component in dirp[len(parts):]:
            dir_name, fs_path = path_component
            # Not indexed, so just grab the FS metadata or use empty metadata.
            try:
                meta = metadata.from_path(fs_path, normalized=True) \
                    if fs_path else metadata.Metadata()
            except (OSError, IOError) as e:
                add_error(e)
                lastskip_name = dir_name
                meta = metadata.Metadata()
            _push(dir_name, meta)

        if not file:
            if len(parts) == 1:
                continue  # We're at the top level -- keep the current root dir
            # Since there's no filename, this is a subdir -- finish it.
            oldtree = already_saved(ent)  # may be None
            newtree = _pop(force_tree=oldtree)
            if not oldtree:
                if lastskip_name and lastskip_name.startswith(ent.name):
                    ent.invalidate()
                else:
                    ent.validate(GIT_MODE_TREE, newtree)
                ent.repack()
            if exists and wasmissing:
                _nonlocal['count'] += oldsize
            continue

        # it's not a directory
        if hashvalid:
            id = ent.sha
            git_name = git.mangle_name(file, ent.mode, ent.gitmode)
            git_info = (ent.gitmode, git_name, id)
            shalists[-1].append(git_info)
            sort_key = git.shalist_item_sort_key((ent.mode, file, id))
            meta = msr.metadata_at(ent.meta_ofs)
            meta.hardlink_target = find_hardlink_target(hlink_db, ent)
            # Restore the times that were cleared to 0 in the metastore.
            (meta.atime, meta.mtime, meta.ctime) = (ent.atime, ent.mtime,
                                                    ent.ctime)
            metalists[-1].append((sort_key, meta))
        else:
            id = None
            hlink = find_hardlink_target(hlink_db, ent)
            try:
                meta = metadata.from_path(
                    ent.name,
                    hardlink_target=hlink,
                    normalized=True,
                    after_stat=after_nondir_metadata_stat)
            except (OSError, IOError) as e:
                add_error(e)
                lastskip_name = ent.name
                continue
            if stat.S_IFMT(ent.mode) != stat.S_IFMT(meta.mode):
                # The mode changed since we indexed the file, this is bad.
                # This can cause two issues:
                # 1) We e.g. think the file is a regular file, but now it's
                #    something else (a device, socket, FIFO or symlink, etc.)
                #    and _read_ from it when we shouldn't.
                # 2) We then record it as valid, but don't update the index
                #    metadata, and on a subsequent save it has 'hashvalid'
                #    but is recorded as the file type from the index, when
                #    the content is something else ...
                # Avoid all of these consistency issues by just skipping such
                # things - it really ought to not happen anyway.
                add_error("%s: mode changed since indexing, skipping." %
                          path_msg(ent.name))
                lastskip_name = ent.name
                continue
            if stat.S_ISREG(ent.mode):
                try:
                    # If the file changes while we're reading it, then our reading
                    # may stop at some point, but the stat() above may have gotten
                    # a different size already. Recalculate the meta size so that
                    # the repository records the accurate size in the metadata, even
                    # if the other stat() data might be slightly older than the file
                    # content (which we can't fix, this is inherently racy, but we
                    # can prevent the size mismatch.)
                    meta.size = 0

                    def new_blob(data):
                        meta.size += len(data)
                        return w.new_blob(data)

                    before_saving_regular_file(ent.name)
                    with hashsplit.open_noatime(ent.name) as f:
                        (mode, id) = hashsplit.split_to_blob_or_tree(
                            new_blob, w.new_tree, [f], keep_boundaries=False)
                except (IOError, OSError) as e:
                    add_error('%s: %s' % (ent.name, e))
                    lastskip_name = ent.name
            elif stat.S_ISDIR(ent.mode):
                assert (0)  # handled above
            elif stat.S_ISLNK(ent.mode):
                mode, id = (GIT_MODE_SYMLINK, w.new_blob(meta.symlink_target))
            else:
                # Everything else should be fully described by its
                # metadata, so just record an empty blob, so the paths
                # in the tree and .bupm will match up.
                (mode, id) = (GIT_MODE_FILE, w.new_blob(b''))

            if id:
                ent.validate(mode, id)
                ent.repack()
                git_name = git.mangle_name(file, ent.mode, ent.gitmode)
                git_info = (mode, git_name, id)
                shalists[-1].append(git_info)
                sort_key = git.shalist_item_sort_key((ent.mode, file, id))
                metalists[-1].append((sort_key, meta))

        if exists and wasmissing:
            _nonlocal['count'] += oldsize
            _nonlocal['subcount'] = 0

    if opt.progress:
        pct = total and _nonlocal['count'] * 100.0 / total or 100
        progress(
            'Saving: %.2f%% (%d/%dk, %d/%d files), done.    \n' %
            (pct, _nonlocal['count'] / 1024, total / 1024, fcount, ftotal))

    while len(parts) > 1:  # _pop() all the parts above the root
        _pop(force_tree=None)
    assert (len(shalists) == 1)
    assert (len(metalists) == 1)

    # Finish the root directory.
    tree = _pop(
        force_tree=None,
        # When there's a collision, use empty metadata for the root.
        dir_metadata=metadata.Metadata() if root_collision else None)

    sys.stdout.flush()
    out = byte_stream(sys.stdout)

    if opt.tree:
        out.write(hexlify(tree))
        out.write(b'\n')
    if opt.commit or name:
        if compat.py_maj > 2:
            # Strip b prefix from python 3 bytes reprs to preserve previous format
            msgcmd = b'[%s]' % b', '.join(
                [repr(argv_bytes(x))[1:].encode('ascii') for x in argv])
        else:
            msgcmd = repr(argv)
        msg = b'bup save\n\nGenerated by command:\n%s\n' % msgcmd
        userline = (b'%s <%s@%s>' % (userfullname(), username(), hostname()))
        commit = w.new_commit(tree, oldref, userline, date, None, userline,
                              date, None, msg)
        if opt.commit:
            out.write(hexlify(commit))
            out.write(b'\n')

    msr.close()
    w.close()  # must close before we can update the ref

    if opt.name:
        if cli:
            cli.update_ref(refname, commit, oldref)
        else:
            git.update_ref(refname, commit, oldref)

    if cli:
        cli.close()

    if saved_errors:
        log('WARNING: %d errors encountered while saving.\n' %
            len(saved_errors))
        sys.exit(1)
Ejemplo n.º 7
0
        dirp = stripped_path_components(dir, [opt.strip_path])
    elif graft_points:
        dirp = grafted_path_components(graft_points, dir)
    else:
        dirp = path_components(dir)

    while parts > [x[0] for x in dirp]:
        _pop(force_tree = None)

    if dir != '/':
        for path_component in dirp[len(parts):]:
            dir_name, fs_path = path_component
            if fs_path:
                meta = metadata.from_path(fs_path)
            else:
                meta = metadata.Metadata()
            _push(dir_name, meta)

    if not file:
        # no filename portion means this is a subdir.  But
        # sub/parentdirectories already handled in the pop/push() part above.
        oldtree = already_saved(ent) # may be None
        newtree = _pop(force_tree = oldtree)
        if not oldtree:
            if lastskip_name and lastskip_name.startswith(ent.name):
                ent.invalidate()
            else:
                ent.validate(GIT_MODE_TREE, newtree)
            ent.repack()
        if exists and wasmissing:
            count += oldsize
Ejemplo n.º 8
0
def test_index_dirty(tmpdir):
    orig_cwd = os.getcwd()
    try:
        os.chdir(tmpdir)
        default_meta = metadata.Metadata()

        with index.MetaStoreWriter(b'index.meta.tmp') as ms1, \
             index.MetaStoreWriter(b'index2.meta.tmp') as ms2, \
             index.MetaStoreWriter(b'index3.meta.tmp') as ms3:

            meta_ofs1 = ms1.store(default_meta)
            meta_ofs2 = ms2.store(default_meta)
            meta_ofs3 = ms3.store(default_meta)

            ds = xstat.stat(lib_t_dir)
            fs = xstat.stat(lib_t_dir + b'/test_index.py')
            tmax = (time.time() - 1) * 10**9

            with index.Writer(b'index.tmp', ms1, tmax) as w1, \
                 index.Writer(b'index2.tmp', ms2, tmax) as w2, \
                 index.Writer(b'index3.tmp', ms3, tmax) as w3:

                w1.add(b'/a/b/x', fs, meta_ofs1)
                w1.add(b'/a/b/c', fs, meta_ofs1)
                w1.add(b'/a/b/', ds, meta_ofs1)
                w1.add(b'/a/', ds, meta_ofs1)
                #w1.close()
                WVPASS()

                w2.add(b'/a/b/n/2', fs, meta_ofs2)
                #w2.close()
                WVPASS()

                w3.add(b'/a/c/n/3', fs, meta_ofs3)
                #w3.close()
                WVPASS()

                with w1.new_reader() as r1, \
                     w2.new_reader() as r2, \
                     w3.new_reader() as r3:
                    WVPASS()

                    r1all = [e.name for e in r1]
                    WVPASSEQ(r1all,
                             [b'/a/b/x', b'/a/b/c', b'/a/b/', b'/a/', b'/'])
                    r2all = [e.name for e in r2]
                    WVPASSEQ(r2all,
                             [b'/a/b/n/2', b'/a/b/n/', b'/a/b/', b'/a/', b'/'])
                    r3all = [e.name for e in r3]
                    WVPASSEQ(r3all,
                             [b'/a/c/n/3', b'/a/c/n/', b'/a/c/', b'/a/', b'/'])
                    all = [e.name for e in index.merge(r2, r1, r3)]
                    WVPASSEQ(all, [
                        b'/a/c/n/3', b'/a/c/n/', b'/a/c/', b'/a/b/x',
                        b'/a/b/n/2', b'/a/b/n/', b'/a/b/c', b'/a/b/', b'/a/',
                        b'/'
                    ])
                    fake_validate(r1)
                    dump(r1)

                    print([hex(e.flags) for e in r1])
                    WVPASSEQ([e.name for e in r1 if e.is_valid()], r1all)
                    WVPASSEQ([e.name for e in r1 if not e.is_valid()], [])
                    WVPASSEQ([
                        e.name
                        for e in index.merge(r2, r1, r3) if not e.is_valid()
                    ], [
                        b'/a/c/n/3', b'/a/c/n/', b'/a/c/', b'/a/b/n/2',
                        b'/a/b/n/', b'/a/b/', b'/a/', b'/'
                    ])

                    expect_invalid = [b'/'] + r2all + r3all
                    expect_real = (set(r1all) - set(r2all) - set(r3all)) \
                                    | set([b'/a/b/n/2', b'/a/c/n/3'])
                    dump(index.merge(r2, r1, r3))
                    for e in index.merge(r2, r1, r3):
                        print(e.name, hex(e.flags), e.ctime)
                        eiv = e.name in expect_invalid
                        er = e.name in expect_real
                        WVPASSEQ(eiv, not e.is_valid())
                        WVPASSEQ(er, e.is_real())
                    fake_validate(r2, r3)
                    dump(index.merge(r2, r1, r3))
                    WVPASSEQ([
                        e.name
                        for e in index.merge(r2, r1, r3) if not e.is_valid()
                    ], [])

                    e = eget(index.merge(r2, r1, r3), b'/a/b/c')
                    e.invalidate()
                    e.repack()
                    dump(index.merge(r2, r1, r3))
                    WVPASSEQ([
                        e.name
                        for e in index.merge(r2, r1, r3) if not e.is_valid()
                    ], [b'/a/b/c', b'/a/b/', b'/a/', b'/'])
    finally:
        os.chdir(orig_cwd)
Ejemplo n.º 9
0
Archivo: save.py Proyecto: fakegit/bup
def save_tree(opt, reader, hlink_db, msr, w):
    # Metadata is stored in a file named .bupm in each directory.  The
    # first metadata entry will be the metadata for the current directory.
    # The remaining entries will be for each of the other directory
    # elements, in the order they're listed in the index.
    #
    # Since the git tree elements are sorted according to
    # git.shalist_item_sort_key, the metalist items are accumulated as
    # (sort_key, metadata) tuples, and then sorted when the .bupm file is
    # created.  The sort_key should have been computed using the element's
    # mangled name and git mode (after hashsplitting), but the code isn't
    # actually doing that but rather uses the element's real name and mode.
    # This makes things a bit more difficult when reading it back, see
    # vfs.ordered_tree_entries().

    # Maintain a stack of information representing the current location in
    # the archive being constructed.  The current path is recorded in
    # parts, which will be something like
    #      [StackDir(name=''), StackDir(name='home'), StackDir(name='someuser')],
    # and the accumulated content and metadata for files in the dirs is stored
    # in the .items member of the StackDir.

    stack = []

    def _push(part, metadata):
        # Enter a new archive directory -- make it the current directory.
        item = StackDir(part, metadata)
        stack.append(item)


    def _pop(force_tree=None, dir_metadata=None):
        # Leave the current archive directory and add its tree to its parent.
        item = stack.pop()
        # FIXME: only test if collision is possible (i.e. given --strip, etc.)?
        if force_tree:
            tree = force_tree
        else:
            names_seen = set()
            clean_list = []
            for x in item.items:
                name = x.name
                if name in names_seen:
                    parent_path = b'/'.join(x.name for x in stack) + b'/'
                    add_error('error: ignoring duplicate path %s in %s'
                              % (path_msg(name), path_msg(parent_path)))
                else:
                    names_seen.add(name)
                    clean_list.append(x)

            # if set, overrides the original metadata pushed for this dir.
            if dir_metadata is None:
                dir_metadata = item.meta
            metalist = [(b'', dir_metadata)]
            metalist += [(git.shalist_item_sort_key((entry.mode, entry.name, None)),
                          entry.meta)
                         for entry in clean_list if entry.mode != GIT_MODE_TREE]
            metalist.sort(key = lambda x: x[0])
            metadata = BytesIO(b''.join(m[1].encode() for m in metalist))
            mode, id = hashsplit.split_to_blob_or_tree(w.new_blob, w.new_tree,
                                                       [metadata],
                                                       keep_boundaries=False)
            shalist = [(mode, b'.bupm', id)]
            shalist += [(entry.gitmode,
                         git.mangle_name(entry.name, entry.mode, entry.gitmode),
                         entry.oid)
                        for entry in clean_list]

            tree = w.new_tree(shalist)
        if stack:
            stack[-1].append(item.name, GIT_MODE_TREE, GIT_MODE_TREE, tree, None)
        return tree


    # Hack around lack of nonlocal vars in python 2
    _nonlocal = {}
    _nonlocal['count'] = 0
    _nonlocal['subcount'] = 0
    _nonlocal['lastremain'] = None

    def progress_report(n):
        _nonlocal['subcount'] += n
        cc = _nonlocal['count'] + _nonlocal['subcount']
        pct = total and (cc*100.0/total) or 0
        now = time.time()
        elapsed = now - tstart
        kps = elapsed and int(cc/1024./elapsed)
        kps_frac = 10 ** int(math.log(kps+1, 10) - 1)
        kps = int(kps/kps_frac)*kps_frac
        if cc:
            remain = elapsed*1.0/cc * (total-cc)
        else:
            remain = 0.0
        if (_nonlocal['lastremain'] and (remain > _nonlocal['lastremain'])
              and ((remain - _nonlocal['lastremain'])/_nonlocal['lastremain'] < 0.05)):
            remain = _nonlocal['lastremain']
        else:
            _nonlocal['lastremain'] = remain
        hours = int(remain/60/60)
        mins = int(remain/60 - hours*60)
        secs = int(remain - hours*60*60 - mins*60)
        if elapsed < 30:
            remainstr = ''
            kpsstr = ''
        else:
            kpsstr = '%dk/s' % kps
            if hours:
                remainstr = '%dh%dm' % (hours, mins)
            elif mins:
                remainstr = '%dm%d' % (mins, secs)
            else:
                remainstr = '%ds' % secs
        qprogress('Saving: %.2f%% (%d/%dk, %d/%d files) %s %s\r'
                  % (pct, cc/1024, total/1024, fcount, ftotal,
                     remainstr, kpsstr))


    def already_saved(ent):
        return ent.is_valid() and w.exists(ent.sha) and ent.sha

    def wantrecurse_pre(ent):
        return not already_saved(ent)

    def wantrecurse_during(ent):
        return not already_saved(ent) or ent.sha_missing()

    def find_hardlink_target(hlink_db, ent):
        if hlink_db and not stat.S_ISDIR(ent.mode) and ent.nlink > 1:
            link_paths = hlink_db.node_paths(ent.dev, ent.ino)
            if link_paths:
                return link_paths[0]
        return None

    total = ftotal = 0
    if opt.progress:
        for transname, ent in reader.filter(opt.sources,
                                            wantrecurse=wantrecurse_pre):
            if not (ftotal % 10024):
                qprogress('Reading index: %d\r' % ftotal)
            exists = ent.exists()
            hashvalid = already_saved(ent)
            ent.set_sha_missing(not hashvalid)
            if not opt.smaller or ent.size < opt.smaller:
                if exists and not hashvalid:
                    total += ent.size
            ftotal += 1
        progress('Reading index: %d, done.\n' % ftotal)
        hashsplit.progress_callback = progress_report

    # Root collisions occur when strip or graft options map more than one
    # path to the same directory (paths which originally had separate
    # parents).  When that situation is detected, use empty metadata for
    # the parent.  Otherwise, use the metadata for the common parent.
    # Collision example: "bup save ... --strip /foo /foo/bar /bar".

    # FIXME: Add collision tests, or handle collisions some other way.

    # FIXME: Detect/handle strip/graft name collisions (other than root),
    # i.e. if '/foo/bar' and '/bar' both map to '/'.

    first_root = None
    root_collision = None
    tstart = time.time()
    fcount = 0
    lastskip_name = None
    lastdir = b''
    for transname, ent in reader.filter(opt.sources,
                                        wantrecurse=wantrecurse_during):
        (dir, file) = os.path.split(ent.name)
        exists = (ent.flags & index.IX_EXISTS)
        hashvalid = already_saved(ent)
        wasmissing = ent.sha_missing()
        oldsize = ent.size
        if opt.verbose:
            if not exists:
                status = 'D'
            elif not hashvalid:
                if ent.sha == index.EMPTY_SHA:
                    status = 'A'
                else:
                    status = 'M'
            else:
                status = ' '
            if opt.verbose >= 2:
                log('%s %-70s\n' % (status, path_msg(ent.name)))
            elif not stat.S_ISDIR(ent.mode) and lastdir != dir:
                if not lastdir.startswith(dir):
                    log('%s %-70s\n' % (status, path_msg(os.path.join(dir, b''))))
                lastdir = dir

        if opt.progress:
            progress_report(0)
        fcount += 1

        if not exists:
            continue
        if opt.smaller and ent.size >= opt.smaller:
            if exists and not hashvalid:
                if opt.verbose:
                    log('skipping large file "%s"\n' % path_msg(ent.name))
                lastskip_name = ent.name
            continue

        assert(dir.startswith(b'/'))
        if opt.strip:
            dirp = stripped_path_components(dir, opt.sources)
        elif opt.strip_path:
            dirp = stripped_path_components(dir, [opt.strip_path])
        elif opt.grafts:
            dirp = grafted_path_components(opt.grafts, dir)
        else:
            dirp = path_components(dir)

        # At this point, dirp contains a representation of the archive
        # path that looks like [(archive_dir_name, real_fs_path), ...].
        # So given "bup save ... --strip /foo/bar /foo/bar/baz", dirp
        # might look like this at some point:
        #   [('', '/foo/bar'), ('baz', '/foo/bar/baz'), ...].

        # This dual representation supports stripping/grafting, where the
        # archive path may not have a direct correspondence with the
        # filesystem.  The root directory is represented by an initial
        # component named '', and any component that doesn't have a
        # corresponding filesystem directory (due to grafting, for
        # example) will have a real_fs_path of None, i.e. [('', None),
        # ...].

        if first_root == None:
            first_root = dirp[0]
        elif first_root != dirp[0]:
            root_collision = True

        # If switching to a new sub-tree, finish the current sub-tree.
        while [x.name for x in stack] > [x[0] for x in dirp]:
            _pop()

        # If switching to a new sub-tree, start a new sub-tree.
        for path_component in dirp[len(stack):]:
            dir_name, fs_path = path_component
            # Not indexed, so just grab the FS metadata or use empty metadata.
            try:
                meta = metadata.from_path(fs_path, normalized=True) \
                    if fs_path else metadata.Metadata()
            except (OSError, IOError) as e:
                add_error(e)
                lastskip_name = dir_name
                meta = metadata.Metadata()
            _push(dir_name, meta)

        if not file:
            if len(stack) == 1:
                continue # We're at the top level -- keep the current root dir
            # Since there's no filename, this is a subdir -- finish it.
            oldtree = already_saved(ent) # may be None
            newtree = _pop(force_tree = oldtree)
            if not oldtree:
                if lastskip_name and lastskip_name.startswith(ent.name):
                    ent.invalidate()
                else:
                    ent.validate(GIT_MODE_TREE, newtree)
                ent.repack()
            if exists and wasmissing:
                _nonlocal['count'] += oldsize
            continue

        # it's not a directory
        if hashvalid:
            meta = msr.metadata_at(ent.meta_ofs)
            meta.hardlink_target = find_hardlink_target(hlink_db, ent)
            # Restore the times that were cleared to 0 in the metastore.
            (meta.atime, meta.mtime, meta.ctime) = (ent.atime, ent.mtime, ent.ctime)
            stack[-1].append(file, ent.mode, ent.gitmode, ent.sha, meta)
        else:
            id = None
            hlink = find_hardlink_target(hlink_db, ent)
            try:
                meta = metadata.from_path(ent.name, hardlink_target=hlink,
                                          normalized=True,
                                          after_stat=after_nondir_metadata_stat)
            except (OSError, IOError) as e:
                add_error(e)
                lastskip_name = ent.name
                continue
            if stat.S_IFMT(ent.mode) != stat.S_IFMT(meta.mode):
                # The mode changed since we indexed the file, this is bad.
                # This can cause two issues:
                # 1) We e.g. think the file is a regular file, but now it's
                #    something else (a device, socket, FIFO or symlink, etc.)
                #    and _read_ from it when we shouldn't.
                # 2) We then record it as valid, but don't update the index
                #    metadata, and on a subsequent save it has 'hashvalid'
                #    but is recorded as the file type from the index, when
                #    the content is something else ...
                # Avoid all of these consistency issues by just skipping such
                # things - it really ought to not happen anyway.
                add_error("%s: mode changed since indexing, skipping." % path_msg(ent.name))
                lastskip_name = ent.name
                continue
            if stat.S_ISREG(ent.mode):
                try:
                    # If the file changes while we're reading it, then our reading
                    # may stop at some point, but the stat() above may have gotten
                    # a different size already. Recalculate the meta size so that
                    # the repository records the accurate size in the metadata, even
                    # if the other stat() data might be slightly older than the file
                    # content (which we can't fix, this is inherently racy, but we
                    # can prevent the size mismatch.)
                    meta.size = 0
                    def new_blob(data):
                        meta.size += len(data)
                        return w.new_blob(data)
                    before_saving_regular_file(ent.name)
                    with hashsplit.open_noatime(ent.name) as f:
                        (mode, id) = hashsplit.split_to_blob_or_tree(
                                                new_blob, w.new_tree, [f],
                                                keep_boundaries=False)
                except (IOError, OSError) as e:
                    add_error('%s: %s' % (ent.name, e))
                    lastskip_name = ent.name
            elif stat.S_ISDIR(ent.mode):
                assert(0)  # handled above
            elif stat.S_ISLNK(ent.mode):
                mode, id = (GIT_MODE_SYMLINK, w.new_blob(meta.symlink_target))
            else:
                # Everything else should be fully described by its
                # metadata, so just record an empty blob, so the paths
                # in the tree and .bupm will match up.
                (mode, id) = (GIT_MODE_FILE, w.new_blob(b''))

            if id:
                ent.validate(mode, id)
                ent.repack()
                stack[-1].append(file, ent.mode, ent.gitmode, id, meta)

        if exists and wasmissing:
            _nonlocal['count'] += oldsize
            _nonlocal['subcount'] = 0


    if opt.progress:
        pct = total and _nonlocal['count']*100.0/total or 100
        progress('Saving: %.2f%% (%d/%dk, %d/%d files), done.    \n'
                 % (pct, _nonlocal['count']/1024, total/1024, fcount, ftotal))

    while len(stack) > 1: # _pop() all the parts above the root
        _pop()

    # Finish the root directory.
    # When there's a collision, use empty metadata for the root.
    tree = _pop(dir_metadata = metadata.Metadata() if root_collision else None)

    return tree