예제 #1
0
 def _meta_write(self, objtype, content):
     sha = git.calc_hash(git._typermap[objtype], content)
     if not self.exists(sha):
         self._ensure_meta_writer()
         self.meta_writer.write(objtype, sha, content)
         self.meta_written_objs.add(sha)
     return sha
예제 #2
0
def walk_object(cat_pipe, id, verbose=None, parent_path=[], writer=None):
    # Yield everything reachable from id via cat_pipe, stopping
    # whenever we hit something writer already has.  Produce (id, type
    # data) for each item.  Since maybe_write() can't accept an
    # iterator, join()ing the data here doesn't hurt anything.
    item_it = cat_pipe.get(id)
    type = item_it.next()
    data = ''.join(item_it)
    id = git.calc_hash(type, data)
    if writer and writer.exists(id):
        return
    if type == 'blob':
        yield (id, type, data)
    elif type == 'commit':
        yield (id, type, data)
        commit_items = parse_commit(data)
        tree_id = commit_items.tree
        for x in walk_object(cat_pipe, tree_id, verbose, parent_path, writer):
            yield x
        parents = commit_items.parents
        for pid in parents:
            for x in walk_object(cat_pipe, pid, verbose, parent_path, writer):
                yield x
    elif type == 'tree':
        yield (id, type, data)
        for (mode, name, ent_id) in git.tree_decode(data):
            if not verbose > 1:
                for x in walk_object(cat_pipe, ent_id.encode('hex'),
                                     writer=writer):
                    yield x
            else:
                demangled, bup_type = git.demangle_name(name)
                sub_path = parent_path + [demangled]
                # Don't print the sub-parts of chunked files.
                sub_v = verbose if bup_type == git.BUP_NORMAL else None
                for x in walk_object(cat_pipe, ent_id.encode('hex'),
                                     sub_v, sub_path, writer):
                    yield x
                if stat.S_ISDIR(mode):
                    if verbose > 1 and bup_type == git.BUP_NORMAL:
                        log('%s/\n' % '/'.join(sub_path))
                    elif verbose > 2:  # (and BUP_CHUNKED)
                        log('%s\n' % '/'.join(sub_path))
                elif verbose > 2:
                    log('%s\n' % '/'.join(sub_path))
    else:
        raise Exception('unexpected repository object type %r' % type)
예제 #3
0
파일: split.py 프로젝트: fakegit/bup
 def new_tree(self, shalist):
     return git.calc_hash(b'tree', git.tree_encode(shalist))
예제 #4
0
파일: split.py 프로젝트: fakegit/bup
 def new_blob(self, content):
     return git.calc_hash(b'blob', content)
예제 #5
0
파일: split.py 프로젝트: gdt/bup
def main(argv):
    o = options.Options(optspec)
    opt, flags, extra = o.parse_bytes(argv[1:])
    if opt.name: opt.name = argv_bytes(opt.name)
    if opt.remote: opt.remote = argv_bytes(opt.remote)
    if opt.verbose is None: opt.verbose = 0

    if not (opt.blobs or opt.tree or opt.commit or opt.name or opt.noop
            or opt.copy):
        o.fatal("use one or more of -b, -t, -c, -n, --noop, --copy")
    if opt.copy and (opt.blobs or opt.tree):
        o.fatal('--copy is incompatible with -b, -t')
    if (opt.noop or opt.copy) and (opt.commit or opt.name):
        o.fatal('--noop and --copy are incompatible with -c, -n')
    if opt.blobs and (opt.tree or opt.commit or opt.name):
        o.fatal('-b is incompatible with -t, -c, -n')
    if extra and opt.git_ids:
        o.fatal("don't provide filenames when using --git-ids")

    if opt.verbose >= 2:
        git.verbose = opt.verbose - 1
        opt.bench = 1

    max_pack_size = None
    if opt.max_pack_size:
        max_pack_size = parse_num(opt.max_pack_size)
    max_pack_objects = None
    if opt.max_pack_objects:
        max_pack_objects = parse_num(opt.max_pack_objects)

    if opt.fanout:
        hashsplit.fanout = parse_num(opt.fanout)
    if opt.blobs:
        hashsplit.fanout = 0
    if opt.bwlimit:
        client.bwlimit = parse_num(opt.bwlimit)
    if opt.date:
        date = parse_date_or_fatal(opt.date, o.fatal)
    else:
        date = time.time()

    # Hack around lack of nonlocal vars in python 2
    total_bytes = [0]

    def prog(filenum, nbytes):
        total_bytes[0] += nbytes
        if filenum > 0:
            qprogress('Splitting: file #%d, %d kbytes\r' %
                      (filenum + 1, total_bytes[0] // 1024))
        else:
            qprogress('Splitting: %d kbytes\r' % (total_bytes[0] // 1024))

    is_reverse = environ.get(b'BUP_SERVER_REVERSE')
    if is_reverse and opt.remote:
        o.fatal("don't use -r in reverse mode; it's automatic")
    start_time = time.time()

    if opt.name and not valid_save_name(opt.name):
        o.fatal("'%r' is not a valid branch name." % opt.name)
    refname = opt.name and b'refs/heads/%s' % opt.name or None

    if opt.noop or opt.copy:
        cli = pack_writer = oldref = None
    elif opt.remote or is_reverse:
        git.check_repo_or_die()
        cli = client.Client(opt.remote)
        oldref = refname and cli.read_ref(refname) or None
        pack_writer = cli.new_packwriter(compression_level=opt.compress,
                                         max_pack_size=max_pack_size,
                                         max_pack_objects=max_pack_objects)
    else:
        git.check_repo_or_die()
        cli = None
        oldref = refname and git.read_ref(refname) or None
        pack_writer = git.PackWriter(compression_level=opt.compress,
                                     max_pack_size=max_pack_size,
                                     max_pack_objects=max_pack_objects)

    input = byte_stream(sys.stdin)

    if opt.git_ids:
        # the input is actually a series of git object ids that we should retrieve
        # and split.
        #
        # This is a bit messy, but basically it converts from a series of
        # CatPipe.get() iterators into a series of file-type objects.
        # It would be less ugly if either CatPipe.get() returned a file-like object
        # (not very efficient), or split_to_shalist() expected an iterator instead
        # of a file.
        cp = git.CatPipe()

        class IterToFile:
            def __init__(self, it):
                self.it = iter(it)

            def read(self, size):
                v = next(self.it, None)
                return v or b''

        def read_ids():
            while 1:
                line = input.readline()
                if not line:
                    break
                if line:
                    line = line.strip()
                try:
                    it = cp.get(line.strip())
                    next(it, None)  # skip the file info
                except KeyError as e:
                    add_error('error: %s' % e)
                    continue
                yield IterToFile(it)

        files = read_ids()
    else:
        # the input either comes from a series of files or from stdin.
        files = extra and (open(argv_bytes(fn), 'rb')
                           for fn in extra) or [input]

    if pack_writer:
        new_blob = pack_writer.new_blob
        new_tree = pack_writer.new_tree
    elif opt.blobs or opt.tree:
        # --noop mode
        new_blob = lambda content: git.calc_hash(b'blob', content)
        new_tree = lambda shalist: git.calc_hash(b'tree',
                                                 git.tree_encode(shalist))

    sys.stdout.flush()
    out = byte_stream(sys.stdout)

    if opt.blobs:
        shalist = hashsplit.split_to_blobs(new_blob,
                                           files,
                                           keep_boundaries=opt.keep_boundaries,
                                           progress=prog)
        for (sha, size, level) in shalist:
            out.write(hexlify(sha) + b'\n')
            reprogress()
    elif opt.tree or opt.commit or opt.name:
        if opt.name:  # insert dummy_name which may be used as a restore target
            mode, sha = \
                hashsplit.split_to_blob_or_tree(new_blob, new_tree, files,
                                                keep_boundaries=opt.keep_boundaries,
                                                progress=prog)
            splitfile_name = git.mangle_name(b'data', hashsplit.GIT_MODE_FILE,
                                             mode)
            shalist = [(mode, splitfile_name, sha)]
        else:
            shalist = hashsplit.split_to_shalist(
                new_blob,
                new_tree,
                files,
                keep_boundaries=opt.keep_boundaries,
                progress=prog)
        tree = new_tree(shalist)
    else:
        last = 0
        it = hashsplit.hashsplit_iter(files,
                                      keep_boundaries=opt.keep_boundaries,
                                      progress=prog)
        for (blob, level) in it:
            hashsplit.total_split += len(blob)
            if opt.copy:
                sys.stdout.write(str(blob))
            megs = hashsplit.total_split // 1024 // 1024
            if not opt.quiet and last != megs:
                last = megs

    if opt.verbose:
        log('\n')
    if opt.tree:
        out.write(hexlify(tree) + b'\n')
    if opt.commit or opt.name:
        msg = b'bup split\n\nGenerated by command:\n%r\n' % compat.get_argvb()
        ref = opt.name and (b'refs/heads/%s' % opt.name) or None
        userline = b'%s <%s@%s>' % (userfullname(), username(), hostname())
        commit = pack_writer.new_commit(tree, oldref, userline, date, None,
                                        userline, date, None, msg)
        if opt.commit:
            out.write(hexlify(commit) + b'\n')

    if pack_writer:
        pack_writer.close()  # must close before we can update the ref

    if opt.name:
        if cli:
            cli.update_ref(refname, commit, oldref)
        else:
            git.update_ref(refname, commit, oldref)

    if cli:
        cli.close()

    secs = time.time() - start_time
    size = hashsplit.total_split
    if opt.bench:
        log('bup: %.2f kbytes in %.2f secs = %.2f kbytes/sec\n' %
            (size / 1024, secs, size / 1024 / secs))

    if saved_errors:
        log('WARNING: %d errors encountered while saving.\n' %
            len(saved_errors))
        sys.exit(1)
예제 #6
0
                next(it, None)  # skip the file info
            except KeyError as e:
                add_error('error: %s' % e)
                continue
            yield IterToFile(it)
    files = read_ids()
else:
    # the input either comes from a series of files or from stdin.
    files = extra and (open(argv_bytes(fn), 'rb') for fn in extra) or [input]

if pack_writer:
    new_blob = pack_writer.new_blob
    new_tree = pack_writer.new_tree
elif opt.blobs or opt.tree:
    # --noop mode
    new_blob = lambda content: git.calc_hash(b'blob', content)
    new_tree = lambda shalist: git.calc_hash(b'tree', git.tree_encode(shalist))

sys.stdout.flush()
out = byte_stream(sys.stdout)

if opt.blobs:
    shalist = hashsplit.split_to_blobs(new_blob, files,
                                       keep_boundaries=opt.keep_boundaries,
                                       progress=prog)
    for (sha, size, level) in shalist:
        out.write(hexlify(sha) + b'\n')
        reprogress()
elif opt.tree or opt.commit or opt.name:
    if opt.name: # insert dummy_name which may be used as a restore target
        mode, sha = \
예제 #7
0
파일: split-cmd.py 프로젝트: bup/bup
                next(it, None)  # skip the file info
            except KeyError as e:
                add_error('error: %s' % e)
                continue
            yield IterToFile(it)
    files = read_ids()
else:
    # the input either comes from a series of files or from stdin.
    files = extra and (open(fn) for fn in extra) or [sys.stdin]

if pack_writer:
    new_blob = pack_writer.new_blob
    new_tree = pack_writer.new_tree
elif opt.blobs or opt.tree:
    # --noop mode
    new_blob = lambda content: git.calc_hash('blob', content)
    new_tree = lambda shalist: git.calc_hash('tree', git.tree_encode(shalist))

if opt.blobs:
    shalist = hashsplit.split_to_blobs(new_blob, files,
                                       keep_boundaries=opt.keep_boundaries,
                                       progress=prog)
    for (sha, size, level) in shalist:
        print(sha.encode('hex'))
        reprogress()
elif opt.tree or opt.commit or opt.name:
    if opt.name: # insert dummy_name which may be used as a restore target
        mode, sha = \
            hashsplit.split_to_blob_or_tree(new_blob, new_tree, files,
                                            keep_boundaries=opt.keep_boundaries,
                                            progress=prog)
예제 #8
0
def main(argv):
    o = options.Options(optspec)
    opt, flags, extra = o.parse_bytes(argv[1:])
    if opt.name: opt.name = argv_bytes(opt.name)
    if opt.verbose is None: opt.verbose = 0

    if not (opt.blobs or opt.tree or opt.commit or opt.name or opt.noop
            or opt.copy):
        o.fatal("use one or more of -b, -t, -c, -n, --noop, --copy")
    if opt.copy and (opt.blobs or opt.tree):
        o.fatal('--copy is incompatible with -b, -t')
    if (opt.noop or opt.copy) and (opt.commit or opt.name):
        o.fatal('--noop and --copy are incompatible with -c, -n')
    if opt.blobs and (opt.tree or opt.commit or opt.name):
        o.fatal('-b is incompatible with -t, -c, -n')
    if extra and opt.git_ids:
        o.fatal("don't provide filenames when using --git-ids")

    if opt.verbose >= 2:
        git.verbose = opt.verbose - 1
        opt.bench = 1

    fanout = None
    if opt.fanout:
        # This used to be in hashsplit, but that's just confusing;
        # hashsplit now defaults to the real default (16) if 0 (or
        # None) is passed, but keep the command-line compatible...
        fanout = parse_num(opt.fanout) or 128
    blobbits = None
    if opt.blobbits:
        blobbits = parse_num(opt.blobbits)
    if opt.bwlimit:
        client.bwlimit = parse_num(opt.bwlimit)
    if opt.date:
        date = parse_date_or_fatal(opt.date, o.fatal)
    else:
        date = time.time()

    # Hack around lack of nonlocal vars in python 2
    total_bytes = [0]

    def prog(filenum, nbytes):
        total_bytes[0] += nbytes
        if filenum > 0:
            qprogress('Splitting: file #%d, %d kbytes\r' %
                      (filenum + 1, total_bytes[0] // 1024))
        else:
            qprogress('Splitting: %d kbytes\r' % (total_bytes[0] // 1024))

    start_time = time.time()

    if opt.name and not valid_save_name(opt.name):
        o.fatal("'%r' is not a valid branch name." % opt.name)
    refname = opt.name and b'refs/heads/%s' % opt.name or None

    if opt.noop or opt.copy:
        repo = oldref = None
    else:
        repo = from_opts(opt)
        oldref = refname and repo.read_ref(refname) or None
        repobits = repo.config(b'bup.blobbits',
                               opttype='int') or hashsplit.BUP_BLOBBITS
        if not blobbits:
            blobbits = repobits
        else:
            print("overriding repo blobbits %d from cmdline with %d" %
                  (repobits, blobbits))

    input = byte_stream(sys.stdin)

    if opt.git_ids:
        # the input is actually a series of git object ids that we should retrieve
        # and split.
        #
        # This is a bit messy, but basically it converts from a series of
        # repo.cat() iterators into a series of file-type objects.
        # It would be less ugly if either repo.cat() returned a file-like object
        # (not very efficient), or split_to_shalist() expected an iterator instead
        # of a file.
        class IterToFile:
            def __init__(self, it):
                self.it = iter(it)

            def read(self, size):
                v = next(self.it, None)
                return v or b''

        def read_ids():
            while 1:
                line = input.readline()
                if not line:
                    break
                if line:
                    line = line.strip()
                try:
                    it = repo.cat(line.strip())
                    next(it, None)  # skip the file info
                except KeyError as e:
                    add_error('error: %s' % e)
                    continue
                yield IterToFile(it)

        files = read_ids()
    else:
        # the input either comes from a series of files or from stdin.
        files = extra and (open(argv_bytes(fn), 'rb')
                           for fn in extra) or [input]

    if repo:
        write_data = repo.write_data
        write_tree = repo.write_tree
    elif opt.blobs or opt.tree:
        # --noop mode
        write_data = lambda content: git.calc_hash(b'blob', content)
        write_tree = lambda shalist: git.calc_hash(b'tree',
                                                   git.tree_encode(shalist))

    sys.stdout.flush()
    out = byte_stream(sys.stdout)

    if opt.blobs:
        shalist = hashsplit.split_to_blobs(write_data,
                                           files,
                                           keep_boundaries=opt.keep_boundaries,
                                           progress=prog,
                                           blobbits=blobbits)
        for (sha, size, level) in shalist:
            out.write(hexlify(sha) + b'\n')
            reprogress()
    elif opt.tree or opt.commit or opt.name:
        if opt.name:  # insert dummy_name which may be used as a restore target
            mode, sha = \
                hashsplit.split_to_blob_or_tree(write_data, write_tree, files,
                                                keep_boundaries=opt.keep_boundaries,
                                                progress=prog, fanout=fanout,
                                                blobbits=blobbits)
            splitfile_name = git.mangle_name(b'data', hashsplit.GIT_MODE_FILE,
                                             mode)
            shalist = [(mode, splitfile_name, sha)]
        else:
            shalist = hashsplit.split_to_shalist(
                write_data,
                write_tree,
                files,
                keep_boundaries=opt.keep_boundaries,
                progress=prog,
                fanout=fanout,
                blobbits=blobbits)
        tree = write_tree(shalist)
    else:
        last = 0
        it = hashsplit.hashsplit_iter(files,
                                      keep_boundaries=opt.keep_boundaries,
                                      progress=prog,
                                      fanout=fanout,
                                      blobbits=blobbits)
        for (blob, level) in it:
            hashsplit.total_split += len(blob)
            if opt.copy:
                sys.stdout.write(str(blob))
            megs = hashsplit.total_split // 1024 // 1024
            if not opt.quiet and last != megs:
                last = megs

    if opt.verbose:
        log('\n')
    if opt.tree:
        out.write(hexlify(tree) + b'\n')
    if opt.commit or opt.name:
        msg = b'bup split\n\nGenerated by command:\n%r\n' % compat.get_argvb()
        ref = opt.name and (b'refs/heads/%s' % opt.name) or None
        userline = b'%s <%s@%s>' % (userfullname(), username(), hostname())
        commit = repo.write_commit(tree, oldref, userline, date, None,
                                   userline, date, None, msg)
        if opt.commit:
            out.write(hexlify(commit) + b'\n')

    if opt.name and repo:
        repo.update_ref(refname, commit, oldref)

    if repo:
        repo.close()

    secs = time.time() - start_time
    size = hashsplit.total_split
    if opt.bench:
        log('bup: %.2f kbytes in %.2f secs = %.2f kbytes/sec\n' %
            (size / 1024, secs, size / 1024 / secs))

    if saved_errors:
        log('WARNING: %d errors encountered while saving.\n' %
            len(saved_errors))
        sys.exit(1)
예제 #9
0
파일: split-cmd.py 프로젝트: presto8/bup
            except KeyError as e:
                add_error('error: %s' % e)
                continue
            yield IterToFile(it)

    files = read_ids()
else:
    # the input either comes from a series of files or from stdin.
    files = extra and (open(argv_bytes(fn), 'rb') for fn in extra) or [input]

if repo:
    write_data = repo.write_data
    write_tree = repo.write_tree
elif opt.blobs or opt.tree:
    # --noop mode
    write_data = lambda content: git.calc_hash(b'blob', content)
    write_tree = lambda shalist: git.calc_hash(b'tree', git.tree_encode(shalist
                                                                        ))

sys.stdout.flush()
out = byte_stream(sys.stdout)

if opt.blobs:
    shalist = hashsplit.split_to_blobs(write_data,
                                       files,
                                       keep_boundaries=opt.keep_boundaries,
                                       progress=prog,
                                       blobbits=blobbits)
    for (sha, size, level) in shalist:
        out.write(hexlify(sha) + b'\n')
        reprogress()
예제 #10
0
파일: server-cmd.py 프로젝트: lkosewsk/bup
def receive_objects(conn, junk):
    global suspended_w
    git.check_repo_or_die()
    suggested = {}
    if suspended_w:
        w = suspended_w
        suspended_w = None
    else:
        w = git.PackWriter()
    while 1:
        ns = conn.read(4)
        if not ns:
            w.abort()
            raise Exception('object read: expected length header, got EOF\n')
        n = struct.unpack('!I', ns)[0]
        #log('expecting %d bytes\n' % n)
        if not n:
            log('bup server: received %d object%s.\n' 
                % (w.count, w.count!=1 and "s" or ''))
            fullpath = w.close()
            if fullpath:
                (dir, name) = os.path.split(fullpath)
                conn.write('%s.idx\n' % name)
            conn.ok()
            return
        elif n == 0xffffffff:
            log('bup server: receive-objects suspended.\n')
            suspended_w = w
            conn.ok()
            return
            
        buf = conn.read(n)  # object sizes in bup are reasonably small
        #log('read %d bytes\n' % n)
        if len(buf) < n:
            w.abort()
            raise Exception('object read: expected %d bytes, got %d\n'
                            % (n, len(buf)))
        (type, content) = git._decode_packobj(buf)
        sha = git.calc_hash(type, content)
        oldpack = w.exists(sha)
        # FIXME: we only suggest a single index per cycle, because the client
        # is currently dumb to download more than one per cycle anyway.
        # Actually we should fix the client, but this is a minor optimization
        # on the server side.
        if not suggested and \
          oldpack and (oldpack == True or oldpack.endswith('.midx')):
            # FIXME: we shouldn't really have to know about midx files
            # at this layer.  But exists() on a midx doesn't return the
            # packname (since it doesn't know)... probably we should just
            # fix that deficiency of midx files eventually, although it'll
            # make the files bigger.  This method is certainly not very
            # efficient.
            w.objcache.refresh(skip_midx = True)
            oldpack = w.objcache.exists(sha)
            log('new suggestion: %r\n' % oldpack)
            assert(oldpack)
            assert(oldpack != True)
            assert(not oldpack.endswith('.midx'))
            w.objcache.refresh(skip_midx = False)
        if not suggested and oldpack:
            assert(oldpack.endswith('.idx'))
            (dir,name) = os.path.split(oldpack)
            if not (name in suggested):
                log("bup server: suggesting index %s\n" % name)
                conn.write('index %s\n' % name)
                suggested[name] = 1
        else:
            w._raw_write([buf])