Example #1
0
def save_tree(output_file, paths, recurse=False, write_paths=True, save_symlinks=True, xdev=False):

    # Issue top-level rewrite warnings.
    for path in paths:
        safe_path = _clean_up_path_for_archive(path)
        if safe_path != path:
            log('archiving "%s" as "%s"\n' % (path, safe_path))

    if not recurse:
        for p in paths:
            safe_path = _clean_up_path_for_archive(p)
            st = xstat.lstat(p)
            if stat.S_ISDIR(st.st_mode):
                safe_path += "/"
            m = from_path(p, statinfo=st, archive_path=safe_path, save_symlinks=save_symlinks)
            if verbose:
                print >> sys.stderr, m.path
            m.write(output_file, include_path=write_paths)
    else:
        start_dir = os.getcwd()
        try:
            for (p, st) in recursive_dirlist(paths, xdev=xdev):
                dirlist_dir = os.getcwd()
                os.chdir(start_dir)
                safe_path = _clean_up_path_for_archive(p)
                m = from_path(p, statinfo=st, archive_path=safe_path, save_symlinks=save_symlinks)
                if verbose:
                    print >> sys.stderr, m.path
                m.write(output_file, include_path=write_paths)
                os.chdir(dirlist_dir)
        finally:
            os.chdir(start_dir)
Example #2
0
def update_index(top):
    ri = index.Reader(indexfile)
    wi = index.Writer(indexfile)
    rig = IterHelper(ri.iter(name=top))
    tstart = int(time.time())

    hashgen = None
    if opt.fake_valid:
        def hashgen(name):
            return (0100644, index.FAKE_SHA)

    total = 0
    for (path,pst) in drecurse.recursive_dirlist([top], xdev=opt.xdev):
        if opt.verbose>=2 or (opt.verbose==1 and stat.S_ISDIR(pst.st_mode)):
            sys.stdout.write('%s\n' % path)
            sys.stdout.flush()
            progress('Indexing: %d\r' % total)
        elif not (total % 128):
            progress('Indexing: %d\r' % total)
        total += 1
        while rig.cur and rig.cur.name > path:  # deleted paths
            if rig.cur.exists():
                rig.cur.set_deleted()
                rig.cur.repack()
            rig.next()
        if rig.cur and rig.cur.name == path:    # paths that already existed
            if pst:
                rig.cur.from_stat(pst, tstart)
            if not (rig.cur.flags & index.IX_HASHVALID):
                if hashgen:
                    (rig.cur.gitmode, rig.cur.sha) = hashgen(path)
                    rig.cur.flags |= index.IX_HASHVALID
            if opt.fake_invalid:
                rig.cur.invalidate()
            rig.cur.repack()
            rig.next()
        else:  # new paths
            wi.add(path, pst, hashgen = hashgen)
    progress('Indexing: %d, done.\n' % total)
    
    if ri.exists():
        ri.save()
        wi.flush()
        if wi.count:
            wr = wi.new_reader()
            if opt.check:
                log('check: before merging: oldfile\n')
                check_index(ri)
                log('check: before merging: newfile\n')
                check_index(wr)
            mi = index.Writer(indexfile)
            merge_indexes(mi, ri, wr)
            ri.close()
            mi.close()
            wr.close()
        wi.abort()
    else:
        wi.close()
Example #3
0
def save_tree(output_file,
              paths,
              recurse=False,
              write_paths=True,
              save_symlinks=True,
              xdev=False):

    # Issue top-level rewrite warnings.
    for path in paths:
        safe_path = _clean_up_path_for_archive(path)
        if safe_path != path:
            log('archiving "%s" as "%s"\n' % (path, safe_path))

    if not recurse:
        for p in paths:
            safe_path = _clean_up_path_for_archive(p)
            st = xstat.lstat(p)
            if stat.S_ISDIR(st.st_mode):
                safe_path += '/'
            m = from_path(p,
                          statinfo=st,
                          archive_path=safe_path,
                          save_symlinks=save_symlinks)
            if verbose:
                print >> sys.stderr, m.path
            m.write(output_file, include_path=write_paths)
    else:
        start_dir = os.getcwd()
        try:
            for (p, st) in recursive_dirlist(paths, xdev=xdev):
                dirlist_dir = os.getcwd()
                os.chdir(start_dir)
                safe_path = _clean_up_path_for_archive(p)
                m = from_path(p,
                              statinfo=st,
                              archive_path=safe_path,
                              save_symlinks=save_symlinks)
                if verbose:
                    print >> sys.stderr, m.path
                m.write(output_file, include_path=write_paths)
                os.chdir(dirlist_dir)
        finally:
            os.chdir(start_dir)
Example #4
0
def main(argv):
    o = options.Options(optspec)
    opt, flags, extra = o.parse_bytes(argv[1:])

    if len(extra) != 1:
        o.fatal("exactly one filename expected")

    drecurse_top = argv_bytes(extra[0])
    excluded_paths = parse_excludes(flags, o.fatal)
    if not drecurse_top.startswith(b'/'):
        excluded_paths = [relpath(x) for x in excluded_paths]
    exclude_rxs = parse_rx_excludes(flags, o.fatal)
    it = drecurse.recursive_dirlist([drecurse_top],
                                    opt.xdev,
                                    excluded_paths=excluded_paths,
                                    exclude_rxs=exclude_rxs)
    if opt.profile:
        import cProfile

        def do_it():
            for i in it:
                pass

        cProfile.run('do_it()')
    else:
        if opt.quiet:
            for i in it:
                pass
        else:
            sys.stdout.flush()
            out = byte_stream(sys.stdout)
            for (name, st) in it:
                out.write(name + b'\n')

    if saved_errors:
        log('WARNING: %d errors encountered.\n' % len(saved_errors))
        sys.exit(1)
Example #5
0
def update_index(top, excluded_paths, exclude_rxs, xdev_exceptions):
    # tmax and start must be epoch nanoseconds.
    tmax = (time.time() - 1) * 10**9
    ri = index.Reader(indexfile)
    msw = index.MetaStoreWriter(indexfile + '.meta')
    wi = index.Writer(indexfile, msw, tmax)
    rig = IterHelper(ri.iter(name=top))
    tstart = int(time.time()) * 10**9

    hlinks = hlinkdb.HLinkDB(indexfile + '.hlink')

    fake_hash = None
    if opt.fake_valid:

        def fake_hash(name):
            return (GIT_MODE_FILE, index.FAKE_SHA)

    total = 0
    bup_dir = os.path.abspath(git.repo())
    index_start = time.time()
    for path, pst in recursive_dirlist([top],
                                       xdev=opt.xdev,
                                       bup_dir=bup_dir,
                                       excluded_paths=excluded_paths,
                                       exclude_rxs=exclude_rxs,
                                       xdev_exceptions=xdev_exceptions):
        if opt.verbose >= 2 or (opt.verbose == 1
                                and stat.S_ISDIR(pst.st_mode)):
            sys.stdout.write('%s\n' % path)
            sys.stdout.flush()
            elapsed = time.time() - index_start
            paths_per_sec = total / elapsed if elapsed else 0
            qprogress('Indexing: %d (%d paths/s)\r' % (total, paths_per_sec))
        elif not (total % 128):
            elapsed = time.time() - index_start
            paths_per_sec = total / elapsed if elapsed else 0
            qprogress('Indexing: %d (%d paths/s)\r' % (total, paths_per_sec))
        total += 1

        while rig.cur and rig.cur.name > path:  # deleted paths
            if rig.cur.exists():
                rig.cur.set_deleted()
                rig.cur.repack()
                if rig.cur.nlink > 1 and not stat.S_ISDIR(rig.cur.mode):
                    hlinks.del_path(rig.cur.name)
            rig.next()

        if rig.cur and rig.cur.name == path:  # paths that already existed
            need_repack = False
            if (rig.cur.stale(pst, tstart, check_device=opt.check_device)):
                try:
                    meta = metadata.from_path(path, statinfo=pst)
                except (OSError, IOError) as e:
                    add_error(e)
                    rig.next()
                    continue
                if not stat.S_ISDIR(rig.cur.mode) and rig.cur.nlink > 1:
                    hlinks.del_path(rig.cur.name)
                if not stat.S_ISDIR(pst.st_mode) and pst.st_nlink > 1:
                    hlinks.add_path(path, pst.st_dev, pst.st_ino)
                # Clear these so they don't bloat the store -- they're
                # already in the index (since they vary a lot and they're
                # fixed length).  If you've noticed "tmax", you might
                # wonder why it's OK to do this, since that code may
                # adjust (mangle) the index mtime and ctime -- producing
                # fake values which must not end up in a .bupm.  However,
                # it looks like that shouldn't be possible:  (1) When
                # "save" validates the index entry, it always reads the
                # metadata from the filesytem. (2) Metadata is only
                # read/used from the index if hashvalid is true. (3)
                # "faked" entries will be stale(), and so we'll invalidate
                # them below.
                meta.ctime = meta.mtime = meta.atime = 0
                meta_ofs = msw.store(meta)
                rig.cur.update_from_stat(pst, meta_ofs)
                rig.cur.invalidate()
                need_repack = True
            if not (rig.cur.flags & index.IX_HASHVALID):
                if fake_hash:
                    rig.cur.gitmode, rig.cur.sha = fake_hash(path)
                    rig.cur.flags |= index.IX_HASHVALID
                    need_repack = True
            if opt.fake_invalid:
                rig.cur.invalidate()
                need_repack = True
            if need_repack:
                rig.cur.repack()
            rig.next()
        else:  # new paths
            try:
                meta = metadata.from_path(path, statinfo=pst)
            except (OSError, IOError) as e:
                add_error(e)
                continue
            # See same assignment to 0, above, for rationale.
            meta.atime = meta.mtime = meta.ctime = 0
            meta_ofs = msw.store(meta)
            wi.add(path, pst, meta_ofs, hashgen=fake_hash)
            if not stat.S_ISDIR(pst.st_mode) and pst.st_nlink > 1:
                hlinks.add_path(path, pst.st_dev, pst.st_ino)

    elapsed = time.time() - index_start
    paths_per_sec = total / elapsed if elapsed else 0
    progress('Indexing: %d, done (%d paths/s).\n' % (total, paths_per_sec))

    hlinks.prepare_save()

    if ri.exists():
        ri.save()
        wi.flush()
        if wi.count:
            wr = wi.new_reader()
            if opt.check:
                log('check: before merging: oldfile\n')
                check_index(ri)
                log('check: before merging: newfile\n')
                check_index(wr)
            mi = index.Writer(indexfile, msw, tmax)

            for e in index.merge(ri, wr):
                # FIXME: shouldn't we remove deleted entries eventually?  When?
                mi.add_ixentry(e)

            ri.close()
            mi.close()
            wr.close()
        wi.abort()
    else:
        wi.close()

    msw.close()
    hlinks.commit_save()
Example #6
0
def update_index(top, excluded_paths, exclude_rxs):
    # tmax and start must be epoch nanoseconds.
    tmax = (time.time() - 1) * 10**9
    ri = index.Reader(indexfile)
    msw = index.MetaStoreWriter(indexfile + '.meta')
    wi = index.Writer(indexfile, msw, tmax)
    rig = IterHelper(ri.iter(name=top))
    tstart = int(time.time()) * 10**9

    hlinks = hlinkdb.HLinkDB(indexfile + '.hlink')

    hashgen = None
    if opt.fake_valid:
        def hashgen(name):
            return (GIT_MODE_FILE, index.FAKE_SHA)

    total = 0
    bup_dir = os.path.abspath(git.repo())
    for (path,pst) in drecurse.recursive_dirlist([top], xdev=opt.xdev,
                                                 bup_dir=bup_dir,
                                                 excluded_paths=excluded_paths,
                                                 exclude_rxs=exclude_rxs):
        if opt.verbose>=2 or (opt.verbose==1 and stat.S_ISDIR(pst.st_mode)):
            sys.stdout.write('%s\n' % path)
            sys.stdout.flush()
            qprogress('Indexing: %d\r' % total)
        elif not (total % 128):
            qprogress('Indexing: %d\r' % total)
        total += 1
        while rig.cur and rig.cur.name > path:  # deleted paths
            if rig.cur.exists():
                rig.cur.set_deleted()
                rig.cur.repack()
                if rig.cur.nlink > 1 and not stat.S_ISDIR(rig.cur.mode):
                    hlinks.del_path(rig.cur.name)
            rig.next()
        if rig.cur and rig.cur.name == path:    # paths that already existed
            try:
                meta = metadata.from_path(path, statinfo=pst)
            except (OSError, IOError), e:
                add_error(e)
                rig.next()
                continue
            if not stat.S_ISDIR(rig.cur.mode) and rig.cur.nlink > 1:
                hlinks.del_path(rig.cur.name)
            if not stat.S_ISDIR(pst.st_mode) and pst.st_nlink > 1:
                hlinks.add_path(path, pst.st_dev, pst.st_ino)
            # Clear these so they don't bloat the store -- they're
            # already in the index (since they vary a lot and they're
            # fixed length).  If you've noticed "tmax", you might
            # wonder why it's OK to do this, since that code may
            # adjust (mangle) the index mtime and ctime -- producing
            # fake values which must not end up in a .bupm.  However,
            # it looks like that shouldn't be possible:  (1) When
            # "save" validates the index entry, it always reads the
            # metadata from the filesytem. (2) Metadata is only
            # read/used from the index if hashvalid is true. (3) index
            # always invalidates "faked" entries, because "old != new"
            # in from_stat().
            meta.ctime = meta.mtime = meta.atime = 0
            meta_ofs = msw.store(meta)
            rig.cur.from_stat(pst, meta_ofs, tstart,
                              check_device=opt.check_device)
            if not (rig.cur.flags & index.IX_HASHVALID):
                if hashgen:
                    (rig.cur.gitmode, rig.cur.sha) = hashgen(path)
                    rig.cur.flags |= index.IX_HASHVALID
            if opt.fake_invalid:
                rig.cur.invalidate()
            rig.cur.repack()
            rig.next()
        else:  # new paths
            try:
                meta = metadata.from_path(path, statinfo=pst)
            except (OSError, IOError), e:
                add_error(e)
                continue
            # See same assignment to 0, above, for rationale.
            meta.atime = meta.mtime = meta.ctime = 0
            meta_ofs = msw.store(meta)
            wi.add(path, pst, meta_ofs, hashgen = hashgen)
            if not stat.S_ISDIR(pst.st_mode) and pst.st_nlink > 1:
                hlinks.add_path(path, pst.st_dev, pst.st_ino)
Example #7
0
--
x,xdev,one-file-system   don't cross filesystem boundaries
exclude= a path to exclude from the backup (can be used more than once)
exclude-from= a file that contains exclude paths (can be used more than once)
q,quiet  don't actually print filenames
profile  run under the python profiler
"""
o = options.Options(optspec)
(opt, flags, extra) = o.parse(sys.argv[1:])

if len(extra) != 1:
    o.fatal("exactly one filename expected")

excluded_paths = parse_excludes(flags, o.fatal)

it = drecurse.recursive_dirlist(extra, opt.xdev, excluded_paths=excluded_paths)
if opt.profile:
    import cProfile
    def do_it():
        for i in it:
            pass
    cProfile.run('do_it()')
else:
    if opt.quiet:
        for i in it:
            pass
    else:
        for (name,st) in it:
            print name

if saved_errors:
Example #8
0
from bup.helpers import *

optspec = """
bup drecurse <path>
--
x,xdev,one-file-system   don't cross filesystem boundaries
q,quiet  don't actually print filenames
profile  run under the python profiler
"""
o = options.Options("bup drecurse", optspec)
(opt, flags, extra) = o.parse(sys.argv[1:])

if len(extra) != 1:
    o.fatal("exactly one filename expected")

it = drecurse.recursive_dirlist(extra, opt.xdev)
if opt.profile:
    import cProfile

    def do_it():
        for i in it:
            pass

    cProfile.run("do_it()")
else:
    if opt.quiet:
        for i in it:
            pass
    else:
        for (name, st) in it:
            print name
Example #9
0
--
x,xdev,one-file-system   don't cross filesystem boundaries
exclude= a path to exclude from the backup (can be used more than once)
exclude-from= a file that contains exclude paths (can be used more than once)
q,quiet  don't actually print filenames
profile  run under the python profiler
"""
o = options.Options(optspec)
(opt, flags, extra) = o.parse(sys.argv[1:])

if len(extra) != 1:
    o.fatal("exactly one filename expected")

excluded_paths = drecurse.parse_excludes(flags)

it = drecurse.recursive_dirlist(extra, opt.xdev, excluded_paths)
if opt.profile:
    import cProfile
    def do_it():
        for i in it:
            pass
    cProfile.run('do_it()')
else:
    if opt.quiet:
        for i in it:
            pass
    else:
        for (name,st) in it:
            print name

if saved_errors:
Example #10
0
q,quiet  don't actually print filenames
profile  run under the python profiler
"""
o = options.Options(optspec)
(opt, flags, extra) = o.parse(sys.argv[1:])

if len(extra) != 1:
    o.fatal("exactly one filename expected")

drecurse_top = extra[0]
excluded_paths = parse_excludes(flags, o.fatal)
if not drecurse_top.startswith('/'):
    excluded_paths = [relpath(x) for x in excluded_paths]
exclude_rxs = parse_rx_excludes(flags, o.fatal)
it = drecurse.recursive_dirlist([drecurse_top],
                                opt.xdev,
                                excluded_paths=excluded_paths,
                                exclude_rxs=exclude_rxs)
if opt.profile:
    import cProfile

    def do_it():
        for i in it:
            pass

    cProfile.run('do_it()')
else:
    if opt.quiet:
        for i in it:
            pass
    else:
        for (name, st) in it:
Example #11
0
def update_index(top, excluded_paths):
    tmax = time.time() - 1
    ri = index.Reader(indexfile)
    wi = index.Writer(indexfile, tmax)
    rig = IterHelper(ri.iter(name=top))
    tstart = int(time.time())

    hashgen = None
    if opt.fake_valid:

        def hashgen(name):
            return (GIT_MODE_FILE, index.FAKE_SHA)

    total = 0
    bup_dir = os.path.abspath(git.repo())
    for (path,
         pst) in drecurse.recursive_dirlist([top],
                                            xdev=opt.xdev,
                                            bup_dir=bup_dir,
                                            excluded_paths=excluded_paths):
        if opt.verbose >= 2 or (opt.verbose == 1
                                and stat.S_ISDIR(pst.st_mode)):
            sys.stdout.write('%s\n' % path)
            sys.stdout.flush()
            qprogress('Indexing: %d\r' % total)
        elif not (total % 128):
            qprogress('Indexing: %d\r' % total)
        total += 1
        while rig.cur and rig.cur.name > path:  # deleted paths
            if rig.cur.exists():
                rig.cur.set_deleted()
                rig.cur.repack()
            rig.next()
        if rig.cur and rig.cur.name == path:  # paths that already existed
            if pst:
                rig.cur.from_stat(pst, tstart)
            if not (rig.cur.flags & index.IX_HASHVALID):
                if hashgen:
                    (rig.cur.gitmode, rig.cur.sha) = hashgen(path)
                    rig.cur.flags |= index.IX_HASHVALID
            if opt.fake_invalid:
                rig.cur.invalidate()
            rig.cur.repack()
            rig.next()
        else:  # new paths
            wi.add(path, pst, hashgen=hashgen)
    progress('Indexing: %d, done.\n' % total)

    if ri.exists():
        ri.save()
        wi.flush()
        if wi.count:
            wr = wi.new_reader()
            if opt.check:
                log('check: before merging: oldfile\n')
                check_index(ri)
                log('check: before merging: newfile\n')
                check_index(wr)
            mi = index.Writer(indexfile, tmax)

            for e in index.merge(ri, wr):
                # FIXME: shouldn't we remove deleted entries eventually?  When?
                mi.add_ixentry(e)

            ri.close()
            mi.close()
            wr.close()
        wi.abort()
    else:
        wi.close()
Example #12
0
def update_index(top, excluded_paths):
    tmax = time.time() - 1
    ri = index.Reader(indexfile)
    wi = index.Writer(indexfile, tmax)
    rig = IterHelper(ri.iter(name=top))
    tstart = int(time.time())

    hashgen = None
    if opt.fake_valid:
        def hashgen(name):
            return (GIT_MODE_FILE, index.FAKE_SHA)

    total = 0
    bup_dir = os.path.abspath(git.repo())
    for (path,pst) in drecurse.recursive_dirlist([top], xdev=opt.xdev,
                                                 bup_dir=bup_dir,
                                                 excluded_paths=excluded_paths):
        if opt.verbose>=2 or (opt.verbose==1 and stat.S_ISDIR(pst.st_mode)):
            sys.stdout.write('%s\n' % path)
            sys.stdout.flush()
            qprogress('Indexing: %d\r' % total)
        elif not (total % 128):
            qprogress('Indexing: %d\r' % total)
        total += 1
        while rig.cur and rig.cur.name > path:  # deleted paths
            if rig.cur.exists():
                rig.cur.set_deleted()
                rig.cur.repack()
            rig.next()
        if rig.cur and rig.cur.name == path:    # paths that already existed
            if pst:
                rig.cur.from_stat(pst, tstart)
            if not (rig.cur.flags & index.IX_HASHVALID):
                if hashgen:
                    (rig.cur.gitmode, rig.cur.sha) = hashgen(path)
                    rig.cur.flags |= index.IX_HASHVALID
            if opt.fake_invalid:
                rig.cur.invalidate()
            rig.cur.repack()
            rig.next()
        else:  # new paths
            wi.add(path, pst, hashgen = hashgen)
    progress('Indexing: %d, done.\n' % total)
    
    if ri.exists():
        ri.save()
        wi.flush()
        if wi.count:
            wr = wi.new_reader()
            if opt.check:
                log('check: before merging: oldfile\n')
                check_index(ri)
                log('check: before merging: newfile\n')
                check_index(wr)
            mi = index.Writer(indexfile, tmax)

            for e in index.merge(ri, wr):
                # FIXME: shouldn't we remove deleted entries eventually?  When?
                mi.add_ixentry(e)

            ri.close()
            mi.close()
            wr.close()
        wi.abort()
    else:
        wi.close()
Example #13
0
def update_index(top, excluded_paths, exclude_rxs, xdev_exceptions):
    # tmax and start must be epoch nanoseconds.
    tmax = (time.time() - 1) * 10**9
    ri = index.Reader(indexfile)
    msw = index.MetaStoreWriter(indexfile + '.meta')
    wi = index.Writer(indexfile, msw, tmax)
    rig = IterHelper(ri.iter(name=top))
    tstart = int(time.time()) * 10**9

    hlinks = hlinkdb.HLinkDB(indexfile + '.hlink')

    fake_hash = None
    if opt.fake_valid:
        def fake_hash(name):
            return (GIT_MODE_FILE, index.FAKE_SHA)

    total = 0
    bup_dir = os.path.abspath(git.repo())
    index_start = time.time()
    for path, pst in recursive_dirlist([top],
                                       xdev=opt.xdev,
                                       bup_dir=bup_dir,
                                       excluded_paths=excluded_paths,
                                       exclude_rxs=exclude_rxs,
                                       xdev_exceptions=xdev_exceptions):
        if opt.verbose>=2 or (opt.verbose==1 and stat.S_ISDIR(pst.st_mode)):
            sys.stdout.write('%s\n' % path)
            sys.stdout.flush()
            elapsed = time.time() - index_start
            paths_per_sec = total / elapsed if elapsed else 0
            qprogress('Indexing: %d (%d paths/s)\r' % (total, paths_per_sec))
        elif not (total % 128):
            elapsed = time.time() - index_start
            paths_per_sec = total / elapsed if elapsed else 0
            qprogress('Indexing: %d (%d paths/s)\r' % (total, paths_per_sec))
        total += 1

        while rig.cur and rig.cur.name > path:  # deleted paths
            if rig.cur.exists():
                rig.cur.set_deleted()
                rig.cur.repack()
                if rig.cur.nlink > 1 and not stat.S_ISDIR(rig.cur.mode):
                    hlinks.del_path(rig.cur.name)
            rig.next()

        if rig.cur and rig.cur.name == path:    # paths that already existed
            need_repack = False
            if(rig.cur.stale(pst, tstart, check_device=opt.check_device)):
                try:
                    meta = metadata.from_path(path, statinfo=pst)
                except (OSError, IOError) as e:
                    add_error(e)
                    rig.next()
                    continue
                if not stat.S_ISDIR(rig.cur.mode) and rig.cur.nlink > 1:
                    hlinks.del_path(rig.cur.name)
                if not stat.S_ISDIR(pst.st_mode) and pst.st_nlink > 1:
                    hlinks.add_path(path, pst.st_dev, pst.st_ino)
                # Clear these so they don't bloat the store -- they're
                # already in the index (since they vary a lot and they're
                # fixed length).  If you've noticed "tmax", you might
                # wonder why it's OK to do this, since that code may
                # adjust (mangle) the index mtime and ctime -- producing
                # fake values which must not end up in a .bupm.  However,
                # it looks like that shouldn't be possible:  (1) When
                # "save" validates the index entry, it always reads the
                # metadata from the filesytem. (2) Metadata is only
                # read/used from the index if hashvalid is true. (3)
                # "faked" entries will be stale(), and so we'll invalidate
                # them below.
                meta.ctime = meta.mtime = meta.atime = 0
                meta_ofs = msw.store(meta)
                rig.cur.update_from_stat(pst, meta_ofs)
                rig.cur.invalidate()
                need_repack = True
            if not (rig.cur.flags & index.IX_HASHVALID):
                if fake_hash:
                    rig.cur.gitmode, rig.cur.sha = fake_hash(path)
                    rig.cur.flags |= index.IX_HASHVALID
                    need_repack = True
            if opt.fake_invalid:
                rig.cur.invalidate()
                need_repack = True
            if need_repack:
                rig.cur.repack()
            rig.next()
        else:  # new paths
            try:
                meta = metadata.from_path(path, statinfo=pst)
            except (OSError, IOError) as e:
                add_error(e)
                continue
            # See same assignment to 0, above, for rationale.
            meta.atime = meta.mtime = meta.ctime = 0
            meta_ofs = msw.store(meta)
            wi.add(path, pst, meta_ofs, hashgen=fake_hash)
            if not stat.S_ISDIR(pst.st_mode) and pst.st_nlink > 1:
                hlinks.add_path(path, pst.st_dev, pst.st_ino)

    elapsed = time.time() - index_start
    paths_per_sec = total / elapsed if elapsed else 0
    progress('Indexing: %d, done (%d paths/s).\n' % (total, paths_per_sec))

    hlinks.prepare_save()

    if ri.exists():
        ri.save()
        wi.flush()
        if wi.count:
            wr = wi.new_reader()
            if opt.check:
                log('check: before merging: oldfile\n')
                check_index(ri)
                log('check: before merging: newfile\n')
                check_index(wr)
            mi = index.Writer(indexfile, msw, tmax)

            for e in index.merge(ri, wr):
                # FIXME: shouldn't we remove deleted entries eventually?  When?
                mi.add_ixentry(e)

            ri.close()
            mi.close()
            wr.close()
        wi.abort()
    else:
        wi.close()

    msw.close()
    hlinks.commit_save()
Example #14
0
exclude-rx-from= skip --exclude-rx patterns in file (may be repeated)
q,quiet  don't actually print filenames
profile  run under the python profiler
"""
o = options.Options(optspec)
(opt, flags, extra) = o.parse(sys.argv[1:])

if len(extra) != 1:
    o.fatal("exactly one filename expected")

drecurse_top = extra[0]
excluded_paths = parse_excludes(flags, o.fatal)
if not drecurse_top.startswith("/"):
    excluded_paths = [relpath(x) for x in excluded_paths]
exclude_rxs = parse_rx_excludes(flags, o.fatal)
it = drecurse.recursive_dirlist([drecurse_top], opt.xdev, excluded_paths=excluded_paths, exclude_rxs=exclude_rxs)
if opt.profile:
    import cProfile

    def do_it():
        for i in it:
            pass

    cProfile.run("do_it()")
else:
    if opt.quiet:
        for i in it:
            pass
    else:
        for (name, st) in it:
            print name
Example #15
0
def update_index(top, excluded_paths, exclude_rxs):
    # tmax and start must be epoch nanoseconds.
    tmax = (time.time() - 1) * 10**9
    ri = index.Reader(indexfile)
    msw = index.MetaStoreWriter(indexfile + '.meta')
    wi = index.Writer(indexfile, msw, tmax)
    rig = IterHelper(ri.iter(name=top))
    tstart = int(time.time()) * 10**9

    hlinks = hlinkdb.HLinkDB(indexfile + '.hlink')

    hashgen = None
    if opt.fake_valid:

        def hashgen(name):
            return (GIT_MODE_FILE, index.FAKE_SHA)

    total = 0
    bup_dir = os.path.abspath(git.repo())
    index_start = time.time()
    for (path,
         pst) in drecurse.recursive_dirlist([top],
                                            xdev=opt.xdev,
                                            bup_dir=bup_dir,
                                            excluded_paths=excluded_paths,
                                            exclude_rxs=exclude_rxs):
        if opt.verbose >= 2 or (opt.verbose == 1
                                and stat.S_ISDIR(pst.st_mode)):
            sys.stdout.write('%s\n' % path)
            sys.stdout.flush()
            elapsed = time.time() - index_start
            paths_per_sec = total / elapsed if elapsed else 0
            qprogress('Indexing: %d (%d paths/s)\r' % (total, paths_per_sec))
        elif not (total % 128):
            elapsed = time.time() - index_start
            paths_per_sec = total / elapsed if elapsed else 0
            qprogress('Indexing: %d (%d paths/s)\r' % (total, paths_per_sec))
        total += 1
        while rig.cur and rig.cur.name > path:  # deleted paths
            if rig.cur.exists():
                rig.cur.set_deleted()
                rig.cur.repack()
                if rig.cur.nlink > 1 and not stat.S_ISDIR(rig.cur.mode):
                    hlinks.del_path(rig.cur.name)
            rig.next()
        if rig.cur and rig.cur.name == path:  # paths that already existed
            try:
                meta = metadata.from_path(path, statinfo=pst)
            except (OSError, IOError), e:
                add_error(e)
                rig.next()
                continue
            if not stat.S_ISDIR(rig.cur.mode) and rig.cur.nlink > 1:
                hlinks.del_path(rig.cur.name)
            if not stat.S_ISDIR(pst.st_mode) and pst.st_nlink > 1:
                hlinks.add_path(path, pst.st_dev, pst.st_ino)
            # Clear these so they don't bloat the store -- they're
            # already in the index (since they vary a lot and they're
            # fixed length).  If you've noticed "tmax", you might
            # wonder why it's OK to do this, since that code may
            # adjust (mangle) the index mtime and ctime -- producing
            # fake values which must not end up in a .bupm.  However,
            # it looks like that shouldn't be possible:  (1) When
            # "save" validates the index entry, it always reads the
            # metadata from the filesytem. (2) Metadata is only
            # read/used from the index if hashvalid is true. (3) index
            # always invalidates "faked" entries, because "old != new"
            # in from_stat().
            meta.ctime = meta.mtime = meta.atime = 0
            meta_ofs = msw.store(meta)
            rig.cur.from_stat(pst,
                              meta_ofs,
                              tstart,
                              check_device=opt.check_device)
            if not (rig.cur.flags & index.IX_HASHVALID):
                if hashgen:
                    (rig.cur.gitmode, rig.cur.sha) = hashgen(path)
                    rig.cur.flags |= index.IX_HASHVALID
            if opt.fake_invalid:
                rig.cur.invalidate()
            rig.cur.repack()
            rig.next()
        else:  # new paths
            try:
                meta = metadata.from_path(path, statinfo=pst)
            except (OSError, IOError), e:
                add_error(e)
                continue
            # See same assignment to 0, above, for rationale.
            meta.atime = meta.mtime = meta.ctime = 0
            meta_ofs = msw.store(meta)
            wi.add(path, pst, meta_ofs, hashgen=hashgen)
            if not stat.S_ISDIR(pst.st_mode) and pst.st_nlink > 1:
                hlinks.add_path(path, pst.st_dev, pst.st_ino)