def validate_tree(src_id, dest_id): rmrf(b'restore-src') rmrf(b'restore-dest') mkdir(b'restore-src') mkdir(b'restore-dest') commit_env = merge_dict(environ, {b'GIT_COMMITTER_DATE': b'2014-01-01 01:01'}) # Create a commit so the archive contents will have matching timestamps. src_c = exo((b'git', b'--git-dir', b'get-src', b'commit-tree', b'-m', b'foo', src_id), env=commit_env).out.strip() dest_c = exo((b'git', b'--git-dir', b'get-dest', b'commit-tree', b'-m', b'foo', dest_id), env=commit_env).out.strip() exr = verify_rcz(b'git --git-dir get-src archive %s | tar xvf - -C restore-src' % bquote(src_c), shell=True) if exr.rc != 0: return False exr = verify_rcz(b'git --git-dir get-dest archive %s | tar xvf - -C restore-dest' % bquote(dest_c), shell=True) if exr.rc != 0: return False # git archive doesn't include an entry for ./. unlink(b'restore-src/pax_global_header') unlink(b'restore-dest/pax_global_header') ex((b'touch', b'-r', b'restore-src', b'restore-dest')) verify_trees_match(b'restore-src/', b'restore-dest/') rmrf(b'restore-src') rmrf(b'restore-dest')
def do_midx_dir(path, outfilename): already = {} sizes = {} if opt.force and not opt.auto: midxs = [] # don't use existing midx files else: midxs = glob.glob('%s/*.midx' % path) contents = {} for mname in midxs: m = git.open_idx(mname) contents[mname] = [('%s/%s' % (path,i)) for i in m.idxnames] sizes[mname] = len(m) # sort the biggest+newest midxes first, so that we can eliminate # smaller (or older) redundant ones that come later in the list midxs.sort(key=lambda ix: (-sizes[ix], -xstat.stat(ix).st_mtime)) for mname in midxs: any = 0 for iname in contents[mname]: if not already.get(iname): already[iname] = 1 any = 1 if not any: debug1('%r is redundant\n' % mname) unlink(mname) already[mname] = 1 midxs = [k for k in midxs if not already.get(k)] idxs = [k for k in glob.glob('%s/*.idx' % path) if not already.get(k)] for iname in idxs: i = git.open_idx(iname) sizes[iname] = len(i) all = [(sizes[n],n) for n in (midxs + idxs)] # FIXME: what are the optimal values? Does this make sense? DESIRED_HWM = opt.force and 1 or 5 DESIRED_LWM = opt.force and 1 or 2 existed = dict((name,1) for sz,name in all) debug1('midx: %d indexes; want no more than %d.\n' % (len(all), DESIRED_HWM)) if len(all) <= DESIRED_HWM: debug1('midx: nothing to do.\n') while len(all) > DESIRED_HWM: all.sort() part1 = [name for sz,name in all[:len(all)-DESIRED_LWM+1]] part2 = all[len(all)-DESIRED_LWM+1:] all = list(do_midx_group(path, outfilename, part1)) + part2 if len(all) > DESIRED_HWM: debug1('\nStill too many indexes (%d > %d). Merging again.\n' % (len(all), DESIRED_HWM)) if opt['print']: for sz,name in all: if not existed.get(name): print name
def do_midx_dir(path, outfilename): already = {} sizes = {} if opt.force and not opt.auto: midxs = [] # don't use existing midx files else: midxs = glob.glob('%s/*.midx' % path) contents = {} for mname in midxs: m = git.open_idx(mname) contents[mname] = [('%s/%s' % (path, i)) for i in m.idxnames] sizes[mname] = len(m) # sort the biggest+newest midxes first, so that we can eliminate # smaller (or older) redundant ones that come later in the list midxs.sort(key=lambda ix: (-sizes[ix], -xstat.stat(ix).st_mtime)) for mname in midxs: any = 0 for iname in contents[mname]: if not already.get(iname): already[iname] = 1 any = 1 if not any: debug1('%r is redundant\n' % mname) unlink(mname) already[mname] = 1 midxs = [k for k in midxs if not already.get(k)] idxs = [k for k in glob.glob('%s/*.idx' % path) if not already.get(k)] for iname in idxs: i = git.open_idx(iname) sizes[iname] = len(i) all = [(sizes[n], n) for n in (midxs + idxs)] # FIXME: what are the optimal values? Does this make sense? DESIRED_HWM = opt.force and 1 or 5 DESIRED_LWM = opt.force and 1 or 2 existed = dict((name, 1) for sz, name in all) debug1('midx: %d indexes; want no more than %d.\n' % (len(all), DESIRED_HWM)) if len(all) <= DESIRED_HWM: debug1('midx: nothing to do.\n') while len(all) > DESIRED_HWM: all.sort() part1 = [name for sz, name in all[:len(all) - DESIRED_LWM + 1]] part2 = all[len(all) - DESIRED_LWM + 1:] all = list(do_midx_group(path, outfilename, part1)) + part2 if len(all) > DESIRED_HWM: debug1('\nStill too many indexes (%d > %d). Merging again.\n' % (len(all), DESIRED_HWM)) if opt['print']: for sz, name in all: if not existed.get(name): print name
def create_path(n, fullname, meta): if meta: meta.create_path(fullname) else: # These fallbacks are important -- meta could be null if, for # example, save created a "fake" item, i.e. a new strip/graft # path element, etc. You can find cases like that by # searching for "Metadata()". unlink(fullname) if stat.S_ISDIR(n.mode): mkdirp(fullname) elif stat.S_ISLNK(n.mode): os.symlink(n.readlink(), fullname)
def prepare_save(self): """ Commit all of the relevant data to disk. Do as much work as possible without actually making the changes visible.""" if self._pending_save: raise Error('save of %r already in progress' % self._filename) with self._cleanup: if self._node_paths: dir, name = os.path.split(self._filename) self._pending_save = atomically_replaced_file(self._filename, mode='wb', buffering=65536) with self._cleanup.enter_context(self._pending_save) as f: pickle.dump(self._node_paths, f, 2) else: # No data self._cleanup.callback(lambda: unlink(self._filename)) self._cleanup = self._cleanup.pop_all()
def refresh(self, skip_midx=False): """Refresh the index list. This method verifies if .midx files were superseded (e.g. all of its contents are in another, bigger .midx file) and removes the superseded files. If skip_midx is True, all work on .midx files will be skipped and .midx files will be removed from the list. The instance variable 'ignore_midx' can force this function to always act as if skip_midx was True. """ if self.bloom is not None: self.bloom.close() self.bloom = None # Always reopen the bloom as it may have been relaced self.do_bloom = False skip_midx = skip_midx or self.ignore_midx d = dict((p.name, p) for p in self.packs if not skip_midx or not isinstance(p, midx.PackMidx)) if os.path.exists(self.dir): if not skip_midx: midxl = [] midxes = set(glob.glob(os.path.join(self.dir, b'*.midx'))) # remove any *.midx files from our list that no longer exist for ix in list(d.values()): if not isinstance(ix, midx.PackMidx): continue if ix.name in midxes: continue # remove the midx del d[ix.name] ix.close() self.packs.remove(ix) for ix in self.packs: if isinstance(ix, midx.PackMidx): for name in ix.idxnames: d[os.path.join(self.dir, name)] = ix for full in midxes: if not d.get(full): mx = midx.PackMidx(full) (mxd, mxf) = os.path.split(mx.name) broken = False for n in mx.idxnames: if not os.path.exists(os.path.join(mxd, n)): log(('warning: index %s missing\n' ' used by %s\n') % (path_msg(n), path_msg(mxf))) broken = True if broken: mx.close() del mx unlink(full) else: midxl.append(mx) midxl.sort( key=lambda ix: (-len(ix), -xstat.stat(ix.name).st_mtime)) for ix in midxl: any_needed = False for sub in ix.idxnames: found = d.get(os.path.join(self.dir, sub)) if not found or isinstance(found, PackIdx): # doesn't exist, or exists but not in a midx any_needed = True break if any_needed: d[ix.name] = ix for name in ix.idxnames: d[os.path.join(self.dir, name)] = ix elif not ix.force_keep: debug1('midx: removing redundant: %s\n' % path_msg(os.path.basename(ix.name))) ix.close() unlink(ix.name) for full in glob.glob(os.path.join(self.dir, b'*.idx')): if not d.get(full): try: ix = open_idx(full) except GitError as e: add_error(e) continue d[full] = ix bfull = os.path.join(self.dir, b'bup.bloom') if self.bloom is None and os.path.exists(bfull): self.bloom = bloom.ShaBloom(bfull) self.packs = list(set(d.values())) self.packs.sort(reverse=True, key=lambda x: len(x)) if self.bloom and self.bloom.valid() and len( self.bloom) >= len(self): self.do_bloom = True else: self.bloom = None debug1('PackIdxList: using %d index%s.\n' % (len(self.packs), len(self.packs) != 1 and 'es' or ''))
def _do_midx(outdir, outfilename, infilenames, prefixstr, auto=False, force=False): global _first if not outfilename: assert(outdir) sum = hexlify(Sha1(b'\0'.join(infilenames)).digest()) outfilename = b'%s/midx-%s.midx' % (outdir, sum) inp = [] total = 0 allfilenames = [] midxs = [] try: for name in infilenames: ix = git.open_idx(name) midxs.append(ix) inp.append(( ix.map, len(ix), ix.sha_ofs, isinstance(ix, midx.PackMidx) and ix.which_ofs or 0, len(allfilenames), )) for n in ix.idxnames: allfilenames.append(os.path.basename(n)) total += len(ix) inp.sort(reverse=True, key=lambda x: x[0][x[2] : x[2] + 20]) if not _first: _first = outdir dirprefix = (_first != outdir) and git.repo_rel(outdir) + b': ' or b'' debug1('midx: %s%screating from %d files (%d objects).\n' % (dirprefix, prefixstr, len(infilenames), total)) if (auto and (total < 1024 and len(infilenames) < 3)) \ or ((auto or force) and len(infilenames) < 2) \ or (force and not total): debug1('midx: nothing to do.\n') return pages = int(total/SHA_PER_PAGE) or 1 bits = int(math.ceil(math.log(pages, 2))) entries = 2**bits debug1('midx: table size: %d (%d bits)\n' % (entries*4, bits)) unlink(outfilename) with atomically_replaced_file(outfilename, 'wb') as f: f.write(b'MIDX') f.write(struct.pack('!II', midx.MIDX_VERSION, bits)) assert(f.tell() == 12) f.truncate(12 + 4*entries + 20*total + 4*total) f.flush() fdatasync(f.fileno()) fmap = mmap_readwrite(f, close=False) count = merge_into(fmap, bits, total, inp) del fmap # Assume this calls msync() now. f.seek(0, os.SEEK_END) f.write(b'\0'.join(allfilenames)) finally: for ix in midxs: if isinstance(ix, midx.PackMidx): ix.close() midxs = None inp = None # This is just for testing (if you enable this, don't clear inp above) if 0: p = midx.PackMidx(outfilename) assert(len(p.idxnames) == len(infilenames)) log(repr(p.idxnames) + '\n') assert(len(p) == total) for pe, e in p, git.idxmerge(inp, final_progress=False): pin = next(pi) assert(i == pin) assert(p.exists(i)) return total, outfilename
def refresh(self, skip_midx = False): """Refresh the index list. This method verifies if .midx files were superseded (e.g. all of its contents are in another, bigger .midx file) and removes the superseded files. If skip_midx is True, all work on .midx files will be skipped and .midx files will be removed from the list. The module-global variable 'ignore_midx' can force this function to always act as if skip_midx was True. """ self.bloom = None # Always reopen the bloom as it may have been relaced self.do_bloom = False skip_midx = skip_midx or ignore_midx d = dict((p.name, p) for p in self.packs if not skip_midx or not isinstance(p, midx.PackMidx)) if os.path.exists(self.dir): if not skip_midx: midxl = [] for ix in self.packs: if isinstance(ix, midx.PackMidx): for name in ix.idxnames: d[os.path.join(self.dir, name)] = ix for full in glob.glob(os.path.join(self.dir,'*.midx')): if not d.get(full): mx = midx.PackMidx(full) (mxd, mxf) = os.path.split(mx.name) broken = False for n in mx.idxnames: if not os.path.exists(os.path.join(mxd, n)): log(('warning: index %s missing\n' + ' used by %s\n') % (n, mxf)) broken = True if broken: mx.close() del mx unlink(full) else: midxl.append(mx) midxl.sort(key=lambda ix: (-len(ix), -xstat.stat(ix.name).st_mtime)) for ix in midxl: any_needed = False for sub in ix.idxnames: found = d.get(os.path.join(self.dir, sub)) if not found or isinstance(found, PackIdx): # doesn't exist, or exists but not in a midx any_needed = True break if any_needed: d[ix.name] = ix for name in ix.idxnames: d[os.path.join(self.dir, name)] = ix elif not ix.force_keep: debug1('midx: removing redundant: %s\n' % os.path.basename(ix.name)) ix.close() unlink(ix.name) for full in glob.glob(os.path.join(self.dir,'*.idx')): if not d.get(full): try: ix = open_idx(full) except GitError as e: add_error(e) continue d[full] = ix bfull = os.path.join(self.dir, 'bup.bloom') if self.bloom is None and os.path.exists(bfull): self.bloom = bloom.ShaBloom(bfull) self.packs = list(set(d.values())) self.packs.sort(lambda x,y: -cmp(len(x),len(y))) if self.bloom and self.bloom.valid() and len(self.bloom) >= len(self): self.do_bloom = True else: self.bloom = None debug1('PackIdxList: using %d index%s.\n' % (len(self.packs), len(self.packs)!=1 and 'es' or ''))
def clear_bloom(dir): unlink(os.path.join(dir, 'bup.bloom'))
def cleanup_testfs(): subprocess.call(['umount', 'testfs']) helpers.unlink('testfs.img')
def _do_midx(outdir, outfilename, infilenames, prefixstr): global _first if not outfilename: assert(outdir) sum = Sha1('\0'.join(infilenames)).hexdigest() outfilename = '%s/midx-%s.midx' % (outdir, sum) inp = [] total = 0 allfilenames = [] midxs = [] try: for name in infilenames: ix = git.open_idx(name) midxs.append(ix) inp.append(( ix.map, len(ix), ix.sha_ofs, isinstance(ix, midx.PackMidx) and ix.which_ofs or 0, len(allfilenames), )) for n in ix.idxnames: allfilenames.append(os.path.basename(n)) total += len(ix) inp.sort(lambda x,y: cmp(str(y[0][y[2]:y[2]+20]),str(x[0][x[2]:x[2]+20]))) if not _first: _first = outdir dirprefix = (_first != outdir) and git.repo_rel(outdir)+': ' or '' debug1('midx: %s%screating from %d files (%d objects).\n' % (dirprefix, prefixstr, len(infilenames), total)) if (opt.auto and (total < 1024 and len(infilenames) < 3)) \ or ((opt.auto or opt.force) and len(infilenames) < 2) \ or (opt.force and not total): debug1('midx: nothing to do.\n') return pages = int(total/SHA_PER_PAGE) or 1 bits = int(math.ceil(math.log(pages, 2))) entries = 2**bits debug1('midx: table size: %d (%d bits)\n' % (entries*4, bits)) unlink(outfilename) with atomically_replaced_file(outfilename, 'wb') as f: f.write('MIDX') f.write(struct.pack('!II', midx.MIDX_VERSION, bits)) assert(f.tell() == 12) f.truncate(12 + 4*entries + 20*total + 4*total) f.flush() fdatasync(f.fileno()) fmap = mmap_readwrite(f, close=False) count = merge_into(fmap, bits, total, inp) del fmap # Assume this calls msync() now. f.seek(0, os.SEEK_END) f.write('\0'.join(allfilenames)) finally: for ix in midxs: if isinstance(ix, midx.PackMidx): ix.close() midxs = None inp = None # This is just for testing (if you enable this, don't clear inp above) if 0: p = midx.PackMidx(outfilename) assert(len(p.idxnames) == len(infilenames)) print p.idxnames assert(len(p) == total) for pe, e in p, git.idxmerge(inp, final_progress=False): pin = pi.next() assert(i == pin) assert(p.exists(i)) return total, outfilename
def cleanup_testfs(): subprocess.call(["umount", "testfs"]) helpers.unlink("testfs.img")