def compare_recursive(src_rp, dest_rp, compare_hardlinks=True, exclude_rbdir=True, ignore_tmp_files=False, compare_ownership=False, compare_eas=False, compare_acls=False): """Compare src_rp and dest_rp, which can be directories This only compares file attributes, not the actual data. This will overwrite the hardlink dictionaries if compare_hardlinks is specified. """ Log( "Comparing %s and %s, hardlinks %s, eas %s, acls %s" % (src_rp.get_safepath(), dest_rp.get_safepath(), compare_hardlinks, compare_eas, compare_acls), 3) if compare_hardlinks: reset_hardlink_dicts() src_iter, dest_iter = _get_selection_functions( src_rp, dest_rp, exclude_rbdir=exclude_rbdir, ignore_tmp_files=ignore_tmp_files) for src_rorp, dest_rorp in rorpiter.Collate2Iters(src_iter, dest_iter): if not _files_rorp_eq(src_rorp, dest_rorp, compare_hardlinks=compare_hardlinks, compare_ownership=compare_ownership, compare_eas=compare_eas, compare_acls=compare_acls): return 0 return 1
def ListChangedSince(mirror_rp, inc_rp, restore_to_time): """List the changed files under mirror_rp since rest time Notice the output is an iterator of RORPs. We do this because we want to give the remote connection the data in buffered increments, and this is done automatically for rorp iterators. Encode the lines in the first element of the rorp's index. """ assert mirror_rp.conn is Globals.local_connection, "Run locally only" MirrorStruct.set_mirror_and_rest_times(restore_to_time) MirrorStruct.initialize_rf_cache(mirror_rp, inc_rp) old_iter = MirrorStruct.get_mirror_rorp_iter(MirrorStruct._rest_time, 1) cur_iter = MirrorStruct.get_mirror_rorp_iter(MirrorStruct._mirror_time, 1) collated = rorpiter.Collate2Iters(old_iter, cur_iter) for old_rorp, cur_rorp in collated: if not old_rorp: change = "new" elif not cur_rorp: change = "deleted" elif old_rorp == cur_rorp: continue else: change = "changed" path_desc = (old_rorp and str(old_rorp) or str(cur_rorp)) yield rpath.RORPath(("%-7s %s" % (change, path_desc), )) MirrorStruct.close_rf_cache()
def compare_recursive(src_rp, dest_rp, compare_hardlinks=True, exclude_rbdir=True, ignore_tmp_files=False, compare_ownership=False, compare_eas=False, compare_acls=False): """Compare src_rp and dest_rp, which can be directories This only compares file attributes, not the actual data. This will overwrite the hardlink dictionaries if compare_hardlinks is specified. """ Log("Comparing {srp} and {drp}, hardlinks {chl}, " "eas {cea}, acls {cacl}".format( srp=src_rp, drp=dest_rp, chl=compare_hardlinks, cea=compare_eas, cacl=compare_acls), 3) if compare_hardlinks: reset_hardlink_dicts() src_iter, dest_iter = _get_selection_functions( src_rp, dest_rp, exclude_rbdir=exclude_rbdir, ignore_tmp_files=ignore_tmp_files) for src_rorp, dest_rorp in rorpiter.Collate2Iters(src_iter, dest_iter): if not _files_rorp_eq(src_rorp, dest_rorp, compare_hardlinks=compare_hardlinks, compare_ownership=compare_ownership, compare_eas=compare_eas, compare_acls=compare_acls): return 0 return 1
def attach_files(cls, src_iter, mirror_rp, inc_rp, compare_time): """Attach data to all the files that need checking Return an iterator of repo rorps that includes all the files that may have changed, and has the fileobj set on all rorps that need it. """ repo_iter = cls.init_and_get_iter(mirror_rp, inc_rp, compare_time) base_index = cls.mirror_base.index for src_rorp, mir_rorp in rorpiter.Collate2Iters(src_iter, repo_iter): index = src_rorp and src_rorp.index or mir_rorp.index if src_rorp and mir_rorp: if not src_rorp.isreg() and src_rorp == mir_rorp: _log_success(src_rorp, mir_rorp) continue # They must be equal, nothing else to check if (src_rorp.isreg() and mir_rorp.isreg() and src_rorp.getsize() == mir_rorp.getsize()): fp = cls.rf_cache.get_fp(base_index + index, mir_rorp) mir_rorp.setfile(fp) mir_rorp.set_attached_filetype('snapshot') if mir_rorp: yield mir_rorp else: yield rpath.RORPath(index) # indicate deleted mir_rorp
def compare_meta(cls, repo_iter): """Compare rorps (metadata only) quickly, return report iter""" src_iter = cls.get_select() for src_rorp, mir_rorp in rorpiter.Collate2Iters(src_iter, repo_iter): report = cls._get_basic_report(src_rorp, mir_rorp) if report: yield report else: cls._log_success(src_rorp, mir_rorp)
def _get_diffiter(self, new_iter, old_iter): """ Iterate meta diffs of new_iter -> old_iter """ for new_rorp, old_rorp in rorpiter.Collate2Iters(new_iter, old_iter): if not old_rorp: yield rpath.RORPath(new_rorp.index) elif not new_rorp or new_rorp.data != old_rorp.data: # exact compare here, can't use == on rorps yield old_rorp
def set_rorp_cache(cls, baserp, source_iter, for_increment): """ Initialize cls.CCPP, the destination rorp cache for_increment should be true if we are mirror+incrementing, false if we are just mirroring. """ dest_iter = cls._get_dest_select(baserp, for_increment) collated = rorpiter.Collate2Iters(source_iter, dest_iter) cls.CCPP = CacheCollatedPostProcess( collated, Globals.pipeline_max_length * 4, baserp)
def get_diffs(cls, target_iter): """Given rorp iter of target files, return diffs Here the target_iter doesn't contain any actual data, just attribute listings. Thus any diffs we generate will be snapshots. """ mir_iter = cls.subtract_indices(cls.mirror_base.index, cls.get_mirror_rorp_iter()) collated = rorpiter.Collate2Iters(mir_iter, target_iter) return cls._get_diffs_from_collated(collated)
def _iterate_meta_rfs(mirror_rp, inc_rp): """Yield RegressFile objects with extra metadata information added Each RegressFile will have an extra object variable .metadata_rorp which will contain the metadata attributes of the mirror file at regress_time. """ raw_rfs = _iterate_raw_rfs(mirror_rp, inc_rp) collated = rorpiter.Collate2Iters(raw_rfs, _yield_metadata()) for raw_rf, metadata_rorp in collated: raw_rf = longname.update_rf(raw_rf, metadata_rorp, mirror_rp, RegressFile) if not raw_rf: log.Log("Warning, metadata file has entry for path {pa}, " "but there are no associated files.".format( pa=metadata_rorp), log.WARNING) continue raw_rf.set_metadata_rorp(metadata_rorp) yield raw_rf
def compare_hash(cls, repo_iter): """Like above, but also compare sha1 sums of any regular files""" def hashes_changed(src_rp, mir_rorp): """Return 0 if their data hashes same, 1 otherwise""" verify_sha1 = map_hardlinks.get_hash(mir_rorp) if not verify_sha1: log.Log("Metadata file has no digest for mirror file {mf}, " "unable to compare.".format(mf=mir_rorp), log.WARNING) return 0 elif (src_rp.getsize() == mir_rorp.getsize() and hash.compute_sha1(src_rp) == verify_sha1): return 0 return 1 src_iter = cls.get_select() for src_rp, mir_rorp in rorpiter.Collate2Iters(src_iter, repo_iter): report = cls._get_basic_report(src_rp, mir_rorp, hashes_changed) if report: yield report else: cls._log_success(src_rp, mir_rorp)
def yield_sub_rfs(self): """Return RestoreFiles under current RestoreFile (which is dir)""" if not self.mirror_rp.isdir() and not self.inc_rp.isdir(): return if self.mirror_rp.isdir(): mirror_iter = self._yield_mirrorrps(self.mirror_rp) else: mirror_iter = iter([]) if self.inc_rp.isdir(): inc_pair_iter = self.yield_inc_complexes(self.inc_rp) else: inc_pair_iter = iter([]) collated = rorpiter.Collate2Iters(mirror_iter, inc_pair_iter) for mirror_rp, inc_pair in collated: if not inc_pair: inc_rp = self.inc_rp.new_index(mirror_rp.index) inc_list = [] else: inc_rp, inc_list = inc_pair if not mirror_rp: mirror_rp = self.mirror_rp.new_index_empty(inc_rp.index) yield self.__class__(mirror_rp, inc_rp, inc_list)
def CompareRecursive(src_rp, dest_rp, compare_hardlinks = 1, equality_func = None, exclude_rbdir = 1, ignore_tmp_files = None, compare_ownership = 0, compare_eas = 0, compare_acls = 0): """Compare src_rp and dest_rp, which can be directories This only compares file attributes, not the actual data. This will overwrite the hardlink dictionaries if compare_hardlinks is specified. """ def get_selection_functions(): """Return generators of files in source, dest""" src_rp.setdata() dest_rp.setdata() src_select = selection.Select(src_rp) dest_select = selection.Select(dest_rp) if ignore_tmp_files: # Ignoring temp files can be useful when we want to check the # correctness of a backup which aborted in the middle. In # these cases it is OK to have tmp files lying around. src_select.add_selection_func(src_select.regexp_get_sf( ".*rdiff-backup.tmp.[^/]+$", 0)) dest_select.add_selection_func(dest_select.regexp_get_sf( ".*rdiff-backup.tmp.[^/]+$", 0)) if exclude_rbdir: # Exclude rdiff-backup-data directory src_select.parse_rbdir_exclude() dest_select.parse_rbdir_exclude() return src_select.set_iter(), dest_select.set_iter() def preprocess(src_rorp, dest_rorp): """Initially process src and dest_rorp""" if compare_hardlinks and src_rorp: Hardlink.add_rorp(src_rorp, dest_rorp) def postprocess(src_rorp, dest_rorp): """After comparison, process src_rorp and dest_rorp""" if compare_hardlinks and src_rorp: Hardlink.del_rorp(src_rorp) def equality_func(src_rorp, dest_rorp): """Combined eq func returns true iff two files compare same""" if not src_rorp: Log("Source rorp missing: " + str(dest_rorp), 3) return 0 if not dest_rorp: Log("Dest rorp missing: " + str(src_rorp), 3) return 0 if not src_rorp.equal_verbose(dest_rorp, compare_ownership = compare_ownership): return 0 if compare_hardlinks and not Hardlink.rorp_eq(src_rorp, dest_rorp): Log("Hardlink compare failure", 3) Log("%s: %s" % (src_rorp.index, Hardlink.get_inode_key(src_rorp)), 3) Log("%s: %s" % (dest_rorp.index, Hardlink.get_inode_key(dest_rorp)), 3) return 0 if compare_eas and not eas_acls.ea_compare_rps(src_rorp, dest_rorp): Log("Different EAs in files %s and %s" % (src_rorp.get_indexpath(), dest_rorp.get_indexpath()), 3) return 0 if compare_acls and not eas_acls.acl_compare_rps(src_rorp, dest_rorp): Log("Different ACLs in files %s and %s" % (src_rorp.get_indexpath(), dest_rorp.get_indexpath()), 3) return 0 return 1 Log("Comparing %s and %s, hardlinks %s, eas %s, acls %s" % (src_rp.path, dest_rp.path, compare_hardlinks, compare_eas, compare_acls), 3) if compare_hardlinks: reset_hardlink_dicts() src_iter, dest_iter = get_selection_functions() for src_rorp, dest_rorp in rorpiter.Collate2Iters(src_iter, dest_iter): preprocess(src_rorp, dest_rorp) if not equality_func(src_rorp, dest_rorp): return 0 postprocess(src_rorp, dest_rorp) return 1 def rbdir_equal(src_rorp, dest_rorp): """Like hardlink_equal, but make allowances for data directories""" if not src_rorp.index and not dest_rorp.index: return 1 if (src_rorp.index and src_rorp.index[0] == 'rdiff-backup-data' and src_rorp.index == dest_rorp.index): # Don't compare dirs - they don't carry significant info if dest_rorp.isdir() and src_rorp.isdir(): return 1 if dest_rorp.isreg() and src_rorp.isreg(): # Don't compare gzipped files because it is apparently # non-deterministic. if dest_rorp.index[-1].endswith('gz'): return 1 # Don't compare .missing increments because they don't matter if dest_rorp.index[-1].endswith('.missing'): return 1 if compare_eas and not eas_acls.ea_compare_rps(src_rorp, dest_rorp): Log("Different EAs in files %s and %s" % (src_rorp.get_indexpath(), dest_rorp.get_indexpath())) return None if compare_acls and not eas_acls.acl_compare_rps(src_rorp, dest_rorp): Log("Different ACLs in files %s and %s" % (src_rorp.get_indexpath(), dest_rorp.get_indexpath()), 3) return None if compare_hardlinks: if Hardlink.rorp_eq(src_rorp, dest_rorp): return 1 elif src_rorp.equal_verbose(dest_rorp, compare_ownership = compare_ownership): return 1 Log("%s: %s" % (src_rorp.index, Hardlink.get_indicies(src_rorp, 1)), 3) Log("%s: %s" % (dest_rorp.index, Hardlink.get_indicies(dest_rorp, None)), 3) return None