def check_checksums(remotepath, cached=False): """Check if the checksums of all replicas are identical.""" replicas = t2kdm.replicas(remotepath, cached=cached) checksum = t2kdm.checksum(replicas[0], cached=cached) if '?' in checksum: return False for rep in replicas[1:]: if t2kdm.checksum(rep, cached=cached) != checksum: return False return True
def _test_replica(replica, verbose=False): """Test whether a replica has the checksum it reports and whether it passes the gzip test.""" with temp_dir() as tempdir: tempf = os.path.join(tempdir, 'temp.gz') if verbose: print_("Downloading and checking replica: "+replica) dm.backend._get(replica, tempf, verbose=verbose) remote_checksum = dm.checksum(replica) local_checksum = sh.adler32(tempf, _tty_out=False).strip() if local_checksum != remote_checksum: if verbose: print_(replica) print_("Local checksum %s is different from remote checksum %s."%(local_checksum, remote_checksum)) return False try: sh.gzip(tempf, test=True, _tty_out=False) except sh.ErrorReturnCode: if verbose: print_(replica) print_("Failed the gzip integrity test.") return False else: return True
def replicas(*args, **kwargs): """Print the replicas of a file on screen.""" checksum = kwargs.pop('checksum', False) state = kwargs.pop('state', False) name = kwargs.pop('name', False) reps = t2kdm.replicas(*args, **kwargs) for r in reps: if checksum: print_(t2kdm.checksum(r), end=' ') if state: print_(t2kdm.state(r), end=' ') if name: se = t2kdm.storage.get_SE(r) if se is None: print_('?', end=' ') else: print_(se.name, end=' ') print_(r) return 0
def replicas(remotepath, *args, **kwargs): """Print the replicas of a file on screen.""" _check_path(remotepath) checksum = kwargs.pop('checksum', False) state = kwargs.pop('state', False) name = kwargs.pop('name', False) distance = kwargs.pop('distance', False) if distance: if isinstance(distance, str): reps = [ x[0] for x in dm.iter_file_sources( remotepath, destination=distance, tape=True) ] else: reps = [x[0] for x in dm.iter_file_sources(remotepath, tape=True)] else: reps = dm.replicas(remotepath, *args, **kwargs) for r in reps: if checksum: try: chk = dm.checksum(r) except Exception as e: chk = str(e) print_(chk, end=' ') if state: try: stat = dm.state(r) except Exception as e: stat = str(e) print_(stat, end=' ') if name: se = dm.storage.get_SE(r) if se is None: print_('?', end=' ') else: print_(se.name, end=' ') print_(r) return 0
def fix_checksum_errors(remotepath, verbose=False): """Fix replicas with differing checksums. This can only be done for files that can be checked for corruption. Otherwise there is no way to decide which file is actually the correct one. """ replicas = dm.replicas(remotepath) checksums = [dm.checksum(r) for r in replicas] if len(set(checksums)) == 1 and '?' not in checksums[0]: # Nothing to do here return True if verbose: print_("Found faulty checksums.") if not remotepath.endswith('.gz'): if verbose: print_("WARNING: Can only check file consistency of *.gz files!") print_("Doing nothing.") return False good_replicas = [] bad_replicas = [] for replica in replicas: if _test_replica(replica, verbose=verbose): good_replicas.append(replica) else: bad_replicas.append(replica) if len(good_replicas) == 0: if verbose: print_("WARNING: Not a single good replica present!") print_("Doing nothing.") return False if len(bad_replicas) == 0: if verbose: print_("WARNING: Not a single bad replica present!") print_("This should not happen, since the checksums are different.") print_("Doing nothing.") return False bad_SEs = [] for replica in bad_replicas: SE = storage.get_SE(replica) if SE is None: if verbose: print_("WARNING: Could not find storage element for replica: "+replica) continue bad_SEs.append(SE) success = True for SE in bad_SEs: if verbose: print_("Removing bad replica from %s."%(SE.name,)) try: dm.remove(remotepath, SE, verbose=verbose) except: success = False for SE in bad_SEs: if verbose: print_("Re-replicating file on %s."%(SE.name,)) try: dm.replicate(remotepath, SE, verbose=verbose) except: success = False return success