def calc_corruptions(filespecs, piece_size, corruption_positions): """Map file names to (piece_index, exception) tuples""" corrupt_pieces = [] reported = [] for corrpos in sorted(corruption_positions): corr_pi = corrpos // piece_size if corr_pi not in reported: exc = ComparableException( torf.VerifyContentError(corr_pi, piece_size, filespecs)) corrupt_pieces.append(exc) reported.append(corr_pi) return fuzzylist(*corrupt_pieces)
def calc_corruptions(filespecs, piece_size, corruption_positions): """Map file names to (piece_index, exception) tuples""" exceptions = [] reported = set() for corrpos in sorted(corruption_positions): corr_pi = corrpos // piece_size if corr_pi not in reported: filepath, _ = pos2file(corrpos, filespecs, piece_size) exc = ComparableException( torf.VerifyContentError(filepath, corr_pi, piece_size, filespecs)) exceptions.append(exc) reported.add(corr_pi) return fuzzylist(*exceptions)
def skip_corruptions(all_corruptions, filespecs, piece_size, corruption_positions, files_missing, files_missized): """Make every non-first corruption optional""" debug(f'Skipping corruptions: {all_corruptions}') pis_seen = set() files_seen = set() corruptions = fuzzylist() files_autoskipped = set( str(f) for f in itertools.chain(files_missing, files_missized)) debug(f' missing or missized: {files_autoskipped}') for exc in all_corruptions: # Corruptions for files we haven't seen yet must be reported if any(f not in files_seen and f not in files_autoskipped for f in exc.files): debug(f' mandatory: {exc}') files_seen.update(exc.files) pis_seen.add(exc.piece_index) corruptions.append(exc) # Corruptions for files we already have seen may still be reported # because skipping is racy and it's impossible to predict how many # pieces are processed before the skip manifests. else: debug(f' optional: {exc}') corruptions.maybe.append(exc) pis_seen.add(exc.piece_index) # Because we fake skipped files, their last piece is reported as corrupt if # it contains bytes from the next file even if there is no corruption in the # skipped file's last piece. But this is not guaranteed because it's # possible the corrupt file is fully processed before its corruption is # noticed. for corrpos in corruption_positions: # Find all files that are affected by the corruption affected_files = pos2files(corrpos, filespecs, piece_size) debug(f' affected_files: {affected_files}') # Find piece_index of the end of the last affected file _, file_end = file_range(affected_files[-1], filespecs) piece_index = file_end // piece_size debug(f' {affected_files[-1]} ends at piece_index {piece_index}') # Add optional exception for that piece exc = ComparableException( torf.VerifyContentError(piece_index, piece_size, filespecs)) if exc not in itertools.chain(corruptions, corruptions.maybe): debug( f'Adding possible exception for last affected file {affected_files[-1]}: {exc}' ) corruptions.maybe.append(exc) return corruptions