def check_sha1sum(filepath, sha1sums=None, observatory=None): """Check to see if the sha1sum of `filepath` is identical to any of the files mentioned in `sha1sums`. Return 1 IFF `filepath` is a duplicate of an existing CRDS file. Otherwise 0 """ if sha1sums is None: sha1sums = get_all_sha1sums(observatory) sha1sum = utils.checksum(filepath) log.verbose("Checking file", repr(filepath), "with sha1sum", repr(sha1sum), "for duplication on CRDS server.") if sha1sum in sha1sums: raise DuplicateSha1sumError( "File", repr(os.path.basename(filepath)), "is identical to existing CRDS file", repr(sha1sums[sha1sum]))
def verify_file(self, file, info, bytes_so_far, total_bytes, nth_file, total_files): """Check one `file` against the provided CRDS database `info` dictionary.""" path = config.locate_file(file, observatory=self.observatory) base = os.path.basename(file) n_bytes = int(info["size"]) # Only output verification info for slow sha1sum checks by default log.verbose( api.file_progress( "Verifying", base, path, n_bytes, bytes_so_far, total_bytes, nth_file, total_files), verbosity=10 if self.args.check_sha1sum else 60) if not os.path.exists(path): if base not in self.bad_files: log.error("File", repr(base), "doesn't exist at", repr(path)) return # Checks which force repairs should do if/else to avoid repeat repair size = os.stat(path).st_size if int(info["size"]) != size: self.error_and_repair(path, "File", repr(base), "length mismatch LOCAL size=" + srepr(size), "CRDS size=" + srepr(info["size"])) elif self.args.check_sha1sum or config.is_mapping(base): log.verbose("Computing checksum for", repr(base), "of size", repr(size), verbosity=60) sha1sum = utils.checksum(path) if info["sha1sum"] == "none": log.warning("CRDS doesn't know the checksum for", repr(base)) elif info["sha1sum"] != sha1sum: self.error_and_repair(path, "File", repr(base), "checksum mismatch CRDS=" + repr(info["sha1sum"]), "LOCAL=" + repr(sha1sum)) if info["state"] not in ["archived", "operational"]: log.warning("File", repr(base), "has an unusual CRDS file state", repr(info["state"])) if info["rejected"] != "false": log.verbose_warning("File", repr(base), "has been explicitly rejected.", verbosity=60) if self.args.purge_rejected: self.remove_files([path], "file") return if info["blacklisted"] != "false": log.verbose_warning("File", repr(base), "has been blacklisted or is dependent on a blacklisted file.", verbosity=60) if self.args.purge_blacklisted: self.remove_files([path], "file") return return
def verify_file(self, file, info, bytes_so_far, total_bytes, nth_file, total_files): """Check one `file` against the provided CRDS database `info` dictionary.""" path = rmap.locate_file(file, observatory=self.observatory) base = os.path.basename(file) n_bytes = int(info["size"]) # Only output verification info for slow sha1sum checks by default log.verbose( api.file_progress( "Verifying", base, path, n_bytes, bytes_so_far, total_bytes, nth_file, total_files), verbosity=10 if self.args.check_sha1sum else 60) if not os.path.exists(path): log.error("File", repr(base), "doesn't exist at", repr(path)) return # Checks which force repairs should do if/else to avoid repeat repair size = os.stat(path).st_size if int(info["size"]) != size: self.error_and_repair(path, "File", repr(base), "length mismatch LOCAL size=" + srepr(size), "CRDS size=" + srepr(info["size"])) elif self.args.check_sha1sum or config.is_mapping(base): log.verbose("Computing checksum for", repr(base), "of size", repr(size), verbosity=60) sha1sum = utils.checksum(path) if info["sha1sum"] == "none": log.warning("CRDS doesn't know the checksum for", repr(base)) elif info["sha1sum"] != sha1sum: self.error_and_repair(path, "File", repr(base), "checksum mismatch CRDS=" + repr(info["sha1sum"]), "LOCAL=" + repr(sha1sum)) if info["state"] not in ["archived", "operational"]: log.warning("File", repr(base), "has an unusual CRDS file state", repr(info["state"])) if info["rejected"] != "false": log.verbose_warning("File", repr(base), "has been explicitly rejected.", verbosity=60) if self.args.purge_rejected: self.remove_files([path], "files") return if info["blacklisted"] != "false": log.verbose_warning("File", repr(base), "has been blacklisted or is dependent on a blacklisted file.", verbosity=60) if self.args.purge_blacklisted: self.remove_files([path], "files") return return
def verify_file(self, filename, localpath): """Check that the size and checksum of downloaded `filename` match the server.""" remote_info = self.info_map[filename] local_length = os.stat(localpath).st_size original_length = int(remote_info["size"]) if original_length != local_length and config.get_length_flag(): raise CrdsDownloadError( "downloaded file size", local_length, "does not match server size", original_length) if not config.get_checksum_flag(): log.verbose("Skipping sha1sum with CRDS_DOWNLOAD_CHECKSUMS=False") elif remote_info["sha1sum"] not in ["", "none"]: original_sha1sum = remote_info["sha1sum"] local_sha1sum = utils.checksum(localpath) if original_sha1sum != local_sha1sum: raise CrdsDownloadError( "downloaded file", srepr(filename), "sha1sum", srepr(local_sha1sum), "does not match server sha1sum", srepr(original_sha1sum)) else: log.verbose("Skipping sha1sum check since server doesn't know it.")
def verify_file(self, filename, localpath): """Check that the size and checksum of downloaded `filename` match the server.""" remote_info = self.info_map[filename] local_length = os.stat(localpath).st_size original_length = int(remote_info["size"]) if original_length != local_length and config.get_length_flag(): raise CrdsDownloadError( "downloaded file size", local_length, "does not match server size", original_length) if not config.get_checksum_flag(): log.verbose("Skipping sha1sum with CRDS_DOWNLOAD_CHECKSUMS=False") elif remote_info["sha1sum"] not in ["", "none"]: original_sha1sum = remote_info["sha1sum"] local_sha1sum = utils.checksum(localpath) if original_sha1sum != local_sha1sum: raise CrdsDownloadError( "downloaded file", srepr(filename), "sha1sum", srepr(local_sha1sum), "does not match server sha1sum", srepr(original_sha1sum)) else: log.verbose("Skipping sha1sum check since server doesn't know it.")