def head(paths): results = {} for path in paths: npath = normalize_path(path) npath = re.sub(r'\*+$', '', path) many, flat, prefix = get_mfp(path, False) if many: cf = CloudFiles(npath, green=True) res = cf.head(cf.list(prefix=prefix, flat=flat)) results.update(res) else: cf = CloudFiles(os.path.dirname(npath), green=True) results[path] = cf.head(os.path.basename(npath)) pp = pprint.PrettyPrinter(indent=2) if len(paths) == 1 and len(results) == 1: val = first(results.values()) if val is not None: print(val) else: print("cloudfiles: head: File not found: {}".format(paths[0])) elif len(paths) > 0: pp.pprint(results)
def verify(source, target, only_matching, verbose, md5): """ Validates that the checksums of two files or two directories match. These tags are usually either md5 or crc32c generated strings. These are not secure hashes so they will only catch accidental changes to files, not intentionally malicious changes. """ source = normalize_path(source) target = normalize_path(target) if ispathdir(source) != ispathdir(target): print( "cloudfiles: verify source and target must both be files or directories." ) return if not md5 and (get_protocol(source) == "file" or get_protocol(target) == "file"): print( "cloudfiles: verify source and target must be object storage without --md5 option. The filesystem does not store hash information." ) return if ispathdir(source): cfsrc = CloudFiles(source) src_files = set(list(cfsrc)) else: cfsrc = CloudFiles(os.path.dirname(source)) src_files = set([os.path.basename(source)]) if ispathdir(target): cftarget = CloudFiles(target) target_files = set(list(cftarget)) else: cftarget = CloudFiles(os.path.dirname(target)) target_files = set([os.path.basename(target)]) matching_files = src_files.intersection(target_files) mismatched_files = src_files | target_files mismatched_files -= matching_files if not only_matching: if len(mismatched_files) > 0: if verbose: print(f"Extra source files:") print("\n".join(src_files - matching_files)) print(f"Extra target files:") print("\n".join(target_files - matching_files)) print( red(f"failed. {len(src_files)} source files, {len(target_files)} target files." )) return src_meta = cfsrc.head(matching_files) target_meta = cftarget.head(matching_files) if md5: src_meta = populate_md5(cfsrc, src_meta) target_meta = populate_md5(cftarget, target_meta) failed_files = [] for filename in src_meta: sm = src_meta[filename] tm = target_meta[filename] if sm["Content-Length"] != tm["Content-Length"]: failed_files.append(filename) continue elif not (( (sm["ETag"] and tm["ETag"]) and (sm["ETag"] == tm["ETag"] or md5_equal(sm["ETag"], tm["ETag"]))) or (sm["Content-Md5"] and tm["Content-Md5"] and md5_equal(sm["Content-Md5"], tm["Content-Md5"]))): failed_files.append(filename) continue elif sm["ETag"] in ("", None): failed_files.append(filename) if not failed_files: print( green( f"success. {len(matching_files)} files matching. {len(mismatched_files)} ignored." )) return if verbose: failed_files.sort() header = [ "src bytes".ljust(12 + 1), "target bytes".ljust(12 + 1), "senc".ljust(4 + 1), "tenc".ljust(4 + 1), "src etag".ljust(34 + 1), "target etag".ljust(34 + 1), "src md5".ljust(24 + 1), "target md5".ljust(24 + 1), "filename" ] print("".join(header)) for filename in failed_files: sm = src_meta[filename] tm = target_meta[filename] print( f'{sm["Content-Length"]:<12} {tm["Content-Length"]:<12} {sm["Content-Encoding"] or "None":<4} {tm["Content-Encoding"] or "None":<4} {sm["ETag"] or "None":<34} {tm["ETag"] or "None":<34} {sm["Content-Md5"] or "None":<24} {tm["Content-Md5"] or "None":<24} {filename}' ) print("--") print( red(f"failed. {len(failed_files)} failed. {len(matching_files) - len(failed_files)} succeeded. {len(mismatched_files)} ignored." ))