def test_manifest_netcdf(): with cd(os.path.join('test', 'testfiles')): mf1 = mf.Manifest('mf1.yaml') for filepath in glob.glob('*.nc'): mf1.add(filepath, ['nchash', 'md5', 'sha1']) mf1.dump() with cd(os.path.join('test', 'testfiles_copy')): mf2 = mf.Manifest('mf2.yaml') for filepath in glob.glob('*.nc'): mf2.add(filepath, ['nchash', 'md5', 'sha1']) mf2.dump() # Unequal because they contain different fullpaths assert (mf1.equals(mf2) == False) # Equal when paths ignored assert (mf1.equals(mf2, paths=False) == True) # Test with array of filepaths with cd(os.path.join('test', 'testfiles_copy')): mf1 = mf.Manifest('mf1.yaml') mf1.add(glob.glob('*.nc'), ['nchash']) mf1.add(hashfn=['md5', 'sha1']) assert (mf1.equals(mf2))
def test_open_manifest_and_add(): # Create manifest as above, but in two steps, writing out # the manifest file in between, deleting the object and reading # it back in and then adding the second lot of files with cd(os.path.join('test', 'testfiles_copy')): mf7 = mf.Manifest('mf7.yaml') for filepath in glob.glob('*.nc'): mf7.add(filepath, hashfn=['nchash', 'binhash']) mf7.dump() del (mf7) mf7 = mf.Manifest('mf7.yaml') mf7.load() for filepath in glob.glob('*.bin'): mf7.add(filepath, hashfn=['nchash', 'binhash']) mf7.dump() del (mf7) mf7 = mf.Manifest('mf7.yaml') mf7.load() mf6 = mf.Manifest('mf6.yaml') mf6.load() assert (mf7.equals(mf6))
def test_manifest_find(): with cd(os.path.join('test', 'testfiles')): mf1 = mf.Manifest('mf1.yaml') mf1.load() for filepath in mf1: # Test for hashes we know should be in the manifest for hashfn in ['nchash', 'md5', 'sha1']: hashval = mf1.get(filepath, hashfn) print(hashfn, hashval, filepath, mf1.find(hashfn, hashval)) assert (mf1.find(hashfn, hashval) == filepath) # Test for one we know shouldn't be there for hashfn in [ 'binhash', ]: hashval = mf1.get(filepath, hashfn) print(hashfn, hashval, filepath, mf1.find(hashfn, hashval)) assert (mf1.find(hashfn, hashval) == None) with cd(os.path.join('test', 'testfiles')): mf2 = mf.Manifest('mf2.yaml') # Make a manifest only with nchash for filepath in glob.glob('*.nc'): mf1.add(filepath, ['nchash']) # Update with hashes from mf1 mf2.update_matching_hashes(mf1) assert (mf2.equals(mf1)) # Make same manifest but from the root directory, so have different file # paths mf3 = mf.Manifest('mf3.yaml') for filepath in glob.glob(os.path.join('test', 'testfiles', '*.nc')): mf3.add(filepath, ['nchash']) mf3.update_matching_hashes(mf1) # Manifests should not be equal, their filepaths differ assert (not mf3.equals(mf1))
def test_malformed_file(): with cd(os.path.join('test', 'testfiles_copy')): mf9 = mf.Manifest('mf9.yaml') for filepath in glob.glob('*.nc'): mf9.add(filepath, ['nchash', 'md5', 'sha1']) # Intentionally alter the format string mf9.header["format"] = 'bogus' mf9.dump() mf10 = mf.Manifest('mf9.yaml') with pytest.raises(ValueError) as e: mf10.load() print(str(e.value)) assert (str(e.value) == 'Not yamanifest format: bogus')
def test_has_hash(): mf1 = mf.Manifest('mf1.yaml') mf1.load() for filepath in mf1: assert (mf1.get(filepath, 'md5') is not None) assert (mf1.get(filepath, 'sha1') is not None)
def test_yamf(): # Create manifest as above, but in two steps, writing out # the manifest file in between, deleting the object and reading # it back in and then adding the second lot of files with cd(os.path.join('test', 'testfiles_copy')): files = glob.glob('*.bin') + glob.glob('*.nc') yamf.main_parse_args( ["add", "-n", "mf8.yaml", "-s", "binhash", "-s", "nchash"] + files) mf8 = mf.Manifest('mf8.yaml') mf8.load() mf6 = mf.Manifest('mf6.yaml') mf6.load() assert (mf8.equals(mf6)) assert (yamf.main_parse_args( ["check", "-n", "mf8.yaml", "-s", "binhash", "-s", "nchash"]))
def test_manifest_hash_with_binhash(): with cd(os.path.join('test', 'testfiles_copy')): mf4 = mf.Manifest('mf4.yaml') for filepath in glob.glob('*.bin'): mf4.add(filepath, hashfn='binhash') mf4.dump() assert (mf4.check()) mf5 = mf.Manifest('mf5.yaml') for filepath in glob.glob('*.bin'): touch(filepath) mf5.add(filepath, hashfn='binhash') hashvals = {} assert (not mf4.check()) assert (not mf5.equals(mf4))
def test_update(): mf1 = mf.Manifest('mf1.yaml') files = ['file1', 'file2'] for filepath in files: mf1.add(os.path.join('test', filepath), ['md5', 'sha1']) assert (len(mf1) == len(files)) mf1.dump() mf2 = mf.Manifest('mf2.yaml') with cd('test'): mf2 = mf.Manifest('mf2.yaml') for filepath in files: mf2.add(filepath, ['md5', 'sha1']) # Make a new manifest, populate it with files from mf1, but # change path so it should resemble mf2 mf3 = mf.Manifest('mf3.yaml') mf3.update(mf1, newpath=".") assert (mf3.equals(mf2)) # As above but in reverse, add a newpath to mf2 so that it # should equal mf1 mf4 = mf.Manifest('mf4.yaml') mf4.update(mf2, newpath="test") print(mf4.data) print(mf1.data) assert (mf4.equals(mf1))
def test_manifest_read_write(): mf1 = mf.Manifest('mf1.yaml') files = ['file1', 'file2'] for filepath in files: mf1.add(os.path.join('test', filepath), ['md5', 'sha1']) assert (len(mf1) == len(files)) mf1.dump() mf2 = mf.Manifest('mf1.yaml') mf2.load() assert (mf1.equals(mf2) == True) # Test chained load mf2 = mf.Manifest('mf1.yaml').load() assert (mf1.equals(mf2) == True)
def test_manifest_netcdf_changed_time(): with cd(os.path.join('test', 'testfiles_copy')): mf3 = mf.Manifest('mf3.yaml') for filepath in glob.glob('*.nc'): touch(filepath) mf3.add(filepath, ['nchash', 'md5', 'sha1']) mf3.dump() mf2 = mf.Manifest('mf2.yaml') mf2.load() assert (not mf3.equals(mf2)) for filepath in mf2: hashvals = {} mf2.check_file(filepath, hashvals=hashvals) print(filepath, hashvals)
def test_manifest_with_mixed_file_types(): with cd(os.path.join('test', 'testfiles_copy')): mf6 = mf.Manifest('mf6.yaml') for filepath in glob.glob('*.bin') + glob.glob('*.nc'): mf6.add(filepath, hashfn=['nchash', 'binhash']) mf6.dump() assert (mf6.check()) # Should have no nchash for the bin files for filepath in glob.glob('*.bin'): assert (mf6.get(filepath, hashfn='nchash') == None)
def test_specify_fullpath_as_array(): mf1 = mf.Manifest('mf1.yaml') files = ['file1', 'file2'] fullpaths = [os.path.join('test', f) for f in files] # Specify a fullpath that is the same as the filepath mf1.add(fullpaths, ['md5'], fullpaths=fullpaths) assert (len(mf1) == len(files)) mf1.dump() assert (mf1.check()) # Now check the fullpath is the same as the filepath for filepath in mf1: assert (mf1.fullpath(filepath) == filepath)
def test_shortcircuit_condition(): with cd(os.path.join('test', 'testfiles_copy')): mf8 = mf.Manifest('mf8.yaml') mf8.load() assert (mf8.check()) # Alter a hash and make sure check fails files = glob.glob('*.bin') + glob.glob('*.nc') print(files[-1]) print(mf8.data[files[-1]]) mf8.data[files[-1]]["hashes"]["binhash"] = 0 print(mf8.data[files[-1]]) print(mf8.check()) assert (not mf8.check()) # Reload and check it reads in properly again mf8.load() assert (mf8.check()) mf8.data[files[-1]]["hashes"]["binhash"] = 0 # nchash should not be true. This behaviour has changed. # Decided an entry with no hash defined should be false to # trigger actions to create a new hash assert (not mf8.check(hashfn='nchash')) # binhash should not be true (set to incorrect value above) assert (not mf8.check(hashfn='binhash')) # Set truth condition to any, so happy if any of the hashes # are correct assert (mf8.check(condition=any)) assert (yamf.main_parse_args(["check", "-n", "mf8.yaml", "--any"]))
def main(args): """ Main routine. Takes return value from parse.parse_args as input """ mf1 = mf.Manifest(args.name) if args.command == 'add': if os.path.exists(args.name): # If manifest exists load existing hash data unless --force if not args.force: mf1.load() mf1.add(args.files, hashfn=args.hashes, force=args.force) mf1.dump() elif args.command == 'check': hashvals = {} try: mf1.load() except: sys.exit(1) if args.any: condition = any else: condition = all if mf1.check(hashfn=args.hashes, hashvals=hashvals, condition=condition): print("{} :: hashes are correct".format(args.name)) return True else: print("{} :: hashes are incorrect".format(args.name)) print(hashvals) for filepath in hashvals: for fn in hashvals[filepath]: print( "hashes do not match for {}: fn: {}\n new {} file {}". format(filepath, fn, hashvals[filepath][fn], mf1.data[filepath]["hashes"][fn])) sys.exit(1)
def test_shortcircuit_add(): with cd(os.path.join('test', 'testfiles_copy')): mf6 = mf.Manifest('mf6.yaml') for filepath in glob.glob('*.bin') + glob.glob('*.nc'): mf6.add(filepath, hashfn=['nchash', 'binhash'], shortcircuit=True) mf6.dump() # print("mf6: ",mf6.data) # pdb.set_trace() assert (mf6.check()) # Should have no nchash for the bin files for filepath in glob.glob('*.bin'): assert (mf6.get(filepath, hashfn='nchash') == None) # Should have no binhash for the netcdf files for filepath in glob.glob('*.nc'): assert (mf6.get(filepath, hashfn='binhash') == None)