def test_create_file(self): mkr = mkdata.DatasetMaker(self.dsdir, { 'totalfiles': 10, 'totalsize': 0 }) self.assertTrue(not os.path.exists(self.dsdir)) f = mkr._create_file(82) self.assertTrue(os.path.exists(self.dsdir)) fp = os.path.join(self.dsdir, f) self.assertTrue(os.path.exists(fp)) self.assertEqual(os.stat(fp).st_size, 82) f = mkr._create_file(8200841, under='goob') self.assertTrue(f.startswith("goob" + os.sep), "file not created under goob/") fp = os.path.join(self.dsdir, f) self.assertTrue(os.path.exists(fp)) self.assertEqual(os.stat(fp).st_size, 8200841) f = mkr._create_file(411, under='goob') self.assertTrue(f.startswith("goob" + os.sep), "file not created under goob/") fp = os.path.join(self.dsdir, f) self.assertTrue(os.path.exists(fp)) self.assertEqual(os.stat(fp).st_size, 411)
def test_fill_dir_1(self): mkr = mkdata.DatasetMaker(self.dsdir, { 'totalfiles': 10, 'totalsize': 0 }) self.assertTrue(not os.path.exists(self.dsdir)) files = [{ 'totalsize': 100, 'totalfiles': 4 }, { 'totalsize': 50, 'reps': 4, 'totalfiles': 1 }, {}] # dirs = [{'totalsize': 500}, {'totalfiles': 5}] mkr._fill_dir('', 1500, 14, files) self.assertTrue(os.path.exists(self.dsdir)) fns = os.listdir(self.dsdir) self.assertEqual(len(fns), 14, "Wrong number of files: expected 14; got " + str(fns)) sizes = sorted( [os.stat(os.path.join(self.dsdir, f)).st_size for f in fns]) self.assertEqual( sizes, [25, 25, 25, 25, 50, 50, 50, 50, 200, 200, 200, 200, 200, 200]) self.assertEqual(sum(sizes), 1500)
def test_convert_new(self): # create the data self.bagdir = os.path.join(self.tempdir, "sampledata") self.assertTrue(not os.path.isdir(self.bagdir)) dm = mkdata.DatasetMaker( self.bagdir, { 'totalsize': 15, 'totalfiles': 3, 'files': [{ 'totalsize': 10, 'totalfiles': 2 }], 'dirs': [{ 'totalsize': 5, 'totalfiles': 1 }] }) dm.fill() self.assertTrue(os.path.isdir(self.bagdir)) # turn it into a bag bag = bagit.make_bag(self.bagdir) self.assertTrue(bag.validate()) mbdir = os.path.join(self.bagdir, 'multibag') self.assertTrue(not os.path.exists(mbdir)) # convert it to a multibag self.mkr = amend.SingleMultibagMaker(self.bagdir) self.mkr.convert("1.5", "doi:XXXX/11111") self.assertTrue(os.path.exists(mbdir)) # validate it as a head bag valid8.validate_headbag(self.bagdir)
def test_fill_dir_2(self): mkr = mkdata.DatasetMaker(self.dsdir, { 'totalfiles': 10, 'totalsize': 0 }) self.assertTrue(not os.path.exists(self.dsdir)) files = [{ 'totalsize': 100, 'totalfiles': 4 }, { 'totalsize': 50, 'reps': 4, 'totalfiles': 1 }, {}] dirs = [{'totalsize': 500}, {'totalfiles': 5}] mkr._fill_dir('', 2000, 19, files, dirs) self.assertTrue(os.path.exists(self.dsdir)) fns = os.listdir(self.dsdir) self.assertEqual( len(fns), 13, "Wrong number of file/dirs: expected 13; got " + str(fns)) dirs = [f for f in fns if f.endswith('_d')] self.assertEqual(len(dirs), 2) self.assertTrue( all([os.path.isdir(os.path.join(self.dsdir, d)) for d in dirs]), "Not all directories are really directories: " + str(dirs)) sizes = sorted([ os.stat(os.path.join(self.dsdir, f)).st_size for f in fns if not f.endswith('_d') ]) self.assertEqual(sizes, [25, 25, 25, 25, 50, 50, 50, 50, 200, 200, 200]) self.assertEqual(sum(sizes), 900) fns = [[ os.path.join(dirs[0], f) for f in os.listdir(os.path.join(self.dsdir, dirs[0])) ], [ os.path.join(dirs[1], f) for f in os.listdir(os.path.join(self.dsdir, dirs[1])) ]] if fns[0][0].endswith('_120'): fns = [fns[1], fns[0]] self.assertEqual(len(fns[0]), 3) sizes = sorted( [os.stat(os.path.join(self.dsdir, f)).st_size for f in fns[0]]) self.assertEqual(sizes, [166, 167, 167]) self.assertEqual(len(fns[1]), 5) sizes = sorted( [os.stat(os.path.join(self.dsdir, f)).st_size for f in fns[1]]) self.assertEqual(sizes, [120, 120, 120, 120, 120])
def test_ensure_root(self): mkr = mkdata.DatasetMaker(self.dsdir, { 'totalfiles': 10, 'totalsize': 0 }) self.assertTrue(not os.path.exists(self.dsdir)) mkr.ensure_root() self.assertTrue(os.path.exists(self.dsdir)) mkr.ensure_root() self.assertTrue(os.path.exists(self.dsdir))
def test_mkfid(self): mkr = mkdata.DatasetMaker(self.dsdir, { 'totalfiles': 10, 'totalsize': 0 }) nms = [mkr._mkfid(100) for i in range(10)] for i in range(len(nms)): self.assertNotEqual(nms[0], nms[1]) n = nms.pop(0) nms.append(n)
def test_mkfilename(self): mkr = mkdata.DatasetMaker(self.dsdir, { 'totalfiles': 10, 'totalsize': 0 }) mkr.ensure_root() files = [] for i in range(100): p = mkr._mkfilename(lambda fn: "_{0}_".format(fn), '') with open(os.path.join(mkr.root, p), 'w') as fd: fd.write('\n') files.append(p) for f in files: self.assertTrue(os.path.exists(os.path.join(self.dsdir, f)), "failed to create " + f)
def mkbag(dsdir): spec = { "totalfiles": 22, "totalsize": 1014600, "files": [{ "type": "inventory", "sizes": { 1400: 1, 159800: 1, 35000: 2, 0: 1, 1000: 1 }}], "dirs": [{ "files": [{ "type": "inventory", "sizes": { 86000: 3 }}], "dirs": [{ "files": [{ "type": "inventory", "sizes": {110000: 1, 5800: 4}}] }] }, { "files": [{ "type": "inventory", "sizes": { 86000: 3 }}], "dirs": [{ "files": [{ "type": "inventory", "sizes": {110000: 1, 5800: 4}}] }] }] } mkr = mkdata.DatasetMaker(dsdir, spec) mkr.fill() bag = bagit.make_bag(dsdir) # self.assertTrue(bag.validate()) # create metadata tree datadir = os.path.join(dsdir,"data") mdatadir = os.path.join(dsdir,"metadata") os.mkdir(mdatadir) with open(os.path.join(dsdir,"tagmanifest-sha256.txt"), "a") as fd: for dir, subdirs, files in os.walk(datadir): for f in files: f = os.path.join(mdatadir, os.path.join(dir, f)[len(datadir)+1:]) if not os.path.exists(os.path.dirname(f)): os.makedirs(os.path.dirname(f)) mkdata.create_file(f, 16) fd.write("{0} {1}\n".format(checksum_of(f), f[len(dsdir)+1:])) # set the Bag-Size sz = du(dsdir) line = "Bag-Size: {0} B\n".format(sz) sz += len(line) line = "Bag-Size: {0} B\n".format(sz) with open(os.path.join(dsdir, "bag-info.txt"), "a") as fd: fd.write(line)
def test_fill_with_files_w_iter(self): mkr = mkdata.DatasetMaker(self.dsdir, { 'totalfiles': 10, 'totalsize': 0 }) self.assertTrue(not os.path.exists(self.dsdir)) iter = mkdata.UniformSizeIterator(totalsize=500, totalfiles=5).iterate() n = mkr._fill_with_files("goob", 3000, 4, iter) self.assertEqual(n, (400, 4)) parent = os.path.join(self.dsdir, 'goob') files = os.listdir(os.path.join(self.dsdir, 'goob')) sizes = [ os.stat(os.path.join(self.dsdir, 'goob', f)).st_size for f in files ] self.assertTrue(all([s == 100 for s in sizes]), "Wrong file sizes: " + str(sizes)) self.assertEqual(len(files), 4)
def test_create_dir(self): mkr = mkdata.DatasetMaker(self.dsdir, { 'totalfiles': 10, 'totalsize': 0 }) self.assertTrue(not os.path.exists(self.dsdir)) f = mkr._create_dir() self.assertTrue(os.path.exists(self.dsdir)) fp = os.path.join(self.dsdir, f) self.assertTrue(os.path.exists(fp)) self.assertTrue(os.path.isdir(fp)) f = mkr._create_dir("furry/goob") self.assertTrue(os.path.exists(self.dsdir)) self.assertTrue(f.startswith("furry" + os.sep + "goob" + os.sep), "directory not created under furry/goob/") fp = os.path.join(self.dsdir, f) self.assertTrue(os.path.exists(fp)) self.assertTrue(os.path.isdir(fp))
def test_fill(self): mkr = mkdata.DatasetMaker( self.dsdir, { 'totalsize': 15, 'totalfiles': 3, 'files': [{ 'totalsize': 10, 'totalfiles': 2 }], 'dirs': [{ 'totalsize': 5, 'totalfiles': 1 }] }) self.assertTrue(not os.path.exists(self.dsdir)) mkr.fill() self.assertTrue(os.path.exists(self.dsdir)) fns = os.listdir(self.dsdir) self.assertEqual( len(fns), 3, "Wrong number of file/dirs: expected 3; got " + str(fns)) dirs = [f for f in fns if f.endswith('_d')] self.assertEqual(len(dirs), 1) self.assertTrue(os.path.isdir(os.path.join(self.dsdir, dirs[0])), "Not a directoru: " + str(dirs)) sizes = sorted([ os.stat(os.path.join(self.dsdir, f)).st_size for f in fns if not f.endswith('_d') ]) self.assertEqual(sizes, [5, 5]) fns = os.listdir(os.path.join(self.dsdir, dirs[0])) self.assertEqual(len(fns), 1) self.assertEqual( os.stat(os.path.join(self.dsdir, dirs[0], fns[0])).st_size, 5)
def test_fill_with_files(self): mkr = mkdata.DatasetMaker(self.dsdir, { 'totalfiles': 10, 'totalsize': 0 }) self.assertTrue(not os.path.exists(self.dsdir)) n = mkr._fill_with_files("goob") self.assertEqual(n, (0, 0)) self.assertTrue(not os.path.exists(self.dsdir)) n = mkr._fill_with_files("goob", 103, 2) self.assertEqual(n, (0, 0)) self.assertTrue(not os.path.exists(self.dsdir)) n = mkr._fill_with_files("gurn", 103, 4, [75, 80]) self.assertEqual(n, (75, 1)) self.assertTrue(os.path.exists(self.dsdir)) self.assertTrue(os.path.isdir(os.path.join(self.dsdir, 'gurn'))) files = os.listdir(os.path.join(self.dsdir, 'gurn')) self.assertEqual(len(files), 1) self.assertEqual( os.stat(os.path.join(self.dsdir, 'gurn', files[0])).st_size, 75) n = mkr._fill_with_files("furry" + os.sep + "goob", 1000, 3, [75, 80, 200]) self.assertEqual(n, (355, 3)) self.assertTrue(os.path.exists(self.dsdir)) self.assertTrue( os.path.isdir(os.path.join(self.dsdir, 'furry', 'goob'))) files = os.listdir(os.path.join(self.dsdir, 'furry', 'goob')) sizes = sorted([ os.stat(os.path.join(self.dsdir, 'furry', 'goob', f)).st_size for f in files ]) self.assertEqual(sizes, [75, 80, 200])
def test_fill_dir_3(self): mkr = mkdata.DatasetMaker(self.dsdir, { 'totalfiles': 10, 'totalsize': 0 }) self.assertTrue(not os.path.exists(self.dsdir)) files = [{ 'totalsize': 100, 'totalfiles': 4 }, { 'type': 'inventory', 'sizes': { 15: 2, 10: 3 } }, { 'totalsize': 50, 'reps': 4, 'totalfiles': 1 }, { 'type': 'inventory', 'sizes': { 22: 1, 18: 1 } }] dirs = [{'totalsize': 500}, {'totalfiles': 5}] # pdb.set_trace() mkr._fill_dir('', 2000, 22, files, dirs) self.assertTrue(os.path.exists(self.dsdir)) fns = os.listdir(self.dsdir) self.assertEqual( len(fns), 17, "Wrong number of file/dirs: expected 17; got " + str(fns)) dirs = [f for f in fns if f.endswith('_d')] self.assertEqual(len(dirs), 2) self.assertTrue( all([os.path.isdir(os.path.join(self.dsdir, d)) for d in dirs]), "Not all directories are really directories: " + str(dirs)) sizes = sorted([ os.stat(os.path.join(self.dsdir, f)).st_size for f in fns if not f.endswith('_d') ]) self.assertEqual( sizes, [10, 10, 10, 15, 15, 18, 22, 25, 25, 25, 25, 50, 50, 50, 50]) self.assertEqual(sum(sizes), 400) fns = [[ os.path.join(dirs[0], f) for f in os.listdir(os.path.join(self.dsdir, dirs[0])) ], [ os.path.join(dirs[1], f) for f in os.listdir(os.path.join(self.dsdir, dirs[1])) ]] if fns[0][0].endswith('_220'): fns = [fns[1], fns[0]] self.assertEqual(len(fns[0]), 2) sizes = sorted( [os.stat(os.path.join(self.dsdir, f)).st_size for f in fns[0]]) self.assertEqual(sizes, [250, 250]) self.assertEqual(len(fns[1]), 5) sizes = sorted( [os.stat(os.path.join(self.dsdir, f)).st_size for f in fns[1]]) self.assertEqual(sizes, [220, 220, 220, 220, 220])
def test_distribute(self): mkr = mkdata.DatasetMaker(self.dsdir, { 'totalfiles': 10, 'totalsize': 0 }) files = [{ 'totalsize': 100, 'totalfiles': 4 }, { 'totalsize': 50, 'reps': 4, 'totalfiles': 1 }, {}] dirs = [{'totalsize': 500}, {'totalfiles': 5}] mkr._distribute(2000, 19, files, dirs) for fs in files: self.assertIn('iter', fs) del fs['iter'] del fs['type'] self.assertEquals(files, [{ 'totalsize': 100, 'totalfiles': 4 }, { 'totalsize': 50, 'reps': 4, 'totalfiles': 1 }, { 'totalsize': 600, 'totalfiles': 3 }]) self.assertEquals(dirs, [{ 'totalsize': 500, 'totalfiles': 3 }, { 'totalsize': 600, 'totalfiles': 5 }]) # test for adding extra mkr._distribute(2200, 22, files, dirs) for fs in files: self.assertIn('iter', fs) del fs['iter'] del fs['type'] self.assertEquals(files, [{ 'totalsize': 100, 'totalfiles': 4 }, { 'totalsize': 50, 'reps': 4, 'totalfiles': 1 }, { 'totalsize': 600, 'totalfiles': 3 }, { 'totalsize': 200, 'totalfiles': 3 }]) self.assertEquals(dirs, [{ 'totalsize': 500, 'totalfiles': 3 }, { 'totalsize': 600, 'totalfiles': 5 }])