def test_check(self): cfg = { "max_bag_size": 1000000000, "max_headbag_size": 5000000, "target_bag_size": 800000000 } self.spltr = multibag.MultibagSplitter(self.bagdir, cfg) self.assertFalse( self.spltr.check() ) cfg = { "max_bag_size": 10000, "max_headbag_size": 5000000 } self.spltr = multibag.MultibagSplitter(self.bagdir, cfg) self.assertTrue( self.spltr.check() ) cfg = { "max_bag_size": 10000 } self.spltr = multibag.MultibagSplitter(self.bagdir, cfg) self.assertTrue( self.spltr.check() ) cfg = { } self.spltr = multibag.MultibagSplitter(self.bagdir, cfg) self.assertFalse( self.spltr.check() )
def test_verify_complete(self): cfg = { "max_bag_size": 400000, "max_headbag_size": 50000, "verify_complete": False } self.spltr = multibag.MultibagSplitter(self.bagdir, cfg) bags = self.spltr.split(self.workdir) self.spltr._verify_complete(self.bagdir, bags) with open(os.path.join(bags[0],"manifest-sha256.txt")) as fd: datafile = fd.readline().strip().split()[-1] os.remove(os.path.join(bags[0], datafile)) try: self.spltr._verify_complete(self.bagdir, bags) self.fail("Failed to raise Validation exception") except Exception as ex: self.assertEqual(len(ex.errors), 1) datafile = "meta"+datafile os.remove(os.path.join(bags[-1], datafile)) try: errors = self.spltr._verify_complete(self.bagdir, bags) self.fail("Failed to raise Validation exception") except Exception as ex: self.assertEqual(len(ex.errors), 2)
def test_check_and_split(self): cfg = { "max_bag_size": 400000, "max_headbag_size": 50000 } self.spltr = multibag.MultibagSplitter(self.bagdir, cfg) bags = self.spltr.check_and_split(self.workdir) self.assertEqual(len(bags), 4) self.assertTrue(os.path.exists(os.path.join(bags[-1], "multibag")))
def test_check_and_split_too_small(self): cfg = { "max_bag_size": 400000000, "max_headbag_size": 50000000, "verify_complete": False } self.spltr = multibag.MultibagSplitter(self.bagdir, cfg) bags = self.spltr.check_and_split(self.workdir) self.assertEqual(len(bags), 1) self.assertFalse(os.path.exists(os.path.join(bags[0], "multibag")))
def test_split_replace(self): cfg = { "max_bag_size": 400000, "max_headbag_size": 50000, "replace": True, "verify_complete": False } self.spltr = multibag.MultibagSplitter(self.bagdir, cfg) bags = self.spltr.split(self.workdir) self.assertEqual([os.path.basename(b) for b in bags], ["dataset-0", "dataset-1", "dataset-2", "dataset-3"]) self.assertTrue(os.path.isdir(os.path.join(bags[-1], "multibag")))
def test_confirm_found(self): cfg = { "max_bag_size": 400000, "max_headbag_size": 50000, "verify_complete": False } self.spltr = multibag.MultibagSplitter(self.bagdir, cfg) bags = self.spltr.split(self.workdir) headbag = multibag.multibag.open_headbag(bags[-1]) error = self.spltr._confirm_found("data/goober", bags, headbag) self.assertIsNotNone(error) self.assertIn("Failed to find", error)
def test_ctor(self): self.spltr = multibag.MultibagSplitter(self.bagdir) self.assertEqual(self.spltr.maxsz, 0) self.assertEqual(self.spltr.maxhbsz, 0) self.assertEqual(self.spltr.trgsz, 0) cfg = { "max_bag_size": 1000000000, "max_headbag_size": 5000000, "target_bag_size": 800000000 } self.spltr = multibag.MultibagSplitter(self.bagdir, cfg) self.assertEqual(self.spltr.maxsz, 1000000000) self.assertEqual(self.spltr.maxhbsz, 5000000) self.assertEqual(self.spltr.trgsz, 800000000) cfg = { "max_bag_size": 10000000 } self.spltr = multibag.MultibagSplitter(self.bagdir, cfg) self.assertEqual(self.spltr.maxsz, 10000000) self.assertEqual(self.spltr.maxhbsz, 10000000) self.assertEqual(self.spltr.trgsz, 10000000)