def main(): parser = _make_parser() args = parser.parse_args() bags = [] _configure_logging(args) if args.directory: directory_path = os.path.abspath(args.directory) for path in os.listdir(directory_path): path = os.path.join(directory_path, path) if os.path.isdir(path): bags.append(path) if args.bagpath: bags.append(os.path.abspath(args.bagpath)) LOGGER.info("Checking {} folder(s).".format(len(bags))) for bagpath in tqdm(bags): LOGGER.info("Checking: {}".format(bagpath)) try: bag = ami_bag(bagpath) except: LOGGER.error("{}: Not a bag".format(bagpath)) else: bag.add_json_from_excel() update_bag = Repairable_Bag(bagpath) update_bag.add_payload_files_not_in_manifest() bag = ami_bag(bagpath) bag.validate_amibag()
def survey_bag(bag_path): try: bag = ami_bag(bag_path) bag_valid = bag.validate_amibag(metadata = True) bag_type = bag.type bag_subtype = bag.subtype except: bag = bagit.Bag(bag_path) bag_valid = False bag_type = None bag_subtype = None all_files = glob.iglob(os.path.join(bag_path,'data/**/*.*'), recursive=True) bag_files = 0 bag_size = 0 for filepath in all_files: bag_files += 1 filesize = os.stat(filepath).st_size bag_size += filesize bag_metadata = [filename for filename in all_files if filename.endswith(('.xlsx', '.json'))] if len(bag_metadata) > 0: bag_metadata = ','.join(bag_metadata) else: bag_metadata = 'no metadata' return [bag_path, bag_type, bag_subtype, bag_size, bag_files, bag_valid]
def survey_bag(bag_path): try: bag = ami_bag(bag_path) bag_valid = bag.validate_amibag(metadata=True) bag_type = bag.type bag_subtype = bag.subtype except: bag = bagit.Bag(bag_path) bag_valid = False bag_type = None bag_subtype = None all_files = glob.iglob(os.path.join(bag_path, 'data/**/*.*'), recursive=True) bag_files = 0 bag_size = 0 for filepath in all_files: bag_files += 1 filesize = os.stat(filepath).st_size bag_size += filesize bag_metadata = [ filename for filename in all_files if filename.endswith(('.xlsx', '.json')) ] if len(bag_metadata) > 0: bag_metadata = ','.join(bag_metadata) else: bag_metadata = 'no metadata' return [bag_path, bag_type, bag_subtype, bag_size, bag_files, bag_valid]
def test_load_bag(self): bagit.make_bag(self.tmpdir) bag = ami_bag.ami_bag(path = self.tmpdir) attrs = ['data_files', 'data_dirs', 'data_exts', 'media_filepaths', 'type', 'subtype'] for attr in attrs: self.assertTrue(hasattr(bag, attr)) self.assertTrue(bag.type == 'json') self.assertTrue(bag.subtype == 'video')
def test_complex_subobject(self): pm = os.path.join(self.tmpdir, 'PreservationMasters/myd_263524_v01_pm.mov') new_pm = pm.replace('v01', 'v01r01p01') os.rename(pm, new_pm) bagit.make_bag(self.tmpdir) bag = ami_bag.ami_bag(path = self.tmpdir) self.assertRaises(ami_bag.ami_BagError, bag.check_simple_filenames) self.assertFalse(bag.validate_amibag())
def test_invalid_filename(self): pm = os.path.join(self.tmpdir, 'PreservationMasters/myd_263524_v01_pm.mov') new_pm = pm[:-5] os.rename(pm, new_pm) bagit.make_bag(self.tmpdir) bag = ami_bag.ami_bag(path = self.tmpdir) self.assertRaises(ami_bag.ami_BagError, bag.check_filenames) self.assertFalse(bag.validate_amibag())
def test_metadata_filename_mismatch(self): pm_path = os.path.join(self.tmpdir, 'PreservationMasters/myd_263524_v01_pm.mov') new_pm_path = pm_path.replace('_263524_', '_263523_') shutil.copy(pm_path, new_pm_path) bagit.make_bag(self.tmpdir) bag = ami_bag.ami_bag(path = self.tmpdir) self.assertRaises(ami_bag.ami_BagError, bag.check_filenames_manifest_and_metadata_json) self.assertFalse(bag.validate_amibag(metadata = True))
def test_deepdirectories(self): new_dir = os.path.join(self.tmpdir, 'PreservationMasters/new_dir') os.makedirs(new_dir) pm_json = os.path.join(self.tmpdir, 'PreservationMasters/myd_263524_v01_pm.json') shutil.move(pm_json, pm_json.replace('/myd', '/new_dir/myd')) bagit.make_bag(self.tmpdir) bag = ami_bag.ami_bag(path = self.tmpdir) self.assertRaises(ami_bag.ami_BagError, bag.check_directory_depth) self.assertFalse(bag.validate_amibag())
def main(): parser = _make_parser() args = parser.parse_args() bags = [] _configure_logging(args) checks = "Performing the following validations: Checking 0xums, Checking bag completeness" if not args.slow: checks += ", Recalculating hashes" checks += ", Determing bag type, Checking directory structure, Checking filenames" if args.metadata: checks += ", Validating Excel metadata files." LOGGER.info(checks) if args.directory: directory_path = os.path.abspath(args.directory) for path in os.listdir(directory_path): path = os.path.join(directory_path, path) if os.path.isdir(path): bags.append(path) if args.bagpath: bags.append(os.path.abspath(args.bagpath)) LOGGER.info("Checking {} folder(s).".format(len(bags))) invalid_bags = [] valid_bags = [] for bagpath in tqdm(bags): LOGGER.info("Checking: {}".format(bagpath)) try: bag = ami_bag(path=bagpath) except Exception as e: LOGGER.error( "Following error encountered while loading {}: {}".format( bagpath, e)) else: if bag.validate_amibag(fast=args.slow, metadata=args.metadata): LOGGER.info("Valid {} {} bag: {}".format( bag.type, bag.subtype, bagpath)) valid_bags.append(os.path.basename(bagpath)) else: LOGGER.error("Invalid bag: {}".format(bagpath)) invalid_bags.append(os.path.basename(bagpath)) if invalid_bags: LOGGER.info( "The following bags are not ready for media ingest: {}".format( ", ".join(invalid_bags))) if valid_bags: LOGGER.info("The following bags are ready for media ingest: {}".format( ", ".join(valid_bags)))
def main(): parser = _make_parser() args = parser.parse_args() bags = [] _configure_logging(args) checks = "Performing the following validations: Checking 0xums, Checking bag completeness" if not args.slow: checks += ", Recalculating hashes" checks += ", Determing bag type, Checking directory structure, Checking filenames" if args.metadata: checks += ", Validating Excel metadata files." LOGGER.info(checks) if args.directory: directory_path = os.path.abspath(args.directory) for path in os.listdir(directory_path): path = os.path.join(directory_path, path) if os.path.isdir(path): bags.append(path) if args.bagpath: bags.append(os.path.abspath(args.bagpath)) LOGGER.info("Checking {} folder(s).".format(len(bags))) invalid_bags = [] valid_bags = [] for bagpath in tqdm(bags): LOGGER.info("Checking: {}".format(bagpath)) try: bag = ami_bag(path = bagpath) except Exception as e: LOGGER.error("Following error encountered while loading {}: {}".format(bagpath, e)) invalid_bags.append(os.path.basename(bagpath)) else: if bag.validate_amibag(fast = args.slow, metadata = args.metadata): LOGGER.info("Valid {} {} bag: {}".format(bag.type, bag.subtype, bagpath)) valid_bags.append(os.path.basename(bagpath)) else: LOGGER.warn("Invalid bag: {}".format(bagpath)) invalid_bags.append(os.path.basename(bagpath)) if invalid_bags: LOGGER.warn("{} of {} bags are not ready for ingest".format(len(invalid_bags), len(bags))) LOGGER.info("The following bags are not ready for media ingest: {}".format(", ".join(invalid_bags))) if valid_bags: LOGGER.warn("{} of {} bags are ready for ingest".format(len(valid_bags), len(bags))) LOGGER.info("The following bags are ready for media ingest: {}".format(", ".join(valid_bags)))
def test_bad_json_metadata(self): json_path = os.path.join(self.tmpdir, 'PreservationMasters/myd_263524_v01_pm.json') with open(json_path, 'r') as f: json_data = json.load(f) json_data['technical'].pop('durationHuman', None) with open(json_path, 'w') as f: json.dump(json_data, f, ensure_ascii=False) bagit.make_bag(self.tmpdir) bag = ami_bag.ami_bag(path = self.tmpdir) self.assertRaises(ami_bag.ami_BagError, bag.check_metadata_json) self.assertFalse(bag.validate_amibag(metadata = True))
def main(): parser = _make_parser() args = parser.parse_args() bags = [] _configure_logging(args) checks = "Performing these repairs: " check_list = [] if args.filenames: check_list.append("filename metadata") checks = checks + ", ".join(check_list) LOGGER.info(checks) if args.directory: directory_path = os.path.abspath(args.directory) for path in os.listdir(directory_path): path = os.path.join(directory_path, path) if os.path.isdir(path): bags.append(path) if args.bagpath: bags.append(os.path.abspath(args.bagpath)) LOGGER.info("Checking {} folder(s).".format(len(bags))) for bagpath in bags: LOGGER.info("Checking: {}".format(bagpath)) try: bag = ami_bag(bagpath) except: LOGGER.error("{}: Not an AMI bag".format(bagpath)) if args.filenames: repair_bag_filenamemd(bag, args.repairer, args.dryrun) bag._open() if args.techmd: repair_bag_techmd(bag, args.repairer, args.dryrun) bag._open()
def test_incompleted_bag(self): bagit.make_bag(self.tmpdir) bag = ami_bag.ami_bag(path = self.tmpdir) bagit_txt = os.path.join(self.tmpdir, 'bagit.txt') os.remove(bagit_txt) self.assertFalse(bag.validate_amibag())
def test_valid_bag(self): bagit.make_bag(self.tmpdir) bag = ami_bag.ami_bag(path = self.tmpdir) self.assertTrue(bag.validate_amibag(metadata = True))