def main():
    parser = _make_parser()
    args = parser.parse_args()

    bags = []

    _configure_logging(args)

    if args.directory:
        directory_path = os.path.abspath(args.directory)
        for path in os.listdir(directory_path):
            path = os.path.join(directory_path, path)
            if os.path.isdir(path):
                bags.append(path)

    if args.bagpath:
        bags.append(os.path.abspath(args.bagpath))

    LOGGER.info("Checking {} folder(s).".format(len(bags)))

    for bagpath in tqdm(bags):
        LOGGER.info("Checking: {}".format(bagpath))
        try:
            bag = ami_bag(bagpath)
        except:
            LOGGER.error("{}: Not a bag".format(bagpath))
        else:
            bag.add_json_from_excel()
            update_bag = Repairable_Bag(bagpath)
            update_bag.add_payload_files_not_in_manifest()
            bag = ami_bag(bagpath)
            bag.validate_amibag()
Example #2
0
def survey_bag(bag_path):
    try:
        bag = ami_bag(bag_path)
        bag_valid = bag.validate_amibag(metadata = True)
        bag_type = bag.type
        bag_subtype = bag.subtype
    except:
        bag = bagit.Bag(bag_path)
        bag_valid = False
        bag_type = None
        bag_subtype = None

    all_files = glob.iglob(os.path.join(bag_path,'data/**/*.*'), recursive=True)

    bag_files = 0
    bag_size = 0
    for filepath in all_files:
        bag_files += 1
        filesize = os.stat(filepath).st_size
        bag_size += filesize

    bag_metadata = [filename for filename in all_files if filename.endswith(('.xlsx', '.json'))]
    if len(bag_metadata) > 0:
        bag_metadata = ','.join(bag_metadata)
    else:
        bag_metadata = 'no metadata'

    return [bag_path, bag_type, bag_subtype, bag_size, bag_files,  bag_valid]
Example #3
0
def survey_bag(bag_path):
    try:
        bag = ami_bag(bag_path)
        bag_valid = bag.validate_amibag(metadata=True)
        bag_type = bag.type
        bag_subtype = bag.subtype
    except:
        bag = bagit.Bag(bag_path)
        bag_valid = False
        bag_type = None
        bag_subtype = None

    all_files = glob.iglob(os.path.join(bag_path, 'data/**/*.*'),
                           recursive=True)

    bag_files = 0
    bag_size = 0
    for filepath in all_files:
        bag_files += 1
        filesize = os.stat(filepath).st_size
        bag_size += filesize

    bag_metadata = [
        filename for filename in all_files
        if filename.endswith(('.xlsx', '.json'))
    ]
    if len(bag_metadata) > 0:
        bag_metadata = ','.join(bag_metadata)
    else:
        bag_metadata = 'no metadata'

    return [bag_path, bag_type, bag_subtype, bag_size, bag_files, bag_valid]
Example #4
0
 def test_load_bag(self):
   bagit.make_bag(self.tmpdir)
   bag = ami_bag.ami_bag(path = self.tmpdir)
   attrs = ['data_files', 'data_dirs', 'data_exts',
     'media_filepaths', 'type', 'subtype']
   for attr in attrs:
     self.assertTrue(hasattr(bag, attr))
   self.assertTrue(bag.type == 'json')
   self.assertTrue(bag.subtype == 'video')
Example #5
0
 def test_complex_subobject(self):
   pm = os.path.join(self.tmpdir,
     'PreservationMasters/myd_263524_v01_pm.mov')
   new_pm = pm.replace('v01', 'v01r01p01')
   os.rename(pm, new_pm)
   bagit.make_bag(self.tmpdir)
   bag = ami_bag.ami_bag(path = self.tmpdir)
   self.assertRaises(ami_bag.ami_BagError, bag.check_simple_filenames)
   self.assertFalse(bag.validate_amibag())
Example #6
0
 def test_invalid_filename(self):
   pm = os.path.join(self.tmpdir,
     'PreservationMasters/myd_263524_v01_pm.mov')
   new_pm = pm[:-5]
   os.rename(pm, new_pm)
   bagit.make_bag(self.tmpdir)
   bag = ami_bag.ami_bag(path = self.tmpdir)
   self.assertRaises(ami_bag.ami_BagError, bag.check_filenames)
   self.assertFalse(bag.validate_amibag())
Example #7
0
 def test_metadata_filename_mismatch(self):
   pm_path = os.path.join(self.tmpdir,
     'PreservationMasters/myd_263524_v01_pm.mov')
   new_pm_path = pm_path.replace('_263524_', '_263523_')
   shutil.copy(pm_path, new_pm_path)
   bagit.make_bag(self.tmpdir)
   bag = ami_bag.ami_bag(path = self.tmpdir)
   self.assertRaises(ami_bag.ami_BagError,
       bag.check_filenames_manifest_and_metadata_json)
   self.assertFalse(bag.validate_amibag(metadata = True))
Example #8
0
 def test_deepdirectories(self):
   new_dir = os.path.join(self.tmpdir, 'PreservationMasters/new_dir')
   os.makedirs(new_dir)
   pm_json = os.path.join(self.tmpdir,
     'PreservationMasters/myd_263524_v01_pm.json')
   shutil.move(pm_json, pm_json.replace('/myd', '/new_dir/myd'))
   bagit.make_bag(self.tmpdir)
   bag = ami_bag.ami_bag(path = self.tmpdir)
   self.assertRaises(ami_bag.ami_BagError, bag.check_directory_depth)
   self.assertFalse(bag.validate_amibag())
Example #9
0
def main():
    parser = _make_parser()
    args = parser.parse_args()

    bags = []

    _configure_logging(args)

    checks = "Performing the following validations: Checking 0xums, Checking bag completeness"
    if not args.slow:
        checks += ", Recalculating hashes"
    checks += ", Determing bag type, Checking directory structure, Checking filenames"
    if args.metadata:
        checks += ", Validating Excel metadata files."
    LOGGER.info(checks)

    if args.directory:
        directory_path = os.path.abspath(args.directory)
        for path in os.listdir(directory_path):
            path = os.path.join(directory_path, path)
            if os.path.isdir(path):
                bags.append(path)

    if args.bagpath:
        bags.append(os.path.abspath(args.bagpath))

    LOGGER.info("Checking {} folder(s).".format(len(bags)))

    invalid_bags = []
    valid_bags = []
    for bagpath in tqdm(bags):
        LOGGER.info("Checking: {}".format(bagpath))
        try:
            bag = ami_bag(path=bagpath)
        except Exception as e:
            LOGGER.error(
                "Following error encountered while loading {}: {}".format(
                    bagpath, e))
        else:
            if bag.validate_amibag(fast=args.slow, metadata=args.metadata):
                LOGGER.info("Valid {} {} bag: {}".format(
                    bag.type, bag.subtype, bagpath))
                valid_bags.append(os.path.basename(bagpath))
            else:
                LOGGER.error("Invalid bag: {}".format(bagpath))
                invalid_bags.append(os.path.basename(bagpath))

    if invalid_bags:
        LOGGER.info(
            "The following bags are not ready for media ingest: {}".format(
                ", ".join(invalid_bags)))
    if valid_bags:
        LOGGER.info("The following bags are ready for media ingest: {}".format(
            ", ".join(valid_bags)))
Example #10
0
def main():
    parser = _make_parser()
    args = parser.parse_args()

    bags = []

    _configure_logging(args)

    checks = "Performing the following validations: Checking 0xums, Checking bag completeness"
    if not args.slow:
        checks += ", Recalculating hashes"
    checks += ", Determing bag type, Checking directory structure, Checking filenames"
    if args.metadata:
        checks += ", Validating Excel metadata files."
    LOGGER.info(checks)


    if args.directory:
        directory_path = os.path.abspath(args.directory)
        for path in os.listdir(directory_path):
            path = os.path.join(directory_path, path)
            if os.path.isdir(path):
                bags.append(path)

    if args.bagpath:
        bags.append(os.path.abspath(args.bagpath))

    LOGGER.info("Checking {} folder(s).".format(len(bags)))

    invalid_bags = []
    valid_bags = []
    for bagpath in tqdm(bags):
        LOGGER.info("Checking: {}".format(bagpath))
        try:
            bag = ami_bag(path = bagpath)
        except Exception as e:
            LOGGER.error("Following error encountered while loading {}: {}".format(bagpath, e))
            invalid_bags.append(os.path.basename(bagpath))
        else:
            if bag.validate_amibag(fast = args.slow, metadata = args.metadata):
                LOGGER.info("Valid {} {} bag: {}".format(bag.type, bag.subtype, bagpath))
                valid_bags.append(os.path.basename(bagpath))
            else:
                LOGGER.warn("Invalid bag: {}".format(bagpath))
                invalid_bags.append(os.path.basename(bagpath))

    if invalid_bags:
        LOGGER.warn("{} of {} bags are not ready for ingest".format(len(invalid_bags), len(bags)))
        LOGGER.info("The following bags are not ready for media ingest: {}".format(", ".join(invalid_bags)))
    if valid_bags:
        LOGGER.warn("{} of {} bags are ready for ingest".format(len(valid_bags), len(bags)))
        LOGGER.info("The following bags are ready for media ingest: {}".format(", ".join(valid_bags)))
Example #11
0
 def test_bad_json_metadata(self):
   json_path = os.path.join(self.tmpdir,
     'PreservationMasters/myd_263524_v01_pm.json')
   with open(json_path, 'r') as f:
     json_data = json.load(f)
   json_data['technical'].pop('durationHuman', None)
   with open(json_path, 'w') as f:
     json.dump(json_data, f, ensure_ascii=False)
   bagit.make_bag(self.tmpdir)
   bag = ami_bag.ami_bag(path = self.tmpdir)
   self.assertRaises(ami_bag.ami_BagError,
     bag.check_metadata_json)
   self.assertFalse(bag.validate_amibag(metadata = True))
Example #12
0
def main():
    parser = _make_parser()
    args = parser.parse_args()

    bags = []

    _configure_logging(args)

    checks = "Performing these repairs: "
    check_list = []
    if args.filenames:
        check_list.append("filename metadata")
    checks = checks + ", ".join(check_list)
    LOGGER.info(checks)


    if args.directory:
        directory_path = os.path.abspath(args.directory)
        for path in os.listdir(directory_path):
            path = os.path.join(directory_path, path)
            if os.path.isdir(path):
                bags.append(path)

    if args.bagpath:
        bags.append(os.path.abspath(args.bagpath))

    LOGGER.info("Checking {} folder(s).".format(len(bags)))

    for bagpath in bags:
        LOGGER.info("Checking: {}".format(bagpath))
        try:
            bag = ami_bag(bagpath)
        except:
            LOGGER.error("{}: Not an AMI bag".format(bagpath))
        if args.filenames:
            repair_bag_filenamemd(bag, args.repairer, args.dryrun)
            bag._open()
        if args.techmd:
            repair_bag_techmd(bag, args.repairer, args.dryrun)
            bag._open()
Example #13
0
 def test_incompleted_bag(self):
   bagit.make_bag(self.tmpdir)
   bag = ami_bag.ami_bag(path = self.tmpdir)
   bagit_txt = os.path.join(self.tmpdir, 'bagit.txt')
   os.remove(bagit_txt)
   self.assertFalse(bag.validate_amibag())
Example #14
0
 def test_valid_bag(self):
   bagit.make_bag(self.tmpdir)
   bag = ami_bag.ami_bag(path = self.tmpdir)
   self.assertTrue(bag.validate_amibag(metadata = True))