Esempio n. 1
0
def is_valid(path, completeness_only=False, printfn=print):
    """Return whether a BagIt package is valid given its ``path``."""
    try:
        bag = Bag(path)
        bag.validate(processes=multiprocessing.cpu_count(),
                     completeness_only=completeness_only)
    except BagError as err:
        printfn("Error validating BagIt package:", err, file=sys.stderr)
        return False
    return True
Esempio n. 2
0
def main():
    parser = _make_parser()
    args = parser.parse_args()

    bags = []

    _configure_logging(args)

    checks = "Performing the following validations: Checking 0xums, Checking bag completeness"
    if not args.slow:
        checks += ", Recalculating hashes"
    LOGGER.info(checks)

    if args.directory:
        directory_path = os.path.abspath(args.directory)
        for path in os.listdir(directory_path):
            path = os.path.join(directory_path, path)
            if os.path.isdir(path):
                bags.append(path)

    if args.bagpath:
        bags.append(os.path.abspath(args.bagpath))

    LOGGER.info("Checking {} folder(s).".format(len(bags)))

    for bagpath in tqdm(bags):
        LOGGER.info("Checking: {}".format(bagpath))
        try:
            bag = Bag(bagpath)
        except:
            LOGGER.error("{}: Not a bag".format(bagpath))
        else:
            if bag.validate(fast=args.slow):
                LOGGER.info("{}: valid".format(bagpath))
            else:
                LOGGER.error("{}: invalid".format(bagpath))
Esempio n. 3
0
def verify_aip(job):
    """Verify the AIP was bagged correctly by extracting it and running
    verification on its contents. This is also where we verify the checksums
    now that the verifyPREMISChecksums_v0.0 ("Verify checksums generated on
    ingest") micro-service has been removed. It was removed because verifying
    checksums by calculating them in that MS and then having bagit calculate
    them here was redundant.

    job.args[1] = UUID
      UUID of the SIP, which will become the UUID of the AIP
    job.args[2] = current location
      Full absolute path to the AIP's current location on the local filesystem
    """

    sip_uuid = job.args[1]  # %sip_uuid%
    aip_path = job.args[2]  # SIPDirectory%%sip_name%-%sip_uuid%.7z

    temp_dir = mcpclient_settings.TEMP_DIRECTORY

    is_uncompressed_aip = os.path.isdir(aip_path)

    if is_uncompressed_aip:
        bag_path = aip_path
    else:
        try:
            extract_dir = os.path.join(temp_dir, sip_uuid)
            bag_path = extract_aip(job, aip_path, extract_dir)
        except Exception as err:
            job.print_error(repr(err))
            job.pyprint('Error extracting AIP at "{}"'.format(aip_path),
                        file=sys.stderr)
            return 1

    return_code = 0
    try:
        # Only validate completeness since we're going to verify checksums
        # later against what we have in the database via `verify_checksums`.
        bag = Bag(bag_path)
        bag.validate(completeness_only=True)
    except BagError as err:
        job.print_error("Error validating BagIt package: {}".format(err))
        return_code = 1

    if return_code == 0:
        try:
            verify_checksums(job, bag, sip_uuid)
        except VerifyChecksumsError:
            return_code = 1
    else:
        job.pyprint("Not verifying checksums because other tests have already"
                    " failed.")

    # cleanup
    if not is_uncompressed_aip:
        try:
            shutil.rmtree(extract_dir)
        except OSError as err:
            job.pyprint(
                "Failed to remove temporary directory at {extract_dir} which"
                " contains the AIP extracted for verification."
                " Error:\n{err}".format(extract_dir=extract_dir, err=err),
                file=sys.stderr,
            )

    return return_code