def work(self) -> bool:
        marc_file = os.path.join(self.package_path, "marc.xml")
        result_builder = hathi_result.SummaryDirector(source=marc_file)
        errors: typing.List[hathi_result.Result] = []

        my_logger = logging.getLogger(hathi_validate.__name__)
        my_logger.setLevel(logging.INFO)

        with self.log_config(my_logger):
            try:
                if not os.path.exists(marc_file):
                    self.log(
                        "Skipping \'{}\' due to file not found".format(
                            marc_file
                        )
                    )

                else:
                    self.log(
                        "Validating marc.xml in {}".format(self.package_path)
                    )

                    marc_errors = validate_process.run_validation(
                        validator.ValidateMarc(marc_file)
                    )

                    if not marc_errors:
                        self.log("{} successfully validated".format(marc_file))
                    else:
                        for error in marc_errors:
                            self.log(error.message)
                            errors.append(error)
            except FileNotFoundError as e:
                result_builder.add_error(
                    "Unable to Validate Marc. Reason: {}".format(e)
                )
            except PermissionError as e:
                report_builder = hathi_result.SummaryDirector(
                   source=self.package_path
                )
                report_builder.add_error("Permission issues. \"{}\"".format(e))
                self.set_results(report_builder.construct())
                return False

            for error in result_builder.construct():
                errors.append(error)
            self.set_results(errors)
        return True
Beispiel #2
0
def main():
    logger = logging.getLogger(__name__)
    logger.setLevel(logging.DEBUG)
    parser = get_parser()
    args = parser.parse_args()

    configure_logging.configure_logger(debug_mode=args.debug,
                                       log_file=args.log_debug)
    errors = []
    batch_manifest_builder = manifest.PackageManifestDirector()
    for pkg in package.get_dirs(args.path):
        logger.info("Creating a manifest for {}".format(pkg))
        package_builder = batch_manifest_builder.add_package(pkg)

        for root, dirs, files in os.walk(pkg):
            for file_name in files:
                package_builder.add_file(file_name)

        logger.info("Checking {}".format(pkg))

        # Validate missing files
        logger.debug("Looking for missing package files in {}".format(pkg))
        missing_files_errors = process.run_validation(
            validator.ValidateMissingFiles(path=pkg))
        if not missing_files_errors:
            logger.info("Found no missing package files in {}".format(pkg))
        else:
            for error in missing_files_errors:
                logger.info(error.message)
                errors.append(error)

        # Look for missing components
        extensions = [".txt", ".jp2"]
        if args.check_ocr:
            extensions.append(".xml")
        logger.debug("Looking for missing component files in {}".format(pkg))
        missing_files_errors = process.run_validation(
            validator.ValidateComponents(pkg, "^\d{8}$", *extensions))
        if not missing_files_errors:
            logger.info("Found no missing component files in {}".format(pkg))
        else:
            for error in missing_files_errors:
                logger.info(error.message)
                errors.append(error)
        # exit()
        # Validate extra subdirectories
        logger.debug("Looking for extra subdirectories in {}".format(pkg))
        extra_subdirectories_errors = process.run_validation(
            validator.ValidateExtraSubdirectories(path=pkg))
        if not extra_subdirectories_errors:
            pass
        else:
            for error in extra_subdirectories_errors:
                errors.append(error)

        # Validate Checksums
        checksum_report = os.path.join(pkg, "checksum.md5")
        checksum_report_errors = process.run_validation(
            validator.ValidateChecksumReport(pkg, checksum_report))
        if not checksum_report_errors:
            logger.info("All checksums in {} successfully validated".format(
                checksum_report))
        else:
            for error in checksum_report_errors:
                errors.append(error)

        # Validate Marc
        marc_file = os.path.join(pkg, "marc.xml")
        marc_errors = process.run_validation(validator.ValidateMarc(marc_file))
        if not marc_errors:
            logger.info("{} successfully validated".format(marc_file))
        else:
            for error in marc_errors:
                errors.append(error)

        # Validate YML
        yml_file = os.path.join(pkg, "meta.yml")
        meta_yml_errors = process.run_validation(
            validator.ValidateMetaYML(yaml_file=yml_file,
                                      path=pkg,
                                      required_page_data=True))
        if not meta_yml_errors:
            logger.info("{} successfully validated".format(yml_file))
        else:
            for error in meta_yml_errors:
                errors.append(error)
        #

        # Validate ocr files
        if args.check_ocr:
            ocr_errors = process.run_validation(
                validator.ValidateOCRFiles(path=pkg))
            if not ocr_errors:
                logger.info("No validation errors found in ".format(pkg))
            else:
                for error in ocr_errors:
                    errors.append(error)

    batch_manifest = batch_manifest_builder.build_manifest()
    manifest_report = manifest.get_report_as_str(batch_manifest, width=80)
    console_reporter2 = report.Reporter(report.ConsoleReporter())
    validation_report = report.get_report_as_str(errors)
    console_reporter2.report(manifest_report)
    console_reporter2.report(validation_report)
    if args.report_name:
        file_reporter = report.Reporter(
            report.FileOutputReporter(args.report_name))
        file_reporter.report(validation_report)