def work(self) -> bool: errors: typing.List[hathi_result.Result] = [] my_logger = logging.getLogger(hathi_validate.__name__) my_logger.setLevel(logging.INFO) with self.log_config(my_logger): print("Running ocr Validation") try: ocr_errors = validate_process.run_validation( validator.ValidateOCRFiles(path=self.package_path)) except PermissionError as e: report_builder = hathi_result.SummaryDirector( source=self.package_path) report_builder.add_error("Permission issues. \"{}\"".format(e)) self.set_results(report_builder.construct()) return False except Exception as e: print(e) raise if ocr_errors: self.log("No validation errors found in ".format( self.package_path)) for error in ocr_errors: self.log(error.message) errors.append(error) self.set_results(errors) return True
def work(self) -> bool: def filter_ocr_only(entry: os.DirEntry): if not entry.is_file(): return False name, ext = os.path.splitext(entry.name) if ext.lower() != ".xml": return False if name.lower() == "marc": return False return True my_logger = logging.getLogger(hathi_validate.__name__) my_logger.setLevel(logging.INFO) with self.log_config(my_logger): errors: typing.List[hathi_result.Result] = [] ocr_file: os.DirEntry for ocr_file in filter(filter_ocr_only, os.scandir(self.package_path)): self.log("Looking for invalid characters in {}".format( ocr_file.path)) invalid_ocr_character = validate_process.run_validation( validator.ValidateUTF8Files(ocr_file.path)) if invalid_ocr_character: errors += invalid_ocr_character self.set_results(errors) return True
def work(self) -> bool: yml_file = os.path.join(self.package_path, "meta.yml") errors: List[hathi_result.Result] = [] my_logger = logging.getLogger(hathi_validate.__name__) my_logger.setLevel(logging.INFO) with self.log_config(my_logger): report_builder = hathi_result.SummaryDirector(source=yml_file) try: if not os.path.exists(yml_file): self.log(f"Skipping '{yml_file}' due to file not found") else: self.log(f"Validating meta.yml in {self.package_path}") meta_yml_errors = validate_process.run_validation( validator.ValidateMetaYML(yaml_file=yml_file, path=self.package_path, required_page_data=True)) if not meta_yml_errors: self.log(f"{yml_file} successfully validated") else: for error in meta_yml_errors: self.log(error.message) errors.append(error) except FileNotFoundError as file_not_found_error: report_builder.add_error( f"Unable to validate YAML. Reason: {file_not_found_error}") for error in report_builder.construct(): errors.append(error) self.set_results(errors) return True
def work(self) -> bool: errors: typing.List[hathi_result.Result] = [] my_logger = logging.getLogger(hathi_validate.__name__) my_logger.setLevel(logging.INFO) with self.log_config(my_logger): try: extra_subdirectories_errors = validate_process.run_validation( validator.ValidateExtraSubdirectories( path=self.package_path)) except PermissionError as e: report_builder = hathi_result.SummaryDirector( source=self.package_path) report_builder.add_error("Permission issues. \"{}\"".format(e)) self.set_results(report_builder.construct()) return False if not extra_subdirectories_errors: self.log("No extra subdirectories found in {}".format( self.package_path)) else: for error in extra_subdirectories_errors: self.log(error.message) errors.append(error) self.set_results(errors) return True
def work(self) -> bool: errors: typing.List[hathi_result.Result] = [] checksum_report = os.path.join(self.package_path, "checksum.md5") my_logger = logging.getLogger(hathi_validate.__name__) my_logger.setLevel(logging.INFO) with self.log_config(my_logger): report_builder = hathi_result.SummaryDirector( source=checksum_report ) try: files_to_check = [] for a, file_name in \ validate_process.extracts_checksums(checksum_report): files_to_check.append(file_name) self.log( "Validating checksums of the {} files " "included in {}".format( len(files_to_check), checksum_report ) ) checksum_report_errors = validate_process.run_validation( validator.ValidateChecksumReport(self.package_path, checksum_report) ) if not checksum_report_errors: self.log( "All checksums in {} successfully validated".format( checksum_report ) ) else: for error in checksum_report_errors: errors.append(error) except FileNotFoundError as e: report_builder.add_error( "Unable to validate checksums. Reason: {}".format(e) ) except PermissionError as e: report_builder = hathi_result.SummaryDirector( source=self.package_path ) report_builder.add_error("Permission issues. \"{}\"".format(e)) self.set_results(report_builder.construct()) return False for error in report_builder.construct(): errors.append(error) self.set_results(errors) return True
def work(self) -> bool: errors: typing.List[hathi_result.Result] = [] my_logger = logging.getLogger(hathi_validate.__name__) my_logger.setLevel(logging.INFO) with self.log_config(my_logger): missing_files_errors = validate_process.run_validation( validator.ValidateMissingFiles(path=self.package_path)) if missing_files_errors: for error in missing_files_errors: self.log(error.message) errors.append(error) self.set_results(errors) return True
def work(self) -> bool: errors: List[hathi_result.Result] = [] checksum_report = os.path.join(self.package_path, "checksum.md5") my_logger = logging.getLogger(hathi_validate.__name__) my_logger.setLevel(logging.INFO) with self.log_config(my_logger): report_builder = hathi_result.SummaryDirector( source=checksum_report) try: files_to_check = [ file_name for _, file_name in validate_process.extracts_checksums(checksum_report) ] self.log( f"Validating checksums of the {len(files_to_check)} files " f"included in {checksum_report}") checksum_report_errors: List[hathi_result.Result] = \ validate_process.run_validation( validator.ValidateChecksumReport( self.package_path, checksum_report ) ) if not checksum_report_errors: self.log( f"All checksums in {checksum_report} successfully " f"validated") else: for error in checksum_report_errors: errors.append(error) except FileNotFoundError as file_missing_error: report_builder.add_error("Unable to validate checksums. " f"Reason: {file_missing_error}") except PermissionError as permission_error: report_builder = hathi_result.SummaryDirector( source=self.package_path) report_builder.add_error( f'Permission issues. "{permission_error}"') self.set_results(report_builder.construct()) return False for error in report_builder.construct(): errors.append(error) self.set_results(errors) return True
def work(self) -> bool: marc_file = os.path.join(self.package_path, "marc.xml") result_builder = hathi_result.SummaryDirector(source=marc_file) errors: typing.List[hathi_result.Result] = [] my_logger = logging.getLogger(hathi_validate.__name__) my_logger.setLevel(logging.INFO) with self.log_config(my_logger): try: if not os.path.exists(marc_file): self.log( "Skipping \'{}\' due to file not found".format( marc_file ) ) else: self.log( "Validating marc.xml in {}".format(self.package_path) ) marc_errors = validate_process.run_validation( validator.ValidateMarc(marc_file) ) if not marc_errors: self.log("{} successfully validated".format(marc_file)) else: for error in marc_errors: self.log(error.message) errors.append(error) except FileNotFoundError as e: result_builder.add_error( "Unable to Validate Marc. Reason: {}".format(e) ) except PermissionError as e: report_builder = hathi_result.SummaryDirector( source=self.package_path ) report_builder.add_error("Permission issues. \"{}\"".format(e)) self.set_results(report_builder.construct()) return False for error in result_builder.construct(): errors.append(error) self.set_results(errors) return True
def work(self) -> bool: errors: typing.List[hathi_result.Result] = [] extensions = [".txt", ".jp2"] my_logger = logging.getLogger(hathi_validate.__name__) my_logger.setLevel(logging.INFO) with self.log_config(my_logger): if self.check_ocr: extensions.append(".xml") try: missing_files_errors = validate_process.run_validation( validator.ValidateComponents( self.package_path, "^[0-9]{8}$", *extensions ) ) except FileNotFoundError: report_builder = hathi_result.SummaryDirector( source=self.package_path ) report_builder.add_error( "No files located with expected file naming scheme in path" ) self.set_results(report_builder.construct()) return False except PermissionError as e: report_builder = hathi_result.SummaryDirector( source=self.package_path ) report_builder.add_error("Permission issues. \"{}\"".format(e)) self.set_results(report_builder.construct()) return False if not missing_files_errors: self.log( "Found no missing component files in {}".format( self.package_path ) ) else: for error in missing_files_errors: self.log(error.message) errors.append(error) self.set_results(errors) return True
def main(): logger = logging.getLogger(__name__) logger.setLevel(logging.DEBUG) parser = get_parser() args = parser.parse_args() configure_logging.configure_logger(debug_mode=args.debug, log_file=args.log_debug) errors = [] batch_manifest_builder = manifest.PackageManifestDirector() for pkg in package.get_dirs(args.path): logger.info("Creating a manifest for {}".format(pkg)) package_builder = batch_manifest_builder.add_package(pkg) for root, dirs, files in os.walk(pkg): for file_name in files: package_builder.add_file(file_name) logger.info("Checking {}".format(pkg)) # Validate missing files logger.debug("Looking for missing package files in {}".format(pkg)) missing_files_errors = process.run_validation( validator.ValidateMissingFiles(path=pkg)) if not missing_files_errors: logger.info("Found no missing package files in {}".format(pkg)) else: for error in missing_files_errors: logger.info(error.message) errors.append(error) # Look for missing components extensions = [".txt", ".jp2"] if args.check_ocr: extensions.append(".xml") logger.debug("Looking for missing component files in {}".format(pkg)) missing_files_errors = process.run_validation( validator.ValidateComponents(pkg, "^\d{8}$", *extensions)) if not missing_files_errors: logger.info("Found no missing component files in {}".format(pkg)) else: for error in missing_files_errors: logger.info(error.message) errors.append(error) # exit() # Validate extra subdirectories logger.debug("Looking for extra subdirectories in {}".format(pkg)) extra_subdirectories_errors = process.run_validation( validator.ValidateExtraSubdirectories(path=pkg)) if not extra_subdirectories_errors: pass else: for error in extra_subdirectories_errors: errors.append(error) # Validate Checksums checksum_report = os.path.join(pkg, "checksum.md5") checksum_report_errors = process.run_validation( validator.ValidateChecksumReport(pkg, checksum_report)) if not checksum_report_errors: logger.info("All checksums in {} successfully validated".format( checksum_report)) else: for error in checksum_report_errors: errors.append(error) # Validate Marc marc_file = os.path.join(pkg, "marc.xml") marc_errors = process.run_validation(validator.ValidateMarc(marc_file)) if not marc_errors: logger.info("{} successfully validated".format(marc_file)) else: for error in marc_errors: errors.append(error) # Validate YML yml_file = os.path.join(pkg, "meta.yml") meta_yml_errors = process.run_validation( validator.ValidateMetaYML(yaml_file=yml_file, path=pkg, required_page_data=True)) if not meta_yml_errors: logger.info("{} successfully validated".format(yml_file)) else: for error in meta_yml_errors: errors.append(error) # # Validate ocr files if args.check_ocr: ocr_errors = process.run_validation( validator.ValidateOCRFiles(path=pkg)) if not ocr_errors: logger.info("No validation errors found in ".format(pkg)) else: for error in ocr_errors: errors.append(error) batch_manifest = batch_manifest_builder.build_manifest() manifest_report = manifest.get_report_as_str(batch_manifest, width=80) console_reporter2 = report.Reporter(report.ConsoleReporter()) validation_report = report.get_report_as_str(errors) console_reporter2.report(manifest_report) console_reporter2.report(validation_report) if args.report_name: file_reporter = report.Reporter( report.FileOutputReporter(args.report_name)) file_reporter.report(validation_report)