def validate_sch(schema, xmlfile, phase=None, xmlparser=None): """Validate XML with Schematron schema :param str schema: Filename of the Schematron schema :param str xmlfile: Filename of the XML file :param str phase: Phase of the Schematron schema :param xmlparser: :class:`etree.XMLParser` object :return: The result of the validation and the Schematron result tree as class :class:`etree._XSLTResultTree` :rtype: tuple """ if xmlparser is None: # Use our default XML parser: xmlparser = etree.XMLParser( encoding="UTF-8", no_network=True, ) doctree = etree.parse(xmlfile, parser=xmlparser) log.debug("Schematron validation with file=%r, schema=%r, phase=%r", xmlfile, schema, phase) schematron = Schematron(file=schema, phase=phase, store_report=True, store_xslt=True) result = schematron.validate(doctree) log.info("=> Validation result was: %s", result) return result, schematron
def analyse_xml_schematron_orderx(self, vals, xml_root, errors, prefix=None): # As the SCH of Order-X are ISO SCH and not XSTL2, we can use lxml paths = { 'facturx.orderx.schematron.basic.sch_path': False, 'facturx.orderx.schematron.comfort.sch_path': False, 'facturx.orderx.schematron.extended.sch_path': False, } self._config_parameter_filepath_update(paths) if not vals['xml_profile'].startswith('orderx_'): raise UserError( _("Wrong XML profile %s. Must be an Order-X profile. This should never happen." ) % vals['xml_profile']) sch_key = 'facturx.orderx.schematron.%s.sch_path' % vals[ 'xml_profile'][7:] sch_path = paths[sch_key] try: sch_root = etree.parse(sch_path) except Exception as e: raise UserError( _("Cannot parse SCH XML file %s. Error: %s") % (sch_path, e)) schematron = Schematron(sch_root, store_report=True) res = schematron.validate(xml_root) logger.debug('analyse_xml_schematron_orderx res=%s', res) svrl_xml_string = schematron.validation_report logger.debug('orderx svrl_xml_string=%s', svrl_xml_string) svrl_root = etree.fromstring(str(svrl_xml_string)) if res is False: logger.info('Order-X file is invalid according to Schematron') self.schematron_result_analysis(vals, svrl_root, errors) else: logger.info('Order-X file is valid according to Schematron')
def fun(): path = "Trades/*.xml" files = glob.glob(path) # print(files) for file in files: xml_file = etree.parse(file) xml_validator = Schematron(file="trade.sch") is_valid = xml_validator.validate(xml_file) print(is_valid)
def __init__(self, name, rules_path=None): self.name = name if not rules_path: rules_path = str( files(SCHEMATRON).joinpath('mets_{}_rules.xml'.format(name))) self.rules_path = rules_path logging.debug("path: %s", self.rules_path) self.ruleset = Schematron(file=self.rules_path, store_schematron=True, store_report=True)
def test(xhtml_file: Path, dtd: DTD, schematron: Schematron) -> bool: """ Test that an XHTML file matches a DTD and passes Schematron tests. Error messages are printed to stderr if the file doesn't pass. :param xhtml_file: the XHTML file to test :param dtd: the DTD :param schematron: the Schematron :return: True if the file passes """ if settings.verbose: print(xhtml_file) clear_error_log() parser = XHTMLParser(dtd_validation=True, ns_clean=True) try: tree = parse(source=str(xhtml_file), parser=parser) html = tree.getroot() except IOError as e: print(f"{xhtml_file}: {e.strerror}", file=stderr) return False except XMLSyntaxError: print_error_log(parser.error_log) return False if not dtd.validate(html): print_error_log(dtd.error_log) return False if not schematron.validate(html): print_schematron_error_log(html, schematron) return False return test_links(xhtml_file, html) and test_images(xhtml_file, html)
class ValidationRules(): """Encapsulates a set of Schematron rules loaded from a file.""" def __init__(self, name, rules_path=None): self.name = name if not rules_path: rules_path = str( files(SCHEMATRON).joinpath('mets_{}_rules.xml'.format(name))) self.rules_path = rules_path logging.debug("path: %s", self.rules_path) self.ruleset = Schematron(file=self.rules_path, store_schematron=True, store_report=True) def get_assertions(self): """Generator that returns the rules one at a time.""" xml_rules = lxml.etree.XML(bytes(self.ruleset.schematron)) for ele in xml_rules.iter(): if ele.tag == SCHEMATRON_NS + 'assert': yield ele def validate(self, to_validate): """Validate a file against the loaded Schematron ruleset.""" xml_file = lxml.etree.parse(to_validate) self.ruleset.validate(xml_file) def get_report(self): """Get the report from the last validation.""" xml_report = lxml.etree.XML(bytes(self.ruleset.validation_report)) failures = [] warnings = [] is_valid = True rule = None for ele in xml_report.iter(): if ele.tag == SVRL_NS + 'fired-rule': rule = ele elif ele.tag == SVRL_NS + 'failed-assert': if ele.get('role') == 'WARN': warnings.append(TestResult.from_element(rule, ele)) else: is_valid = False failures.append(TestResult.from_element(rule, ele)) return TestReport(is_valid, failures, warnings)
def open_schematron(schematron_file: Path) -> Schematron: """ Open a Schematron schema. Exit program on failure. :param schematron_file: path to a Schematron XML file :return: A Schematron object """ try: xml = parse(str(schematron_file)) return Schematron(xml, store_report=True) except XMLSyntaxError as e: print(f"{schematron_file}:1: {e}", file=stderr) exit(1)
def schematron(self, filename=None, schematron_file=None, skip_valid=False, **kwargs): self.basedir = os.path.dirname(filename) if schematron_file is None: schema_doc = etree.parse( urlopen( 'https://csrc.nist.gov/schema/xccdf/1.2/xccdf_1.2.sch')) else: with open(schematron_file) as f: schema_doc = etree.parse(f) schema = Schematron(schema_doc, store_report=True) with open(filename) as f: benchmark = etree.parse(f) validation_result = schema.validate(benchmark) if validation_result: print('Benchmark {} PASSED schematron validation'.format(filename)) return True else: print('Benchmark {} FAILED schematron validation'.format(filename)) errors = schema.validation_report.xpath( 'svrl:failed-assert/svrl:text', namespaces={'svrl': 'http://purl.oclc.org/dsdl/svrl'}) print('Schematron validation errors:') for element in errors: print("---") print(textwrap.fill(element.text)) print("---") if skip_valid: return validation_result raise Exception("Schematron validation failed")
def test_schematron_validation(schematron): """Test that the schematron is valid.""" Schematron(ET.parse(schematron))
def test_mets_creation(testpath, requests_mock, dataset, files): """Test SIP validity. Run CompressSIP task (and all tasks it requires) and check that: #. mets.xml validates against the schema #. mets.xml passes schematron verification #. digital object fixity (checksums) is correct in mets.xml #. digital objects of the SIP are valid #. mets.xml root element is valid (CONTRACTID, SPECIFICATION) #. all files are found in correct path :param testpath: temporary directory :param requests_mock: Mocker object :param dataset: dataset metadata :param files: list of file metadata objects :returns: ``None`` """ # Mock Metax tests.utils.add_metax_dataset(requests_mock, dataset=dataset, files=files) # Mock file download sources for file_ in files: if file_['file_storage']['identifier'] == PAS_STORAGE_ID: # Mock upload-rest-api conf = siptools_research.config.Configuration( tests.conftest.TEST_CONFIG_FILE ) mongoclient = pymongo.MongoClient(host=conf.get('mongodb_host')) mongoclient.upload.files.insert_one( { "_id": file_['identifier'], "file_path": os.path.join(testpath, file_['identifier']) } ) with open(os.path.join(testpath, file_['identifier']), 'w') as file_: file_.write('foo') else: # Mock Ida requests_mock.get( 'https://ida.test/files/pid:urn:identifier/download', text='foo' ) workspace = os.path.join(testpath, 'workspaces', 'workspace') luigi.build( [CompressSIP( workspace=workspace, dataset_id='dataset_identifier', config=tests.conftest.UNIT_TEST_CONFIG_FILE )], local_scheduler=True ) # Extract SIP with tarfile.open(os.path.join(workspace, 'workspace.tar')) as tar: tar.extractall(os.path.join(testpath, 'extracted_sip')) # Read mets.xml mets = ET.parse(os.path.join(testpath, 'extracted_sip', 'mets.xml')) # Validate mets.xml against schema schema = ET.XMLSchema(ET.parse(METS_XSD)) assert schema.validate(mets) # Validate mets.xml against Schematrons for schematron in SCHEMATRONS: Schematron(ET.parse(schematron)).assertValid(mets) # Check mets root element contract identifier and spec version mets_xml_root = mets.getroot() assert mets_xml_root.xpath('@*[local-name() = "CONTRACTID"]')[0] \ == 'urn:uuid:abcd1234-abcd-1234-5678-abcd1234abcd' assert mets_xml_root.xpath('@*[local-name() = "CATALOG"] | ' '@*[local-name() = "SPECIFICATION"]')[0][:3] \ == '1.7' # Check that all files are included in SIP for file_metadata in files: with open(os.path.join(testpath, 'extracted_sip', 'dataset_files', file_metadata['file_path'])) as file_: assert file_.read() == 'foo'