Ejemplo n.º 1
0
def validate_sch(schema, xmlfile, phase=None, xmlparser=None):
    """Validate XML with Schematron schema

    :param str schema: Filename of the Schematron schema
    :param str xmlfile: Filename of the XML file
    :param str phase: Phase of the Schematron schema
    :param xmlparser: :class:`etree.XMLParser` object
    :return: The result of the validation and the
             Schematron result tree as class :class:`etree._XSLTResultTree`
    :rtype: tuple
    """
    if xmlparser is None:
        # Use our default XML parser:
        xmlparser = etree.XMLParser(
            encoding="UTF-8",
            no_network=True,
        )
    doctree = etree.parse(xmlfile, parser=xmlparser)
    log.debug("Schematron validation with file=%r, schema=%r, phase=%r",
              xmlfile, schema, phase)
    schematron = Schematron(file=schema,
                            phase=phase,
                            store_report=True,
                            store_xslt=True)
    result = schematron.validate(doctree)
    log.info("=> Validation result was: %s", result)
    return result, schematron
Ejemplo n.º 2
0
 def analyse_xml_schematron_orderx(self,
                                   vals,
                                   xml_root,
                                   errors,
                                   prefix=None):
     # As the SCH of Order-X are ISO SCH and not XSTL2, we can use lxml
     paths = {
         'facturx.orderx.schematron.basic.sch_path': False,
         'facturx.orderx.schematron.comfort.sch_path': False,
         'facturx.orderx.schematron.extended.sch_path': False,
     }
     self._config_parameter_filepath_update(paths)
     if not vals['xml_profile'].startswith('orderx_'):
         raise UserError(
             _("Wrong XML profile %s. Must be an Order-X profile. This should never happen."
               ) % vals['xml_profile'])
     sch_key = 'facturx.orderx.schematron.%s.sch_path' % vals[
         'xml_profile'][7:]
     sch_path = paths[sch_key]
     try:
         sch_root = etree.parse(sch_path)
     except Exception as e:
         raise UserError(
             _("Cannot parse SCH XML file %s. Error: %s") % (sch_path, e))
     schematron = Schematron(sch_root, store_report=True)
     res = schematron.validate(xml_root)
     logger.debug('analyse_xml_schematron_orderx res=%s', res)
     svrl_xml_string = schematron.validation_report
     logger.debug('orderx svrl_xml_string=%s', svrl_xml_string)
     svrl_root = etree.fromstring(str(svrl_xml_string))
     if res is False:
         logger.info('Order-X file is invalid according to Schematron')
         self.schematron_result_analysis(vals, svrl_root, errors)
     else:
         logger.info('Order-X file is valid according to Schematron')
Ejemplo n.º 3
0
def fun():
    path = "Trades/*.xml"
    files = glob.glob(path)
    # print(files)
    for file in files:
        xml_file = etree.parse(file)
        xml_validator = Schematron(file="trade.sch")
        is_valid = xml_validator.validate(xml_file)
        print(is_valid)
Ejemplo n.º 4
0
 def __init__(self, name, rules_path=None):
     self.name = name
     if not rules_path:
         rules_path = str(
             files(SCHEMATRON).joinpath('mets_{}_rules.xml'.format(name)))
     self.rules_path = rules_path
     logging.debug("path: %s", self.rules_path)
     self.ruleset = Schematron(file=self.rules_path,
                               store_schematron=True,
                               store_report=True)
Ejemplo n.º 5
0
def test(xhtml_file: Path, dtd: DTD, schematron: Schematron) -> bool:
    """
    Test that an XHTML file matches a DTD and passes Schematron tests.
    Error messages are printed to stderr if the file doesn't pass.

    :param xhtml_file: the XHTML file to test
    :param dtd: the DTD
    :param schematron: the Schematron
    :return: True if the file passes
    """
    if settings.verbose:
        print(xhtml_file)

    clear_error_log()

    parser = XHTMLParser(dtd_validation=True, ns_clean=True)
    try:
        tree = parse(source=str(xhtml_file), parser=parser)
        html = tree.getroot()
    except IOError as e:
        print(f"{xhtml_file}: {e.strerror}", file=stderr)
        return False
    except XMLSyntaxError:
        print_error_log(parser.error_log)
        return False

    if not dtd.validate(html):
        print_error_log(dtd.error_log)
        return False

    if not schematron.validate(html):
        print_schematron_error_log(html, schematron)
        return False

    return test_links(xhtml_file, html) and test_images(xhtml_file, html)
Ejemplo n.º 6
0
class ValidationRules():
    """Encapsulates a set of Schematron rules loaded from a file."""
    def __init__(self, name, rules_path=None):
        self.name = name
        if not rules_path:
            rules_path = str(
                files(SCHEMATRON).joinpath('mets_{}_rules.xml'.format(name)))
        self.rules_path = rules_path
        logging.debug("path: %s", self.rules_path)
        self.ruleset = Schematron(file=self.rules_path,
                                  store_schematron=True,
                                  store_report=True)

    def get_assertions(self):
        """Generator that returns the rules one at a time."""
        xml_rules = lxml.etree.XML(bytes(self.ruleset.schematron))

        for ele in xml_rules.iter():
            if ele.tag == SCHEMATRON_NS + 'assert':
                yield ele

    def validate(self, to_validate):
        """Validate a file against the loaded Schematron ruleset."""
        xml_file = lxml.etree.parse(to_validate)
        self.ruleset.validate(xml_file)

    def get_report(self):
        """Get the report from the last validation."""
        xml_report = lxml.etree.XML(bytes(self.ruleset.validation_report))
        failures = []
        warnings = []
        is_valid = True
        rule = None
        for ele in xml_report.iter():
            if ele.tag == SVRL_NS + 'fired-rule':
                rule = ele
            elif ele.tag == SVRL_NS + 'failed-assert':
                if ele.get('role') == 'WARN':
                    warnings.append(TestResult.from_element(rule, ele))
                else:
                    is_valid = False
                    failures.append(TestResult.from_element(rule, ele))
        return TestReport(is_valid, failures, warnings)
Ejemplo n.º 7
0
def open_schematron(schematron_file: Path) -> Schematron:
    """
    Open a Schematron schema. Exit program on failure.

    :param schematron_file: path to a Schematron XML file
    :return: A Schematron object
    """
    try:
        xml = parse(str(schematron_file))
        return Schematron(xml, store_report=True)
    except XMLSyntaxError as e:
        print(f"{schematron_file}:1: {e}", file=stderr)
        exit(1)
Ejemplo n.º 8
0
    def schematron(self,
                   filename=None,
                   schematron_file=None,
                   skip_valid=False,
                   **kwargs):
        self.basedir = os.path.dirname(filename)

        if schematron_file is None:
            schema_doc = etree.parse(
                urlopen(
                    'https://csrc.nist.gov/schema/xccdf/1.2/xccdf_1.2.sch'))
        else:
            with open(schematron_file) as f:
                schema_doc = etree.parse(f)

        schema = Schematron(schema_doc, store_report=True)

        with open(filename) as f:
            benchmark = etree.parse(f)

        validation_result = schema.validate(benchmark)
        if validation_result:
            print('Benchmark {} PASSED schematron validation'.format(filename))
            return True
        else:
            print('Benchmark {} FAILED schematron validation'.format(filename))
            errors = schema.validation_report.xpath(
                'svrl:failed-assert/svrl:text',
                namespaces={'svrl': 'http://purl.oclc.org/dsdl/svrl'})
            print('Schematron validation errors:')
            for element in errors:
                print("---")
                print(textwrap.fill(element.text))
            print("---")

            if skip_valid:
                return validation_result

            raise Exception("Schematron validation failed")
def test_schematron_validation(schematron):
    """Test that the schematron is valid."""
    Schematron(ET.parse(schematron))
def test_mets_creation(testpath, requests_mock, dataset, files):
    """Test SIP validity.

    Run CompressSIP task (and all tasks it requires) and check that:

        #. mets.xml validates against the schema
        #. mets.xml passes schematron verification
        #. digital object fixity (checksums) is correct in mets.xml
        #. digital objects of the SIP are valid
        #. mets.xml root element is valid (CONTRACTID, SPECIFICATION)
        #. all files are found in correct path

    :param testpath: temporary directory
    :param requests_mock: Mocker object
    :param dataset: dataset metadata
    :param files: list of file metadata objects
    :returns: ``None``
    """
    # Mock Metax
    tests.utils.add_metax_dataset(requests_mock,
                                  dataset=dataset,
                                  files=files)

    # Mock file download sources
    for file_ in files:
        if file_['file_storage']['identifier'] == PAS_STORAGE_ID:
            # Mock upload-rest-api
            conf = siptools_research.config.Configuration(
                tests.conftest.TEST_CONFIG_FILE
            )
            mongoclient = pymongo.MongoClient(host=conf.get('mongodb_host'))
            mongoclient.upload.files.insert_one(
                {
                    "_id": file_['identifier'],
                    "file_path": os.path.join(testpath, file_['identifier'])
                }
            )
            with open(os.path.join(testpath,
                                   file_['identifier']), 'w') as file_:
                file_.write('foo')
        else:
            # Mock Ida
            requests_mock.get(
                'https://ida.test/files/pid:urn:identifier/download',
                text='foo'
            )

    workspace = os.path.join(testpath, 'workspaces', 'workspace')
    luigi.build(
        [CompressSIP(
            workspace=workspace,
            dataset_id='dataset_identifier',
            config=tests.conftest.UNIT_TEST_CONFIG_FILE
        )],
        local_scheduler=True
    )

    # Extract SIP
    with tarfile.open(os.path.join(workspace, 'workspace.tar')) as tar:
        tar.extractall(os.path.join(testpath, 'extracted_sip'))

    # Read mets.xml
    mets = ET.parse(os.path.join(testpath, 'extracted_sip', 'mets.xml'))

    # Validate mets.xml against schema
    schema = ET.XMLSchema(ET.parse(METS_XSD))
    assert schema.validate(mets)

    # Validate mets.xml against Schematrons
    for schematron in SCHEMATRONS:
        Schematron(ET.parse(schematron)).assertValid(mets)

    # Check mets root element contract identifier and spec version
    mets_xml_root = mets.getroot()
    assert mets_xml_root.xpath('@*[local-name() = "CONTRACTID"]')[0] \
        == 'urn:uuid:abcd1234-abcd-1234-5678-abcd1234abcd'
    assert mets_xml_root.xpath('@*[local-name() = "CATALOG"] | '
                               '@*[local-name() = "SPECIFICATION"]')[0][:3] \
        == '1.7'

    # Check that all files are included in SIP
    for file_metadata in files:
        with open(os.path.join(testpath,
                               'extracted_sip',
                               'dataset_files',
                               file_metadata['file_path'])) as file_:
            assert file_.read() == 'foo'