コード例 #1
0
    def test_valid_extra_schematron(self):
        extra_sch = io.BytesIO(b'''\
        <schema xmlns="http://purl.oclc.org/dsdl/schematron">
          <pattern id="two_elements">
            <title>Max 2 elements allowed.</title>
            <rule context="Total">
              <assert test="count(//Percent) &lt; 3">Element 'Total': More than 2 elements.</assert>
            </rule>
          </pattern>
        </schema>
        ''')

        fp = etree.parse(
            io.BytesIO(
                b'<Total><Percent>70</Percent><Percent>20</Percent><Percent>10</Percent></Total>'
            ))
        xml = domain.XMLValidator.parse(fp,
                                        no_doctype=True,
                                        sps_version='sps-1.1')
        xml.schematron = isoschematron.Schematron(etree.parse(sample_sch))
        xml.extra_schematron = isoschematron.Schematron(etree.parse(extra_sch))

        result, errors = xml._validate_sch()
        self.assertFalse(result)
        self.assertEqual(len(errors), 1)
        self.assertEqual(errors[0].message,
                         "Element 'Total': More than 2 elements.")
コード例 #2
0
def xml_validate(parent, xmlns, xpath=None, rng=None, sch=None):
    """Validate xml by rng and sch"""
    if xpath:

        # rng rules
        relaxng = None
        if rng:
            rng_node = etree.XML(rng)
            relaxng = etree.RelaxNG(rng_node)

        # schematron rules
        schematron = None
        if sch:
            sch_node = etree.XML(sch)
            schematron = isoschematron.Schematron(sch_node)

        # run validation
        for node in parent.xpath(xpath, namespaces=xmlns):
            if relaxng:
                if not relaxng.validate(node):
                    raise cfy_exc.NonRecoverableError("Not valid xml by rng")
            if schematron:
                if not schematron.validate(node):
                    raise cfy_exc.NonRecoverableError(
                        "Not valid xml by Schematron")
コード例 #3
0
    def test_schematron_assertValid(self):
        schema = self.parse('''\
<sch:schema xmlns:sch="http://purl.oclc.org/dsdl/schematron">
  <sch:pattern id="number_of_entries">
    <sch:title>mandatory number_of_entries tests</sch:title>
    <sch:rule context="number_of_entries">
      <sch:assert test="text()=count(../entries/entry)">[ERROR] number_of_entries (<sch:value-of select="."/>) must equal the number of entries/entry elements (<sch:value-of select="count(../entries/entry)"/>)</sch:assert>
    </sch:rule>
  </sch:pattern>
</sch:schema>
''')
        tree_valid = self.parse('''\
<message>
  <number_of_entries>0</number_of_entries>
  <entries>
  </entries>
</message>
''')
        tree_invalid = self.parse('''\
<message>
  <number_of_entries>3</number_of_entries>
  <entries>
    <entry>Entry 1</entry>
    <entry>Entry 2</entry>
  </entries>
</message>
''')
        schematron = isoschematron.Schematron(schema)
        self.assertTrue(schematron(tree_valid), schematron.error_log)
        self.assertRaises(etree.DocumentInvalid, schematron.assertValid,
                          tree_invalid)
コード例 #4
0
    def test_extra_schematron_thru_parse(self):
        extra_sch = io.BytesIO(b'''\
        <schema xmlns="http://purl.oclc.org/dsdl/schematron">
          <pattern id="two_elements">
            <title>Max 2 elements allowed.</title>
            <rule context="Total">
              <assert test="count(//Percent) &lt; 3">Element 'Total': More than 2 elements.</assert>
            </rule>
          </pattern>
        </schema>
        ''')

        fp = etree.parse(
            io.BytesIO(
                b'<Total><Percent>70</Percent><Percent>20</Percent><Percent>10</Percent></Total>'
            ))
        extra_sch_obj = isoschematron.Schematron(etree.parse(extra_sch))
        xml = domain.XMLValidator.parse(fp,
                                        no_doctype=True,
                                        sps_version='sps-1.1',
                                        extra_sch_schemas=[extra_sch_obj])

        result, errors = xml.validate_style()
        self.assertFalse(result)
        self.assertTrue("Element 'Total': More than 2 elements." in
                        [err.message for err in errors])
コード例 #5
0
    def validate_xml(self, dict_file, parsed_xml_tree, validator_type,
                     validator_name):
        # Check that validator is valid
        if not validator_type in self.Config or not validator_name in self.Config[
                validator_type]:
            msg = "XML Validator type " + validator_type + " not found in ConfigManager instance"
            raise FprimeXmlException(msg)

        # Create proper xml validator tool
        validator_file_handler = open(
            ROOTDIR + self.Config.get(validator_type, validator_name), 'r')
        validator_parsed = etree.parse(validator_file_handler)
        validator_file_handler.close()
        if validator_type == 'schema':
            validator_compiled = etree.RelaxNG(validator_parsed)
        elif validator_type == 'schematron':
            validator_compiled = isoschematron.Schematron(validator_parsed)

        # Validate XML file
        if not validator_compiled.validate(parsed_xml_tree):
            if validator_type == 'schema':
                msg = "XML file {} is not valid according to {} {}.".format(
                    dict_file, validator_type,
                    ROOTDIR + self.Config.get(validator_type, validator_name))
                raise FprimeXmlException(msg)
            elif validator_type == 'schematron':
                msg = "WARNING: XML file {} is not valid according to {} {}.".format(
                    dict_file, validator_type,
                    ROOTDIR + self.Config.get(validator_type, validator_name))
                PRINT.info(msg)
コード例 #6
0
 def get_xslt_tree(self, model_instance):
     if self._schematron_xslt is None:
         schematron_tree = self.get_schematron_tree(model_instance)
         self._schematron = isoschematron.Schematron(
             schematron_tree, **self.schematron_kwargs)
         self._schematron_xslt = self._schematron.validator_xslt.getroot()
     return self._schematron_xslt
コード例 #7
0
    def test_schematron_error_log(self):
        schema = self.parse('''\
<sch:schema xmlns:sch="http://purl.oclc.org/dsdl/schematron">
  <sch:pattern id="number_of_entries">
    <sch:title>mandatory number_of_entries tests</sch:title>
    <sch:rule context="number_of_entries">
      <sch:assert test="text()=count(../entries/entry)">[ERROR] number_of_entries (<sch:value-of select="."/>) must equal the number of entries/entry elements (<sch:value-of select="count(../entries/entry)"/>)</sch:assert>
    </sch:rule>
  </sch:pattern>
</sch:schema>
''')
        tree_valid = self.parse('''\
<message>
  <number_of_entries>0</number_of_entries>
  <entries>
  </entries>
</message>
''')
        tree_invalid = self.parse('''\
<message>
  <number_of_entries>3</number_of_entries>
  <entries>
    <entry>Entry 1</entry>
    <entry>Entry 2</entry>
  </entries>
</message>
''')
        schematron = isoschematron.Schematron(schema)
        self.assertTrue(schematron(tree_valid), schematron.error_log)
        valid = schematron(tree_invalid)
        self.assertTrue(not valid)
        self.assertEqual(len(schematron.error_log), 1,
                          'expected single error: %s (%s errors)' %
                          (schematron.error_log, len(schematron.error_log)))
コード例 #8
0
        def validate_schematron_pt_udf(pt):

            # parse schematron
            sct_doc = etree.parse(vs_filepath)
            validator = isoschematron.Schematron(sct_doc, store_report=True)

            for row in pt:

                try:

                    # get document xml
                    record_xml = etree.fromstring(row.document.encode('utf-8'))

                    # validate
                    is_valid = validator.validate(record_xml)

                    # if not valid, prepare Row
                    if not is_valid:

                        # prepare results_dict
                        results_dict = {'fail_count': 0, 'failed': []}

                        # get failed
                        report_root = validator.validation_report.getroot()
                        fails = report_root.findall(
                            'svrl:failed-assert', namespaces=report_root.nsmap)

                        # log fail_count
                        results_dict['fail_count'] = len(fails)

                        # loop through fails and add to dictionary
                        for fail in fails:
                            fail_text_elem = fail.find('svrl:text',
                                                       namespaces=fail.nsmap)
                            results_dict['failed'].append(fail_text_elem.text)

                        yield Row(record_id=row._id,
                                  record_identifier=row.record_id,
                                  job_id=row.job_id,
                                  validation_scenario_id=int(vs_id),
                                  validation_scenario_name=vs_name,
                                  valid=False,
                                  results_payload=json.dumps(results_dict),
                                  fail_count=results_dict['fail_count'])

                except Exception as e:

                    results_dict = {'fail_count': 0, 'failed': []}
                    results_dict['fail_count'] += 1
                    results_dict['failed'].append(
                        "Schematron validation exception: %s" % (str(e)))

                    yield Row(record_id=row._id,
                              record_identifier=row.record_id,
                              job_id=row.job_id,
                              validation_scenario_id=int(vs_id),
                              validation_scenario_name=vs_name,
                              valid=False,
                              results_payload=json.dumps(results_dict),
                              fail_count=results_dict['fail_count'])
コード例 #9
0
    def test_schematron(self):
        tree_valid = self.parse('<AAA><BBB/><CCC/></AAA>')
        tree_invalid = self.parse('<AAA><BBB/><CCC/><DDD/></AAA>')
        schema = self.parse('''\
<schema xmlns="http://purl.oclc.org/dsdl/schematron" >
    <pattern id="OpenModel">
        <title>Open Model</title>
        <rule context="AAA">
            <assert test="BBB"> BBB element is not present</assert>
            <assert test="CCC"> CCC element is not present</assert>
        </rule>
    </pattern>
    <pattern id="ClosedModel">
        <title>Closed model"</title>
        <rule context="AAA">
            <assert test="BBB"> BBB element is not present</assert>
            <assert test="CCC"> CCC element is not present</assert>
            <assert test="count(BBB|CCC) = count (*)">There is an extra element</assert>
        </rule>
    </pattern>
</schema>
''')

        schema = isoschematron.Schematron(schema)
        self.assertTrue(schema.validate(tree_valid))
        self.assertTrue(not schema.validate(tree_invalid))
コード例 #10
0
def get_schematron(sct_path):
    """Return an lxml ``isoschematron.Schematron()`` instance using the
    schematron file at ``sct_path``.
    """
    sct_path = _get_file_path(sct_path)
    parser = etree.XMLParser(remove_blank_text=True)
    sct_doc = etree.parse(sct_path, parser=parser)
    return isoschematron.Schematron(sct_doc, store_report=True)
コード例 #11
0
    def test_schematron_store_xslt(self):
        schema = self.parse('''\
<sch:schema xmlns:sch="http://purl.oclc.org/dsdl/schematron">
  <sch:pattern id="number_of_entries">
    <sch:title>mandatory number_of_entries tests</sch:title>
    <sch:rule context="number_of_entries">
      <sch:assert test="text()=count(../entries/entry)">[ERROR] number_of_entries (<sch:value-of select="."/>) must equal the number of entries/entry elements (<sch:value-of select="count(../entries/entry)"/>)</sch:assert>
    </sch:rule>
  </sch:pattern>
</sch:schema>
''')
        schematron = isoschematron.Schematron(schema)
        self.assertTrue(schematron.validator_xslt is None)

        schematron = isoschematron.Schematron(schema, store_xslt=True)
        self.assertTrue(isinstance(schematron.validator_xslt, etree._ElementTree),
                     'expected validator xslt to be stored')
コード例 #12
0
ファイル: utils.py プロジェクト: joffilyfe/packtools
def get_schematron_from_buffer(buff, parser=NOIDS_XMLPARSER):
    """Returns an ``isoschematron.Schematron`` for ``buff``.

    The default parser doesn't collect ids on a hash table, i.e.:
    ``collect_ids=False``.
    """
    xmlschema_doc = etree.parse(buff, parser)
    return isoschematron.Schematron(xmlschema_doc)
コード例 #13
0
 def setUp(self):
     with open("../schemas/SAND-MPD.xsd") as schema:
         sand_mpd_schema_doc = etree.parse(schema)
         self.sand_mpd_schema = etree.XMLSchema(sand_mpd_schema_doc)
     with open("../schemas/SAND-MPD.sch") as schematron:
         sand_mpd_schmeatron_doc = etree.parse(schematron)
         self.sand_mpd_schematron = isoschematron.Schematron(
             sand_mpd_schmeatron_doc)
コード例 #14
0
def main():

    # Schema
    f = io.StringIO('''\
    <schema xmlns="http://purl.oclc.org/dsdl/schematron" >
    <ns prefix="csip" uri="DILCIS"/>
    <ns prefix="ead" uri="urn:isbn:1-931666-22-9"/>
    <ns prefix="mets" uri="http://www.loc.gov/METS/"/>
    <pattern id="METS_root_element_validation">
    <title>Validate METS root element.</title>
    <rule id="CSIP3-TYPE-existence" context="mets:mets">
    <assert test="@TYPE">General content type attribute does not exist.</assert>
    </rule>
    <rule id="CSIP4-CONTENTTYPESPECIFICATION-existence" context="mets:mets">
    <assert test="@csip:CONTENTTYPESPECIFICATION">Content information type attribute does not exist.</assert>
    </rule>
    </pattern>
    <pattern id="METS_root_element_value_validation">
    <rule id="CSIP4-CONTENTTYPESPECIFICATION-value" context="mets:mets">
    <assert test="(contains(string(@csip:CONTENTTYPESPECIFICATION), 'SMURFERMS') or contains(string(@csip:CONTENTTYPESPECIFICATION), 'SMURFSFSB') or contains(string(@csip:CONTENTTYPESPECIFICATION), 'SIARD1') or contains(string(@csip:CONTENTTYPESPECIFICATION), 'SIARD2') or contains(string(@csip:CONTENTTYPESPECIFICATION), 'SIARDDK') or contains(string(@csip:CONTENTTYPESPECIFICATION), 'GeoVectorGML') or contains(string(@csip:CONTENTTYPESPECIFICATION), 'GeoRasterGeotiff') or contains(string(@csip:CONTENTTYPESPECIFICATION), 'MIXED') or contains(string(@csip:CONTENTTYPESPECIFICATION), 'OTHER'))">Content information type attribute value is not known. Known values are: SMURFERMS, SMURFSFSB, SIARD1, SIARD2, SIARDDK, GeoVectorGML, GeoRasterGeotiff, MIXED, OTHER.</assert>
    </rule>
    </pattern>
    </schema>
    ''')

    # Parse schema
    sct_doc = etree.parse(f)
    schematron = isoschematron.Schematron(sct_doc, store_report = True)


    # XML to validate
    notValid = io.StringIO('''\
	<mets:mets xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" 
	    xmlns:mets="http://www.loc.gov/METS/" 
	    xmlns:xlink="http://www.w3.org/1999/xlink"
	    xmlns:csip="DILCIS"
	    OBJID="uuid-4422c185-5407-4918-83b1-7abfa77de182" 
	    LABEL="Sample CS IP Information Package" 
	    TYPE="Database" 
	    csip:CONTENTTYPESPECIFICATION="SIARD3" 	
	    PROFILE="http://www.eark-project.com/METS/IP.xml" 
	    xsi:schemaLocation="http://www.loc.gov/METS/ http://www.loc.gov/standards/mets/mets.xsd http://www.w3.org/1999/xlink http://www.loc.gov/standards/mets/xlink.xsd">
         </mets:mets>
        ''')

    # Parse xml
    doc = etree.parse(notValid)

    # Validate against schema
    validationResult = schematron.validate(doc)

    # Validation report
    report = schematron.validation_report

    print("is valid: " + str(validationResult))
    print(type(report))
    print(report)
コード例 #15
0
    def test_schematron_xmlschema_embedded(self):
        schema = self.parse('''\
<xs:schema xmlns:xs="http://www.w3.org/2001/XMLSchema"
    xmlns:sch="http://purl.oclc.org/dsdl/schematron">
    <xs:element name="message">
        <xs:complexType>
            <xs:sequence>
                <xs:element name="number_of_entries" type="xs:positiveInteger">
                    <xs:annotation>
                        <xs:appinfo>
                            <sch:pattern id="number_of_entries">
                                <sch:title>mandatory number_of_entries tests</sch:title>
                                <sch:rule context="number_of_entries">
                                    <sch:assert test="text()=count(../entries/entry)">[ERROR] number_of_entries (<sch:value-of select="."/>) must equal the number of entries/entry elements (<sch:value-of select="count(../entries/entry)"/>)</sch:assert>
                                </sch:rule>
                            </sch:pattern>
                        </xs:appinfo>
                    </xs:annotation>
                </xs:element>
                <xs:element name="entries">
                    <xs:complexType>
                        <xs:sequence>
                            <xs:element name="entry" type="xs:string" minOccurs="0" maxOccurs="unbounded"/>
                        </xs:sequence>
                    </xs:complexType>
                </xs:element>
            </xs:sequence>
        </xs:complexType>
    </xs:element>
</xs:schema>
''')
        tree_valid = self.parse('''\
<message>
  <number_of_entries>2</number_of_entries>
  <entries>
    <entry>Entry 1</entry>
    <entry>Entry 2</entry>
  </entries>
</message>
''')
        tree_invalid = self.parse('''\
<message>
  <number_of_entries>1</number_of_entries>
  <entries>
    <entry>Entry 1</entry>
    <entry>Entry 2</entry>
  </entries>
</message>
''')
        xmlschema = etree.XMLSchema(schema)
        schematron = isoschematron.Schematron(schema)
        # fwiw, this must also be XMLSchema-valid
        self.assertTrue(xmlschema(tree_valid), xmlschema.error_log)
        self.assertTrue(schematron(tree_valid))
        # still schema-valid
        self.assertTrue(xmlschema(tree_invalid), xmlschema.error_log)
        self.assertTrue(not schematron(tree_invalid))
コード例 #16
0
    def test_schematron_result_report(self):
        schema = self.parse('''\
<sch:schema xmlns:sch="http://purl.oclc.org/dsdl/schematron">
  <sch:pattern id="number_of_entries">
    <sch:title>mandatory number_of_entries tests</sch:title>
    <sch:rule context="number_of_entries">
      <sch:assert test="text()=count(../entries/entry)">[ERROR] number_of_entries (<sch:value-of select="."/>) must equal the number of entries/entry elements (<sch:value-of select="count(../entries/entry)"/>)</sch:assert>
    </sch:rule>
  </sch:pattern>
</sch:schema>
''')
        tree_valid = self.parse('''\
<message>
  <number_of_entries>0</number_of_entries>
  <entries>
  </entries>
</message>
''')
        tree_invalid = self.parse('''\
<message>
  <number_of_entries>3</number_of_entries>
  <entries>
    <entry>Entry 1</entry>
    <entry>Entry 2</entry>
  </entries>
</message>
''')
        schematron = isoschematron.Schematron(schema, store_report=True)
        self.assertTrue(schematron(tree_valid), schematron.error_log)
        valid = schematron(tree_invalid)
        self.assertTrue(not valid)
        self.assertTrue(
            isinstance(schematron.validation_report, etree._ElementTree),
            'expected a validation report result tree, got: %s' %
            (schematron.validation_report))

        schematron = isoschematron.Schematron(schema, store_report=False)
        self.assertTrue(schematron(tree_valid), schematron.error_log)
        valid = schematron(tree_invalid)
        self.assertTrue(not valid)
        self.assertTrue(
            schematron.validation_report is None,
            'validation reporting switched off, still: %s' %
            (schematron.validation_report))
コード例 #17
0
    def test_invalid_schematron(self):
        fp = etree.parse(
            StringIO(
                b'<Total><Percent>60</Percent><Percent>30</Percent></Total>'))
        xml = stylechecker.XML(fp)
        xml.schematron = isoschematron.Schematron(etree.parse(sample_sch))

        result, errors = xml._validate_sch()
        self.assertFalse(result)
        self.assertTrue(errors)
コード例 #18
0
 def __init__(self):
     xsd_path = join(dirname(__file__), self.sand_message_xsd)
     with open(xsd_path) as f:
         sand_schema_doc = etree.parse(f)
         self.sand_xml_schema = etree.XMLSchema(sand_schema_doc)
     
     sch_path = join(dirname(__file__), self.sand_message_sch)
     with open(sch_path) as f:
         sand_schematron_doc = etree.parse(f)
         self.sand_schematron = isoschematron.Schematron(sand_schematron_doc)
コード例 #19
0
    def test_schematron_empty_pattern(self):
        schema = self.parse('''\
<schema xmlns="http://purl.oclc.org/dsdl/schematron" >
    <pattern id="OpenModel">
        <title>Open model</title>
    </pattern>
</schema>
''')
        schema = isoschematron.Schematron(schema)
        self.assertTrue(schema)
コード例 #20
0
    def test_schematron_relaxng_embedded(self):
        schema = self.parse('''\
<grammar xmlns="http://relaxng.org/ns/structure/1.0"
  xmlns:sch="http://purl.oclc.org/dsdl/schematron"
  datatypeLibrary="http://www.w3.org/2001/XMLSchema-datatypes">
  <start>
    <ref name="message"/>
  </start>
  <define name="message">
    <element name="message">
      <element name="number_of_entries">
        <!-- RelaxNG can be mixed freely with stuff from other namespaces -->
        <sch:pattern id="number_of_entries">
          <sch:title>mandatory number_of_entries tests</sch:title>
          <sch:rule context="number_of_entries">
            <sch:assert test="text()=count(../entries/entry)">[ERROR] number_of_entries (<sch:value-of select="."/>) must equal the number of entries/entry elements (<sch:value-of select="count(../entries/entry)"/>)</sch:assert>
          </sch:rule>
        </sch:pattern>
        <data type="positiveInteger"/>
      </element>
      <element name="entries">
        <zeroOrMore>
          <element name="entry"><data type="string"/></element>
        </zeroOrMore>
      </element>
    </element>
  </define>
</grammar>
''')
        tree_valid = self.parse('''\
<message>
  <number_of_entries>2</number_of_entries>
  <entries>
    <entry>Entry 1</entry>
    <entry>Entry 2</entry>
  </entries>
</message>
''')
        tree_invalid = self.parse('''\
<message>
  <number_of_entries>1</number_of_entries>
  <entries>
    <entry>Entry 1</entry>
    <entry>Entry 2</entry>
  </entries>
</message>
''')
        relaxng = etree.RelaxNG(schema)
        schematron = isoschematron.Schematron(schema)
        # fwiw, this must also be RelaxNG-valid
        self.assertTrue(relaxng(tree_valid), relaxng.error_log)
        self.assertTrue(schematron(tree_valid))
        # still schema-valid
        self.assertTrue(relaxng(tree_invalid), relaxng.error_log)
        self.assertTrue(not schematron(tree_invalid))
コード例 #21
0
    def test_schematron_fail_on_report(self):
        tree_valid = self.parse('<AAA><BBB/><CCC/></AAA>')
        tree_invalid = self.parse('<AAA><BBB/><CCC/><DDD/></AAA>')
        schema = self.parse('''\
<schema xmlns="http://purl.oclc.org/dsdl/schematron" >
    <pattern id="OpenModel">
        <title>Simple Report</title>
        <rule context="AAA">
            <report test="DDD"> DDD element must not be present</report>
        </rule>
    </pattern>
</schema>
''')
        schema_report = isoschematron.Schematron(
            schema, error_finder=isoschematron.Schematron.ASSERTS_AND_REPORTS)
        schema_no_report = isoschematron.Schematron(schema)
        self.assertTrue(schema_report.validate(tree_valid))
        self.assertTrue(not schema_report.validate(tree_invalid))
        self.assertTrue(schema_no_report.validate(tree_valid))
        self.assertTrue(schema_no_report.validate(tree_invalid))
コード例 #22
0
def main(teipath, rngfile, schematronfile):
    """
    Arguments:
    teipath (str): path to the TEI files, e.g. /home/ulrike/Dokumente/Git/textbox/es/novela-espanola/tei/*.xml
    rngfile (str): path to the schema file, e.g. /home/ulrike/Schreibtisch/basisformat.rng
    schematronfile (str): path to the schematron file, e.g. /home/ulrike/Schreibtisch/keywords.sch
    
    Example:
    from toolbox.check_quality import validate_tei
    validate_tei.main("/home/ulrike/Git/novelashispanoamericanas/master/nh0001.xml", "/home/ulrike/Git/novelashispanoamericanas/cligs_importance.rnc", "/home/ulrike/Git/novelashispanoamericanas/keywords.sch")
    """
    problematic_files_rng = 0
    problematic_files_sch = 0
    for teifile in glob.glob(teipath): 
        
        idno = os.path.basename(teifile)
        #print(idno)
        
        parser = etree.XMLParser(recover=True)
        parser.resolvers.add(FileResolver())
        
        teiparsed = etree.parse(teifile, parser)
        #teiparsed = etree.parse(teifile)
        
        # RelaxNG validation
        rngparsed = etree.parse(rngfile)
        rngvalidator = etree.RelaxNG(rngparsed)
       
        validation_rng = rngvalidator.validate(teiparsed)
        log_rng = rngvalidator.error_log
        
        # Schematron validation
        sct_doc = etree.parse(schematronfile, parser)
        schematron = isoschematron.Schematron(sct_doc)
        
        validation_sch = schematron.validate(teiparsed)
        log_sch = schematron.error_log
        
        if validation_rng == True: 
            print(idno, "valid with RNG!")
        else:
            print(idno, "sorry, not valid with RNG!")
            print(log_rng)
            problematic_files_rng += 1
            #print(log.last_error)
            #print(log.last_error.domain_name)
            #print(log.last_error.type_name)
        if validation_sch == True:
            print(idno, "valid with schematron!")
        else:
            problematic_files_sch += 1
            print(idno, "sorry, not valid with schematron!")
            print(log_sch)
    print(problematic_files_rng, problematic_files_sch)
コード例 #23
0
    def test_validation_schematron(self):
        fp = etree.parse(
            io.BytesIO(
                b'<Total><Percent>70</Percent><Percent>30</Percent></Total>'))
        schema = domain.SchematronValidator(
            isoschematron.Schematron(etree.parse(sample_sch)))
        xml = domain.XMLValidator(fp, style_validators=[schema])

        is_valid, errors = xml.validate_style()
        self.assertTrue(is_valid)
        self.assertEqual(len(errors), 0)
コード例 #24
0
def test_simple_validation(file_path, expected):

    with open(schematron_path) as f:
        schematron_doc = etree.parse(f)

    schematron = isoschematron.Schematron(schematron_doc)

    with open(file_path) as gml_f:
        gml_obj = etree.parse(gml_f)

        assert schematron.validate(gml_obj)
コード例 #25
0
    def test_invalid_schematron(self):
        fp = etree.parse(
            io.BytesIO(
                b'<Total><Percent>60</Percent><Percent>30</Percent></Total>'))
        schema = domain.SchematronValidator(
            isoschematron.Schematron(etree.parse(sample_sch)))
        xml = domain.XMLValidator(fp, style_validators=[schema])

        result, errors = xml.validate_style()
        self.assertFalse(result)
        self.assertTrue(errors)
コード例 #26
0
def TestPhase(phase_name, cache):
    """Factory of parsed Schematron phases.

    :param phase_name: the phase name
    :param cache: mapping type
    """
    if phase_name not in cache:
        phase = isoschematron.Schematron(SCH, phase=phase_name)
        cache[phase_name] = phase

    return cache[phase_name]
コード例 #27
0
 def _init_schematron(self, schematron):
     '''Returns an instance of lxml.isoschematron.Schematron'''
     if schematron is None:
         self.schematron = None
         return
     elif not (isinstance(schematron, etree._Element) or isinstance(schematron, etree._ElementTree)):
         tree = etree.parse(schematron)
     else:
         tree = schematron
         
     self.schematron = isoschematron.Schematron(tree, store_report=True, store_xslt=True, store_schematron=True)
コード例 #28
0
    def test_invalid_schematron(self):
        fp = etree.parse(
            io.BytesIO(
                b'<Total><Percent>60</Percent><Percent>30</Percent></Total>'))
        xml = domain.XMLValidator.parse(fp,
                                        no_doctype=True,
                                        sps_version='sps-1.1')
        xml.schematron = isoschematron.Schematron(etree.parse(sample_sch))

        result, errors = xml._validate_sch()
        self.assertFalse(result)
        self.assertTrue(errors)
コード例 #29
0
ファイル: validate.py プロジェクト: tulibraries/tulflow
def report_s3_schematron(**kwargs):
    """Wrapper function for using S3 Retrieval, Schematron Reporting, and S3 Writer."""
    source_prefix = kwargs.get("source_prefix")
    dest_prefix = kwargs.get("destination_prefix")
    bucket = kwargs.get("bucket")
    schematron_file = kwargs.get("schematron_filename")
    access_id = kwargs.get("access_id")
    access_secret = kwargs.get("access_secret")

    # create reporting csv
    csv_in_mem = io.StringIO()
    report_csv = csv.DictWriter(
        csv_in_mem, fieldnames=["id", "report", "record", "source_file"])
    report_csv.writeheader()

    # get schematron doc & return lxml.etree.Schematron validator
    schematron_doc = process.get_github_content("tulibraries/aggregator_mdx",
                                                schematron_file)
    schematron = isoschematron.Schematron(etree.fromstring(schematron_doc),
                                          store_report=True)

    # Iterate through S3 Files, Validate, & Save Report to CSV
    total_transform_count = 0
    for s3_key in process.list_s3_content(bucket, access_id, access_secret,
                                          source_prefix):
        logging.info("Validating & Reporting On File: %s", s3_key)
        s3_content = process.get_s3_content(bucket, s3_key, access_id,
                                            access_secret)
        s3_xml = etree.fromstring(s3_content)
        for record in s3_xml.iterchildren():
            total_transform_count += 1
            record_id = record.get("airflow-record-id")
            logging.info("Ran report on record: %s", record_id)
            schematron.validate(record)
            report_csv.writerow({
                "id":
                record_id,
                "report":
                schematron_failed_validation_text(
                    schematron.validation_report),
                "record":
                identifier_or_full_record(record),
                "source_file":
                f"https://s3.console.aws.amazon.com/s3/object/{bucket}/{s3_key}"
            })
    report_filename = dest_prefix + "-report.csv"
    logging.info("Records report: https://%s.s3.amazonaws.com/%s", bucket,
                 report_filename)
    logging.info("Total Transform Count: %s", total_transform_count)
    process.generate_s3_object(csv_in_mem.getvalue(), bucket, report_filename,
                               access_id, access_secret)

    return {"transformed": total_transform_count}
コード例 #30
0
def validate_xml(ctx, verbose=False):
    from lxml import etree
    errors = False
    for section in ctx.xml_validation.keys():
        if verbose:
            print('\nXML validation (%s)' % section)
        section_cfg = ctx.xml_validation[section]
        # gather files based on paths in the config
        files = []
        for path in section_cfg['files']:
            files.extend(glob.glob(path))
        if not files:
            print('No files to process for %s' % section)
            continue
        # load configured schema
        schema = None
        if 'xsd' in section_cfg:
            xmlschema_doc = etree.parse(section_cfg['xsd'])
            schema = etree.XMLSchema(xmlschema_doc)
        # TODO: support other schema types here

        # NOTE: should be possible to support rnc
        # if rnc2rng is installed, but current getting an error
        # elif 'rnc' in section_cfg:
        #     with open(section_cfg['rnc']) as rncdoc:
        #        schema = etree.RelaxNG.from_rnc_string(rncdoc.read())

        elif 'schematron' in section_cfg:
            from lxml import isoschematron
            sct_doc = etree.parse(section_cfg['schematron'])
            schema = isoschematron.Schematron(sct_doc)

        if schema is None:
            print('No recognized schema format found for %s' % section)
            continue

        for file in files:
            xmldoc = etree.parse(file)
            if not schema.validate(xmldoc):
                print('Validation failed: %s' % file)
                errors = True
                # if verbose:
                # should errors only be displayed in verbose mode?
                print(schema.error_log)
            else:
                if verbose:
                    print('%s is valid' % file)

        # if any file was invalid, exit with an error code to indicate
        # the build failed
        if errors:
            exit(-1)