def testOne(self): self.observer.methods['add'] = lambda *args, **kwargs: (x for x in []) list( compose(self.norm_mods.all_unknown('add', 'id', 'metadata', 'anotherone', lxmlNode=parse(open("data/didl_mods.xml")), identifier='oai:very:secret:09987' ))) self.assertEquals(3, len(self.observer.calledMethods)) # for m in self.observer.calledMethods: # print 'method name:',m.name, m.args, m.kwargs result = self.observer.calledMethods[2].kwargs.get('lxmlNode') # print "Converted:", tostring(result) self.assertEquals(2, len(self.observer.calledMethods[0].args)) arguments = self.observer.calledMethods[2].args self.assertEquals("id", arguments[0]) self.assertEquals("metadata", arguments[1]) #Get MODS from record: mods = result.xpath("//mods:mods", namespaces=namespacesMap) # Should be exactly 1: self.assertTrue(len(mods)==1) #print tostring(mods[0], pretty_print = True, encoding='utf-8') #Validate against schema: modsSchema = XMLSchema(lxmlParse(open(join(self.schemasPath, 'mods-3-6.xsd')))) modsSchema.validate(mods[0]) if modsSchema.error_log.last_error: self.fail(modsSchema.error_log.last_error) # Check if expected result: expectedResult = open("data/mods_converted.xml").read() # print "EXPECTED MODS:", tostring(mods[0], pretty_print = True, encoding='utf-8') self.assertEqualsWithDiff(expectedResult, tostring(mods[0], pretty_print = True, encoding='utf-8'))
def assertValid(xmlString, schemaPath): schema = XMLSchema(parse(open(schemaPath))) toValidate = parse(StringIO(xmlString)) schema.validate(toValidate) if schema.error_log: raise AssertionError(formatException("assertValid", schema, toValidate))
class Validate(Observable): def __init__(self, schemaPath): Observable.__init__(self) try: with open(schemaPath) as fp: self._schema = XMLSchema(parse(fp)) except XMLSchemaParseError as e: print(e.error_log.last_error) raise def all_unknown(self, message, *args, **kwargs): self._detectAndValidate(*args, **kwargs) yield self.all.unknown(message, *args, **kwargs) def do_unknown(self, message, *args, **kwargs): self._detectAndValidate(*args, **kwargs) return self.do.unknown(message, *args, **kwargs) def any_unknown(self, message, *args, **kwargs): self._detectAndValidate(*args, **kwargs) try: response = yield self.any.unknown(message, *args, **kwargs) return response except NoneOfTheObserversRespond: raise DeclineMessage def call_unknown(self, message, *args, **kwargs): self._detectAndValidate(*args, **kwargs) try: return self.call.unknown(message, *args, **kwargs) except NoneOfTheObserversRespond: raise DeclineMessage def _detectAndValidate(self, *args, **kwargs): allArguments = list(args) + list(kwargs.values()) for arg in allArguments: if type(arg) == _ElementTree: self.validate(arg) def validate(self, arg): self._schema.validate(arg) if self._schema.error_log: exception = ValidateException(formatException(self._schema, arg)) self.do.logException(exception) raise exception def assertValid(self, xmlOrString): toValidate = XML(xmlOrString.encode('utf-8')) if isinstance(xmlOrString, str) else xmlOrString self._schema.validate(toValidate) if self._schema.error_log: raise AssertionError(formatException(self._schema, toValidate))
def testOne(self): self.observer.methods['add'] = lambda *args, **kwargs: (x for x in []) list( compose( self.norm_didl.all_unknown( 'add', 'id', 'metadata', 'anotherone', lxmlNode=parse(open("data/didl_mods.xml")), identifier='oai:very:secret:09987'))) self.assertEquals(4, len(self.observer.calledMethods)) #for m in self.observer.calledMethods: # print 'method name:',m.name, m.args, m.kwargs result = self.observer.calledMethods[3].kwargs.get('lxmlNode') self.assertEquals(2, len(self.observer.calledMethods[0].args)) arguments = self.observer.calledMethods[1].args self.assertEquals("oai:very:secret:09987", arguments[0]) #Test logMessage: self.assertEquals( "Found objectFile in depricated dip:ObjectType. This should have been: rdf:type/@rdf:resource", arguments[1]) #Get DIDL from record: didl = result.xpath("//didl:DIDL", namespaces=namespacesMap) # Should be exactly 1: self.assertTrue(len(didl) == 1) #print tostring(didl[0], pretty_print = True, encoding='utf-8') #Validate against schema: didlSchema = XMLSchema( lxmlParse(open(join(self.schemasPath, 'didl.xsd')))) didlSchema.validate(didl[0]) if didlSchema.error_log.last_error: self.fail(didlSchema.error_log.last_error) # Check if expected result: expectedResult = open("data/didl_converted.xml").read() # print "EXPECTED DIDL:", tostring(didl[0], pretty_print = True, encoding='utf-8') self.assertEqualsWithDiff( expectedResult, tostring(didl[0], pretty_print=True, encoding='utf-8'))
def validate_schema(xml_loc: str, schema: etree.XMLSchema): """ Given an xmlfile and a schema. This function returns True if the xml file matches the schema Raises: DoxygenMalformedXML: If the schema is malformed """ assert xml_loc is not None assert schema is not None assert os.path.exists(xml_loc) with open(xml_loc, "rb") as xml_f: xml_bytes = BytesIO(xml_f.read()) try: xml = etree.parse(xml_bytes) except etree.XMLSyntaxError as e: logger.error(e) logger.warning(f"Failed to parse: {xml_loc}") logger.warning("Skipping this XML file") raise DoxygenMalformedXML(xml_loc) from e logger.debug(f"Validating {xml_loc}") if schema and not schema.validate(xml): # type: ignore logger.warning(f"Validating schema failed for: {xml_loc}") logger.warning(schema.error_log) # type: ignore logger.warning("Skipping this XML file") return False return True
def validate_data(self, data, structural_schema, metamorphic_schema=None, validate_schemas=False): """ Ensure that a data structure matches a schema (xml schema) :param str data: data to check :param str structural_schema: unique_key :param str metamorphic_schema: unique_key :param bool validate_schemas: Any provided schema will be valided before use if set to True. They are used as is otherwise. This option is suited for better perfs. Use it at your own risk. :return: True if data is valid, False otherwise :rtype: bool """ if validate_schemas is True: self.assert_structural_schema(structural_schema) if metamorphic_schema is not None: self.assert_metamorphic_schema(metamorphic_schema) if metamorphic_schema is not None: self.transform(data, metamorphic_schema) xsl_xml = self.get_cached_schema(structural_schema) xmlschema = XMLSchema(xsl_xml) xml = parse(StringIO(data)) return xmlschema.validate(xml)
def validate(self): """ Validate the provided data file for correctness against the provided schema file. @return: A flag indicating if the data validates against the schema. """ # clear any previous xml errors clear_error_log() if self.schema_file is not None: try: # Attempt parsing the schema file schdoc = parse(self.schema_file) except XMLSyntaxError as e: # The schema was not parsable XML logging.warning('The schema XML file could not be parsed.') for item in e.error_log: logging.info(item) return False try: schema = XMLSchema(schdoc) except XMLSchemaParseError as e: # The schema document is XML, but it's not a schema logging.warning( 'The schema XML file was parsed, but it does not appear to be a valid XML Schema document.' ) for item in e.error_log: logging.info(item) return False try: # Attempt parsing the data file data = parse(self.datafile) except XMLSyntaxError as e: # The data was not parsable XML logging.warning('The data XML file could not be parsed.') for item in e.error_log: logging.info(item) return False if self.schema_file is not None: if schema.validate(data): self.data = data return True logging.warning( 'The data does not conform to the provided schema.') for item in schema.error_log: logging.info(item) return False self.data = data return True
def parse(self, filename): self.parser = None # find parser try: from lxml.etree import parse, XMLSchema self.logger.info('using lxml.etree parser') # parse XML and validate it tree = parse(filename) # get XSD schemaDoc = parse(XSDContents) schema = XMLSchema(schemaDoc) xml_valid = schema.validate(tree) if xml_valid: self.logger.info('XML validated') else: self.logger.error('XML NOT validated: {}'.format(filename)) print >> stderr, schema.error_log return tree if xml_valid else None except ImportError: try: from xml.etree.ElementTree import parse self.logger.info('using xml.etree.ElementTree parser') return parse(filename) except ImportError: self.logger.critical( "Failed to import ElementTree from any known place") raise
def validate(): for s in SCHEMAS: fp1 = "{}/{}.xsd".format(s, s) xsdDocument = parse(fp1) schema = XMLSchema(xsdDocument) logging.info( "Checking that all valid examples pass when validated against the XSD file." ) if s == "question": fps = glob.glob("{}/examples/*_new.question.xml".format(s)) else: fps = glob.glob("{}/examples/*.xml".format(s)) for fp in fps: xmlDocument = parse(fp) isValid = schema.validate(xmlDocument) if isValid == True: logging.info(" - {} passes.".format(fp)) else: logging.info(" - {} does not pass.".format(fp)) schema.assertValid(xmlDocument) logging.info( "Checking that all invalid examples fail when validated against the XSD file." ) if s == "question": fps = glob.glob("{}/examples/*_current.question.xml".format(s)) else: fps = glob.glob("{}/invalid_examples/*.xml".format(s)) for fp in fps: xmlDocument = parse(fp) isValid = schema.validate(xmlDocument) if isValid == False: logging.info(" - {} fails.".format(fp)) else: logging.info(" - {} does not fail.".format(fp)) raise Exception( "{} should fail validation, but doesn't.".format(fp))
def valid_xml(self, xml_string): """Return True if `xml_string` conforms to SEPA XML schema.""" if self.xml_schema_path is None: raise NotImplementedError( "XSD validation failed: path to schema is not set.") parsed_xml = parse(StringIO(xml_string)) xsd_scheme = XMLSchema(parse(self.xml_schema_path)) return xsd_scheme.validate(parsed_xml)
def parse(self, filename): self.parser = None # find parser try: from lxml.etree import parse, XMLSchema self.logger.info('using lxml.etree parser') # parse XML and validate it tree = parse(filename) # get XSD schemaDoc = parse(XSDContents) schema = XMLSchema(schemaDoc) if schema.validate(tree): self.logger.info('XML validated') return tree print >> stderr, schema.error_log raise ValueError('XML NOT validated: {}'.format(filename)) except ImportError: try: from xml.etree.ElementTree import parse self.logger.info('using xml.etree.ElementTree parser') return parse(filename) except ImportError: self.logger.critical("Failed to import ElementTree from any known place") raise
#!/usr/bin/env python from lxml.etree import XMLParser, fromstring, XMLSchema schema_doc = open('schema.xsd').read() inst_doc = open('inst.xml').read() parser = XMLParser(resolve_entities=False) elt = fromstring(inst_doc, parser) schema = XMLSchema(fromstring(schema_doc)) schema.validate(elt)
def assertValid(xmlString, schemaPath): schema = XMLSchema(parse(open(schemaPath))) toValidate = parse(StringIO(xmlString)) schema.validate(toValidate) if schema.error_log: raise AssertionError(formatException(schema, toValidate))
logger.info('### Start validation of xml files in %s' % path) print('### Start validation of xml files in %s' % path) for subdir, dirs, files in os.walk(path): if "knaw_short" in files or "knaw_long" in files: for file in files: if file.startswith('knaw_short') or file.startswith( 'knaw_long'): file_path = os.path.join(subdir, file) xml_file = open(file_path) contents = parse(xml_file) xml_file.close() if file.startswith('knaw_short'): count_short += 1 if not xmlschema_short.validate(contents): count_short_invalid += 1 logger.error("ERROR in validating " + file_path) logger.error(" " + xmlschema_short. error_log.last_error.message) print("ERROR in validating " + file_path) else: count_long += 1 if not xmlschema_long.validate(contents): count_long_invalid += 1 if not xmlschema_long.error_log.last_error.message.startswith( "Element '{http://www.knaw.nl/narcis/1.0/long/}topic': Missing child element(s). Expected is ( {http://www.knaw.nl/narcis/1.0/long/}topicValue" ): logger.error("ERROR in validating " + file_path)
count_research = 0 count_research_invalid = 0 if item == 'person': for subdir, dirs, files in os.walk(path): if "normdoc" in files: for file in files: file_path = os.path.join(subdir, file) xml = tostring(parse(file_path)) person_xml = xml[xml.index('<persoon'):xml. index('</persoon>') + 16] person_xml = person_xml.replace('<', '<').replace( '>', '>') person = parse(StringIO(person_xml)) count_person += 1 if not xmlschema_person.validate(person): count_person_invalid += 1 logger.error("ERROR in validating " + file_path) logger.error( " " + xmlschema_person.error_log.last_error.message) print("ERROR in validating " + file_path) logger.info('%d person normdocs validated' % count_person) logger.info('%d invalid person normdocs' % count_person_invalid) elif item == 'organisation': for subdir, dirs, files in os.walk(path): if "normdoc" in files: for file in files: file_path = os.path.join(subdir, file) xml = tostring(parse(file_path)) organisation_xml = xml[xml.index('<organisatie'):xml
def is_valid_xml(xml): filenameXML = os.path.join(dirname, "../Schemas/XSD/datasetXSD.xsd") xsd = XMLSchema(etree.parse(filenameXML)) xml_object = etree.fromstring(xml) return xsd.validate(xml_object)
class StyledLayerDescriptor(SLDNode): """ An object representation of an SLD document. @prop: NamedLayer The named layer that this styling applies to. I{Type}: L{NamedLayer} """ _cached_schema = None """A cached schema document, to prevent multiple requests from occurring.""" def __init__(self, sld_file=None): """ Create a new SLD document. If an sld file is provided, this constructor will fetch the SLD schema from the internet and validate the file against that schema. @type sld_file: string @param sld_file: The name of a pre-existing SLD file. """ super(StyledLayerDescriptor, self).__init__(None) if StyledLayerDescriptor._cached_schema is None: logging.debug('Storing new schema into cache.') localschema = NamedTemporaryFile(delete=False) schema_url = 'http://schemas.opengis.net/sld/1.0.0/StyledLayerDescriptor.xsd' resp = urllib2.urlopen(schema_url) localschema.write(resp.read()) resp.close() localschema.seek(0) theschema = parse(localschema) localschema.close() StyledLayerDescriptor._cached_schema = localschema.name else: logging.debug('Fetching schema from cache.') localschema = open(StyledLayerDescriptor._cached_schema, 'r') theschema = parse(localschema) localschema.close() self._schema = XMLSchema(theschema) if not sld_file is None: self._node = parse(sld_file) if not self._schema.validate(self._node): logging.warn('SLD File "%s" does not validate against the SLD schema.', sld_file) else: self._node = Element("{%s}StyledLayerDescriptor" % SLDNode._nsmap['sld'], version="1.0.0", nsmap=SLDNode._nsmap) setattr(self.__class__, 'NamedLayer', SLDNode.makeproperty('sld', cls=NamedLayer, docstring="The named layer of the SLD.")) def __del__(self): """ Destroy the StyledLayerDescriptor object, and clear its cache. """ if not StyledLayerDescriptor._cached_schema is None: logging.debug('Clearing cached schema.') os.remove(StyledLayerDescriptor._cached_schema) StyledLayerDescriptor._cached_schema = None def __deepcopy__(self, memo): """ Perform a deep copy. Instead of copying references to the schema object, create a new SLD, and deepcopy the SLD node. """ sld = StyledLayerDescriptor() sld._node = copy.deepcopy(self._node) return sld def normalize(self): """ Normalize this node and all child nodes prior to validation. The SLD is modified in place. """ if not self.NamedLayer is None: self.NamedLayer.normalize() def validate(self): """ Validate the current file against the SLD schema. This first normalizes the SLD document, then validates it. Any schema validation error messages are logged at the INFO level. @rtype: boolean @return: A flag indicating if the SLD is valid. """ self.normalize() if self._node is None or self._schema is None: logging.debug('The node or schema is empty, and cannot be validated.') return False is_valid = self._schema.validate(self._node) for msg in self._schema.error_log: logging.info('Line:%d, Column:%d -- %s', msg.line, msg.column, msg.message) return is_valid @property def version(self): """ Get the SLD version. """ return self._node.getroot().get('version') @property def xmlns(self): """ Get the XML Namespace. """ return self._node.getroot().nsmap[None] def create_namedlayer(self, name): """ Create a L{NamedLayer} in this SLD. @type name: string @param name: The name of the layer. @rtype: L{NamedLayer} @return: The named layer, attached to this SLD. """ namedlayer = self.get_or_create_element('sld', 'NamedLayer') namedlayer.Name = name return namedlayer def as_sld(self, pretty_print=False): """ Serialize this SLD model into a string. @rtype: string @returns: The content of the SLD. """ return tostring(self._node, pretty_print=pretty_print)
count_organisation = 0 count_organisation_invalid = 0 count_research = 0 count_research_invalid = 0 if item == 'person': for subdir, dirs, files in os.walk(path): if "normdoc" in files: for file in files: file_path = os.path.join(subdir, file) xml = tostring(parse(file_path)) person_xml = xml[xml.index('<persoon'): xml.index('</persoon>') + 16] person_xml = person_xml.replace('<', '<').replace('>', '>') person = parse(StringIO(person_xml)) count_person += 1 if not xmlschema_person.validate(person): count_person_invalid += 1 logger.error("ERROR in validating " + file_path) logger.error(" " + xmlschema_person.error_log.last_error.message) print("ERROR in validating " + file_path) logger.info('%d person normdocs validated' % count_person) logger.info('%d invalid person normdocs' % count_person_invalid) elif item == 'organisation': for subdir, dirs, files in os.walk(path): if "normdoc" in files: for file in files: file_path = os.path.join(subdir, file) xml = tostring(parse(file_path)) organisation_xml = xml[xml.index('<organisatie'): xml.index('</organisatie>') + 20] organisation_xml = organisation_xml.replace('<', '<').replace('>', '>') organisation = parse(StringIO(organisation_xml))
def validate_xml(xml_string, xsd_string): """Validate XML file against XML schema """ xmlschema = XMLSchema(parse(xsd_string)) return xmlschema.validate(parse(xml_string))