Esempio n. 1
0
 def testOne(self):
     self.observer.methods['add'] = lambda *args, **kwargs: (x for x in [])
     list( compose(self.norm_mods.all_unknown('add', 'id', 'metadata', 'anotherone', lxmlNode=parse(open("data/didl_mods.xml")), identifier='oai:very:secret:09987' )))        
     self.assertEquals(3, len(self.observer.calledMethods))
     
     # for m in self.observer.calledMethods:
     #    print 'method name:',m.name, m.args, m.kwargs
     result = self.observer.calledMethods[2].kwargs.get('lxmlNode')
     
     # print "Converted:", tostring(result)
     self.assertEquals(2, len(self.observer.calledMethods[0].args))
     
     arguments = self.observer.calledMethods[2].args
     self.assertEquals("id", arguments[0])
     self.assertEquals("metadata", arguments[1])
     
     #Get MODS from record:
     mods = result.xpath("//mods:mods", namespaces=namespacesMap)
     
     # Should be exactly 1:
     self.assertTrue(len(mods)==1)
     
     #print tostring(mods[0], pretty_print = True, encoding='utf-8')        
     
     #Validate against schema:
     modsSchema = XMLSchema(lxmlParse(open(join(self.schemasPath, 'mods-3-6.xsd'))))                
     modsSchema.validate(mods[0])
     if modsSchema.error_log.last_error:
         self.fail(modsSchema.error_log.last_error)        
     
     # Check if expected result:        
     expectedResult = open("data/mods_converted.xml").read()
     # print "EXPECTED MODS:", tostring(mods[0], pretty_print = True, encoding='utf-8')      
     self.assertEqualsWithDiff(expectedResult, tostring(mods[0], pretty_print = True, encoding='utf-8'))
Esempio n. 2
0
def assertValid(xmlString, schemaPath):
    schema = XMLSchema(parse(open(schemaPath)))
    toValidate = parse(StringIO(xmlString))
    schema.validate(toValidate)
    if schema.error_log:
        raise AssertionError(formatException("assertValid", schema,
                                             toValidate))
Esempio n. 3
0
class Validate(Observable):
    def __init__(self, schemaPath):
        Observable.__init__(self)
        try:
            with open(schemaPath) as fp:
                self._schema = XMLSchema(parse(fp))
        except XMLSchemaParseError as e:
            print(e.error_log.last_error)
            raise


    def all_unknown(self, message, *args, **kwargs):
        self._detectAndValidate(*args, **kwargs)
        yield self.all.unknown(message, *args, **kwargs)

    def do_unknown(self, message, *args, **kwargs):
        self._detectAndValidate(*args, **kwargs)
        return self.do.unknown(message, *args, **kwargs)

    def any_unknown(self, message, *args, **kwargs):
        self._detectAndValidate(*args, **kwargs)
        try:
            response = yield self.any.unknown(message, *args, **kwargs)
            return response
        except NoneOfTheObserversRespond:
            raise DeclineMessage

    def call_unknown(self, message, *args, **kwargs):
        self._detectAndValidate(*args, **kwargs)
        try:
            return self.call.unknown(message, *args, **kwargs)
        except NoneOfTheObserversRespond:
            raise DeclineMessage

    def _detectAndValidate(self, *args, **kwargs):
        allArguments = list(args) + list(kwargs.values())
        for arg in allArguments:
            if type(arg) == _ElementTree:
                self.validate(arg)

    def validate(self, arg):
        self._schema.validate(arg)
        if self._schema.error_log:
            exception = ValidateException(formatException(self._schema, arg))
            self.do.logException(exception)
            raise exception

    def assertValid(self, xmlOrString):
        toValidate = XML(xmlOrString.encode('utf-8')) if isinstance(xmlOrString, str) else xmlOrString
        self._schema.validate(toValidate)
        if self._schema.error_log:
            raise AssertionError(formatException(self._schema, toValidate))
Esempio n. 4
0
    def testOne(self):
        self.observer.methods['add'] = lambda *args, **kwargs: (x for x in [])
        list(
            compose(
                self.norm_didl.all_unknown(
                    'add',
                    'id',
                    'metadata',
                    'anotherone',
                    lxmlNode=parse(open("data/didl_mods.xml")),
                    identifier='oai:very:secret:09987')))
        self.assertEquals(4, len(self.observer.calledMethods))

        #for m in self.observer.calledMethods:
        #    print 'method name:',m.name, m.args, m.kwargs
        result = self.observer.calledMethods[3].kwargs.get('lxmlNode')

        self.assertEquals(2, len(self.observer.calledMethods[0].args))

        arguments = self.observer.calledMethods[1].args

        self.assertEquals("oai:very:secret:09987", arguments[0])
        #Test logMessage:
        self.assertEquals(
            "Found objectFile in depricated dip:ObjectType. This should have been: rdf:type/@rdf:resource",
            arguments[1])

        #Get DIDL from record:
        didl = result.xpath("//didl:DIDL", namespaces=namespacesMap)

        # Should be exactly 1:
        self.assertTrue(len(didl) == 1)

        #print tostring(didl[0], pretty_print = True, encoding='utf-8')

        #Validate against schema:
        didlSchema = XMLSchema(
            lxmlParse(open(join(self.schemasPath, 'didl.xsd'))))
        didlSchema.validate(didl[0])
        if didlSchema.error_log.last_error:
            self.fail(didlSchema.error_log.last_error)

        # Check if expected result:
        expectedResult = open("data/didl_converted.xml").read()
        # print "EXPECTED DIDL:", tostring(didl[0], pretty_print = True, encoding='utf-8')
        self.assertEqualsWithDiff(
            expectedResult,
            tostring(didl[0], pretty_print=True, encoding='utf-8'))
Esempio n. 5
0
def validate_schema(xml_loc: str, schema: etree.XMLSchema):
    """
    Given an xmlfile and a schema. This function returns True
    if the xml file matches the schema
    Raises: DoxygenMalformedXML: If the schema is malformed
    """
    assert xml_loc is not None
    assert schema is not None
    assert os.path.exists(xml_loc)

    with open(xml_loc, "rb") as xml_f:
        xml_bytes = BytesIO(xml_f.read())

    try:
        xml = etree.parse(xml_bytes)
    except etree.XMLSyntaxError as e:
        logger.error(e)
        logger.warning(f"Failed to parse: {xml_loc}")
        logger.warning("Skipping this XML file")
        raise DoxygenMalformedXML(xml_loc) from e

    logger.debug(f"Validating {xml_loc}")

    if schema and not schema.validate(xml):  # type: ignore
        logger.warning(f"Validating schema failed for: {xml_loc}")
        logger.warning(schema.error_log)  # type: ignore
        logger.warning("Skipping this XML file")
        return False

    return True
Esempio n. 6
0
    def validate_data(self, data, structural_schema, metamorphic_schema=None, validate_schemas=False):
        """
        Ensure that a data structure matches a schema (xml schema)

        :param str data: data to check
        :param str structural_schema: unique_key
        :param str metamorphic_schema: unique_key
        :param bool validate_schemas: Any provided schema will be
          valided before use if set to True. They are used as is
          otherwise. This option is suited for better perfs. Use it at
          your own risk.
        :return: True if data is valid, False otherwise
        :rtype: bool
        """

        if validate_schemas is True:
            self.assert_structural_schema(structural_schema)

            if metamorphic_schema is not None:
                self.assert_metamorphic_schema(metamorphic_schema)

        if metamorphic_schema is not None:
            self.transform(data, metamorphic_schema)

        xsl_xml = self.get_cached_schema(structural_schema)
        xmlschema = XMLSchema(xsl_xml)

        xml = parse(StringIO(data))

        return xmlschema.validate(xml)
Esempio n. 7
0
    def validate_data(self,
                      data,
                      structural_schema,
                      metamorphic_schema=None,
                      validate_schemas=False):
        """
        Ensure that a data structure matches a schema (xml schema)

        :param str data: data to check
        :param str structural_schema: unique_key
        :param str metamorphic_schema: unique_key
        :param bool validate_schemas: Any provided schema will be
          valided before use if set to True. They are used as is
          otherwise. This option is suited for better perfs. Use it at
          your own risk.
        :return: True if data is valid, False otherwise
        :rtype: bool
        """

        if validate_schemas is True:
            self.assert_structural_schema(structural_schema)

            if metamorphic_schema is not None:
                self.assert_metamorphic_schema(metamorphic_schema)

        if metamorphic_schema is not None:
            self.transform(data, metamorphic_schema)

        xsl_xml = self.get_cached_schema(structural_schema)
        xmlschema = XMLSchema(xsl_xml)

        xml = parse(StringIO(data))

        return xmlschema.validate(xml)
Esempio n. 8
0
    def validate(self):
        """
        Validate the provided data file for correctness against the provided
        schema file.

        @return: A flag indicating if the data validates against the schema.
        """

        # clear any previous xml errors
        clear_error_log()
        if self.schema_file is not None:
            try:
                # Attempt parsing the schema file
                schdoc = parse(self.schema_file)
            except XMLSyntaxError as e:
                # The schema was not parsable XML
                logging.warning('The schema XML file could not be parsed.')
                for item in e.error_log:
                    logging.info(item)

                return False

            try:
                schema = XMLSchema(schdoc)
            except XMLSchemaParseError as e:
                # The schema document is XML, but it's not a schema
                logging.warning(
                    'The schema XML file was parsed, but it does not appear to be a valid XML Schema document.'
                )
                for item in e.error_log:
                    logging.info(item)

                return False

        try:
            # Attempt parsing the data file
            data = parse(self.datafile)
        except XMLSyntaxError as e:
            # The data was not parsable XML
            logging.warning('The data XML file could not be parsed.')
            for item in e.error_log:
                logging.info(item)

            return False

        if self.schema_file is not None:
            if schema.validate(data):
                self.data = data
                return True

            logging.warning(
                'The data does not conform to the provided schema.')
            for item in schema.error_log:
                logging.info(item)

            return False

        self.data = data

        return True
Esempio n. 9
0
 def parse(self, filename):
     self.parser = None
     # find parser
     try:
         from lxml.etree import parse, XMLSchema
         self.logger.info('using lxml.etree parser')
         # parse XML and validate it
         tree = parse(filename)
         # get XSD
         schemaDoc = parse(XSDContents)
         schema = XMLSchema(schemaDoc)
         xml_valid = schema.validate(tree)
         if xml_valid:
             self.logger.info('XML validated')
         else:
             self.logger.error('XML NOT validated: {}'.format(filename))
             print >> stderr, schema.error_log
         return tree if xml_valid else None
     except ImportError:
         try:
             from xml.etree.ElementTree import parse
             self.logger.info('using xml.etree.ElementTree parser')
             return parse(filename)
         except ImportError:
             self.logger.critical(
                 "Failed to import ElementTree from any known place")
             raise
Esempio n. 10
0
def validate():
    for s in SCHEMAS:
        fp1 = "{}/{}.xsd".format(s, s)

        xsdDocument = parse(fp1)
        schema = XMLSchema(xsdDocument)

        logging.info(
            "Checking that all valid examples pass when validated against the XSD file."
        )

        if s == "question":
            fps = glob.glob("{}/examples/*_new.question.xml".format(s))
        else:
            fps = glob.glob("{}/examples/*.xml".format(s))

        for fp in fps:
            xmlDocument = parse(fp)
            isValid = schema.validate(xmlDocument)

            if isValid == True:
                logging.info(" - {} passes.".format(fp))
            else:
                logging.info(" - {} does not pass.".format(fp))

                schema.assertValid(xmlDocument)

        logging.info(
            "Checking that all invalid examples fail when validated against the XSD file."
        )

        if s == "question":
            fps = glob.glob("{}/examples/*_current.question.xml".format(s))
        else:
            fps = glob.glob("{}/invalid_examples/*.xml".format(s))

        for fp in fps:
            xmlDocument = parse(fp)
            isValid = schema.validate(xmlDocument)

            if isValid == False:
                logging.info(" - {} fails.".format(fp))
            else:
                logging.info(" - {} does not fail.".format(fp))

                raise Exception(
                    "{} should fail validation, but doesn't.".format(fp))
Esempio n. 11
0
 def valid_xml(self, xml_string):
     """Return True if `xml_string` conforms to SEPA XML schema."""
     if self.xml_schema_path is None:
         raise NotImplementedError(
             "XSD validation failed: path to schema is not set.")
     parsed_xml = parse(StringIO(xml_string))
     xsd_scheme = XMLSchema(parse(self.xml_schema_path))
     return xsd_scheme.validate(parsed_xml)
Esempio n. 12
0
 def parse(self, filename):
     self.parser = None
     # find parser
     try:
         from lxml.etree import parse, XMLSchema
         self.logger.info('using lxml.etree parser')
         # parse XML and validate it
         tree = parse(filename)
         # get XSD
         schemaDoc = parse(XSDContents)
         schema = XMLSchema(schemaDoc)
         if schema.validate(tree):
             self.logger.info('XML validated')
             return tree
         print >> stderr,  schema.error_log
         raise ValueError('XML NOT validated: {}'.format(filename))
     except ImportError:
         try:
             from xml.etree.ElementTree import parse
             self.logger.info('using xml.etree.ElementTree parser')
             return parse(filename)
         except ImportError:
             self.logger.critical("Failed to import ElementTree from any known place")
             raise
Esempio n. 13
0
#!/usr/bin/env python

from lxml.etree import XMLParser, fromstring, XMLSchema

schema_doc = open('schema.xsd').read()
inst_doc = open('inst.xml').read()

parser = XMLParser(resolve_entities=False)
elt = fromstring(inst_doc, parser)
schema = XMLSchema(fromstring(schema_doc))
schema.validate(elt)
Esempio n. 14
0
def assertValid(xmlString, schemaPath):
    schema = XMLSchema(parse(open(schemaPath)))
    toValidate = parse(StringIO(xmlString))
    schema.validate(toValidate)
    if schema.error_log:
        raise AssertionError(formatException(schema, toValidate))
        logger.info('### Start validation of xml files in %s' % path)
        print('### Start validation of xml files in %s' % path)

        for subdir, dirs, files in os.walk(path):
            if "knaw_short" in files or "knaw_long" in files:
                for file in files:
                    if file.startswith('knaw_short') or file.startswith(
                            'knaw_long'):
                        file_path = os.path.join(subdir, file)
                        xml_file = open(file_path)
                        contents = parse(xml_file)
                        xml_file.close()
                        if file.startswith('knaw_short'):
                            count_short += 1
                            if not xmlschema_short.validate(contents):
                                count_short_invalid += 1
                                logger.error("ERROR in validating " +
                                             file_path)
                                logger.error("    " + xmlschema_short.
                                             error_log.last_error.message)
                                print("ERROR in validating " + file_path)
                        else:
                            count_long += 1
                            if not xmlschema_long.validate(contents):
                                count_long_invalid += 1
                                if not xmlschema_long.error_log.last_error.message.startswith(
                                        "Element '{http://www.knaw.nl/narcis/1.0/long/}topic': Missing child element(s). Expected is ( {http://www.knaw.nl/narcis/1.0/long/}topicValue"
                                ):
                                    logger.error("ERROR in validating " +
                                                 file_path)
Esempio n. 16
0
        count_research = 0
        count_research_invalid = 0

        if item == 'person':
            for subdir, dirs, files in os.walk(path):
                if "normdoc" in files:
                    for file in files:
                        file_path = os.path.join(subdir, file)
                        xml = tostring(parse(file_path))
                        person_xml = xml[xml.index('<persoon'):xml.
                                         index('</persoon>') + 16]
                        person_xml = person_xml.replace('&lt;', '<').replace(
                            '&gt;', '>')
                        person = parse(StringIO(person_xml))
                        count_person += 1
                        if not xmlschema_person.validate(person):
                            count_person_invalid += 1
                            logger.error("ERROR in validating " + file_path)
                            logger.error(
                                "    " +
                                xmlschema_person.error_log.last_error.message)
                            print("ERROR in validating " + file_path)
            logger.info('%d person normdocs validated' % count_person)
            logger.info('%d invalid person normdocs' % count_person_invalid)
        elif item == 'organisation':
            for subdir, dirs, files in os.walk(path):
                if "normdoc" in files:
                    for file in files:
                        file_path = os.path.join(subdir, file)
                        xml = tostring(parse(file_path))
                        organisation_xml = xml[xml.index('&lt;organisatie'):xml
def is_valid_xml(xml):
    filenameXML = os.path.join(dirname, "../Schemas/XSD/datasetXSD.xsd")
    xsd = XMLSchema(etree.parse(filenameXML))
    xml_object = etree.fromstring(xml)
    return xsd.validate(xml_object)
Esempio n. 18
0
class StyledLayerDescriptor(SLDNode):
    """
    An object representation of an SLD document.

    @prop: NamedLayer
    
        The named layer that this styling applies to.

        I{Type}: L{NamedLayer}
    """

    _cached_schema = None
    """A cached schema document, to prevent multiple requests from occurring."""

    def __init__(self, sld_file=None):
        """
        Create a new SLD document. If an sld file is provided, this constructor
        will fetch the SLD schema from the internet and validate the file 
        against that schema.

        @type  sld_file: string
        @param sld_file: The name of a pre-existing SLD file.
        """
        super(StyledLayerDescriptor, self).__init__(None)

        if StyledLayerDescriptor._cached_schema is None:
            logging.debug('Storing new schema into cache.')

            localschema = NamedTemporaryFile(delete=False)
            schema_url = 'http://schemas.opengis.net/sld/1.0.0/StyledLayerDescriptor.xsd'
            resp = urllib2.urlopen(schema_url)
            localschema.write(resp.read())
            resp.close()
            localschema.seek(0)

            theschema = parse(localschema)
            localschema.close()

            StyledLayerDescriptor._cached_schema = localschema.name
        else:
            logging.debug('Fetching schema from cache.')

            localschema = open(StyledLayerDescriptor._cached_schema, 'r')
            theschema = parse(localschema)
            localschema.close()

        self._schema = XMLSchema(theschema)

        if not sld_file is None:
            self._node = parse(sld_file)

            if not self._schema.validate(self._node):
                logging.warn('SLD File "%s" does not validate against the SLD schema.', sld_file)
        else:
            self._node = Element("{%s}StyledLayerDescriptor" % SLDNode._nsmap['sld'], version="1.0.0", nsmap=SLDNode._nsmap)

        setattr(self.__class__, 'NamedLayer', SLDNode.makeproperty('sld', cls=NamedLayer,
            docstring="The named layer of the SLD."))

    def __del__(self):
        """
        Destroy the StyledLayerDescriptor object, and clear its cache.
        """
        if not StyledLayerDescriptor._cached_schema is None:
            logging.debug('Clearing cached schema.')

            os.remove(StyledLayerDescriptor._cached_schema)
            StyledLayerDescriptor._cached_schema = None

    def __deepcopy__(self, memo):
        """
        Perform a deep copy. Instead of copying references to the schema
        object, create a new SLD, and deepcopy the SLD node.
        """
        sld = StyledLayerDescriptor()
        sld._node = copy.deepcopy(self._node)
        return sld


    def normalize(self):
        """
        Normalize this node and all child nodes prior to validation. The SLD
        is modified in place.
        """
        if not self.NamedLayer is None:
            self.NamedLayer.normalize()


    def validate(self):
        """
        Validate the current file against the SLD schema. This first normalizes
        the SLD document, then validates it. Any schema validation error messages
        are logged at the INFO level.

        @rtype: boolean
        @return: A flag indicating if the SLD is valid.
        """
        self.normalize()

        if self._node is None or self._schema is None:
            logging.debug('The node or schema is empty, and cannot be validated.')
            return False

        is_valid = self._schema.validate(self._node)

        for msg in self._schema.error_log:
            logging.info('Line:%d, Column:%d -- %s', msg.line, msg.column, msg.message)

        return is_valid


    @property
    def version(self):
        """
        Get the SLD version.
        """
        return self._node.getroot().get('version')

    @property
    def xmlns(self):
        """
        Get the XML Namespace.
        """
        return self._node.getroot().nsmap[None]

    def create_namedlayer(self, name):
        """
        Create a L{NamedLayer} in this SLD.
        
        @type  name: string
        @param name: The name of the layer.
        @rtype: L{NamedLayer}
        @return: The named layer, attached to this SLD.
        """
        namedlayer = self.get_or_create_element('sld', 'NamedLayer')
        namedlayer.Name = name
        return namedlayer

    def as_sld(self, pretty_print=False):
        """
        Serialize this SLD model into a string.

        @rtype: string
        @returns: The content of the SLD.
        """
        return tostring(self._node, pretty_print=pretty_print)
        count_organisation = 0
        count_organisation_invalid = 0
        count_research = 0
        count_research_invalid = 0

        if item == 'person':
            for subdir, dirs, files in os.walk(path):
                if "normdoc" in files:
                    for file in files:
                        file_path = os.path.join(subdir, file)
                        xml = tostring(parse(file_path))
                        person_xml = xml[xml.index('&lt;persoon'): xml.index('&lt;/persoon&gt;') + 16]
                        person_xml = person_xml.replace('&lt;', '<').replace('&gt;', '>')
                        person = parse(StringIO(person_xml))
                        count_person += 1
                        if not xmlschema_person.validate(person):
                            count_person_invalid += 1
                            logger.error("ERROR in validating " + file_path)
                            logger.error("    " + xmlschema_person.error_log.last_error.message)
                            print("ERROR in validating " + file_path)
            logger.info('%d person normdocs validated' % count_person)
            logger.info('%d invalid person normdocs' % count_person_invalid)
        elif item == 'organisation':
            for subdir, dirs, files in os.walk(path):
                if "normdoc" in files:
                    for file in files:
                        file_path = os.path.join(subdir, file)
                        xml = tostring(parse(file_path))
                        organisation_xml = xml[xml.index('&lt;organisatie'): xml.index('&lt;/organisatie&gt;') + 20]
                        organisation_xml = organisation_xml.replace('&lt;', '<').replace('&gt;', '>')
                        organisation = parse(StringIO(organisation_xml))
Esempio n. 20
0
def validate_xml(xml_string, xsd_string):
    """Validate XML file against XML schema
    """
    xmlschema = XMLSchema(parse(xsd_string))
    return xmlschema.validate(parse(xml_string))