Example #1
0
    def elementMaker(self, prefix=None, **parserOptions):
        """Obtain a factory for making in-memory PMML objects.

        This factory is an lxml ElementMaker, pre-loaded with the PMML
        namespace and this ModelLoader's current tag-to-class
        relationship.  See the lxml documentation for how to use an
        ElementMaker.

        @type prefix: string or None
        @param prefix: A prefix for the PMML namespace.
        @param **parserOptions: Arguments passed to lxml's U{XMLParser<http://lxml.de/api/lxml.etree.XMLParser-class.html>}.
        @rtype: ElementMaker
        @return: The ElementMaker factory.
        @see: The lxml U{ElementMaker documentation<http://lxml.de/api/lxml.builder.ElementMaker-class.html>}, which explains how to use an ElementMaker factory.
        """
        class XmlParser(XMLParser):
            def makeelement(parserSelf, *args, **kwds):
                result = XMLParser.makeelement(parserSelf, *args, **kwds)
                if isinstance(result, PmmlBinding):
                    result.modelLoader = self
                return result

        parser = XmlParser(**parserOptions)
        lookup = ElementNamespaceClassLookup()
        namespace = lookup.get_namespace(defs.PMML_NAMESPACE)
        for xsdElement in self.schema.xpath(
                "xs:element", namespaces={"xs": defs.XSD_NAMESPACE}):
            namespace[xsdElement.attrib["name"]] = PmmlBinding
        namespace.update(self.tagToClass)
        parser.set_element_class_lookup(lookup)

        return ElementMaker(namespace=defs.PMML_NAMESPACE,
                            nsmap={prefix: defs.PMML_NAMESPACE},
                            makeelement=parser.makeelement)
Example #2
0
    def elementMaker(self, prefix=None, **parserOptions):
        """Obtain a factory for making in-memory PMML objects.

        This factory is an lxml ElementMaker, pre-loaded with the PMML
        namespace and this ModelLoader's current tag-to-class
        relationship.  See the lxml documentation for how to use an
        ElementMaker.

        @type prefix: string or None
        @param prefix: A prefix for the PMML namespace.
        @param **parserOptions: Arguments passed to lxml's U{XMLParser<http://lxml.de/api/lxml.etree.XMLParser-class.html>}.
        @rtype: ElementMaker
        @return: The ElementMaker factory.
        @see: The lxml U{ElementMaker documentation<http://lxml.de/api/lxml.builder.ElementMaker-class.html>}, which explains how to use an ElementMaker factory.
        """

        class XmlParser(XMLParser):
            def makeelement(parserSelf, *args, **kwds):
                result = XMLParser.makeelement(parserSelf, *args, **kwds)
                if isinstance(result, PmmlBinding):
                    result.modelLoader = self
                return result

        parser = XmlParser(**parserOptions)
        lookup = ElementNamespaceClassLookup()
        namespace = lookup.get_namespace(defs.PMML_NAMESPACE)
        for xsdElement in self.schema.xpath("xs:element", namespaces={"xs": defs.XSD_NAMESPACE}):
            namespace[xsdElement.attrib["name"]] = PmmlBinding
        namespace.update(self.tagToClass)
        parser.set_element_class_lookup(lookup)

        return ElementMaker(namespace=defs.PMML_NAMESPACE, nsmap={prefix: defs.PMML_NAMESPACE}, makeelement=parser.makeelement)
Example #3
0
    def loadXml(self, data, validate=True, postValidate=True, **parserOptions):
        """Load a PMML model represented as an XML string, fileName,
        URI, or file-like object.

        Note that the XML file or string may be Gzip-compressed.

        @type data: string or file-like object
        @param data: The data to load.
        @type validate: bool
        @param validate: If True, validate the resulting PmmlBinding against this ModelLoader's XSD schema while loading.
        @type postValidate: bool
        @param postValidate: If True, run post-XSD validation checks.  (Note: very few PmmlBinding subclasses have postValidation tests defined as of May 2013.)
        @param **parserOptions: Arguments passed to lxml's U{XMLParser<http://lxml.de/api/lxml.etree.XMLParser-class.html>}.
        @rtype: PmmlBinding
        @return: In-memory PMML object.
        """

        if isinstance(data, basestring):
            if len(data) >= 2 and data[0:2] == "\x1f\x8b":
                data = gzip.GzipFile(fileobj=StringIO(data))
            elif data.find("<") != -1:
                data = StringIO(data)

        if validate:
            if self.preparedSchema is None:
                self.preparedSchema = XMLSchema(self.schema)
            schema = self.preparedSchema
        else:
            schema = None

        newParserOptions = {"schema": schema, "huge_tree": True}
        newParserOptions.update(parserOptions)
        parserOptions = newParserOptions

        parser = XMLParser(**parserOptions)
        lookup = ElementNamespaceClassLookup()
        namespace = lookup.get_namespace(defs.PMML_NAMESPACE)
        for xsdElement in self.schema.xpath(
                "xs:element", namespaces={"xs": defs.XSD_NAMESPACE}):
            namespace[xsdElement.attrib["name"]] = PmmlBinding
        namespace.update(self.tagToClass)
        parser.set_element_class_lookup(lookup)

        # ElementNamespaceClassLookup don't work with iterparse, so we have to parse all at once and then iterwalk
        pmmlBinding = parse(data, parser).getroot()
        pmmlBinding.modelLoader = self

        if postValidate:
            for event, elem in iterwalk(pmmlBinding,
                                        events=("end", ),
                                        tag="{%s}*" % defs.PMML_NAMESPACE):
                if isinstance(elem, PmmlBinding):
                    elem.postValidate()

        return pmmlBinding
Example #4
0
    def loadXml(self, data, validate=True, postValidate=True, **parserOptions):
        """Load a PMML model represented as an XML string, fileName,
        URI, or file-like object.

        Note that the XML file or string may be Gzip-compressed.

        @type data: string or file-like object
        @param data: The data to load.
        @type validate: bool
        @param validate: If True, validate the resulting PmmlBinding against this ModelLoader's XSD schema while loading.
        @type postValidate: bool
        @param postValidate: If True, run post-XSD validation checks.  (Note: very few PmmlBinding subclasses have postValidation tests defined as of May 2013.)
        @param **parserOptions: Arguments passed to lxml's U{XMLParser<http://lxml.de/api/lxml.etree.XMLParser-class.html>}.
        @rtype: PmmlBinding
        @return: In-memory PMML object.
        """

        if isinstance(data, basestring):
            if len(data) >= 2 and data[0:2] == "\x1f\x8b":
                data = gzip.GzipFile(fileobj=StringIO(data))
            elif data.find("<") != -1:
                data = StringIO(data)

        if validate:
            if self.preparedSchema is None:
                self.preparedSchema = XMLSchema(self.schema)
            schema = self.preparedSchema
        else:
            schema = None

        newParserOptions = {"schema": schema, "huge_tree": True}
        newParserOptions.update(parserOptions)
        parserOptions = newParserOptions

        parser = XMLParser(**parserOptions)
        lookup = ElementNamespaceClassLookup()
        namespace = lookup.get_namespace(defs.PMML_NAMESPACE)
        for xsdElement in self.schema.xpath("xs:element", namespaces={"xs": defs.XSD_NAMESPACE}):
            namespace[xsdElement.attrib["name"]] = PmmlBinding
        namespace.update(self.tagToClass)
        parser.set_element_class_lookup(lookup)

        # ElementNamespaceClassLookup don't work with iterparse, so we have to parse all at once and then iterwalk
        pmmlBinding = parse(data, parser).getroot()
        pmmlBinding.modelLoader = self

        if postValidate:
            for event, elem in iterwalk(pmmlBinding, events=("end",), tag="{%s}*" % defs.PMML_NAMESPACE):
                if isinstance(elem, PmmlBinding):
                    elem.postValidate()

        return pmmlBinding
Example #5
0
# Copyright (C) 2012-2018 by Dr. Dieter Maurer <*****@*****.**>; see 'LICENSE.txt' for details
"""Auxiliary classes to construct signature/encryption templates."""

from lxml.etree import ElementBase, \
     parse as et_parse, fromstring as et_fromstring, XML as et_xml, \
     XMLParser, ElementNamespaceClassLookup, ElementDefaultClassLookup
from dm.xmlsec.binding import DSigNs, dsig, EncNs, enc

# set up our own parser and related `etree` infrastructure
parser = XMLParser()
# apparently, `parser` has a `set_element_class_lookup` but not corresponding `get`
#class_lookup = ElementNamespaceClassLookup(parser.get_element_class_lookup())
class_lookup = ElementNamespaceClassLookup(ElementDefaultClassLookup())
parser.set_element_class_lookup(class_lookup)

Element = parser.makeelement


def SubElement(node, *args, **kw):
    node.append(Element(*args, **kw))


def parse(file, parser=parser):
    return et_parse(file, parser=parser)


def fromstring(s, parser=parser):
    return et_fromstring(s, parser=parser)


def XML(s, parser=parser):
Example #6
0
from lxml.etree import ElementNamespaceClassLookup
from lxml.objectify import ObjectifyElementClassLookup, ElementMaker, ObjectifiedElement
from lxml import objectify
from fvdl import FortifyObjectifiedDataElement

ExternalMetadataParser = objectify.makeparser(ns_clean=True,
                                              remove_blank_text=True,
                                              resolve_entities=False,
                                              strip_cdata=False)

ExternalMetadataElementNamespaceClassLookup = ElementNamespaceClassLookup(
    ObjectifyElementClassLookup())


class ExternalMetadataPackElement(FortifyObjectifiedDataElement):

    metadata_name_shortcut_cache = {}

    @property
    def namespace_map(self):
        # lxml is really dumb with xml using default namespaces.  You have to define a dummy namespace prefix to the
        # default namespace even though that prefix doesn't exist in the raw xml. Define a consistent map for all xpath
        return {'z': 'xmlns://www.fortifysoftware.com/schema/externalMetadata'}

    # in goes a metadata name and out comes a list of shortcuts for that name
    def get_shortcuts_for_name(self, name):
        if name not in self.metadata_name_shortcut_cache:
            self.metadata_name_shortcut_cache[name] = self.xpath(
                "./z:ExternalList[z:Name='%s']/z:Shortcut/text()" % name,
                namespaces=self.namespace_map)
Example #7
0
    def loadJson(self,
                 data,
                 validate=True,
                 postValidate=True,
                 **parserOptions):
        """Load a PMML model represented as a JSON string, fileName,
        dict, or file-like object.

        There is no standard XML-to-JSON specification, so we define
        our own.  Our specification is very similar to U{this
        proposal<http://www.xml.com/pub/a/2006/05/31/converting-between-xml-and-json.html>},
        which collects subelements of different tagnames into
        different JSON lists, rather than having one long list and
        needing to specify the tag of each element in that list.  This
        has the following advantages, particularly useful for PMML:
          - Frequent tagnames (like <Segment>) are not repeated,
            wasting space.
          - Subelements with a given tagname can be quickly queried,
            without having to iterate over a list that contains
            non-matching tagnames.
        It has the following disadvantages:
          - The relative order of subelements with different tagnames
            is not preserved.
        We therefore additionally include a JSON attribute named "#"
        to specify the ordering of subelements in the XML
        representation.  Also, the specification referenced above
        represents single-child subelements as JSON objects and
        multiple children as JSON lists, but for consistency and ease
        of parsing, we always use lists.  The last difference is that
        we include "#tail" as well as "#text", so that text outside of
        an element is preserved (rarely relevant for PMML, but
        included for completeness).

        Note that this method returns a JSON-like dictionary, not a
        string.  To serialize to JSON, use the C{json} module from the
        Python Standard Library, a faster variant, or an exotic
        serializer such as BSON.

        @type data: string, dict, or file-like object
        @param data: The data to load.
        @type validate: bool
        @param validate: If True, validate the resulting PmmlBinding against this ModelLoader's XSD schema after loading.
        @type postValidate: bool
        @param postValidate: If True, run post-XSD validation checks.  (Note: very few PmmlBinding subclasses have postValidation tests defined as of May 2013.)
        @param **parserOptions: Arguments passed to lxml's U{XMLParser<http://lxml.de/api/lxml.etree.XMLParser-class.html>}.
        @rtype: PmmlBinding
        @return: In-memory PMML object.
        @raise ValueError: If the JSON text is malformed or does not represent PMML, an error is raised.
        """

        if hasattr(data, "read"):
            data = json.load(data)
        elif isinstance(data, basestring):
            if os.path.exists(data):
                data = json.load(open(data))
            else:
                data = json.loads(data)

        if not isinstance(data, dict):
            raise ValueError("JSON object must be a mapping at the top level")

        if validate:
            if self.preparedSchema is None:
                self.preparedSchema = XMLSchema(self.schema)
            schema = self.preparedSchema
        else:
            schema = None

        parser = XMLParser(**parserOptions)
        lookup = ElementNamespaceClassLookup()
        namespace = lookup.get_namespace(defs.PMML_NAMESPACE)
        for xsdElement in self.schema.xpath(
                "xs:element", namespaces={"xs": defs.XSD_NAMESPACE}):
            namespace[xsdElement.attrib["name"]] = PmmlBinding
        namespace.update(self.tagToClass)
        parser.set_element_class_lookup(lookup)

        try:
            nsmap = data["#nsmap"]
        except KeyError:
            raise ValueError(
                "JSON object must have a \"#nsmap\" key at the top level")

        if "" in nsmap:
            nsmap[None] = nsmap[""]
            del nsmap[""]
        del data["#nsmap"]

        if len(data) != 1:
            raise ValueError(
                "JSON object must have exactly one PMML object at the top level"
            )

        tag = data.keys()[0]
        data = data[tag]
        if not isinstance(data, list) or len(data) != 1:
            raise ValueError(
                "Top-level PMML object must be a list with exactly one item")
        data = data[0]

        pmmlBinding = self._loadJsonItem(tag, data, parser, nsmap)

        if validate:
            schema.assertValid(pmmlBinding)

        if postValidate:
            for event, elem in iterwalk(pmmlBinding,
                                        events=("end", ),
                                        tag="{%s}*" % defs.PMML_NAMESPACE):
                if isinstance(elem, PmmlBinding):
                    elem.postValidate()

        return pmmlBinding
Example #8
0
AuditParser = objectify.makeparser(ns_clean=True,
                                   remove_blank_text=True,
                                   resolve_entities=False,
                                   strip_cdata=False)

FilterTemplateParser = objectify.makeparser(ns_clean=True,
                                            remove_blank_text=True,
                                            resolve_entities=False,
                                            strip_cdata=False)

FVDLParser = objectify.makeparser(ns_clean=True,
                                  remove_blank_text=True,
                                  resolve_entities=False,
                                  strip_cdata=False)

AuditObjectifiedElementNamespaceClassLookup = ElementNamespaceClassLookup(
    ObjectifyElementClassLookup())

FVDLObjectifiedElementNamespaceClassLookup = ElementNamespaceClassLookup(
    ObjectifyElementClassLookup())

FilterTemplateObjectifiedElementNamespaceClassLookup = ElementNamespaceClassLookup(
    ObjectifyElementClassLookup())


class FortifyObjectifiedDataElement(ObjectifiedDataElement):
    def __repr__(self):
        return "<Element {0} at 0x{1:x}>".format(self.tag, id(self))


class FVDLElement(FortifyObjectifiedDataElement):
    def get_vulnerabilities(self):
from lxml.etree import (
    ElementBase,
    parse as et_parse,
    fromstring as et_fromstring,
    XML as et_xml,
    XMLParser,
    ElementNamespaceClassLookup,
    ElementDefaultClassLookup,
)
from dm.xmlsec.binding import DSigNs, dsig, EncNs, enc

# set up our own parser and related `etree` infrastructure
parser = XMLParser()
# apparently, `parser` has a `set_element_class_lookup` but not corresponding `get`
# class_lookup = ElementNamespaceClassLookup(parser.get_element_class_lookup())
class_lookup = ElementNamespaceClassLookup(ElementDefaultClassLookup())
parser.set_element_class_lookup(class_lookup)

Element = parser.makeelement


def SubElement(node, *args, **kw):
    node.append(Element(*args, **kw))


def parse(file, parser=parser):
    return et_parse(file, parser=parser)


def fromstring(s, parser=parser):
    return et_fromstring(s, parser=parser)
Example #10
0
    def loadJson(self, data, validate=True, postValidate=True, **parserOptions):
        """Load a PMML model represented as a JSON string, fileName,
        dict, or file-like object.

        There is no standard XML-to-JSON specification, so we define
        our own.  Our specification is very similar to U{this
        proposal<http://www.xml.com/pub/a/2006/05/31/converting-between-xml-and-json.html>},
        which collects subelements of different tagnames into
        different JSON lists, rather than having one long list and
        needing to specify the tag of each element in that list.  This
        has the following advantages, particularly useful for PMML:
          - Frequent tagnames (like <Segment>) are not repeated,
            wasting space.
          - Subelements with a given tagname can be quickly queried,
            without having to iterate over a list that contains
            non-matching tagnames.
        It has the following disadvantages:
          - The relative order of subelements with different tagnames
            is not preserved.
        We therefore additionally include a JSON attribute named "#"
        to specify the ordering of subelements in the XML
        representation.  Also, the specification referenced above
        represents single-child subelements as JSON objects and
        multiple children as JSON lists, but for consistency and ease
        of parsing, we always use lists.  The last difference is that
        we include "#tail" as well as "#text", so that text outside of
        an element is preserved (rarely relevant for PMML, but
        included for completeness).

        Note that this method returns a JSON-like dictionary, not a
        string.  To serialize to JSON, use the C{json} module from the
        Python Standard Library, a faster variant, or an exotic
        serializer such as BSON.

        @type data: string, dict, or file-like object
        @param data: The data to load.
        @type validate: bool
        @param validate: If True, validate the resulting PmmlBinding against this ModelLoader's XSD schema after loading.
        @type postValidate: bool
        @param postValidate: If True, run post-XSD validation checks.  (Note: very few PmmlBinding subclasses have postValidation tests defined as of May 2013.)
        @param **parserOptions: Arguments passed to lxml's U{XMLParser<http://lxml.de/api/lxml.etree.XMLParser-class.html>}.
        @rtype: PmmlBinding
        @return: In-memory PMML object.
        @raise ValueError: If the JSON text is malformed or does not represent PMML, an error is raised.
        """

        if hasattr(data, "read"):
            data = json.load(data)
        elif isinstance(data, basestring):
            if os.path.exists(data):
                data = json.load(open(data))
            else:
                data = json.loads(data)

        if not isinstance(data, dict):
            raise ValueError("JSON object must be a mapping at the top level")

        if validate:
            if self.preparedSchema is None:
                self.preparedSchema = XMLSchema(self.schema)
            schema = self.preparedSchema
        else:
            schema = None

        parser = XMLParser(**parserOptions)
        lookup = ElementNamespaceClassLookup()
        namespace = lookup.get_namespace(defs.PMML_NAMESPACE)
        for xsdElement in self.schema.xpath("xs:element", namespaces={"xs": defs.XSD_NAMESPACE}):
            namespace[xsdElement.attrib["name"]] = PmmlBinding
        namespace.update(self.tagToClass)
        parser.set_element_class_lookup(lookup)

        try:
            nsmap = data["#nsmap"]
        except KeyError:
            raise ValueError("JSON object must have a \"#nsmap\" key at the top level")

        if "" in nsmap:
            nsmap[None] = nsmap[""]
            del nsmap[""]
        del data["#nsmap"]
        
        if len(data) != 1:
            raise ValueError("JSON object must have exactly one PMML object at the top level")

        tag = data.keys()[0]
        data = data[tag]
        if not isinstance(data, list) or len(data) != 1:
            raise ValueError("Top-level PMML object must be a list with exactly one item")
        data = data[0]
        
        pmmlBinding = self._loadJsonItem(tag, data, parser, nsmap)

        if validate:
            schema.assertValid(pmmlBinding)

        if postValidate:
            for event, elem in iterwalk(pmmlBinding, events=("end",), tag="{%s}*" % defs.PMML_NAMESPACE):
                if isinstance(elem, PmmlBinding):
                    elem.postValidate()

        return pmmlBinding
Example #11
0
AuditParser = objectify.makeparser(ns_clean=True,
                                   remove_blank_text=True,
                                   resolve_entities=False,
                                   strip_cdata=False)

FilterTemplateParser = objectify.makeparser(ns_clean=True,
                                            remove_blank_text=True,
                                            resolve_entities=False,
                                            strip_cdata=False)

FVDLParser = objectify.makeparser(ns_clean=True,
                                  remove_blank_text=True,
                                  resolve_entities=False,
                                  strip_cdata=False)

AuditObjectifiedElementNamespaceClassLookup = ElementNamespaceClassLookup(
    ObjectifyElementClassLookup())

FVDLObjectifiedElementNamespaceClassLookup = ElementNamespaceClassLookup(
    ObjectifyElementClassLookup())

FilterTemplateObjectifiedElementNamespaceClassLookup = ElementNamespaceClassLookup(
    ObjectifyElementClassLookup())


class FortifyObjectifiedDataElement(ObjectifiedDataElement):
    def __repr__(self):
        return "<Element {0} at 0x{1:x}>".format(self.tag, id(self))


class FVDLElement(FortifyObjectifiedDataElement):
    def get_vulnerabilities(self):
Example #12
0
from lxml.etree import ElementNamespaceClassLookup
from lxml.objectify import ObjectifyElementClassLookup, ElementMaker, ObjectifiedElement
from lxml import objectify
from fvdl import FortifyObjectifiedDataElement

ExternalMetadataParser = objectify.makeparser(ns_clean=True,
                                              remove_blank_text=True,
                                              resolve_entities=False,
                                              strip_cdata=False)

ExternalMetadataElementNamespaceClassLookup = ElementNamespaceClassLookup(
    ObjectifyElementClassLookup())

class ExternalMetadataPackElement(FortifyObjectifiedDataElement):

    metadata_name_shortcut_cache = {}

    @property
    def namespace_map(self):
        # lxml is really dumb with xml using default namespaces.  You have to define a dummy namespace prefix to the
        # default namespace even though that prefix doesn't exist in the raw xml. Define a consistent map for all xpath
        return {'z':'xmlns://www.fortifysoftware.com/schema/externalMetadata'}

    # in goes a metadata name and out comes a list of shortcuts for that name
    def get_shortcuts_for_name(self, name):
        if name not in self.metadata_name_shortcut_cache:
            self.metadata_name_shortcut_cache[name] = self.xpath("./z:ExternalList[z:Name='%s']/z:Shortcut/text()" % name,
                          namespaces=self.namespace_map)

        return self.metadata_name_shortcut_cache[name]
Example #13
0
AuditParser = objectify.makeparser(ns_clean=True,
                                   remove_blank_text=True,
                                   resolve_entities=False,
                                   strip_cdata=False)

FilterTemplateParser = objectify.makeparser(ns_clean=True,
                                            remove_blank_text=True,
                                            resolve_entities=False,
                                            strip_cdata=False)

FVDLParser = objectify.makeparser(ns_clean=True,
                                  remove_blank_text=True,
                                  resolve_entities=False,
                                  strip_cdata=False)

AuditObjectifiedElementNamespaceClassLookup = ElementNamespaceClassLookup(
    ObjectifyElementClassLookup())

FVDLObjectifiedElementNamespaceClassLookup = ElementNamespaceClassLookup(
    ObjectifyElementClassLookup())


class FortifyObjectifiedDataElement(ObjectifiedDataElement):
    def __repr__(self):
        return "<Element {0} at 0x{1:x}>".format(self.tag, id(self))


class FVDLElement(FortifyObjectifiedDataElement):
    def get_vulnerabilities(self):
        return self.Vulnerabilities.Vulnerability

Example #14
0
AuditParser = objectify.makeparser(ns_clean=True,
                                   remove_blank_text=True,
                                   resolve_entities=False,
                                   strip_cdata=False)

FilterTemplateParser = objectify.makeparser(ns_clean=True,
                                            remove_blank_text=True,
                                            resolve_entities=False,
                                            strip_cdata=False)

FVDLParser = objectify.makeparser(ns_clean=True,
                                  remove_blank_text=True,
                                  resolve_entities=False,
                                  strip_cdata=False)

AuditObjectifiedElementNamespaceClassLookup = ElementNamespaceClassLookup(
    ObjectifyElementClassLookup())

FVDLObjectifiedElementNamespaceClassLookup = ElementNamespaceClassLookup(
    ObjectifyElementClassLookup())


class FortifyObjectifiedDataElement(ObjectifiedDataElement):
    def __repr__(self):
        return "<Element {0} at 0x{1:x}>".format(self.tag, id(self))


class FVDLElement(FortifyObjectifiedDataElement):
    def get_vulnerabilities(self):
        return self.Vulnerabilities.Vulnerability