Esempio n. 1
0
    def wsdl(self, url, debug=False, cache=False):
        "Parse Web Service Description v1.1"
        soap_ns = {
            "http://schemas.xmlsoap.org/wsdl/soap/": 'soap11',
            "http://schemas.xmlsoap.org/wsdl/soap12/": 'soap12',
            }
        wsdl_uri="http://schemas.xmlsoap.org/wsdl/"
        xsd_uri="http://www.w3.org/2001/XMLSchema"
        xsi_uri="http://www.w3.org/2001/XMLSchema-instance"
        
        get_local_name = lambda s: str((':' in s) and s.split(':')[1] or s)
        
        REVERSE_TYPE_MAP = dict([(v,k) for k,v in TYPE_MAP.items()])

        def fetch(url):
            "Fetch a document from a URL, save it locally if cache enabled"
            import os, hashlib
            # make md5 hash of the url for caching... 
            filename = "%s.xml" % hashlib.md5(url).hexdigest()
            if isinstance(cache, basestring):
                filename = os.path.join(cache, filename) 
            if cache and os.path.exists(filename):
                if debug: print "Reading file %s" % (filename, )
                f = open(filename, "r")
                xml = f.read()
                f.close()
            else:
                if debug: print "Fetching url %s" % (url, )
                f = urllib.urlopen(url)
                xml = f.read()
                if cache:
                    if debug: print "Writing file %s" % (filename, )
                    f = open(filename, "w")
                    f.write(xml)
                    f.close()
            return xml
            
        # Open uri and read xml:
        xml = fetch(url)
        # Parse WSDL XML:
        wsdl = SimpleXMLElement(xml, namespace=wsdl_uri)
        
        # detect soap prefix and uri (xmlns attributes of <definitions>)
        xsd_ns = None
        soap_uris = {}
        for k, v in wsdl[:]:
            if v in soap_ns and k.startswith("xmlns:"):
                soap_uris[get_local_name(k)] = v
            if v== xsd_uri and k.startswith("xmlns:"):
                xsd_ns = get_local_name(k)

        # Extract useful data:
        self.namespace = wsdl['targetNamespace']
        self.documentation = unicode(wsdl('documentation', error=False) or '')
        
        services = {}
        bindings = {}           # binding_name: binding
        operations = {}         # operation_name: operation
        port_type_bindings = {} # port_type_name: binding
        messages = {}           # message: element
        elements = {}           # element: type def
        
        wsdl_import = wsdl.__call__('import')
        
        if wsdl_import:
            wsdl_import_xml = fetch(wsdl_import['location'])
            wsdl_import = SimpleXMLElement(wsdl_import_xml, namespace=wsdl_uri)
        
        service_tag = wsdl.service if wsdl.__contains__('wsdl:service') else (wsdl_import.service if wsdl_import and wsdl_import.__contains__('wsdl:service') else None)  
        
        for service in service_tag:
            service_name=service['name']
            if not service_name:
                continue # empty service?
            if debug: print "Processing service", service_name
            serv = services.setdefault(service_name, {'ports': {}})
            serv['documentation']=service['documentation'] or ''
            for port in service.port:
                binding_name = get_local_name(port['binding'])
                address = port('address', ns=soap_uris.values(), error=False)
                location = address and address['location'] or None
                soap_uri = address and soap_uris.get(address.get_prefix())
                soap_ver = soap_uri and soap_ns.get(soap_uri)
                bindings[binding_name] = {'service_name': service_name,
                    'location': location,
                    'soap_uri': soap_uri, 'soap_ver': soap_ver,
                    }
                serv['ports'][port['name']] = bindings[binding_name]
        
        binding_tag = wsdl.binding if wsdl.__contains__('wsdl:binding') else (wsdl_import.binding if wsdl_import and wsdl_import.__contains__('wsdl:binding') else None)
             
        for binding in binding_tag:
            binding_name = binding['name']
            if debug: print "Processing binding", service_name
            soap_binding = binding('binding', ns=soap_uris.values(), error=False)
            transport = soap_binding and soap_binding['transport'] or None
            port_type_name = get_local_name(binding['type'])
            bindings[binding_name].update({
                'port_type_name': port_type_name,
                'transport': transport, 'operations': {},
                })
            port_type_bindings[port_type_name] = bindings[binding_name]
            for operation in binding.operation:
                op_name = operation['name']
                op = operation('operation',ns=soap_uris.values(), error=False)
                action = op and op['soapAction']
                d = operations.setdefault(op_name, {})
                bindings[binding_name]['operations'][op_name] = d
                d.update({'name': op_name})
                #if action: #TODO: separe operation_binding from operation
                if action:
                    d["action"] = action
                        
        #TODO: cleanup element/schema/types parsing:
        def process_element(element_name, node):
            "Parse and define simple element types"
            if debug: print "Processing element", element_name
            for tag in node:
                if tag.get_local_name() in ("annotation", "documentation"):
                    continue
                elif tag.get_local_name() in ('element', 'restriction'):
                    if debug: print element_name,"has not children!",tag
                    children = tag # element "alias"?
                    alias = True
                elif tag.children():
                    children = tag.children()
                    alias = False
                else:
                    if debug: print element_name,"has not children!",tag
                    continue #TODO: abstract?
                d = OrderedDict()                    
                for e in children:
                    t = e['type']
                    if not t:
                        t = e['base'] # complexContent (extension)!
                    if not t:
                        t = 'anyType' # no type given!
                    t = t.split(":")
                    if len(t)>1:
                        ns, type_name = t
                    else:
                        ns, type_name = None, t[0]
                    if element_name == type_name:
                        continue # prevent infinite recursion
                    uri = ns and e.get_namespace_uri(ns) or xsd_uri
                    if uri==xsd_uri:
                        # look for the type, None == any
                        fn = REVERSE_TYPE_MAP.get(unicode(type_name), None)
                    else:
                        # complex type, postprocess later
                        fn = elements.setdefault(unicode(type_name), OrderedDict())
                    if e['name'] is not None and not alias:
                        e_name = unicode(e['name'])
                        d[e_name] = fn
                    else:
                        if debug: print "complexConent/simpleType/element", element_name, "=", type_name
                        d[None] = fn
                    if e['maxOccurs']=="unbounded":
                        # it's an array... TODO: compound arrays?
                        d.array = True
                    if e is not None and e.get_local_name() == 'extension' and e.children():
                        # extend base element:
                        process_element(element_name, e.children())
                elements.setdefault(element_name, OrderedDict()).update(d)

        types_tag = wsdl.types if wsdl.__contains__('wsdl:types') and wsdl.types.__contains__('xsd:schema') else (wsdl_import.types if wsdl_import and wsdl_import.__contains__('wsdl:types') and wsdl_import.types.__contains__('xsd:schema') else None)

        # check axis2 namespace at schema types attributes
        self.namespace = dict(types_tag("schema", ns=xsd_uri)[:]).get('targetNamespace', self.namespace) 

        imported_schemas = {}

        def preprocess_schema(schema):
            "Find schema elements and complex types"
            for element in schema.children():
                if element.get_local_name() in ('import', ):
                    schema_namespace = element['namespace']
                    schema_location = element['schemaLocation']
                    if schema_location is None:
                        if debug: print "Schema location not provided for %s!" % (schema_namespace, )
                        continue
                    if schema_location in imported_schemas:
                        if debug: print "Schema %s already imported!" % (schema_location, )
                        continue
                    imported_schemas[schema_location] = schema_namespace
                    if debug: print "Importing schema %s from %s" % (schema_namespace, schema_location)
                    # Open uri and read xml:
                    xml = fetch(schema_location)
                    # Parse imported XML schema (recursively):
                    imported_schema = SimpleXMLElement(xml, namespace=xsd_uri)
                    preprocess_schema(imported_schema)

                if element.get_local_name() in ('element', 'complexType', "simpleType"):
                    element_name = unicode(element['name'])
                    if debug: print "Parsing Element %s: %s" % (element.get_local_name(),element_name)
                    if element.get_local_name() == 'complexType':
                        children = element.children()
                    elif element.get_local_name() == 'simpleType':
                        children = element("restriction", ns=xsd_uri)
                    elif element.get_local_name() == 'element' and element['type']:
                        children = element
                    else:
                        children = element.children()
                        if children:
                            children = children.children()
                        elif element.get_local_name() == 'element':
                            children = element
                    if children:
                        process_element(element_name, children)

        def postprocess_element(elements):
            "Fix unresolved references (elements referenced before its definition, thanks .net)"
            for k,v in elements.items():
                if isinstance(v, OrderedDict):
                    if v.array:
                        elements[k] = [v] # convert arrays to python lists
                    if v!=elements: #TODO: fix recursive elements
                        postprocess_element(v)
                    if None in v and v[None]: # extension base?
                        if isinstance(v[None], dict):
                            for i, kk in enumerate(v[None]):
                                # extend base -keep orginal order-
                                elements[k].insert(kk, v[None][kk], i)
                            del v[None]
                        else:  # "alias", just replace
                            if debug: print "Replacing ", k , " = ", v[None]
                            elements[k] = v[None]
                            #break
                if isinstance(v, list):
                    for n in v: # recurse list
                        postprocess_element(n)

                        
        # process current wsdl schema:
        for schema in types_tag("schema", ns=xsd_uri): 
            preprocess_schema(schema)                

        postprocess_element(elements)

        message_tag = wsdl.message if wsdl.__contains__('wsdl:message') else (wsdl_import.message if wsdl_import and wsdl_import.__contains__('wsdl:message') else None)

        for message in message_tag:
            if debug: print "Processing message", message['name']
            part = message('part', error=False)
            element = {}
            if part:
                element_name = part['element']
                if not element_name:
                    element_name = part['type'] # some uses type instead
                element_name = get_local_name(element_name)
                element = {element_name: elements.get(element_name)}
            messages[message['name']] = element
        
        port_tag = wsdl.portType if wsdl.__contains__('wsdl:portType') else (wsdl_import.portType if wsdl_import and wsdl_import.__contains__('wsdl:portType') else None)
        
        for port_type in port_tag:
            port_type_name = port_type['name']
            if debug: print "Processing port type", port_type_name
            binding = port_type_bindings[port_type_name]

            for operation in port_type.operation:
                op_name = operation['name']
                op = operations[op_name] 
                op['documentation'] = unicode(operation('documentation', error=False) or '')
                if binding['soap_ver']: 
                    #TODO: separe operation_binding from operation (non SOAP?)
                    input = get_local_name(operation.input['message'])
                    output = get_local_name(operation.output['message'])
                    op['input'] = messages[input]
                    op['output'] = messages[output]

        if debug:
            import pprint
            pprint.pprint(services)
        
        return services
Esempio n. 2
0
    def wsdl(self, url, debug=False, cache=False):
        "Parse Web Service Description v1.1"
        soap_ns = {
            "http://schemas.xmlsoap.org/wsdl/soap/": 'soap11',
            "http://schemas.xmlsoap.org/wsdl/soap12/": 'soap12',
        }
        wsdl_uri = "http://schemas.xmlsoap.org/wsdl/"
        xsd_uri = "http://www.w3.org/2001/XMLSchema"
        xsi_uri = "http://www.w3.org/2001/XMLSchema-instance"

        get_local_name = lambda s: str((':' in s) and s.split(':')[1] or s)

        REVERSE_TYPE_MAP = dict([(v, k) for k, v in TYPE_MAP.items()])

        def fetch(url):
            "Fetch a document from a URL, save it locally if cache enabled"
            import os, hashlib
            # make md5 hash of the url for caching...
            filename = "%s.xml" % hashlib.md5(url).hexdigest()
            if isinstance(cache, basestring):
                filename = os.path.join(cache, filename)
            if cache and os.path.exists(filename):
                if debug: print "Reading file %s" % (filename, )
                f = open(filename, "r")
                xml = f.read()
                f.close()
            else:
                if debug: print "Fetching url %s" % (url, )
                f = urllib.urlopen(url)
                xml = f.read()
                if cache:
                    if debug: print "Writing file %s" % (filename, )
                    f = open(filename, "w")
                    f.write(xml)
                    f.close()
            return xml

        # Open uri and read xml:
        xml = fetch(url)
        # Parse WSDL XML:
        wsdl = SimpleXMLElement(xml, namespace=wsdl_uri)

        # detect soap prefix and uri (xmlns attributes of <definitions>)
        xsd_ns = None
        soap_uris = {}
        for k, v in wsdl[:]:
            if v in soap_ns and k.startswith("xmlns:"):
                soap_uris[get_local_name(k)] = v
            if v == xsd_uri and k.startswith("xmlns:"):
                xsd_ns = get_local_name(k)

        # Extract useful data:
        self.namespace = wsdl['targetNamespace']
        self.documentation = unicode(wsdl('documentation', error=False) or '')

        services = {}
        bindings = {}  # binding_name: binding
        operations = {}  # operation_name: operation
        port_type_bindings = {}  # port_type_name: binding
        messages = {}  # message: element
        elements = {}  # element: type def

        wsdl_import = wsdl.__call__('import')

        if wsdl_import:
            wsdl_import_xml = fetch(wsdl_import['location'])
            wsdl_import = SimpleXMLElement(wsdl_import_xml, namespace=wsdl_uri)

        service_tag = wsdl.service if wsdl.__contains__('wsdl:service') else (
            wsdl_import.service if wsdl_import
            and wsdl_import.__contains__('wsdl:service') else None)

        for service in service_tag:
            service_name = service['name']
            if not service_name:
                continue  # empty service?
            if debug: print "Processing service", service_name
            serv = services.setdefault(service_name, {'ports': {}})
            serv['documentation'] = service['documentation'] or ''
            for port in service.port:
                binding_name = get_local_name(port['binding'])
                address = port('address', ns=soap_uris.values(), error=False)
                location = address and address['location'] or None
                soap_uri = address and soap_uris.get(address.get_prefix())
                soap_ver = soap_uri and soap_ns.get(soap_uri)
                bindings[binding_name] = {
                    'service_name': service_name,
                    'location': location,
                    'soap_uri': soap_uri,
                    'soap_ver': soap_ver,
                }
                serv['ports'][port['name']] = bindings[binding_name]

        binding_tag = wsdl.binding if wsdl.__contains__('wsdl:binding') else (
            wsdl_import.binding if wsdl_import
            and wsdl_import.__contains__('wsdl:binding') else None)

        for binding in binding_tag:
            binding_name = binding['name']
            if debug: print "Processing binding", service_name
            soap_binding = binding('binding',
                                   ns=soap_uris.values(),
                                   error=False)
            transport = soap_binding and soap_binding['transport'] or None
            port_type_name = get_local_name(binding['type'])
            bindings[binding_name].update({
                'port_type_name': port_type_name,
                'transport': transport,
                'operations': {},
            })
            port_type_bindings[port_type_name] = bindings[binding_name]
            for operation in binding.operation:
                op_name = operation['name']
                op = operation('operation', ns=soap_uris.values(), error=False)
                action = op and op['soapAction']
                d = operations.setdefault(op_name, {})
                bindings[binding_name]['operations'][op_name] = d
                d.update({'name': op_name})
                #if action: #TODO: separe operation_binding from operation
                if action:
                    d["action"] = action

        #TODO: cleanup element/schema/types parsing:
        def process_element(element_name, node):
            "Parse and define simple element types"
            if debug: print "Processing element", element_name
            for tag in node:
                if tag.get_local_name() in ("annotation", "documentation"):
                    continue
                elif tag.get_local_name() in ('element', 'restriction'):
                    if debug: print element_name, "has not children!", tag
                    children = tag  # element "alias"?
                    alias = True
                elif tag.children():
                    children = tag.children()
                    alias = False
                else:
                    if debug: print element_name, "has not children!", tag
                    continue  #TODO: abstract?
                d = OrderedDict()
                for e in children:
                    t = e['type']
                    if not t:
                        t = e['base']  # complexContent (extension)!
                    if not t:
                        t = 'anyType'  # no type given!
                    t = t.split(":")
                    if len(t) > 1:
                        ns, type_name = t
                    else:
                        ns, type_name = None, t[0]
                    if element_name == type_name:
                        continue  # prevent infinite recursion
                    uri = ns and e.get_namespace_uri(ns) or xsd_uri
                    if uri == xsd_uri:
                        # look for the type, None == any
                        fn = REVERSE_TYPE_MAP.get(unicode(type_name), None)
                    else:
                        # complex type, postprocess later
                        fn = elements.setdefault(unicode(type_name),
                                                 OrderedDict())
                    if e['name'] is not None and not alias:
                        e_name = unicode(e['name'])
                        d[e_name] = fn
                    else:
                        if debug:
                            print "complexConent/simpleType/element", element_name, "=", type_name
                        d[None] = fn
                    if e['maxOccurs'] == "unbounded":
                        # it's an array... TODO: compound arrays?
                        d.array = True
                    if e is not None and e.get_local_name(
                    ) == 'extension' and e.children():
                        # extend base element:
                        process_element(element_name, e.children())
                elements.setdefault(element_name, OrderedDict()).update(d)

        types_tag = wsdl.types if wsdl.__contains__(
            'wsdl:types') and wsdl.types.__contains__('xsd:schema') else (
                wsdl_import.types
                if wsdl_import and wsdl_import.__contains__('wsdl:types')
                and wsdl_import.types.__contains__('xsd:schema') else None)

        # check axis2 namespace at schema types attributes
        self.namespace = dict(types_tag("schema", ns=xsd_uri)[:]).get(
            'targetNamespace', self.namespace)

        imported_schemas = {}

        def preprocess_schema(schema):
            "Find schema elements and complex types"
            for element in schema.children():
                if element.get_local_name() in ('import', ):
                    schema_namespace = element['namespace']
                    schema_location = element['schemaLocation']
                    if schema_location is None:
                        if debug:
                            print "Schema location not provided for %s!" % (
                                schema_namespace, )
                        continue
                    if schema_location in imported_schemas:
                        if debug:
                            print "Schema %s already imported!" % (
                                schema_location, )
                        continue
                    imported_schemas[schema_location] = schema_namespace
                    if debug:
                        print "Importing schema %s from %s" % (
                            schema_namespace, schema_location)
                    # Open uri and read xml:
                    xml = fetch(schema_location)
                    # Parse imported XML schema (recursively):
                    imported_schema = SimpleXMLElement(xml, namespace=xsd_uri)
                    preprocess_schema(imported_schema)

                if element.get_local_name() in ('element', 'complexType',
                                                "simpleType"):
                    element_name = unicode(element['name'])
                    if debug:
                        print "Parsing Element %s: %s" % (
                            element.get_local_name(), element_name)
                    if element.get_local_name() == 'complexType':
                        children = element.children()
                    elif element.get_local_name() == 'simpleType':
                        children = element("restriction", ns=xsd_uri)
                    elif element.get_local_name(
                    ) == 'element' and element['type']:
                        children = element
                    else:
                        children = element.children()
                        if children:
                            children = children.children()
                        elif element.get_local_name() == 'element':
                            children = element
                    if children:
                        process_element(element_name, children)

        def postprocess_element(elements):
            "Fix unresolved references (elements referenced before its definition, thanks .net)"
            for k, v in elements.items():
                if isinstance(v, OrderedDict):
                    if v.array:
                        elements[k] = [v]  # convert arrays to python lists
                    if v != elements:  #TODO: fix recursive elements
                        postprocess_element(v)
                    if None in v and v[None]:  # extension base?
                        if isinstance(v[None], dict):
                            for i, kk in enumerate(v[None]):
                                # extend base -keep orginal order-
                                elements[k].insert(kk, v[None][kk], i)
                            del v[None]
                        else:  # "alias", just replace
                            if debug: print "Replacing ", k, " = ", v[None]
                            elements[k] = v[None]
                            #break
                if isinstance(v, list):
                    for n in v:  # recurse list
                        postprocess_element(n)

        # process current wsdl schema:
        for schema in types_tag("schema", ns=xsd_uri):
            preprocess_schema(schema)

        postprocess_element(elements)

        message_tag = wsdl.message if wsdl.__contains__('wsdl:message') else (
            wsdl_import.message if wsdl_import
            and wsdl_import.__contains__('wsdl:message') else None)

        for message in message_tag:
            if debug: print "Processing message", message['name']
            part = message('part', error=False)
            element = {}
            if part:
                element_name = part['element']
                if not element_name:
                    element_name = part['type']  # some uses type instead
                element_name = get_local_name(element_name)
                element = {element_name: elements.get(element_name)}
            messages[message['name']] = element

        port_tag = wsdl.portType if wsdl.__contains__('wsdl:portType') else (
            wsdl_import.portType if wsdl_import
            and wsdl_import.__contains__('wsdl:portType') else None)

        for port_type in port_tag:
            port_type_name = port_type['name']
            if debug: print "Processing port type", port_type_name
            binding = port_type_bindings[port_type_name]

            for operation in port_type.operation:
                op_name = operation['name']
                op = operations[op_name]
                op['documentation'] = unicode(
                    operation('documentation', error=False) or '')
                if binding['soap_ver']:
                    #TODO: separe operation_binding from operation (non SOAP?)
                    input = get_local_name(operation.input['message'])
                    output = get_local_name(operation.output['message'])
                    op['input'] = messages[input]
                    op['output'] = messages[output]

        if debug:
            import pprint
            pprint.pprint(services)

        return services