def parse_siteinfo(self, xml=None): if xml is not None: try: self._root = etree.parse(xml) except: self._root = xml # try: xml_dict = _xml_to_dict(self._root) self.site_name = xml_dict.get('site_name') self.site_codes = [ testXMLValue(code) for code in self._findall('siteCode') ] self.elevation = xml_dict.get('elevation_m') self.vertical_datum = xml_dict.get('vertical_datum') self.site_types = [ testXMLValue(typ) for typ in self._findall('siteType') ] self.site_properties = dict([ (prop.attrib.get('name'), testXMLValue(prop)) for prop in self._findall('siteProperty') ]) self.altname = xml_dict.get('altname') self.notes = [testXMLValue(note) for note in self._findall('note')] # sub-objects tzi = self._find('timeZoneInfo') if tzi is not None: self.time_zone_info = TimeZoneInfo(tzi, self._ns) self.location = Location(self._find('geoLocation'), self._ns)
def parse_location(self,xml=None): if xml is not None: try: self._root = etree.parse(xml) except: self._root = xml # try: xml_dict = _xml_to_dict(self._root) geogs = self._findall('geogLocation') self.geo_coords = list() self.srs = list() for g in geogs: self.geo_coords.append((testXMLValue(g.find(ns(self._ns) + 'longitude')),testXMLValue(g.find(ns(self._ns) + 'latitude')))) self.srs.append(g.attrib.get('srs')) locsite = self._findall('localSiteXY') self.local_sites = list() self.notes = list() self.projections = list() for ls in locsite: z = testXMLValue(ls.find(ns(self._ns) + 'Z')) if z is not None: self.local_sites.append((testXMLValue(ls.find(ns(self._ns) + 'X')),testXMLValue(ls.find(ns(self._ns) + 'Y')),z)) else: self.local_sites.append((testXMLValue(ls.find(ns(self._ns) + 'X')),testXMLValue(ls.find(ns(self._ns) + 'Y')),'0')) self.notes.append([testXMLValue(note) for note in ls.findall(ns(self._ns) + 'note')]) self.projections.append(ls.attrib.get('projectionInformation'))
def __init__(self, elem, parent, parse_remote_metadata=False): """.""" self.id = testXMLValue(elem.find(wfs_ns("Name"))) self.title = testXMLValue(elem.find(wfs_ns("Title"))) self.abstract = testXMLValue(elem.find(wfs_ns("Abstract"))) self.keywords = extract_xml_list(elem.findall(wfs_ns("Keywords"))) # bboxes self.boundingBox = None b = elem.find(wfs_ns("BoundingBox")) if b is not None: self.boundingBox = ( float(b.attrib["minx"]), float(b.attrib["miny"]), float(b.attrib["maxx"]), float(b.attrib["maxy"]), b.attrib["SRS"], ) self.boundingBoxWGS84 = None b = elem.find(wfs_ns("LatLongBoundingBox")) if b is not None: self.boundingBoxWGS84 = ( float(b.attrib["minx"]), float(b.attrib["miny"]), float(b.attrib["maxx"]), float(b.attrib["maxy"]), ) # crs options self.crsOptions = [Crs(srs.text) for srs in elem.findall(wfs_ns("SRS"))] # verbs self.verbOptions = [op.tag for op in parent.findall(wfs_ns("Operations/*"))] self.verbOptions + [op.tag for op in elem.findall(wfs_ns("Operations/*")) if op.tag not in self.verbOptions] # others not used but needed for iContentMetadata harmonisation self.styles = None self.timepositions = None # MetadataURLs self.metadataUrls = [] for m in elem.findall(wfs_ns("MetadataURL")): metadataUrl = { "type": testXMLValue(m.attrib["type"], attrib=True), "format": testXMLValue(m.find("Format")), "url": testXMLValue(m), } if metadataUrl["url"] is not None and parse_remote_metadata: # download URL try: content = urlopen(metadataUrl["url"]) doc = etree.parse(content) if metadataUrl["type"] is not None: if metadataUrl["type"] == "FGDC": metadataUrl["metadata"] = Metadata(doc) if metadataUrl["type"] == "TC211": metadataUrl["metadata"] = MD_Metadata(doc) except Exception, err: metadataUrl["metadata"] = None self.metadataUrls.append(metadataUrl)
def _invoke(self): # do HTTP request self.response = util.http_post(self.url, self.request, self.lang, self.timeout) # parse result see if it's XML self._exml = etree.parse(StringIO.StringIO(self.response)) # it's XML. Attempt to decipher whether the XML response is CSW-ish """ valid_xpaths = [ util.nspath_eval('ows:ExceptionReport', namespaces), util.nspath_eval('csw:Capabilities', namespaces), util.nspath_eval('csw:DescribeRecordResponse', namespaces), util.nspath_eval('csw:GetDomainResponse', namespaces), util.nspath_eval('csw:GetRecordsResponse', namespaces), util.nspath_eval('csw:GetRecordByIdResponse', namespaces), util.nspath_eval('csw:HarvestResponse', namespaces), util.nspath_eval('csw:TransactionResponse', namespaces) ] if self._exml.getroot().tag not in valid_xpaths: raise RuntimeError, 'Document is XML, but not CSW-ish' # check if it's an OGC Exception val = self._exml.find(util.nspath_eval('ows:Exception', namespaces)) if val is not None: self.exceptionreport = ExceptionReport(self._exml, self.owscommon.namespace) else: self.exceptionreport = None
def getAttributes(shapefile, WFS_URL): """ Given a valid shapefile(WFS Featuretype as returned by getShapefiles), this function will make a request for one feature from the featureType and parse out the attributes that come from a namespace not associated with the normal GML schema. There may be a better way to determine which are shapefile dbf attributes, but this should work pretty well. """ wfs = WebFeatureService(WFS_URL, version='1.1.0') feature = wfs.getfeature(typename=shapefile, maxfeatures=1, propertyname=None) gml = etree.parse(feature) gml_root=gml.getroot() name_spaces = gml_root.nsmap attributes = [] for namespace in name_spaces.values(): if namespace not in ['http://www.opengis.net/wfs', 'http://www.w3.org/2001/XMLSchema-instance', 'http://www.w3.org/1999/xlink', 'http://www.opengis.net/gml', 'http://www.opengis.net/ogc', 'http://www.opengis.net/ows']: custom_namespace = namespace for element in gml.iter('{'+custom_namespace+'}*'): if etree.QName(element).localname not in ['the_geom', 'Shape', shapefile.split(':')[1]]: attributes.append(etree.QName(element).localname) return attributes
def parse_criteria(self, xml=None): if xml is not None: try: self._root = etree.parse(xml) except: self._root = xml # try: xml_dict = _xml_to_dict(self._root, depth=4) self.method_called = self._root.attrib.get('MethodCalled') self.location_param = xml_dict.get('location_param') self.variable_param = xml_dict.get('variable_param') try: self.begin_date_time = parser.parse(xml_dict['begin_date_time']) except: self.begin_date_time = None try: self.end_date_time = parser.parse(xml_dict['end_date_time']) except: self.end_date_time = None self.parameters = [(param.attrib.get('name'), param.attrib.get('value')) for param in self._findall('parameter')]
def _invoke(self): # do HTTP request if isinstance(self.request, basestring): # GET KVP self.response = urlopen(self.request, timeout=self.timeout).read() else: self.request = cleanup_namespaces(self.request) self.request = util.xml2string(etree.tostring(self.request)) self.response = util.http_post(self.url, self.request, self.lang, self.timeout) # parse result see if it's XML self._exml = etree.parse(StringIO.StringIO(self.response)) # it's XML. Attempt to decipher whether the XML response is CSW-ish """ valid_xpaths = [ util.nspath_eval("ows:ExceptionReport", namespaces), util.nspath_eval("csw:Capabilities", namespaces), util.nspath_eval("csw:DescribeRecordResponse", namespaces), util.nspath_eval("csw:GetDomainResponse", namespaces), util.nspath_eval("csw:GetRecordsResponse", namespaces), util.nspath_eval("csw:GetRecordByIdResponse", namespaces), util.nspath_eval("csw:HarvestResponse", namespaces), util.nspath_eval("csw:TransactionResponse", namespaces), ] if self._exml.getroot().tag not in valid_xpaths: raise RuntimeError, "Document is XML, but not CSW-ish" # check if it's an OGC Exception val = self._exml.find(util.nspath_eval("ows:Exception", namespaces)) if val is not None: raise ows.ExceptionReport(self._exml, self.owscommon.namespace) else: self.exceptionreport = None
def _invoke(self): # do HTTP request self.response = http_post(self.url, self.request, self.lang, self.timeout) # parse result see if it's XML self._exml = etree.parse(StringIO.StringIO(self.response)) # it's XML. Attempt to decipher whether the XML response is CSW-ish """ valid_xpaths = [ nsp_ows('ows:ExceptionReport'), nsp('csw:Capabilities'), nsp('csw:DescribeRecordResponse'), nsp('csw:GetDomainResponse'), nsp('csw:GetRecordsResponse'), nsp('csw:GetRecordByIdResponse'), nsp('csw:HarvestResponse'), nsp('csw:TransactionResponse') ] if self._exml.getroot().tag not in valid_xpaths: raise RuntimeError, 'Document is XML, but not CSW-ish' # check if it's an OGC Exception val = self._exml.find(nsp_ows('ows:Exception')) if val is not None: raise ows.ExceptionReport(self._exml, ns.get_versioned_namespace('ows',_ows_version)) else: self.exceptionreport = None
def _invoke(self): # do HTTP request self.response = util.http_post(self.url, self.request, self.lang, self.timeout) # parse result see if it's XML self._exml = etree.parse(StringIO.StringIO(self.response)) # it's XML. Attempt to decipher whether the XML response is CSW-ish """ valid_xpaths = [ util.nspath("ExceptionReport", namespaces["ows"]), util.nspath("Capabilities", namespaces["csw"]), util.nspath("DescribeRecordResponse", namespaces["csw"]), util.nspath("GetDomainResponse", namespaces["csw"]), util.nspath("GetRecordsResponse", namespaces["csw"]), util.nspath("GetRecordByIdResponse", namespaces["csw"]), util.nspath("HarvestResponse", namespaces["csw"]), util.nspath("TransactionResponse", namespaces["csw"]), ] if self._exml.getroot().tag not in valid_xpaths: raise RuntimeError, "Document is XML, but not CSW-ish" # check if it's an OGC Exception val = self._exml.find(util.nspath("Exception", namespaces["ows"])) if val is not None: self.exceptionreport = ExceptionReport(self._exml, self.owscommon.namespace) else: self.exceptionreport = None
def gmlas_config(self): path = self.gmlasConfigLineEdit.text() if path == '': raise InputError(self.tr("You must select a GMLAS config file")) xmlConfig = etree.parse(self.gmlasConfigLineEdit.text()) # Set parameters c = xmlConfig.getroot() for l in c.iter('ExposeMetadataLayers'): l.text = str(self.ogrExposeMetadataLayersCheckbox.isChecked()).lower() for l in c.iter('LayerBuildingRules'): for n in l.iter('RemoveUnusedLayers'): n.text = str(self.ogrRemoveUnusedLayersCheckbox.isChecked()).lower() for n in l.iter('RemoveUnusedFields'): n.text = str(self.ogrRemoveUnusedFieldsCheckbox.isChecked()).lower() for l in c.findall("XLinkResolution/URLSpecificResolution/HTTPHeader"): name = l.find('Name').text if name == 'Accept-Language': l.find('Value').text = self.acceptLanguageHeaderInput.text() textConfig = BytesIO() xmlConfig.write(textConfig, encoding='utf-8', xml_declaration=False) # Write config in temp file tf = tempfile.NamedTemporaryFile(prefix='gmlasconf_', suffix='.xml', delete=False) tf.write(textConfig.getvalue()) tf.close() log("Temporary configuration file created '{}' for conversion.".format(str(tf.name))) return tf.name
def parse_series(self, xml=None): if xml is not None: try: self._root = etree.parse(xml) except: self._root = xml # try: xml_dict = _xml_to_dict(self._root, depth=3) self.value_count = xml_dict.get('value_count') self.value_type = xml_dict.get('value_type') self.general_category = xml_dict.get('general_category') self.sample_medium = xml_dict.get('sample_medium') self.data_type = xml_dict.get('data_type') # date-time self.begin_date_time = parser.parse(xml_dict.get('begin_date_time')) self.begin_date_time_utc = parser.parse( xml_dict.get('begin_date_time_utc')) if xml_dict.get( 'begin_date_time_utc') is not None else None self.end_date_time = parser.parse(xml_dict.get('end_date_time')) self.end_date_time_utc = parser.parse( xml_dict.get('end_date_time_utc')) if xml_dict.get( 'end_date_time_utc') is not None else None # method info self.method_description = xml_dict.get('method_description') self.method_code = xml_dict.get('method_code') self.method_link = xml_dict.get('method_link') method = self._find('method') if method is not None: self.method_id = method.attrib.get('methodID') else: self.method_id = None # source info self.organization = xml_dict.get('organization') self.source_description = xml_dict.get('source_description') self.citation = xml_dict.get('citation') source = self._find('source') if source is not None: self.source_id = source.attrib.get('sourceID') else: self.source_id = None # quality control info self.quality_control_level_code = xml_dict.get( 'quality_control_level_code') self.definition = xml_dict.get('definition') qa = self._find('qualityControlLevel') if qa is not None: self.quality_control_level_id = qa.attrib.get( 'qualityControlLevelID') else: self.quality_control_level_id = None # properties self.properties = dict([(prop.attrib.get('name'), testXMLValue(prop)) for prop in self._findall('seriesProperty')]) # sub-objects self.variable = Variable(self._find('variable'), self._ns)
def __init__(self, elem, parent, parse_remote_metadata=False): """.""" self.id = testXMLValue(elem.find(nspath('Name'))) self.title = testXMLValue(elem.find(nspath('Title'))) self.abstract = testXMLValue(elem.find(nspath('Abstract'))) self.keywords = [f.text for f in elem.findall(nspath('Keywords'))] # bboxes self.boundingBox = None b = elem.find(nspath('BoundingBox')) if b is not None: self.boundingBox = (float(b.attrib['minx']),float(b.attrib['miny']), float(b.attrib['maxx']), float(b.attrib['maxy']), b.attrib['SRS']) self.boundingBoxWGS84 = None b = elem.find(nspath('LatLongBoundingBox')) if b is not None: self.boundingBoxWGS84 = ( float(b.attrib['minx']),float(b.attrib['miny']), float(b.attrib['maxx']), float(b.attrib['maxy']), ) # crs options self.crsOptions = [Crs(srs.text) for srs in elem.findall(nspath('SRS'))] # verbs self.verbOptions = [op.tag for op \ in parent.findall(nspath('Operations/*'))] self.verbOptions + [op.tag for op \ in elem.findall(nspath('Operations/*')) \ if op.tag not in self.verbOptions] #others not used but needed for iContentMetadata harmonisation self.styles=None self.timepositions=None self.defaulttimeposition=None # MetadataURLs self.metadataUrls = [] for m in elem.findall(nspath('MetadataURL')): metadataUrl = { 'type': testXMLValue(m.attrib['type'], attrib=True), 'format': testXMLValue(m.find('Format')), 'url': testXMLValue(m) } if metadataUrl['url'] is not None and parse_remote_metadata: # download URL try: content = urlopen(metadataUrl['url']) doc = etree.parse(content) if metadataUrl['type'] is not None: if metadataUrl['type'] == 'FGDC': metadataUrl['metadata'] = Metadata(doc) if metadataUrl['type'] == 'TC211': metadataUrl['metadata'] = MD_Metadata(doc) except Exception, err: metadataUrl['metadata'] = None self.metadataUrls.append(metadataUrl)
def __init__(self, elem, parent, parse_remote_metadata=False, timeout=30): """.""" self.id = testXMLValue(elem.find(nspath('Name'))) self.title = testXMLValue(elem.find(nspath('Title'))) self.abstract = testXMLValue(elem.find(nspath('Abstract'))) self.keywords = [f.text for f in elem.findall(nspath('Keywords'))] # bboxes self.boundingBox = None b = elem.find(nspath('BoundingBox')) if b is not None: self.boundingBox = (float(b.attrib['minx']),float(b.attrib['miny']), float(b.attrib['maxx']), float(b.attrib['maxy']), b.attrib['SRS']) self.boundingBoxWGS84 = None b = elem.find(nspath('LatLongBoundingBox')) if b is not None: self.boundingBoxWGS84 = ( float(b.attrib['minx']),float(b.attrib['miny']), float(b.attrib['maxx']), float(b.attrib['maxy']), ) # crs options self.crsOptions = [Crs(srs.text) for srs in elem.findall(nspath('SRS'))] # verbs self.verbOptions = [op.tag for op \ in parent.findall(nspath('Operations/*'))] self.verbOptions + [op.tag for op \ in elem.findall(nspath('Operations/*')) \ if op.tag not in self.verbOptions] #others not used but needed for iContentMetadata harmonisation self.styles=None self.timepositions=None self.defaulttimeposition=None # MetadataURLs self.metadataUrls = [] for m in elem.findall(nspath('MetadataURL')): metadataUrl = { 'type': testXMLValue(m.attrib['type'], attrib=True), 'format': testXMLValue(m.find('Format')), 'url': testXMLValue(m) } if metadataUrl['url'] is not None and parse_remote_metadata: # download URL try: content = urlopen(metadataUrl['url'], timeout=timeout) doc = etree.parse(content) if metadataUrl['type'] is not None: if metadataUrl['type'] == 'FGDC': metadataUrl['metadata'] = Metadata(doc) if metadataUrl['type'] == 'TC211': metadataUrl['metadata'] = MD_Metadata(doc) except Exception, err: metadataUrl['metadata'] = None self.metadataUrls.append(metadataUrl)
def __init__(self, elem, parse_remote_metadata=False, timeout=30): """.""" self.id = testXMLValue(elem.find(nspath_eval('wfs:Name', namespaces))) self.title = testXMLValue(elem.find(nspath_eval('wfs:Title', namespaces))) self.abstract = testXMLValue(elem.find(nspath_eval('wfs:Abstract', namespaces))) self.keywords = [f.text for f in elem.findall(nspath_eval('ows:Keywords/ows:Keyword', namespaces))] # bbox self.boundingBoxWGS84 = None b = BoundingBox(elem.find(nspath_eval('ows:WGS84BoundingBox', namespaces)), namespaces['ows']) if b is not None: try: self.boundingBoxWGS84 = ( float(b.minx), float(b.miny), float(b.maxx), float(b.maxy), ) except TypeError: self.boundingBoxWGS84 = None # crs options self.crsOptions = [Crs(srs.text) for srs in elem.findall(nspath_eval('wfs:OtherSRS', namespaces))] dsrs = testXMLValue(elem.find(nspath_eval('wfs:DefaultSRS', namespaces))) if dsrs is not None: # first element is default srs self.crsOptions.insert(0, Crs(dsrs)) # verbs self.verbOptions = [op.text for op in elem.findall(nspath_eval('wfs:Operations/wfs:Operation', namespaces))] # output formats self.outputFormats = [op.text for op in elem.findall(nspath_eval('wfs:OutputFormats/wfs:Format', namespaces))] # MetadataURLs self.metadataUrls = [] for m in elem.findall(nspath_eval('wfs:MetadataURL', namespaces)): metadataUrl = { 'type': testXMLValue(m.attrib['type'], attrib=True), 'format': testXMLValue(m.find('Format')), 'url': testXMLValue(m) } if metadataUrl['url'] is not None and parse_remote_metadata: # download URL try: content = openURL(metadataUrl['url'], timeout=timeout) doc = etree.parse(content) if metadataUrl['type'] is not None: if metadataUrl['type'] == 'FGDC': metadataUrl['metadata'] = Metadata(doc) if metadataUrl['type'] in ['TC211', '19115', '19139']: metadataUrl['metadata'] = MD_Metadata(doc) except Exception: metadataUrl['metadata'] = None self.metadataUrls.append(metadataUrl) #others not used but needed for iContentMetadata harmonisation self.styles=None self.timepositions=None self.defaulttimeposition=None
def xmlvalid(xml, xsd): """ Test whether an XML document is valid Parameters ---------- - xml: XML content - xsd: pointer to XML Schema (local file path or URL) """ xsd1 = etree.parse(xsd) xsd2 = etree.XMLSchema(xsd1) doc = etree.parse(StringIO(xml)) return xsd2.validate(doc)
def parse_seriescatalog(self,xml=None): if xml is not None: try: self._root = etree.parse(xml) except: self._root = xml # try: self.series = [Series(elm,self._ns) for elm in self._findall('series')]
def harvest(self, source, resourcetype, resourceformat=None, harvestinterval=None, responsehandler=None): """ Construct and process a Harvest request Parameters ---------- - source: a URI to harvest - resourcetype: namespace identifying the type of resource - resourceformat: MIME type of the resource - harvestinterval: frequency of harvesting, in ISO8601 - responsehandler: endpoint that CSW should responsd to with response """ # construct request node0 = etree.Element(util.nspath('Harvest', namespaces['csw'])) node0.set('version', self.version) node0.set('service', self.service) node0.set(util.nspath('schemaLocation', namespaces['xsi']), schema_location) etree.SubElement(node0, util.nspath('Source', namespaces['csw'])).text = source etree.SubElement(node0, util.nspath('ResourceType', namespaces['csw'])).text = resourcetype if resourceformat is not None: etree.SubElement(node0, util.nspath('ResourceFormat', namespaces['csw'])).text = resourceformat if harvestinterval is not None: etree.SubElement(node0, util.nspath('HarvestInterval', namespaces['csw'])).text = harvestinterval if responsehandler is not None: etree.SubElement(node0, util.nspath('ResponseHandler', namespaces['csw'])).text = responsehandler self.request = util.xml2string(etree.tostring(node0)) self.response = util.http_post(self.url, self.request, self.lang) # parse result self._response = etree.parse(StringIO.StringIO(self.response)) # check for exceptions self._isexception(self._response, self.owscommon.namespace) self.results = {} if self.exceptionreport is None: val = self._response.find(util.nspath('Acknowledgement', namespaces['csw'])) if util.testXMLValue(val) is not None: ts = val.attrib.get('timeStamp') self.timestamp = util.testXMLValue(ts, True) id = val.find(util.nspath('RequestId', namespaces['csw'])) self.id = util.testXMLValue(id) else: self._parsetransactionsummary() self.results['inserted'] = [] for i in self._response.findall(util.nspath('TransactionResponse/InsertResult', namespaces['csw'])): for j in i.findall(util.nspath('BriefRecord', namespaces['csw']) + '/' + util.nspath('identifier', namespaces['dc'])): self.results['inserted'].append(util.testXMLValue(j))
def getValues(shapefile, attribute, getTuples, limitFeatures, wfs_url): """ Similar to get attributes, given a shapefile and a valid attribute this function will make a call to the Web Feature Services returning a list of values associated with the shapefile and attribute. If getTuples = True, will also return the tuples of [feature:id] along with values [feature] """ wfs = WebFeatureService(wfs_url, version='1.1.0') feature = wfs.getfeature(typename=shapefile, maxfeatures=limitFeatures, propertyname=[attribute]) content = BytesIO(feature.read().encode()) gml = etree.parse(content) values = [] for el in gml.iter(): if attribute in el.tag: if el.text not in values: values.append(el.text) if getTuples == 'true' or getTuples == 'only': tuples = [] att = False # If features are encoded as a list of featureMember elements. gmlid_found = False for featureMember in gml.iter('{' + GML_NAMESPACE + '}featureMember'): for el in featureMember.iter(): if el.get('{' + GML_NAMESPACE + '}id'): gmlid = el.get('{' + GML_NAMESPACE + '}id') att = True gmlid_found = True if attribute in el.tag and att is True: value = el.text tuples.append((value, gmlid)) att = False if not gmlid_found: raise Exception('No gml:id found in source feature service. This form of GML is not supported.') # If features are encoded as a featureMembers element. for featureMember in gml.iter('{' + GML_NAMESPACE + '}featureMembers'): for el in featureMember.iter(): gmlid = el.get('{' + GML_NAMESPACE + '}id') for feat in el.getchildren(): if attribute in feat.tag: value = feat.text tuples.append((value, gmlid)) if getTuples == 'true': return sorted(values), sorted(tuples) elif getTuples == 'only': return sorted(tuples) else: return sorted(values)
def parse_variable(self,xml=None): if xml is not None: try: self._root = etree.parse(xml) except: self._root = xml # try: xml_dict = _xml_to_dict(self._root) self.value_type = xml_dict.get('value_type') self.data_type = xml_dict.get('data_type') self.general_category = xml_dict.get('general_category') self.sample_medium = xml_dict.get('sample_medium') self.no_data_value = xml_dict.get('no_data_value') self.variable_name = xml_dict.get('variable_name') self.variable_code = xml_dict.get('variable_code') self.variable_description = xml_dict.get('variable_description') self.speciation = xml_dict.get('speciation') # notes and properties notes = [(note.attrib.get('title'),testXMLValue(note)) for note in self._findall('note')] none_notes = [note[1] for note in notes if note[0] is None] self.notes = dict([note for note in notes if note[0] is not None]) if len(none_notes) > 0: self.notes['none'] = none_notes self.properties = dict([(prop.attrib.get('name'),testXMLValue(prop)) for prop in self._findall('variableProperty')]) # related related = self._find('related') if related is not None: self.parent_codes = [dict([('network',code.attrib.get('network')),('vocabulary',code.attrib.get('vocabulary')),('default',code.attrib.get('default'))]) for code in related.findall(ns(self._ns) + 'parentCode')] self.related_codes = [dict([('network',d.get('network')),('vocabulary',d.get('vocabulary')),('default',d.get('default'))]) for code in related.findall(ns(self._ns) + 'relatedCode')] else: self.parent_codes = None self.related_codes = None # sub-objects if self._ns == 'wml1.0': unit = self._find('units') self.unit = Unit1_0(unit, self._ns) if unit is not None else None timesupport = self._find('timeSupport') self.time_support = TimeScale(timesupport, self._ns) if timesupport is not None else None else: unit = self._find('unit') self.unit = Unit(unit, self._ns) if unit is not None else None timescale = self._find('timeScale') self.time_scale = TimeScale(timescale, self._ns) if timescale is not None else None categories = self._find('categories') if categories is not None: self.categories = [Category(cat,self._ns) for cat in categories.findall(ns(self._ns) + 'category')] else: self.categories = None
def parse_site(self,xml=None): if xml is not None: try: self._root = etree.parse(xml) except: self._root = xml # try: self.site_info = SiteInfo(self._find('siteInfo'), self._ns) self.series_catalogs = [SeriesCatalog(elm, self._ns) for elm in self._findall('seriesCatalog')]
def __init__(self, xml_root, namespace): try: self._root = etree.parse(xml_root) except: self._root = xml_root if not namespace in namespaces: raise ValueError('Unsupported namespace passed in to parser!') self._ns = namespace
def __init__(self,xml_root,namespace): try: self._root = etree.parse(xml_root) except: self._root = xml_root if not namespace in namespaces: raise ValueError('Unsupported namespace passed in to parser!') self._ns = namespace
def __init__(self, url, lang='en-US', version='2.0.2'): """ Construct and process a GetCapabilities request Parameters ---------- - url: the URL of the CSW - lang: the language (default is 'en-US') - version: version (default is '2.0.2') """ self.url = url self.lang = lang self.version = version self.service = 'CSW' self.exceptionreport = None self.owscommon = OwsCommon('1.0.0') # construct request node0 = etree.Element(util.nspath('GetCapabilities', namespaces['csw'])) node0.set('service', self.service) node0.set(util.nspath('schemaLocation', namespaces['xsi']), schema_location) tmp = etree.SubElement(node0, util.nspath('AcceptVersions', namespaces['ows'])) etree.SubElement(tmp, util.nspath('Version', namespaces['ows'])).text = self.version tmp2 = etree.SubElement(node0, util.nspath('AcceptFormats', namespaces['ows'])) etree.SubElement(tmp2, util.nspath('OutputFormat', namespaces['ows'])).text = outputformat self.request = util.xml2string(etree.tostring(node0)) # invoke self.response = util.http_post(self.url, self.request, self.lang) # parse result self._capabilities = etree.parse(StringIO.StringIO(self.response)) # check for exceptions self._isexception(self._capabilities, self.owscommon.namespace) if self.exceptionreport is None: # ServiceIdentification val = self._capabilities.find(util.nspath('ServiceIdentification', namespaces['ows'])) self.identification=ServiceIdentification(val,self.owscommon.namespace) # ServiceProvider val = self._capabilities.find(util.nspath('ServiceProvider', namespaces['ows'])) self.provider=ServiceProvider(val,self.owscommon.namespace) # ServiceOperations metadata self.operations=[] for elem in self._capabilities.findall(util.nspath('OperationsMetadata/Operation', namespaces['ows'])): self.operations.append(OperationsMetadata(elem, self.owscommon.namespace)) # FilterCapabilities val = self._capabilities.find(util.nspath('Filter_Capabilities', namespaces['ogc'])) self.filters=FilterCapabilities(val)
def parse_seriescatalog(self, xml=None): if xml is not None: try: self._root = etree.parse(xml) except: self._root = xml # try: self.series = [ Series(elm, self._ns) for elm in self._findall('series') ]
def __init__(self, elem, parse_remote_metadata=False, timeout=30): """.""" self.id = testXMLValue(elem.find(nspath_eval('wfs:Name', namespaces))) self.title = testXMLValue(elem.find(nspath_eval('wfs:Title', namespaces))) self.abstract = testXMLValue(elem.find(nspath_eval('wfs:Abstract', namespaces))) self.keywords = [f.text for f in elem.findall(nspath_eval('ows:Keywords/ows:Keyword', namespaces))] # bbox self.boundingBoxWGS84 = None b = BoundingBox(elem.find(nspath_eval('ows:WGS84BoundingBox', namespaces)), namespaces['ows']) if b is not None: self.boundingBoxWGS84 = ( float(b.minx), float(b.miny), float(b.maxx), float(b.maxy), ) # crs options self.crsOptions = [Crs(srs.text) for srs in elem.findall(nspath_eval('wfs:OtherSRS', namespaces))] dsrs = testXMLValue(elem.find(nspath_eval('wfs:DefaultSRS', namespaces))) if dsrs is not None: # first element is default srs self.crsOptions.insert(0, Crs(dsrs)) # verbs self.verbOptions = [op.text for op in elem.findall(nspath_eval('wfs:Operations/wfs:Operation', namespaces))] # output formats self.outputFormats = [op.text for op in elem.findall(nspath_eval('wfs:OutputFormats/wfs:Format', namespaces))] # MetadataURLs self.metadataUrls = [] for m in elem.findall(nspath_eval('wfs:MetadataURL', namespaces)): metadataUrl = { 'type': testXMLValue(m.attrib['type'], attrib=True), 'format': testXMLValue(m.find('Format')), 'url': testXMLValue(m) } if metadataUrl['url'] is not None and parse_remote_metadata: # download URL try: content = openURL(metadataUrl['url'], timeout=timeout) doc = etree.parse(content) if metadataUrl['type'] is not None: if metadataUrl['type'] == 'FGDC': metadataUrl['metadata'] = Metadata(doc) if metadataUrl['type'] in ['TC211', '19115', '19139']: metadataUrl['metadata'] = MD_Metadata(doc) except Exception: metadataUrl['metadata'] = None self.metadataUrls.append(metadataUrl) #others not used but needed for iContentMetadata harmonisation self.styles=None self.timepositions=None self.defaulttimeposition=None
def parse_sites_response(self, xml=None): """ """ if xml is not None: try: self._root = etree.parse(xml) except: self._root = xml # try: self.query_info = QueryInfo(self._find('queryInfo'), self._ns) self.sites = [Site(site, self._ns) for site in self._findall('site')]
def getValues(shapefile, attribute, getTuples, limitFeatures, WFS_URL): """ Similar to get attributes, given a shapefile and a valid attribute this function will make a call to the Web Feature Services returning a list of values associated with the shapefile and attribute. If getTuples = True, will also return the tuples of [feature:id] along with values [feature] """ wfs = WebFeatureService(WFS_URL, version="1.1.0") feature = wfs.getfeature(typename=shapefile, maxfeatures=limitFeatures, propertyname=[attribute]) gml = etree.parse(feature) values = [] for el in gml.iter(): if attribute in el.tag: if el.text not in values: values.append(el.text) if getTuples == "true" or getTuples == "only": tuples = [] # If features are encoded as a list of featureMember elements. gmlid_found = False for featureMember in gml.iter("{" + GML_NAMESPACE + "}featureMember"): for el in featureMember.iter(): if el.get("{" + GML_NAMESPACE + "}id"): gmlid = el.get("{" + GML_NAMESPACE + "}id") att = True gmlid_found = True if attribute in el.tag and att == True: value = el.text tuples.append((value, gmlid)) att = False if gmlid_found == False: raise Exception("No gml:id found in source feature service. This form of GML is not supported.") # If features are encoded as a featureMembers element. for featureMember in gml.iter("{" + GML_NAMESPACE + "}featureMembers"): for el in featureMember.iter(): gmlid = el.get("{" + GML_NAMESPACE + "}id") for feat in el.getchildren(): if attribute in feat.tag: value = feat.text tuples.append((value, gmlid)) if getTuples == "true": return sorted(values), sorted(tuples) elif getTuples == "only": return sorted(tuples) else: return sorted(values)
def parse_sites_response(self,xml=None): """ """ if xml is not None: try: self._root = etree.parse(xml) except: self._root = xml # try: self.query_info = QueryInfo(self._find('queryInfo'), self._ns) self.sites = [Site(site, self._ns) for site in self._findall('site')]
def test_gm03(): """Test GM03 parsing""" e = etree.parse(resource_file('gm03_example1.xml')) gm03 = GM03(e) assert gm03.header.version == '2.3' assert gm03.header.sender == 'geocat.ch' assert not hasattr(gm03.data, 'core') assert hasattr(gm03.data, 'comprehensive') assert len(gm03.data.comprehensive.elements) == 13 assert sorted(list(gm03.data.comprehensive.elements.keys())) == [ 'address', 'citation', 'contact', 'data_identification', 'date', 'extent', 'extent_geographic_element', 'geographic_bounding_box', 'identification_point_of_contact', 'keywords', 'metadata', 'metadata_point_of_contact', 'responsible_party' ] # noqa assert isinstance(gm03.data.comprehensive.date, list) assert len(gm03.data.comprehensive.date) == 1 assert gm03.data.comprehensive.metadata.file_identifier == '41ac321f632e55cebf0508a2cea5d9023fd12d9ad46edd679f2c275127c88623fb9c9d29726bef7c' # noqa assert gm03.data.comprehensive.metadata.date_stamp == '1999-12-31T12:00:00' assert gm03.data.comprehensive.metadata.language == 'de' # Test TID searching assert gm03.data.comprehensive.metadata.tid == 'xN6509077498146737843' search_tid = gm03.data.comprehensive.metadata.tid assert gm03.data.comprehensive.get_element_by_tid('404') is None assert gm03.data.comprehensive.get_element_by_tid(search_tid) is not None search_tid2 = gm03.data.comprehensive.extent.data_identification.ref assert search_tid2 == 'xN8036063300808707346' assert gm03.data.comprehensive.get_element_by_tid(search_tid2) is not None e = etree.parse(resource_file('gm03_example2.xml')) gm03 = GM03(e) assert gm03.data.comprehensive.geographic_bounding_box.extent_type_code == 'false' assert gm03.data.comprehensive.geographic_bounding_box.north_bound_latitude == '47.1865387201702' assert gm03.data.comprehensive.geographic_bounding_box.south_bound_latitude == '47.1234508676764' assert gm03.data.comprehensive.geographic_bounding_box.east_bound_longitude == '9.10597474389878' assert gm03.data.comprehensive.geographic_bounding_box.west_bound_longitude == '9.23798212070671'
def parse_series(self,xml=None): if xml is not None: try: self._root = etree.parse(xml) except: self._root = xml # try: xml_dict = _xml_to_dict(self._root,depth=3) self.value_count = xml_dict.get('value_count') self.value_type = xml_dict.get('value_type') self.general_category = xml_dict.get('general_category') self.sample_medium = xml_dict.get('sample_medium') self.data_type = xml_dict.get('data_type') # date-time self.begin_date_time = parser.parse(xml_dict.get('begin_date_time')) self.begin_date_time_utc = parser.parse(xml_dict.get('begin_date_time_utc')) if xml_dict.get('begin_date_time_utc') is not None else None self.end_date_time = parser.parse(xml_dict.get('end_date_time')) self.end_date_time_utc = parser.parse(xml_dict.get('end_date_time_utc')) if xml_dict.get('end_date_time_utc') is not None else None # method info self.method_description = xml_dict.get('method_description') self.method_code = xml_dict.get('method_code') self.method_link = xml_dict.get('method_link') method = self._find('method') if method is not None: self.method_id = method.attrib.get('methodID') else: self.method_id = None # source info self.organization = xml_dict.get('organization') self.source_description = xml_dict.get('source_description') self.citation = xml_dict.get('citation') source = self._find('source') if source is not None: self.source_id = source.attrib.get('sourceID') else: self.source_id = None # quality control info self.quality_control_level_code = xml_dict.get('quality_control_level_code') self.definition = xml_dict.get('definition') qa = self._find('qualityControlLevel') if qa is not None: self.quality_control_level_id = qa.attrib.get('qualityControlLevelID') else: self.quality_control_level_id = None # properties self.properties = dict([(prop.attrib.get('name'),testXMLValue(prop)) for prop in self._findall('seriesProperty')]) # sub-objects self.variable = Variable(self._find('variable'),self._ns)
def parse_site(self, xml=None): if xml is not None: try: self._root = etree.parse(xml) except: self._root = xml # try: self.site_info = SiteInfo(self._find('siteInfo'), self._ns) self.series_catalogs = [ SeriesCatalog(elm, self._ns) for elm in self._findall('seriesCatalog') ]
def parse_query_info(self, xml=None): if xml is not None: try: self._root = etree.parse(xml) except: self._root = xml # try: # create queryinfo object from dict xml_dict = _xml_to_dict(self._root) self.creation_time = parser.parse(xml_dict.get('creation_time')) if xml_dict.get('creation_time') is not None else None self.notes = [testXMLValue(note) for note in self._findall('note')] self.criteria = Criteria(self._find('criteria'), self._ns)
def parse_query_info(self, xml=None): if xml is not None: try: self._root = etree.parse(xml) except Exception: self._root = xml # try: # create queryinfo object from dict xml_dict = _xml_to_dict(self._root) self.creation_time = parser.parse(xml_dict.get('creation_time')) \ if xml_dict.get('creation_time') is not None else None self.notes = [testXMLValue(note) for note in self._findall('note')] self.criteria = Criteria(self._find('criteria'), self._ns)
def __load_titles(self): RDF_URI = "http://www.w3.org/1999/02/22-rdf-syntax-ns#" ns = { "rdf": RDF_URI, "foaf": "http://xmlns.com/foaf/0.1/", "dc": "http://purl.org/dc/elements/1.1/", "dcterms": "http://purl.org/dc/terms/", "skos": "http://www.w3.org/2004/02/skos/core#", } tfile = dlxml.parse(self.rdf_path) root = tfile.getroot() scheme = root.find("skos:ConceptScheme", ns) return scheme.findall("dc:title", ns)
def getdomain(self, dname, dtype='parameter'): """ Construct and process a GetDomain request Parameters ---------- - dname: the value of the Parameter or Property to query - dtype: whether to query a parameter (parameter) or property (property) """ # construct request dtypename = 'ParameterName' node0 = etree.Element(util.nspath('GetDomain', namespaces['csw'])) node0.set('service', self.service) node0.set('version', self.version) node0.set(util.nspath('schemaLocation', namespaces['xsi']), schema_location) if dtype == 'property': dtypename = 'PropertyName' etree.SubElement(node0, util.nspath(dtypename, namespaces['csw'])).text = dname self.request = util.xml2string(etree.tostring(node0)) # invoke self.response = util.http_post(self.url, self.request, self.lang) # parse result self._values = etree.parse(StringIO.StringIO(self.response)) # check for exceptions self._isexception(self._values, self.owscommon.namespace) if self.exceptionreport is None: self.results = {} val = self._values.find(util.nspath('DomainValues', namespaces['csw'])).attrib.get('type') self.results['type'] = util.testXMLValue(val, True) val = self._values.find(util.nspath('DomainValues/' + dtypename, namespaces['csw'])) self.results[dtype] = util.testXMLValue(val) # get the list of values associated with the Domain self.results['values'] = [] for f in self._values.findall(util.nspath('DomainValues/ListOfValues/Value', namespaces['csw'])): self.results['values'].append(util.testXMLValue(f))
def __init__(self, elem, parse_remote_metadata=False): """.""" self.id = testXMLValue(elem.find(nspath_eval("wfs:Name", namespaces))) self.title = testXMLValue(elem.find(nspath_eval("wfs:Title", namespaces))) self.abstract = testXMLValue(elem.find(nspath_eval("wfs:Abstract", namespaces))) self.keywords = [f.text for f in elem.findall(nspath_eval("ows:Keywords/ows:Keyword", namespaces))] # bbox self.boundingBoxWGS84 = None b = BoundingBox(elem.find(nspath_eval("ows:WGS84BoundingBox", namespaces)), namespaces["ows"]) if b is not None: self.boundingBoxWGS84 = (float(b.minx), float(b.miny), float(b.maxx), float(b.maxy)) # crs options self.crsOptions = [Crs(srs.text) for srs in elem.findall(nspath_eval("wfs:OtherSRS", namespaces))] dsrs = testXMLValue(elem.find(nspath_eval("wfs:DefaultSRS", namespaces))) if dsrs is not None: # first element is default srs self.crsOptions.insert(0, Crs(dsrs)) # verbs self.verbOptions = [op.text for op in elem.findall(nspath_eval("wfs:Operations/wfs:Operation", namespaces))] # output formats self.verbOptions = [op.text for op in elem.findall(nspath_eval("wfs:OutputFormats/wfs:Format", namespaces))] # MetadataURLs self.metadataUrls = [] for m in elem.findall(nspath_eval("wfs:MetadataURL", namespaces)): metadataUrl = { "type": testXMLValue(m.attrib["type"], attrib=True), "format": testXMLValue(m.find("Format")), "url": testXMLValue(m), } if metadataUrl["url"] is not None and parse_remote_metadata: # download URL try: content = urlopen(metadataUrl["url"]) doc = etree.parse(content) if metadataUrl["type"] is not None: if metadataUrl["type"] == "FGDC": metadataUrl["metadata"] = Metadata(doc) if metadataUrl["type"] in ["TC211", "19115", "19139"]: metadataUrl["metadata"] = MD_Metadata(doc) except Exception, err: metadataUrl["metadata"] = None self.metadataUrls.append(metadataUrl)
def parse_timezoneinfo(self,xml=None): if xml is not None: try: self._root = etree.parse(xml) except: self._root = xml # try: xml_dict = _xml_to_dict(self._root) default = self._find('defaultTimeZone') if default is not None: self.zone_offset = default.attrib.get('zoneOffset') self.zone_abbreviation = default.attrib.get('zoneAbbreviation') daylight = self._find('daylightSavingsTimeZone') if daylight is not None: self.daylight_zone_offset = daylight.attrib.get('zoneOffset') self.daylight_zone_abbreviation = daylight.attrib.get('zoneAbbreviation')
def parse_timezoneinfo(self, xml=None): if xml is not None: try: self._root = etree.parse(xml) except: self._root = xml # try: xml_dict = _xml_to_dict(self._root) default = self._find('defaultTimeZone') if default is not None: self.zone_offset = default.attrib.get('zoneOffset') self.zone_abbreviation = default.attrib.get('zoneAbbreviation') daylight = self._find('daylightSavingsTimeZone') if daylight is not None: self.daylight_zone_offset = daylight.attrib.get('zoneOffset') self.daylight_zone_abbreviation = daylight.attrib.get( 'zoneAbbreviation')
def getrecordbyid(self, id=[], esn='full', outputschema=namespaces['csw'], format=outputformat): """ Construct and process a GetRecordById request Parameters ---------- - id: the list of Ids - esn: the ElementSetName 'full', 'brief' or 'summary' (default is 'full') - outputschema: the outputSchema (default is 'http://www.opengis.net/cat/csw/2.0.2') - format: the outputFormat (default is 'application/xml') """ # construct request node0 = etree.Element(util.nspath('GetRecordById', namespaces['csw'])) node0.set('outputSchema', outputschema) node0.set('outputFormat', format) node0.set('version', self.version) node0.set('service', self.service) node0.set(util.nspath('schemaLocation', namespaces['xsi']), schema_location) for i in id: etree.SubElement(node0, util.nspath('Id', namespaces['csw'])).text = i etree.SubElement(node0, util.nspath('ElementSetName', namespaces['csw'])).text = esn self.request = util.xml2string(etree.tostring(node0)) # invoke self.response = util.http_post(self.url, self.request, self.lang) # parse result self._records = etree.parse(StringIO.StringIO(self.response)) # check for exceptions self._isexception(self._records, self.owscommon.namespace) if self.exceptionreport is None: self.records = {} self._parserecords(outputschema, esn)
def parse_location(self, xml=None): if xml is not None: try: self._root = etree.parse(xml) except: self._root = xml # try: xml_dict = _xml_to_dict(self._root) geogs = self._findall('geogLocation') self.geo_coords = list() self.srs = list() for g in geogs: self.geo_coords.append( (testXMLValue(g.find(ns(self._ns) + 'longitude')), testXMLValue(g.find(ns(self._ns) + 'latitude')))) self.srs.append(g.attrib.get('srs')) locsite = self._findall('localSiteXY') self.local_sites = list() self.notes = list() self.projections = list() for ls in locsite: z = testXMLValue(ls.find(ns(self._ns) + 'Z')) if z is not None: self.local_sites.append( (testXMLValue(ls.find(ns(self._ns) + 'X')), testXMLValue(ls.find(ns(self._ns) + 'Y')), z)) else: self.local_sites.append( (testXMLValue(ls.find(ns(self._ns) + 'X')), testXMLValue(ls.find(ns(self._ns) + 'Y')), '0')) self.notes.append([ testXMLValue(note) for note in ls.findall(ns(self._ns) + 'note') ]) self.projections.append(ls.attrib.get('projectionInformation'))
def find_data_metadata(resource, credentials, no_ssl_check=False): """ Retrieves and parse a remote metadata, given a gsconfig object (resource or layergroup). :param resource: an object from the gsconfig python library (either a resource or a layergroup) :param credentials: an object that store credential for various OGC services :param no_ssl_check: boolean indicating if SSL certificate check should be deactivated (False by default) :return: a tuple (url, parsed metadata). """ if resource.metadata_links is None: raise GsMetadataMissingInconsistency( "%s:%s" % (resource.workspace.name, resource.name)) for mime_type, md_format, url in resource.metadata_links: if mime_type == "text/xml" and md_format == "ISO19115:2003": # disable certificate verification ctx = None if no_ssl_check: ctx = ssl.create_default_context() ctx.check_hostname = False ctx.verify_mode = ssl.CERT_NONE req = Request(url) username, password = credentials.getFromUrl(url) if username is not None: base64string = base64.b64encode( ('%s:%s' % (username, password)).encode()) authheader = "Basic %s" % base64string.decode() req.add_header("Authorization", authheader) try: with urlopen(req, context=ctx) as fhandle: return (url, MD_Metadata(etree.parse(fhandle))) except Exception as e: raise GsToGnMetadataInvalidInconsistency( url, str(e), layer_name="%s:%s" % (resource.workspace.name, resource.name)) raise GsMetadataMissingInconsistency(resource.workspace.name + ":" + resource.name)
def parse_criteria(self, xml=None): if xml is not None: try: self._root = etree.parse(xml) except: self._root = xml # try: xml_dict = _xml_to_dict(self._root,depth=4) self.method_called = self._root.attrib.get('MethodCalled') self.location_param = xml_dict.get('location_param') self.variable_param = xml_dict.get('variable_param') try: self.begin_date_time = parser.parse(xml_dict['begin_date_time']) except: self.begin_date_time = None try: self.end_date_time = parser.parse(xml_dict['end_date_time']) except: self.end_date_time = None self.parameters = [(param.attrib.get('name'),param.attrib.get('value')) for param in self._findall('parameter')]
def parse_siteinfo(self,xml=None): if xml is not None: try: self._root = etree.parse(xml) except: self._root = xml # try: xml_dict = _xml_to_dict(self._root) self.site_name = xml_dict.get('site_name') self.site_codes = [testXMLValue(code) for code in self._findall('siteCode')] self.elevation = xml_dict.get('elevation_m') self.vertical_datum = xml_dict.get('vertical_datum') self.site_types = [testXMLValue(typ) for typ in self._findall('siteType')] self.site_properties = dict([(prop.attrib.get('name'),testXMLValue(prop)) for prop in self._findall('siteProperty')]) self.altname = xml_dict.get('altname') self.notes = [testXMLValue(note) for note in self._findall('note')] # sub-objects tzi = self._find('timeZoneInfo') if tzi is not None: self.time_zone_info = TimeZoneInfo(tzi, self._ns) self.location = Location(self._find('geoLocation'), self._ns)
# ============================================================================= # get a list of entries for a given code list dictionary import sys import urllib2 from owslib.etree import etree from owslib.iso import CodelistCatalogue if len(sys.argv) < 3: print 'Usage: %s <path/to/gmxCodelists.xml> <CodeListDictionary>' % sys.argv[ 0] sys.exit(1) e = etree.parse(sys.argv[1]) c = CodelistCatalogue(e) clds = c.getcodelistdictionaries() def valid_clds(): return ''' Valid code list dictionaries are: %s ''' % '\n'.join(clds) if len(sys.argv) < 2: print '''
def load_thesaurus(self, input_file, name, store): RDF_URI = 'http://www.w3.org/1999/02/22-rdf-syntax-ns#' XML_URI = 'http://www.w3.org/XML/1998/namespace' ABOUT_ATTRIB = f"{{{RDF_URI}}}about" LANG_ATTRIB = f"{{{XML_URI}}}lang" ns = { 'rdf': RDF_URI, 'foaf': 'http://xmlns.com/foaf/0.1/', 'dc': 'http://purl.org/dc/elements/1.1/', 'dcterms': 'http://purl.org/dc/terms/', 'skos': 'http://www.w3.org/2004/02/skos/core#' } tfile = dlxml.parse(input_file) root = tfile.getroot() scheme = root.find('skos:ConceptScheme', ns) if not scheme: raise CommandError("ConceptScheme not found in file") titles = scheme.findall('dc:title', ns) default_lang = getattr(settings, 'THESAURUS_DEFAULT_LANG', None) available_lang = get_all_lang_available_with_title(titles, LANG_ATTRIB) thesaurus_title = determinate_value(available_lang, default_lang) descr = scheme.find('dc:description', ns).text if scheme.find( 'dc:description', ns) else thesaurus_title date_issued = scheme.find('dcterms:issued', ns).text about = scheme.attrib.get(ABOUT_ATTRIB) print(f'Thesaurus "{thesaurus_title}" issued at {date_issued}') thesaurus = Thesaurus() thesaurus.identifier = name thesaurus.title = thesaurus_title thesaurus.description = descr thesaurus.about = about thesaurus.date = date_issued if store: thesaurus.save() for lang in available_lang: if lang[0] is not None: thesaurus_label = ThesaurusLabel() thesaurus_label.lang = lang[0] thesaurus_label.label = lang[1] thesaurus_label.thesaurus = thesaurus thesaurus_label.save() for concept in root.findall('skos:Concept', ns): about = concept.attrib.get(ABOUT_ATTRIB) alt_label = concept.find('skos:altLabel', ns) if alt_label is not None: alt_label = alt_label.text else: concepts = concept.findall('skos:prefLabel', ns) available_lang = get_all_lang_available_with_title( concepts, LANG_ATTRIB) alt_label = determinate_value(available_lang, default_lang) print(f'Concept {alt_label} ({about})') tk = ThesaurusKeyword() tk.thesaurus = thesaurus tk.about = about tk.alt_label = alt_label if store: tk.save() for pref_label in concept.findall('skos:prefLabel', ns): lang = pref_label.attrib.get(LANG_ATTRIB) label = pref_label.text print(f' Label {lang}: {label}') tkl = ThesaurusKeywordLabel() tkl.keyword = tk tkl.lang = lang tkl.label = label if store: tkl.save()
def _invoke(self): # do HTTP request request_url = self.url # Get correct URL based on Operation list. # If skip_caps=True, then self.operations has not been set, so use # default URL. if hasattr(self, 'operations'): caller = inspect.stack()[1][3] if caller == 'getrecords2': caller = 'getrecords' try: op = self.get_operation_by_name(caller) if isinstance(self.request, six.string_types): # GET KVP get_verbs = [ x for x in op.methods if x.get('type').lower() == 'get' ] request_url = get_verbs[0].get('url') else: post_verbs = [ x for x in op.methods if x.get('type').lower() == 'post' ] if len(post_verbs) > 1: # Filter by constraints. We must match a PostEncoding of "XML" for pv in post_verbs: for const in pv.get('constraints'): if const.name.lower() == 'postencoding': values = [v.lower() for v in const.values] if 'xml' in values: request_url = pv.get('url') break else: # Well, just use the first one. request_url = post_verbs[0].get('url') elif len(post_verbs) == 1: request_post_url = post_verbs[0].get('url') except: # no such luck, just go with request_url pass if isinstance(self.request, six.string_types): # GET KVP self.request = '%s%s' % (bind_url(request_url), self.request) self.response = openURL(self.request, None, 'Get', username=self.username, password=self.password, timeout=self.timeout).read() else: self.request = cleanup_namespaces(self.request) # Add any namespaces used in the "typeNames" attribute of the # csw:Query element to the query's xml namespaces. for query in self.request.findall( util.nspath_eval('csw:Query', namespaces)): ns = query.get("typeNames", None) if ns is not None: # Pull out "gmd" from something like "gmd:MD_Metadata" from the list # of typenames ns_keys = [x.split(':')[0] for x in ns.split(' ')] self.request = add_namespaces(self.request, ns_keys) self.request = util.element_to_string(self.request, encoding='utf-8') self.response = util.http_post(request_url, self.request, self.lang, self.timeout, self.username, self.password) # parse result see if it's XML self._exml = etree.parse(BytesIO(self.response)) # it's XML. Attempt to decipher whether the XML response is CSW-ish """ valid_xpaths = [ util.nspath_eval('ows:ExceptionReport', namespaces), util.nspath_eval('csw:Capabilities', namespaces), util.nspath_eval('csw:DescribeRecordResponse', namespaces), util.nspath_eval('csw:GetDomainResponse', namespaces), util.nspath_eval('csw:GetRecordsResponse', namespaces), util.nspath_eval('csw:GetRecordByIdResponse', namespaces), util.nspath_eval('csw:HarvestResponse', namespaces), util.nspath_eval('csw:TransactionResponse', namespaces) ] if self._exml.getroot().tag not in valid_xpaths: raise RuntimeError('Document is XML, but not CSW-ish') # check if it's an OGC Exception val = self._exml.find(util.nspath_eval('ows:Exception', namespaces)) if val is not None: raise ows.ExceptionReport(self._exml, self.owscommon.namespace) else: self.exceptionreport = None
def _invoke(self, mock_requests_post, mock_requests_request): try: if self.auth.token is not None: self.auth.username = "******" self.auth.password = self.auth.token except AttributeError: pass # do HTTP request request_url = self.url # Get correct URL based on Operation list. # If skip_caps=True, then self.operations has not been set, so use # default URL. if hasattr(self, "operations"): caller = inspect.stack()[1][3] if caller == "getrecords2": caller = "getrecords" # noinspection PyBroadException try: op = self.get_operation_by_name(caller) if isinstance(self.request, str): # GET KVP get_verbs = [ x for x in op.methods if x.get("type").lower() == "get" ] request_url = get_verbs[0].get("url") else: post_verbs = [ x for x in op.methods if x.get("type").lower() == "post" ] if len(post_verbs) > 1: # Filter by constraints. We must match a PostEncoding of "XML" for pv in post_verbs: for const in pv.get("constraints"): if const.name.lower() == "postencoding": values = [v.lower() for v in const.values] if "xml" in values: request_url = pv.get("url") break else: # Well, just use the first one. request_url = post_verbs[0].get("url") elif len(post_verbs) == 1: request_url = post_verbs[0].get("url") except Exception: # nosec # no such luck, just go with request_url pass # print("Echo") if isinstance(self.request, str): # GET KVP # print("Foxtrot") self.request = "%s%s" % (bind_url(request_url), self.request) self.response = openURL(self.request, None, "Get", timeout=self.timeout, auth=self.auth).read() # debug # print("invoke") # print(self.response[0:100]) else: # print("Golf") self.request = cleanup_namespaces(self.request) # Add any namespaces used in the "typeNames" attribute of the # csw:Query element to the query's xml namespaces. # noinspection PyUnresolvedReferences for query in self.request.findall( util.nspath_eval("csw:Query", csw_namespaces)): ns = query.get("typeNames", None) if ns is not None: # Pull out "gmd" from something like "gmd:MD_Metadata" from the list # of typenames ns_keys = [x.split(":")[0] for x in ns.split(" ")] self.request = add_namespaces(self.request, ns_keys) self.request = add_namespaces(self.request, "ows") self.request = util.element_to_string(self.request, encoding="utf-8") # print("Hotel") self.response = http_post(request_url, self.request, self.lang, self.timeout, auth=self.auth) # debug # print("invoke 2") # print(self.response[0:100]) # debug # print("parse") # print(self.response[0:100]) # print(self.response) # parse result see if it's XML self._exml = etree.parse(BytesIO(self.response)) # it's XML. Attempt to decipher whether the XML response is CSW-ish """ valid_xpaths = [ util.nspath_eval("ows:ExceptionReport", csw_namespaces), util.nspath_eval("csw:Capabilities", csw_namespaces), util.nspath_eval("csw:DescribeRecordResponse", csw_namespaces), util.nspath_eval("csw:GetDomainResponse", csw_namespaces), util.nspath_eval("csw:GetRecordsResponse", csw_namespaces), util.nspath_eval("csw:GetRecordByIdResponse", csw_namespaces), util.nspath_eval("csw:HarvestResponse", csw_namespaces), util.nspath_eval("csw:TransactionResponse", csw_namespaces), ] if self._exml.getroot().tag not in valid_xpaths: raise RuntimeError("Document is XML, but not CSW-ish") # check if it's an OGC Exception val = self._exml.find(util.nspath_eval("ows:Exception", csw_namespaces)) if val is not None: raise ows.ExceptionReport(self._exml, self.owscommon.namespace) else: self.exceptionreport = None
def __init__(self, elem, parent, parse_remote_metadata=False, timeout=30): """.""" self.id = elem.find(nspath('Name', ns=WFS_NAMESPACE)).text self.title = elem.find(nspath('Title', ns=WFS_NAMESPACE)).text abstract = elem.find(nspath('Abstract', ns=WFS_NAMESPACE)) if abstract is not None: self.abstract = abstract.text else: self.abstract = None self.keywords = [ f.text for f in elem.findall(nspath('Keywords', ns=WFS_NAMESPACE)) ] # bboxes self.boundingBoxWGS84 = None b = elem.find(nspath('WGS84BoundingBox', ns=OWS_NAMESPACE)) if b is not None: lc = b.find(nspath("LowerCorner", ns=OWS_NAMESPACE)) uc = b.find(nspath("UpperCorner", ns=OWS_NAMESPACE)) ll = [float(s) for s in lc.text.split()] ur = [float(s) for s in uc.text.split()] self.boundingBoxWGS84 = (ll[0], ll[1], ur[0], ur[1]) # there is no such think as bounding box # make copy of the WGS84BoundingBox self.boundingBox = (self.boundingBoxWGS84[0], self.boundingBoxWGS84[1], self.boundingBoxWGS84[2], self.boundingBoxWGS84[3], Crs("epsg:4326")) # crs options self.crsOptions = [ Crs(srs.text) for srs in elem.findall(nspath('OtherCRS', ns=WFS_NAMESPACE)) ] defaultCrs = elem.findall(nspath('DefaultCRS', ns=WFS_NAMESPACE)) if len(defaultCrs) > 0: self.crsOptions.insert(0, Crs(defaultCrs[0].text)) # verbs self.verbOptions = [op.tag for op \ in parent.findall(nspath('Operations/*',ns=WFS_NAMESPACE))] self.verbOptions + [op.tag for op \ in elem.findall(nspath('Operations/*',ns=WFS_NAMESPACE)) \ if op.tag not in self.verbOptions] #others not used but needed for iContentMetadata harmonisation self.styles = None self.timepositions = None self.defaulttimeposition = None # MetadataURLs self.metadataUrls = [] for m in elem.findall('MetadataURL'): metadataUrl = { 'type': testXMLValue(m.attrib['type'], attrib=True), 'format': m.find('Format').text.strip(), 'url': testXMLValue(m.find('OnlineResource'). attrib['{http://www.w3.org/1999/xlink}href'], attrib=True) } if metadataUrl[ 'url'] is not None and parse_remote_metadata: # download URL try: content = urllib2.urlopen(metadataUrl['url'], timeout=timeout) doc = etree.parse(content) try: # FGDC metadataUrl['metadata'] = Metadata(doc) except: # ISO metadataUrl['metadata'] = MD_Metadata(doc) except Exception, err: metadataUrl['metadata'] = None self.metadataUrls.append(metadataUrl)
def __init__(self, elem, parent=None, children=None, index=0, parse_remote_metadata=False, timeout=30): if xmltag_split(elem.tag) != 'Layer': raise ValueError('%s should be a Layer' % (elem, )) self.parent = parent if parent: self.index = "%s.%d" % (parent.index, index) else: self.index = str(index) self._children = children self.id = self.name = testXMLValue( elem.find(nspath('Name', WMS_NAMESPACE))) # layer attributes self.queryable = int(elem.attrib.get('queryable', 0)) self.cascaded = int(elem.attrib.get('cascaded', 0)) self.opaque = int(elem.attrib.get('opaque', 0)) self.noSubsets = int(elem.attrib.get('noSubsets', 0)) self.fixedWidth = int(elem.attrib.get('fixedWidth', 0)) self.fixedHeight = int(elem.attrib.get('fixedHeight', 0)) # title is mandatory property self.title = None title = testXMLValue(elem.find(nspath('Title', WMS_NAMESPACE))) if title is not None: self.title = title.strip() self.abstract = testXMLValue( elem.find(nspath('Abstract', WMS_NAMESPACE))) # TODO: what is the preferred response to esri's handling of custom projections # in the spatial ref definitions? see http://resources.arcgis.com/en/help/main/10.1/index.html#//00sq000000m1000000 # and an example (20150812) http://maps.ngdc.noaa.gov/arcgis/services/firedetects/MapServer/WMSServer?request=GetCapabilities&service=WMS # bboxes b = elem.find(nspath('EX_GeographicBoundingBox', WMS_NAMESPACE)) self.boundingBoxWGS84 = None if b is not None: minx = b.find(nspath('westBoundLongitude', WMS_NAMESPACE)) miny = b.find(nspath('southBoundLatitude', WMS_NAMESPACE)) maxx = b.find(nspath('eastBoundLongitude', WMS_NAMESPACE)) maxy = b.find(nspath('northBoundLatitude', WMS_NAMESPACE)) box = tuple( map(float, [ minx.text if minx is not None else None, miny.text if miny is not None else None, maxx.text if maxx is not None else None, maxy.text if maxy is not None else None ])) self.boundingBoxWGS84 = tuple(box) elif self.parent: if hasattr(self.parent, 'boundingBoxWGS84'): self.boundingBoxWGS84 = self.parent.boundingBoxWGS84 # make a bbox list (of tuples) crs_list = [] for bb in elem.findall(nspath('BoundingBox', WMS_NAMESPACE)): srs_str = bb.attrib.get('CRS', None) srs = Crs(srs_str) box = tuple( map(float, [ bb.attrib['minx'], bb.attrib['miny'], bb.attrib['maxx'], bb.attrib['maxy'] ])) minx, miny, maxx, maxy = box[0], box[1], box[2], box[3] # handle the ordering so that it always # returns (minx, miny, maxx, maxy) if srs and srs.axisorder == 'yx': # reverse things minx, miny, maxx, maxy = box[1], box[0], box[3], box[2] crs_list.append(( minx, miny, maxx, maxy, srs_str, )) self.crs_list = crs_list # and maintain the original boundingBox attribute (first in list) # or the wgs84 bbox (to handle cases of incomplete parentage) self.boundingBox = crs_list[0] if crs_list else self.boundingBoxWGS84 # ScaleHint sh = elem.find(nspath('ScaleHint', WMS_NAMESPACE)) self.scaleHint = None if sh is not None: if 'min' in sh.attrib and 'max' in sh.attrib: self.scaleHint = { 'min': sh.attrib['min'], 'max': sh.attrib['max'] } attribution = elem.find(nspath('Attribution', WMS_NAMESPACE)) if attribution is not None: self.attribution = dict() title = attribution.find(nspath('Title', WMS_NAMESPACE)) url = attribution.find(nspath('OnlineResource', WMS_NAMESPACE)) logo = attribution.find(nspath('LogoURL', WMS_NAMESPACE)) if title is not None: self.attribution['title'] = title.text if url is not None: self.attribution['url'] = url.attrib[ '{http://www.w3.org/1999/xlink}href'] if logo is not None: self.attribution['logo_size'] = (int(logo.attrib['width']), int(logo.attrib['height'])) self.attribution['logo_url'] = logo.find( nspath('OnlineResource', WMS_NAMESPACE) ).attrib['{http://www.w3.org/1999/xlink}href'] # TODO: get this from the bbox attributes instead (deal with parents) # SRS options self.crsOptions = [] # Copy any parent SRS options (they are inheritable properties) if self.parent: self.crsOptions = list(self.parent.crsOptions) # Look for SRS option attached to this layer if elem.find(nspath('CRS', WMS_NAMESPACE)) is not None: # some servers found in the wild use a single SRS # tag containing a whitespace separated list of SRIDs # instead of several SRS tags. hence the inner loop for srslist in [ x.text for x in elem.findall(nspath('CRS', WMS_NAMESPACE)) ]: if srslist: for srs in srslist.split(): self.crsOptions.append(srs) # Get rid of duplicate entries self.crsOptions = list(set(self.crsOptions)) # Set self.crsOptions to None if the layer (and parents) had no SRS options if len(self.crsOptions) == 0: # raise ValueError('%s no SRS available!?' % (elem,)) # Comment by D Lowe. # Do not raise ValueError as it is possible that a layer is purely a parent layer and does not have SRS specified. Instead set crsOptions to None # Comment by Jachym: # Do not set it to None, but to [], which will make the code # work further. Fixed by anthonybaxter self.crsOptions = [] # Styles self.styles = {} # Copy any parent styles (they are inheritable properties) if self.parent: self.styles = self.parent.styles.copy() # Get the styles for this layer (items with the same name are replaced) for s in elem.findall(nspath('Style', WMS_NAMESPACE)): name = s.find(nspath('Name', WMS_NAMESPACE)) title = s.find(nspath('Title', WMS_NAMESPACE)) if name is None or title is None: raise ValueError('%s missing name or title' % (s, )) style = {'title': title.text} # legend url legend = s.find(nspath('LegendURL/OnlineResource', WMS_NAMESPACE)) if legend is not None: style['legend'] = legend.attrib[ '{http://www.w3.org/1999/xlink}href'] lgd = s.find(nspath('LegendURL', WMS_NAMESPACE)) if lgd is not None: if 'width' in list(lgd.attrib.keys()): style['legend_width'] = lgd.attrib.get('width') if 'height' in list(lgd.attrib.keys()): style['legend_height'] = lgd.attrib.get('height') lgd_format = lgd.find(nspath('Format', WMS_NAMESPACE)) if lgd_format is not None: style['legend_format'] = lgd_format.text.strip() self.styles[name.text] = style # keywords self.keywords = [ f.text for f in elem.findall(nspath('KeywordList/Keyword', WMS_NAMESPACE)) ] # extents replaced by dimensions of name # comment by Soren Scott # <Dimension name="elevation" units="meters" default="500" multipleValues="1" # nearestValue="0" current="true" unitSymbol="m">500, 490, 480</Dimension> # it can be repeated with the same name so ? this assumes a single one to match 1.1.1 self.timepositions = None self.defaulttimeposition = None time_dimension = None for dim in elem.findall(nspath('Dimension', WMS_NAMESPACE)): dim_name = dim.attrib.get('name') if dim_name is not None and dim_name.lower() == 'time': time_dimension = dim if time_dimension is not None: self.timepositions = time_dimension.text.split( ',') if time_dimension.text else None self.defaulttimeposition = time_dimension.attrib.get( 'default', None) # Elevations - available vertical levels self.elevations = None elev_dimension = None for dim in elem.findall(nspath('Dimension', WMS_NAMESPACE)): if dim.attrib.get('elevation') is not None: elev_dimension = dim if elev_dimension is not None: self.elevations = [ e.strip() for e in elev_dimension.text.split(',') ] if elev_dimension.text else None # and now capture the dimensions as more generic things (and custom things) self.dimensions = {} for dim in elem.findall(nspath('Dimension', WMS_NAMESPACE)): dim_name = dim.attrib.get('name') dim_data = {} for k, v in six.iteritems(dim.attrib): if k != 'name': dim_data[k] = v # single values and ranges are not differentiated here dim_data['values'] = dim.text.strip().split( ',') if dim.text.strip() else None self.dimensions[dim_name] = dim_data # MetadataURLs self.metadataUrls = [] for m in elem.findall(nspath('MetadataURL', WMS_NAMESPACE)): metadataUrl = { 'type': testXMLValue(m.attrib['type'], attrib=True), 'format': testXMLValue(m.find(nspath('Format', WMS_NAMESPACE))), 'url': testXMLValue(m.find(nspath('OnlineResource', WMS_NAMESPACE)). attrib['{http://www.w3.org/1999/xlink}href'], attrib=True) } if metadataUrl[ 'url'] is not None and parse_remote_metadata: # download URL try: content = openURL(metadataUrl['url'], timeout=timeout) doc = etree.parse(content) if metadataUrl['type'] is not None: if metadataUrl['type'] == 'FGDC': metadataUrl['metadata'] = Metadata(doc) if metadataUrl['type'] == 'TC211': metadataUrl['metadata'] = MD_Metadata(doc) except Exception: metadataUrl['metadata'] = None self.metadataUrls.append(metadataUrl) # DataURLs self.dataUrls = [] for m in elem.findall(nspath('DataURL', WMS_NAMESPACE)): dataUrl = { 'format': m.find(nspath('Format', WMS_NAMESPACE)).text.strip(), 'url': m.find(nspath('OnlineResource', WMS_NAMESPACE)). attrib['{http://www.w3.org/1999/xlink}href'] } self.dataUrls.append(dataUrl) # FeatureListURLs self.featureListUrls = [] for m in elem.findall(nspath('FeatureListURL', WMS_NAMESPACE)): featureUrl = { 'format': m.find(nspath('Format', WMS_NAMESPACE)).text.strip(), 'url': m.find(nspath('OnlineResource', WMS_NAMESPACE)). attrib['{http://www.w3.org/1999/xlink}href'] } self.featureListUrls.append(featureUrl) self.layers = [] for child in elem.findall(nspath('Layer', WMS_NAMESPACE)): self.layers.append(ContentMetadata(child, self))
def __init__(self, elem, parent=None, children=None, index=0, parse_remote_metadata=False, timeout=30): if xmltag_split(elem.tag) != 'Layer': raise ValueError('%s should be a Layer' % (elem,)) self.parent = parent if parent: self.index = "%s.%d" % (parent.index, index) else: self.index = str(index) self._children = children self.id = self.name = testXMLValue(elem.find(nspath('Name', WMS_NAMESPACE))) # layer attributes self.queryable = int(elem.attrib.get('queryable', 0)) self.cascaded = int(elem.attrib.get('cascaded', 0)) self.opaque = int(elem.attrib.get('opaque', 0)) self.noSubsets = int(elem.attrib.get('noSubsets', 0)) self.fixedWidth = int(elem.attrib.get('fixedWidth', 0)) self.fixedHeight = int(elem.attrib.get('fixedHeight', 0)) # title is mandatory property self.title = None title = testXMLValue(elem.find(nspath('Title', WMS_NAMESPACE))) if title is not None: self.title = title.strip() self.abstract = testXMLValue(elem.find(nspath('Abstract', WMS_NAMESPACE))) # TODO: what is the preferred response to esri's handling of custom projections # in the spatial ref definitions? see http://resources.arcgis.com/en/help/main/10.1/index.html#//00sq000000m1000000 # and an example (20150812) http://maps.ngdc.noaa.gov/arcgis/services/firedetects/MapServer/WMSServer?request=GetCapabilities&service=WMS # bboxes b = elem.find(nspath('EX_GeographicBoundingBox', WMS_NAMESPACE)) self.boundingBoxWGS84 = None if b is not None: minx = b.find(nspath('westBoundLongitude', WMS_NAMESPACE)) miny = b.find(nspath('southBoundLatitude', WMS_NAMESPACE)) maxx = b.find(nspath('eastBoundLongitude', WMS_NAMESPACE)) maxy = b.find(nspath('northBoundLatitude', WMS_NAMESPACE)) box = tuple(map(float, [minx.text if minx is not None else None, miny.text if miny is not None else None, maxx.text if maxx is not None else None, maxy.text if maxy is not None else None])) self.boundingBoxWGS84 = tuple(box) elif self.parent: if hasattr(self.parent, 'boundingBoxWGS84'): self.boundingBoxWGS84 = self.parent.boundingBoxWGS84 # make a bbox list (of tuples) crs_list = [] for bb in elem.findall(nspath('BoundingBox', WMS_NAMESPACE)): srs_str = bb.attrib.get('CRS', None) srs = Crs(srs_str) box = tuple(map(float, [bb.attrib['minx'], bb.attrib['miny'], bb.attrib['maxx'], bb.attrib['maxy']] )) minx, miny, maxx, maxy = box[0], box[1], box[2], box[3] # handle the ordering so that it always # returns (minx, miny, maxx, maxy) if srs and srs.axisorder == 'yx': # reverse things minx, miny, maxx, maxy = box[1], box[0], box[3], box[2] crs_list.append(( minx, miny, maxx, maxy, srs_str, )) self.crs_list = crs_list # and maintain the original boundingBox attribute (first in list) # or the wgs84 bbox (to handle cases of incomplete parentage) self.boundingBox = crs_list[0] if crs_list else self.boundingBoxWGS84 # ScaleHint sh = elem.find(nspath('ScaleHint', WMS_NAMESPACE)) self.scaleHint = None if sh is not None: if 'min' in sh.attrib and 'max' in sh.attrib: self.scaleHint = {'min': sh.attrib['min'], 'max': sh.attrib['max']} attribution = elem.find(nspath('Attribution', WMS_NAMESPACE)) if attribution is not None: self.attribution = dict() title = attribution.find(nspath('Title', WMS_NAMESPACE)) url = attribution.find(nspath('OnlineResource', WMS_NAMESPACE)) logo = attribution.find(nspath('LogoURL', WMS_NAMESPACE)) if title is not None: self.attribution['title'] = title.text if url is not None: self.attribution['url'] = url.attrib['{http://www.w3.org/1999/xlink}href'] if logo is not None: self.attribution['logo_size'] = (int(logo.attrib['width']), int(logo.attrib['height'])) self.attribution['logo_url'] = logo.find(nspath('OnlineResource', WMS_NAMESPACE)).attrib['{http://www.w3.org/1999/xlink}href'] # TODO: get this from the bbox attributes instead (deal with parents) # SRS options self.crsOptions = [] # Copy any parent SRS options (they are inheritable properties) if self.parent: self.crsOptions = list(self.parent.crsOptions) # Look for SRS option attached to this layer if elem.find(nspath('CRS', WMS_NAMESPACE)) is not None: # some servers found in the wild use a single SRS # tag containing a whitespace separated list of SRIDs # instead of several SRS tags. hence the inner loop for srslist in map(lambda x: x.text, elem.findall(nspath('CRS', WMS_NAMESPACE))): if srslist: for srs in srslist.split(): self.crsOptions.append(srs) # Get rid of duplicate entries self.crsOptions = list(set(self.crsOptions)) # Set self.crsOptions to None if the layer (and parents) had no SRS options if len(self.crsOptions) == 0: # raise ValueError('%s no SRS available!?' % (elem,)) # Comment by D Lowe. # Do not raise ValueError as it is possible that a layer is purely a parent layer and does not have SRS specified. Instead set crsOptions to None # Comment by Jachym: # Do not set it to None, but to [], which will make the code # work further. Fixed by anthonybaxter self.crsOptions = [] # Styles self.styles = {} # Copy any parent styles (they are inheritable properties) if self.parent: self.styles = self.parent.styles.copy() # Get the styles for this layer (items with the same name are replaced) for s in elem.findall(nspath('Style', WMS_NAMESPACE)): name = s.find(nspath('Name', WMS_NAMESPACE)) title = s.find(nspath('Title', WMS_NAMESPACE)) if name is None or title is None: raise ValueError('%s missing name or title' % (s,)) style = {'title': title.text} # legend url legend = s.find(nspath('LegendURL/OnlineResource', WMS_NAMESPACE)) if legend is not None: style['legend'] = legend.attrib['{http://www.w3.org/1999/xlink}href'] lgd = s.find(nspath('LegendURL', WMS_NAMESPACE)) if lgd is not None: if 'width' in lgd.attrib.keys(): style['legend_width'] = lgd.attrib.get('width') if 'height' in lgd.attrib.keys(): style['legend_height'] = lgd.attrib.get('height') lgd_format = lgd.find(nspath('Format', WMS_NAMESPACE)) if lgd_format is not None: style['legend_format'] = lgd_format.text.strip() self.styles[name.text] = style # keywords self.keywords = [f.text for f in elem.findall(nspath('KeywordList/Keyword', WMS_NAMESPACE))] # extents replaced by dimensions of name # comment by Soren Scott # <Dimension name="elevation" units="meters" default="500" multipleValues="1" # nearestValue="0" current="true" unitSymbol="m">500, 490, 480</Dimension> # it can be repeated with the same name so ? this assumes a single one to match 1.1.1 self.timepositions = None self.defaulttimeposition = None time_dimension = None for dim in elem.findall(nspath('Dimension', WMS_NAMESPACE)): if dim.attrib.get('name') is not None: time_dimension = dim if time_dimension is not None: self.timepositions = time_dimension.text.split(',') if time_dimension.text else None self.defaulttimeposition = time_dimension.attrib.get('default', None) # Elevations - available vertical levels self.elevations = None elev_dimension = None for dim in elem.findall(nspath('Dimension', WMS_NAMESPACE)): if dim.attrib.get('elevation') is not None: elev_dimension = dim if elev_dimension is not None: self.elevations = [e.strip() for e in elev_dimension.text.split(',')] if elev_dimension.text else None # and now capture the dimensions as more generic things (and custom things) self.dimensions = {} for dim in elem.findall(nspath('Dimension', WMS_NAMESPACE)): dim_name = dim.attrib.get('name') dim_data = {} for k, v in six.iteritems(dim.attrib): if k != 'name': dim_data[k] = v # single values and ranges are not differentiated here dim_data['values'] = dim.text.strip().split(',') if dim.text.strip() else None self.dimensions[dim_name] = dim_data # MetadataURLs self.metadataUrls = [] for m in elem.findall(nspath('MetadataURL', WMS_NAMESPACE)): metadataUrl = { 'type': testXMLValue(m.attrib['type'], attrib=True), 'format': testXMLValue(m.find(nspath('Format', WMS_NAMESPACE))), 'url': testXMLValue(m.find(nspath('OnlineResource', WMS_NAMESPACE)).attrib['{http://www.w3.org/1999/xlink}href'], attrib=True) } if metadataUrl['url'] is not None and parse_remote_metadata: # download URL try: content = openURL(metadataUrl['url'], timeout=timeout) doc = etree.parse(content) if metadataUrl['type'] is not None: if metadataUrl['type'] == 'FGDC': metadataUrl['metadata'] = Metadata(doc) if metadataUrl['type'] == 'TC211': metadataUrl['metadata'] = MD_Metadata(doc) except Exception: metadataUrl['metadata'] = None self.metadataUrls.append(metadataUrl) # DataURLs self.dataUrls = [] for m in elem.findall(nspath('DataURL', WMS_NAMESPACE)): dataUrl = { 'format': m.find(nspath('Format', WMS_NAMESPACE)).text.strip(), 'url': m.find(nspath('OnlineResource', WMS_NAMESPACE)).attrib['{http://www.w3.org/1999/xlink}href'] } self.dataUrls.append(dataUrl) # FeatureListURLs self.featureListUrls = [] for m in elem.findall(nspath('FeatureListURL', WMS_NAMESPACE)): featureUrl = { 'format': m.find(nspath('Format', WMS_NAMESPACE)).text.strip(), 'url': m.find(nspath('OnlineResource', WMS_NAMESPACE)).attrib['{http://www.w3.org/1999/xlink}href'] } self.featureListUrls.append(featureUrl) self.layers = [] for child in elem.findall(nspath('Layer', WMS_NAMESPACE)): self.layers.append(ContentMetadata(child, self))
def __init__(self, elem, parent, parse_remote_metadata=False, timeout=30): """.""" self.id = testXMLValue(elem.find(nspath('Name'))) self.title = testXMLValue(elem.find(nspath('Title'))) self.abstract = testXMLValue(elem.find(nspath('Abstract'))) self.keywords = [f.text for f in elem.findall(nspath('Keywords'))] # bboxes self.boundingBox = None b = elem.find(nspath('LatLongBoundingBox')) srs = elem.find(nspath('SRS')) if b is not None: self.boundingBox = (float(b.attrib['minx']), float( b.attrib['miny']), float(b.attrib['maxx']), float(b.attrib['maxy']), Crs(srs.text)) # transform wgs84 bbox from given default bboxt self.boundingBoxWGS84 = None if b is not None and srs is not None: wgs84 = pyproj.Proj(init="epsg:4326") try: src_srs = pyproj.Proj(init=srs.text) mincorner = pyproj.transform(src_srs, wgs84, b.attrib['minx'], b.attrib['miny']) maxcorner = pyproj.transform(src_srs, wgs84, b.attrib['maxx'], b.attrib['maxy']) self.boundingBoxWGS84 = (mincorner[0], mincorner[1], maxcorner[0], maxcorner[1]) except RuntimeError as e: pass # crs options self.crsOptions = [ Crs(srs.text) for srs in elem.findall(nspath('SRS')) ] # verbs self.verbOptions = [op.tag for op \ in parent.findall(nspath('Operations/*'))] self.verbOptions + [op.tag for op \ in elem.findall(nspath('Operations/*')) \ if op.tag not in self.verbOptions] #others not used but needed for iContentMetadata harmonisation self.styles = None self.timepositions = None self.defaulttimeposition = None # MetadataURLs self.metadataUrls = [] for m in elem.findall(nspath('MetadataURL')): metadataUrl = { 'type': testXMLValue(m.attrib['type'], attrib=True), 'format': testXMLValue(m.find('Format')), 'url': testXMLValue(m) } if metadataUrl[ 'url'] is not None and parse_remote_metadata: # download URL try: content = openURL(metadataUrl['url'], timeout=timeout) doc = etree.parse(content) if metadataUrl['type'] is not None: if metadataUrl['type'] == 'FGDC': metadataUrl['metadata'] = Metadata(doc) if metadataUrl['type'] == 'TC211': metadataUrl['metadata'] = MD_Metadata(doc) except Exception: metadataUrl['metadata'] = None self.metadataUrls.append(metadataUrl)
def __init__(self, elem, parent, parse_remote_metadata=False, timeout=30): """.""" self.id = elem.find(nspath('Name',ns=WFS_NAMESPACE)).text self.title = elem.find(nspath('Title',ns=WFS_NAMESPACE)).text abstract = elem.find(nspath('Abstract',ns=WFS_NAMESPACE)) if abstract is not None: self.abstract = abstract.text else: self.abstract = None self.keywords = [f.text for f in elem.findall(nspath('Keywords',ns=WFS_NAMESPACE))] # bboxes self.boundingBoxWGS84 = None b = elem.find(nspath('WGS84BoundingBox',ns=OWS_NAMESPACE)) if b is not None: lc = b.find(nspath("LowerCorner",ns=OWS_NAMESPACE)) uc = b.find(nspath("UpperCorner",ns=OWS_NAMESPACE)) ll = [float(s) for s in lc.text.split()] ur = [float(s) for s in uc.text.split()] self.boundingBoxWGS84 = (ll[0],ll[1],ur[0],ur[1]) # there is no such think as bounding box # make copy of the WGS84BoundingBox self.boundingBox = (self.boundingBoxWGS84[0], self.boundingBoxWGS84[1], self.boundingBoxWGS84[2], self.boundingBoxWGS84[3], Crs("epsg:4326")) # crs options self.crsOptions = [Crs(srs.text) for srs in elem.findall(nspath('OtherCRS',ns=WFS_NAMESPACE))] defaultCrs = elem.findall(nspath('DefaultCRS',ns=WFS_NAMESPACE)) if len(defaultCrs) > 0: self.crsOptions.insert(0,Crs(defaultCrs[0].text)) # verbs self.verbOptions = [op.tag for op \ in parent.findall(nspath('Operations/*',ns=WFS_NAMESPACE))] self.verbOptions + [op.tag for op \ in elem.findall(nspath('Operations/*',ns=WFS_NAMESPACE)) \ if op.tag not in self.verbOptions] #others not used but needed for iContentMetadata harmonisation self.styles=None self.timepositions=None self.defaulttimeposition=None # MetadataURLs self.metadataUrls = [] for m in elem.findall('MetadataURL'): metadataUrl = { 'type': testXMLValue(m.attrib['type'], attrib=True), 'format': m.find('Format').text.strip(), 'url': testXMLValue(m.find('OnlineResource').attrib['{http://www.w3.org/1999/xlink}href'], attrib=True) } if metadataUrl['url'] is not None and parse_remote_metadata: # download URL try: content = urllib2.urlopen(metadataUrl['url'], timeout=timeout) doc = etree.parse(content) try: # FGDC metadataUrl['metadata'] = Metadata(doc) except: # ISO metadataUrl['metadata'] = MD_Metadata(doc) except Exception, err: metadataUrl['metadata'] = None self.metadataUrls.append(metadataUrl)
def _invoke(self): # do HTTP request if isinstance(self.request, basestring): # GET KVP req = Request(self.request) if self.username is not None and self.password is not None: base64string = base64.encodestring('%s:%s' % (self.username, self.password))[:-1] req.add_header('Authorization', 'Basic %s' % base64string) self.response = urlopen(req, timeout=self.timeout).read() else: xml_post_url = self.url # Get correct POST URL based on Operation list. # If skip_caps=True, then self.operations has not been set, so use # default URL. if hasattr(self, 'operations'): caller = inspect.stack()[1][3] if caller == 'getrecords2': caller = 'getrecords' try: op = self.get_operation_by_name(caller) post_verbs = filter(lambda x: x.get('type').lower() == 'post', op.methods) if len(post_verbs) > 1: # Filter by constraints. We must match a PostEncoding of "XML" try: xml_post_url = next(x for x in filter(list, ([pv.get('url') for const in pv.get('constraints') if const.name.lower() == "postencoding" and 'xml' in map(lambda x: x.lower(), const.values)] for pv in post_verbs)))[0] except StopIteration: # Well, just use the first one. xml_post_url = post_verbs[0].get('url') elif len(post_verbs) == 1: xml_post_url = post_verbs[0].get('url') except: # no such luck, just go with xml_post_url pass self.request = cleanup_namespaces(self.request) # Add any namespaces used in the "typeNames" attribute of the # csw:Query element to the query's xml namespaces. for query in self.request.findall(util.nspath_eval('csw:Query', namespaces)): ns = query.get("typeNames", None) if ns is not None: # Pull out "gmd" from something like "gmd:MD_Metadata" from the list # of typenames ns_keys = [x.split(':')[0] for x in ns.split(' ')] self.request = add_namespaces(self.request, ns_keys) self.request = util.element_to_string(self.request, encoding='utf-8') #Modified by Ross Thompson for use with FGP #self.response = util.http_post(xml_post_url, self.request, self.lang, self.timeout, self.username, self.password) self.response = util.http_post(self.url, self.request, self.lang, self.timeout, self.username, self.password) # parse result see if it's XML self._exml = etree.parse(StringIO.StringIO(self.response)) # it's XML. Attempt to decipher whether the XML response is CSW-ish """ valid_xpaths = [ util.nspath_eval('ows:ExceptionReport', namespaces), util.nspath_eval('csw:Capabilities', namespaces), util.nspath_eval('csw:DescribeRecordResponse', namespaces), util.nspath_eval('csw:GetDomainResponse', namespaces), util.nspath_eval('csw:GetRecordsResponse', namespaces), util.nspath_eval('csw:GetRecordByIdResponse', namespaces), util.nspath_eval('csw:HarvestResponse', namespaces), util.nspath_eval('csw:TransactionResponse', namespaces) ] if self._exml.getroot().tag not in valid_xpaths: raise RuntimeError('Document is XML, but not CSW-ish') # check if it's an OGC Exception val = self._exml.find(util.nspath_eval('ows:Exception', namespaces)) if val is not None: raise ows.ExceptionReport(self._exml, self.owscommon.namespace) else: self.exceptionreport = None
def parse_variable(self, xml=None): if xml is not None: try: self._root = etree.parse(xml) except: self._root = xml # try: xml_dict = _xml_to_dict(self._root) self.value_type = xml_dict.get('value_type') self.data_type = xml_dict.get('data_type') self.general_category = xml_dict.get('general_category') self.sample_medium = xml_dict.get('sample_medium') self.no_data_value = xml_dict.get('no_data_value') self.variable_name = xml_dict.get('variable_name') self.variable_code = xml_dict.get('variable_code') self.variable_description = xml_dict.get('variable_description') self.speciation = xml_dict.get('speciation') # notes and properties notes = [(note.attrib.get('title'), testXMLValue(note)) for note in self._findall('note')] none_notes = [note[1] for note in notes if note[0] is None] self.notes = dict([note for note in notes if note[0] is not None]) if len(none_notes) > 0: self.notes['none'] = none_notes self.properties = dict([(prop.attrib.get('name'), testXMLValue(prop)) for prop in self._findall('variableProperty')]) # related related = self._find('related') if related is not None: self.parent_codes = [ dict([('network', code.attrib.get('network')), ('vocabulary', code.attrib.get('vocabulary')), ('default', code.attrib.get('default'))]) for code in related.findall(ns(self._ns) + 'parentCode') ] self.related_codes = [ dict([('network', d.get('network')), ('vocabulary', d.get('vocabulary')), ('default', d.get('default'))]) for code in related.findall(ns(self._ns) + 'relatedCode') ] else: self.parent_codes = None self.related_codes = None # sub-objects if self._ns == 'wml1.0': unit = self._find('units') self.unit = Unit1_0(unit, self._ns) if unit is not None else None timesupport = self._find('timeSupport') self.time_support = TimeScale( timesupport, self._ns) if timesupport is not None else None else: unit = self._find('unit') self.unit = Unit(unit, self._ns) if unit is not None else None timescale = self._find('timeScale') self.time_scale = TimeScale( timescale, self._ns) if timescale is not None else None categories = self._find('categories') if categories is not None: self.categories = [ Category(cat, self._ns) for cat in categories.findall(ns(self._ns) + 'category') ] else: self.categories = None
# Contact email: [email protected] # ============================================================================= # get a list of entries for a given code list dictionary import sys import urllib2 from owslib.etree import etree from owslib.iso import CodelistCatalogue if len(sys.argv) < 3: print 'Usage: %s <path/to/gmxCodelists.xml> <CodeListDictionary>' % sys.argv[0] sys.exit(1) e=etree.parse(sys.argv[1]) c=CodelistCatalogue(e) clds = c.getcodelistdictionaries() def valid_clds(): return ''' Valid code list dictionaries are: %s ''' % '\n'.join(clds) if len(sys.argv) < 2: print ''' Usage: %s <codelistdictionary>
def __init__(self, elem, parent=None, children=None, index=0, parse_remote_metadata=False, timeout=30): if elem.tag != 'Layer': raise ValueError('%s should be a Layer' % (elem, )) self.parent = parent if parent: self.index = "%s.%d" % (parent.index, index) else: self.index = str(index) self._children = children self.id = self.name = testXMLValue(elem.find('Name')) # layer attributes self.queryable = int(elem.attrib.get('queryable', 0)) self.cascaded = int(elem.attrib.get('cascaded', 0)) self.opaque = int(elem.attrib.get('opaque', 0)) self.noSubsets = int(elem.attrib.get('noSubsets', 0)) self.fixedWidth = int(elem.attrib.get('fixedWidth', 0)) self.fixedHeight = int(elem.attrib.get('fixedHeight', 0)) # title is mandatory property self.title = None title = testXMLValue(elem.find('Title')) if title is not None: self.title = title.strip() self.abstract = testXMLValue(elem.find('Abstract')) # bboxes b = elem.find('BoundingBox') self.boundingBox = None if b is not None: try: # sometimes the SRS attribute is (wrongly) not provided srs = b.attrib['SRS'] except KeyError: srs = None self.boundingBox = ( float(b.attrib['minx']), float(b.attrib['miny']), float(b.attrib['maxx']), float(b.attrib['maxy']), srs, ) elif self.parent: if hasattr(self.parent, 'boundingBox'): self.boundingBox = self.parent.boundingBox # ScaleHint sh = elem.find('ScaleHint') self.scaleHint = None if sh is not None: if 'min' in sh.attrib and 'max' in sh.attrib: self.scaleHint = { 'min': sh.attrib['min'], 'max': sh.attrib['max'] } attribution = elem.find('Attribution') if attribution is not None: self.attribution = dict() title = attribution.find('Title') url = attribution.find('OnlineResource') logo = attribution.find('LogoURL') if title is not None: self.attribution['title'] = title.text if url is not None: self.attribution['url'] = url.attrib[ '{http://www.w3.org/1999/xlink}href'] if logo is not None: self.attribution['logo_size'] = (int(logo.attrib['width']), int(logo.attrib['height'])) self.attribution['logo_url'] = logo.find( 'OnlineResource' ).attrib['{http://www.w3.org/1999/xlink}href'] b = elem.find('LatLonBoundingBox') if b is not None: self.boundingBoxWGS84 = ( float(b.attrib['minx']), float(b.attrib['miny']), float(b.attrib['maxx']), float(b.attrib['maxy']), ) elif self.parent: self.boundingBoxWGS84 = self.parent.boundingBoxWGS84 else: self.boundingBoxWGS84 = None # SRS options self.crsOptions = [] # Copy any parent SRS options (they are inheritable properties) if self.parent: self.crsOptions = list(self.parent.crsOptions) # Look for SRS option attached to this layer if elem.find('SRS') is not None: ## some servers found in the wild use a single SRS ## tag containing a whitespace separated list of SRIDs ## instead of several SRS tags. hence the inner loop for srslist in [x.text for x in elem.findall('SRS')]: if srslist: for srs in srslist.split(): self.crsOptions.append(srs) #Get rid of duplicate entries self.crsOptions = list(set(self.crsOptions)) #Set self.crsOptions to None if the layer (and parents) had no SRS options if len(self.crsOptions) == 0: #raise ValueError('%s no SRS available!?' % (elem,)) #Comment by D Lowe. #Do not raise ValueError as it is possible that a layer is purely a parent layer and does not have SRS specified. Instead set crsOptions to None # Comment by Jachym: # Do not set it to None, but to [], which will make the code # work further. Fixed by anthonybaxter self.crsOptions = [] #Styles self.styles = {} #Copy any parent styles (they are inheritable properties) if self.parent: self.styles = self.parent.styles.copy() #Get the styles for this layer (items with the same name are replaced) for s in elem.findall('Style'): name = s.find('Name') title = s.find('Title') if name is None or title is None: raise ValueError('%s missing name or title' % (s, )) style = {'title': title.text} # legend url legend = s.find('LegendURL/OnlineResource') if legend is not None: style['legend'] = legend.attrib[ '{http://www.w3.org/1999/xlink}href'] self.styles[name.text] = style # keywords self.keywords = [f.text for f in elem.findall('KeywordList/Keyword')] # timepositions - times for which data is available. self.timepositions = None self.defaulttimeposition = None for extent in elem.findall('Extent'): if extent.attrib.get("name").lower() == 'time': if extent.text: self.timepositions = extent.text.split(',') self.defaulttimeposition = extent.attrib.get("default") break # Elevations - available vertical levels self.elevations = None for extent in elem.findall('Extent'): if extent.attrib.get("name").lower() == 'elevation': if extent.text: self.elevations = extent.text.split(',') break # MetadataURLs self.metadataUrls = [] for m in elem.findall('MetadataURL'): metadataUrl = { 'type': testXMLValue(m.attrib['type'], attrib=True), 'format': testXMLValue(m.find('Format')), 'url': testXMLValue(m.find('OnlineResource'). attrib['{http://www.w3.org/1999/xlink}href'], attrib=True) } if metadataUrl[ 'url'] is not None and parse_remote_metadata: # download URL try: content = openURL(metadataUrl['url'], timeout=timeout) doc = etree.parse(content) if metadataUrl['type'] is not None: if metadataUrl['type'] == 'FGDC': metadataUrl['metadata'] = Metadata(doc) if metadataUrl['type'] == 'TC211': metadataUrl['metadata'] = MD_Metadata(doc) except Exception: metadataUrl['metadata'] = None self.metadataUrls.append(metadataUrl) # DataURLs self.dataUrls = [] for m in elem.findall('DataURL'): dataUrl = { 'format': m.find('Format').text.strip(), 'url': m.find('OnlineResource'). attrib['{http://www.w3.org/1999/xlink}href'] } self.dataUrls.append(dataUrl) self.layers = [] for child in elem.findall('Layer'): self.layers.append(ContentMetadata(child, self))