def parseXML(self, file, naam): Log.log.startTimer("parseXML") xml = None try: xml = etree.parse(file) bericht = Log.log.endTimer("parseXML") Database().log_actie('xml_parse', naam, bericht) except (Exception), e: bericht = Log.log.error("fout %s in XML parsen, bestand=%s" % (str(e), str(naam) )) Database().log_actie('xml_parse', naam, bericht, True)
def parseXML(self, file, naam): Log.log.startTimer("parseXML") xml = None try: xml = etree.parse(file) bericht = Log.log.endTimer("parseXML") Database().log_actie('xml_parse', naam, bericht) except (Exception), e: bericht = Log.log.error("fout %s in XML parsen, bestand=%s" % (str(e), str(naam))) Database().log_actie('xml_parse', naam, bericht, True)
def process_xml(self, file_resource, filenaam): Log.log.info("process_xml: verwerk %s " % filenaam) try: # XML doc parsen naar etree object Log.log.startTimer("parseXML") parsed_xml = etree.parse(file_resource) bericht = Log.log.endTimer("parseXML") self.database.log_actie("xml_parse", filenaam, bericht) except (Exception), e: bericht = Log.log.error("fout %s in XML parsen, bestand=%s" % (str(e), filenaam)) self.database.log_actie("xml_parse", filenaam, bericht, True) return
def process_xml(self, file_resource, filenaam): Log.log.info("process_xml: verwerk %s " % filenaam) try: # XML doc parsen naar etree object Log.log.startTimer("parseXML") parsed_xml = etree.parse(file_resource) bericht = Log.log.endTimer("parseXML") self.database.log_actie('xml_parse', filenaam, bericht) except (Exception), e: bericht = Log.log.error("fout %s in XML parsen, bestand=%s" % (str(e), filenaam)) self.database.log_actie('xml_parse', filenaam, bericht, True) return
def parse_gemeentelijke_indeling(args): if not os.access(args.input, os.R_OK): if args.verbose: print("Error: Cannot read file: "+ args.input) return if args.verbose: print("Parsing XML file: "+ args.input) try: # XML doc parsen naar etree object parsed_xml = etree.parse(args.input) except (Exception), e: print("Error: Failed to parse file: %s (%s)" % (args.input, str(e))) return
def parse_gemeentelijke_indeling(args): if not os.access(args.input, os.R_OK): if args.verbose: print("Error: Cannot read file: " + args.input) return if args.verbose: print("Parsing XML file: " + args.input) try: # XML doc parsen naar etree object parsed_xml = etree.parse(args.input) except (Exception), e: print("Error: Failed to parse file: %s (%s)" % (args.input, str(e))) return
def parse(fn, out = None): """ Reads XML from the file name or object 'fn' and returns a 'Unit' instance. Rewrites the XML to out, if not None. """ e = etree.parse(fn).getroot() u = unit.Unit() u.lang = e.get('language') u.filename = e.get('filename') if out: open(out + os.path.basename(u.filename) + '.lh', 'w').write(etree.tostring(e)) u.source = e.find('raw-source').text for t in e.find('referenced-types').getchildren(): tt = types.aggregate_from_xml(t) assert tt.id not in u.types u.types[tt.id] = tt for f in e.find('function-bodies').getchildren(): u.functions += [ functions.Function.from_xml(f) ] u.finalise() return u
def process_xml(self, file_resource, filenaam): Log.log.info("process_xml: verwerk %s " % filenaam) try: # XML doc parsen naar etree object Log.log.startTimer("parseXML") parsed_xml = etree.parse(file_resource) bericht = Log.log.endTimer("parseXML") self.database.log_actie('xml_parse', filenaam, bericht) except (Exception) as e: bericht = Log.log.error("fout %s in XML parsen, bestand=%s" % (str(e), filenaam)) self.database.log_actie('xml_parse', filenaam, bericht, True) return if filenaam == 'gemeentelijke-indeling.xml': try: self.processor.processGemeentelijkeIndeling( parsed_xml.getroot(), filenaam) self.database.log_actie('xml_processing', filenaam, 'verwerkt OK') except (Exception) as e: bericht = Log.log.error( "fout %s in XML DOM processing, bestand=%s" % (str(e), filenaam)) self.database.log_actie('xml_processing', filenaam, bericht, True) else: try: # Verwerken parsed xml: de Processor bepaalt of het een extract of een mutatie is self.processor.processDOM(parsed_xml.getroot(), filenaam) self.database.log_actie('verwerkt', filenaam, 'verwerkt OK') except (Exception) as e: bericht = Log.log.error( "fout %s in XML DOM processing, bestand=%s" % (str(e), filenaam)) self.database.log_actie('xml_processing', filenaam, bericht, True)
def __init__(self, elem, parent=None, index=0, parse_remote_metadata=False): if elem.tag != "Layer": raise ValueError("%s should be a Layer" % (elem,)) self.parent = parent if parent: self.index = "%s.%d" % (parent.index, index) else: self.index = str(index) self.id = self.name = testXMLValue(elem.find("Name")) # layer attributes self.queryable = int(elem.attrib.get("queryable", 0)) self.cascaded = int(elem.attrib.get("cascaded", 0)) self.opaque = int(elem.attrib.get("opaque", 0)) self.noSubsets = int(elem.attrib.get("noSubsets", 0)) self.fixedWidth = int(elem.attrib.get("fixedWidth", 0)) self.fixedHeight = int(elem.attrib.get("fixedHeight", 0)) # title is mandatory property self.title = None title = testXMLValue(elem.find("Title")) if title is not None: self.title = title.strip() self.abstract = testXMLValue(elem.find("Abstract")) # bboxes b = elem.find("BoundingBox") self.boundingBox = None if b is not None: try: # sometimes the SRS attribute is (wrongly) not provided srs = b.attrib["SRS"] except KeyError: srs = None self.boundingBox = ( float(b.attrib["minx"]), float(b.attrib["miny"]), float(b.attrib["maxx"]), float(b.attrib["maxy"]), srs, ) elif self.parent: if hasattr(self.parent, "boundingBox"): self.boundingBox = self.parent.boundingBox # ScaleHint sh = elem.find("ScaleHint") self.scaleHint = None if sh is not None: self.scaleHint = {"min": sh.attrib["min"], "max": sh.attrib["max"]} attribution = elem.find("Attribution") if attribution is not None: self.attribution = dict() title = attribution.find("Title") url = attribution.find("OnlineResource") logo = attribution.find("LogoURL") if title is not None: self.attribution["title"] = title.text if url is not None: self.attribution["url"] = url.attrib["{http://www.w3.org/1999/xlink}href"] if logo is not None: self.attribution["logo_size"] = (int(logo.attrib["width"]), int(logo.attrib["height"])) self.attribution["logo_url"] = logo.find("OnlineResource").attrib["{http://www.w3.org/1999/xlink}href"] b = elem.find("LatLonBoundingBox") if b is not None: self.boundingBoxWGS84 = ( float(b.attrib["minx"]), float(b.attrib["miny"]), float(b.attrib["maxx"]), float(b.attrib["maxy"]), ) elif self.parent: self.boundingBoxWGS84 = self.parent.boundingBoxWGS84 else: self.boundingBoxWGS84 = None # SRS options self.crsOptions = [] # Copy any parent SRS options (they are inheritable properties) if self.parent: self.crsOptions = list(self.parent.crsOptions) # Look for SRS option attached to this layer if elem.find("SRS") is not None: ## some servers found in the wild use a single SRS ## tag containing a whitespace separated list of SRIDs ## instead of several SRS tags. hence the inner loop for srslist in map(lambda x: x.text, elem.findall("SRS")): if srslist: for srs in srslist.split(): self.crsOptions.append(srs) # Get rid of duplicate entries self.crsOptions = list(set(self.crsOptions)) # Set self.crsOptions to None if the layer (and parents) had no SRS options if len(self.crsOptions) == 0: # raise ValueError('%s no SRS available!?' % (elem,)) # Comment by D Lowe. # Do not raise ValueError as it is possible that a layer is purely a parent layer and does not have SRS specified. Instead set crsOptions to None # Comment by Jachym: # Do not set it to None, but to [], which will make the code # work further. Fixed by anthonybaxter self.crsOptions = [] # Styles self.styles = {} # Copy any parent styles (they are inheritable properties) if self.parent: self.styles = self.parent.styles.copy() # Get the styles for this layer (items with the same name are replaced) for s in elem.findall("Style"): name = s.find("Name") title = s.find("Title") if name is None or title is None: raise ValueError("%s missing name or title" % (s,)) style = {"title": title.text} # legend url legend = s.find("LegendURL/OnlineResource") if legend is not None: style["legend"] = legend.attrib["{http://www.w3.org/1999/xlink}href"] self.styles[name.text] = style # keywords self.keywords = [f.text for f in elem.findall("KeywordList/Keyword")] # timepositions - times for which data is available. self.timepositions = None for extent in elem.findall("Extent"): if extent.attrib.get("name").lower() == "time": if extent.text: self.timepositions = extent.text.split(",") break # Elevations - available vertical levels self.elevations = None for extent in elem.findall("Extent"): if extent.attrib.get("name").lower() == "elevation": if extent.text: self.elevations = extent.text.split(",") break # MetadataURLs self.metadataUrls = [] for m in elem.findall("MetadataURL"): metadataUrl = { "type": testXMLValue(m.attrib["type"], attrib=True), "format": testXMLValue(m.find("Format")), "url": testXMLValue(m.find("OnlineResource").attrib["{http://www.w3.org/1999/xlink}href"], attrib=True), } if metadataUrl["url"] is not None and parse_remote_metadata: # download URL try: content = urllib2.urlopen(metadataUrl["url"]) doc = etree.parse(content) if metadataUrl["type"] is not None: if metadataUrl["type"] == "FGDC": metadataUrl["metadata"] = Metadata(doc) if metadataUrl["type"] == "TC211": metadataUrl["metadata"] = MD_Metadata(doc) except Exception, err: metadataUrl["metadata"] = None self.metadataUrls.append(metadataUrl)
def __init__(self, elem, parent=None, index=0, parse_remote_metadata=False): if elem.tag != 'Layer': raise ValueError('%s should be a Layer' % (elem,)) self.parent = parent if parent: self.index = "%s.%d" % (parent.index, index) else: self.index = str(index) self.id = self.name = testXMLValue(elem.find('Name')) # title is mandatory property self.title = testXMLValue(elem.find('Title')).strip() self.abstract = testXMLValue(elem.find('Abstract')) # bboxes b = elem.find('BoundingBox') self.boundingBox = None if b is not None: try: #sometimes the SRS attribute is (wrongly) not provided srs=b.attrib['SRS'] except KeyError: srs=None self.boundingBox = ( float(b.attrib['minx']), float(b.attrib['miny']), float(b.attrib['maxx']), float(b.attrib['maxy']), srs, ) elif self.parent: if hasattr(self.parent, 'boundingBox'): self.boundingBox = self.parent.boundingBox # ScaleHint sh = elem.find('ScaleHint') self.scaleHint = None if sh is not None: self.scaleHint = {'min': sh.attrib['min'], 'max': sh.attrib['max']} attribution = elem.find('Attribution') if attribution is not None: self.attribution = dict() title = attribution.find('Title') url = attribution.find('OnlineResource') logo = attribution.find('LogoURL') if title is not None: self.attribution['title'] = title.text if url is not None: self.attribution['url'] = url.attrib['{http://www.w3.org/1999/xlink}href'] if logo is not None: self.attribution['logo_size'] = (int(logo.attrib['width']), int(logo.attrib['height'])) self.attribution['logo_url'] = logo.find('OnlineResource').attrib['{http://www.w3.org/1999/xlink}href'] b = elem.find('LatLonBoundingBox') if b is not None: self.boundingBoxWGS84 = ( float(b.attrib['minx']), float(b.attrib['miny']), float(b.attrib['maxx']), float(b.attrib['maxy']), ) elif self.parent: self.boundingBoxWGS84 = self.parent.boundingBoxWGS84 else: self.boundingBoxWGS84 = None #SRS options self.crsOptions = [] #Copy any parent SRS options (they are inheritable properties) if self.parent: self.crsOptions = list(self.parent.crsOptions) #Look for SRS option attached to this layer if elem.find('SRS') is not None: ## some servers found in the wild use a single SRS ## tag containing a whitespace separated list of SRIDs ## instead of several SRS tags. hence the inner loop for srslist in map(lambda x: x.text, elem.findall('SRS')): if srslist: for srs in srslist.split(): self.crsOptions.append(srs) #Get rid of duplicate entries self.crsOptions = list(set(self.crsOptions)) #Set self.crsOptions to None if the layer (and parents) had no SRS options if len(self.crsOptions) == 0: #raise ValueError('%s no SRS available!?' % (elem,)) #Comment by D Lowe. #Do not raise ValueError as it is possible that a layer is purely a parent layer and does not have SRS specified. Instead set crsOptions to None self.crsOptions=None #Styles self.styles = {} #Copy any parent styles (they are inheritable properties) if self.parent: self.styles = self.parent.styles.copy() #Get the styles for this layer (items with the same name are replaced) for s in elem.findall('Style'): name = s.find('Name') title = s.find('Title') if name is None or title is None: raise ValueError('%s missing name or title' % (s,)) style = { 'title' : title.text } # legend url legend = s.find('LegendURL/OnlineResource') if legend is not None: style['legend'] = legend.attrib['{http://www.w3.org/1999/xlink}href'] self.styles[name.text] = style # keywords self.keywords = extract_xml_list(elem.findall('KeywordList/Keyword')) # timepositions - times for which data is available. self.timepositions=None for extent in elem.findall('Extent'): if extent.attrib.get("name").lower() =='time': if extent.text: self.timepositions=extent.text.split(',') break # MetadataURLs self.metadataUrls = [] for m in elem.findall('MetadataURL'): metadataUrl = { 'type': testXMLValue(m.attrib['type'], attrib=True), 'format': testXMLValue(m.find('Format')), 'url': testXMLValue(m.find('OnlineResource').attrib['{http://www.w3.org/1999/xlink}href'], attrib=True) } if metadataUrl['url'] is not None and parse_remote_metadata: # download URL try: content = urllib2.urlopen(metadataUrl['url']) doc = etree.parse(content) if metadataUrl['type'] is not None: if metadataUrl['type'] == 'FGDC': metadataUrl['metadata'] = Metadata(doc) if metadataUrl['type'] == 'TC211': metadataUrl['metadata'] = MD_Metadata(doc) except Exception, err: metadataUrl['metadata'] = None self.metadataUrls.append(metadataUrl)
def parse_gemeentelijke_indeling(args): if not os.access(args.input, os.R_OK): if args.verbose: print("Error: Cannot read file: " + args.input) return if args.verbose: print("Parsing XML file: " + args.input) try: # XML doc parsen naar etree object parsed_xml = etree.parse(args.input) except (Exception) as e: print("Error: Failed to parse file: %s (%s)" % (args.input, str(e))) return gemeentelijke_indeling = hash() root = parsed_xml.getroot() if stripschema(root.tag) == 'gemeentelijke_indeling': if len(root.attrib): gemeentelijke_indeling['attributes'] = root.attrib if len(root.nsmap): gemeentelijke_indeling['nsmap'] = root.nsmap for indelingNode in root: if stripschema(indelingNode.tag) == 'indeling': jaar = indelingNode.get('jaar') indeling = hash() indeling['attributes']['jaar'] = jaar for provincieNode in indelingNode: if stripschema(provincieNode.tag) == 'provincie': provinciecode = provincieNode.get('code') provincienaam = provincieNode.get('naam') provincie = hash() provincie['attributes']['code'] = provinciecode provincie['attributes']['naam'] = provincienaam for gemeenteNode in provincieNode: if stripschema(gemeenteNode.tag) == 'gemeente': gemeentecode = gemeenteNode.get('code') gemeentenaam = gemeenteNode.get('naam') begindatum = gemeenteNode.get('begindatum') einddatum = gemeenteNode.get('einddatum') gemeente = hash() gemeente['attributes']['code'] = gemeentecode gemeente['attributes']['naam'] = gemeentenaam gemeente['attributes'][ 'begindatum'] = begindatum gemeente['attributes']['einddatum'] = einddatum provincie['gemeente'][gemeentecode] = gemeente indeling['provincie'][provinciecode] = provincie gemeentelijke_indeling['indeling'][jaar] = indeling return gemeentelijke_indeling
def __init__(self, elem, parent=None, index=0, parse_remote_metadata=False): if elem.tag != 'Layer': raise ValueError('%s should be a Layer' % (elem, )) self.parent = parent if parent: self.index = "%s.%d" % (parent.index, index) else: self.index = str(index) self.id = self.name = testXMLValue(elem.find('Name')) # layer attributes self.queryable = int(elem.attrib.get('queryable', 0)) self.cascaded = int(elem.attrib.get('cascaded', 0)) self.opaque = int(elem.attrib.get('opaque', 0)) self.noSubsets = int(elem.attrib.get('noSubsets', 0)) self.fixedWidth = int(elem.attrib.get('fixedWidth', 0)) self.fixedHeight = int(elem.attrib.get('fixedHeight', 0)) # title is mandatory property self.title = None title = testXMLValue(elem.find('Title')) if title is not None: self.title = title.strip() self.abstract = testXMLValue(elem.find('Abstract')) # bboxes b = elem.find('BoundingBox') self.boundingBox = None if b is not None: try: #sometimes the SRS attribute is (wrongly) not provided srs = b.attrib['SRS'] except KeyError: srs = None self.boundingBox = ( float(b.attrib['minx']), float(b.attrib['miny']), float(b.attrib['maxx']), float(b.attrib['maxy']), srs, ) elif self.parent: if hasattr(self.parent, 'boundingBox'): self.boundingBox = self.parent.boundingBox # ScaleHint sh = elem.find('ScaleHint') self.scaleHint = None if sh is not None: self.scaleHint = {'min': sh.attrib['min'], 'max': sh.attrib['max']} attribution = elem.find('Attribution') if attribution is not None: self.attribution = dict() title = attribution.find('Title') url = attribution.find('OnlineResource') logo = attribution.find('LogoURL') if title is not None: self.attribution['title'] = title.text if url is not None: self.attribution['url'] = url.attrib[ '{http://www.w3.org/1999/xlink}href'] if logo is not None: self.attribution['logo_size'] = (int(logo.attrib['width']), int(logo.attrib['height'])) self.attribution['logo_url'] = logo.find( 'OnlineResource' ).attrib['{http://www.w3.org/1999/xlink}href'] b = elem.find('LatLonBoundingBox') if b is not None: self.boundingBoxWGS84 = ( float(b.attrib['minx']), float(b.attrib['miny']), float(b.attrib['maxx']), float(b.attrib['maxy']), ) elif self.parent: self.boundingBoxWGS84 = self.parent.boundingBoxWGS84 else: self.boundingBoxWGS84 = None #SRS options self.crsOptions = [] #Copy any parent SRS options (they are inheritable properties) if self.parent: self.crsOptions = list(self.parent.crsOptions) #Look for SRS option attached to this layer if elem.find('SRS') is not None: ## some servers found in the wild use a single SRS ## tag containing a whitespace separated list of SRIDs ## instead of several SRS tags. hence the inner loop for srslist in map(lambda x: x.text, elem.findall('SRS')): if srslist: for srs in srslist.split(): self.crsOptions.append(srs) #Get rid of duplicate entries self.crsOptions = list(set(self.crsOptions)) #Set self.crsOptions to None if the layer (and parents) had no SRS options if len(self.crsOptions) == 0: #raise ValueError('%s no SRS available!?' % (elem,)) #Comment by D Lowe. #Do not raise ValueError as it is possible that a layer is purely a parent layer and does not have SRS specified. Instead set crsOptions to None # Comment by Jachym: # Do not set it to None, but to [], which will make the code # work further. Fixed by anthonybaxter self.crsOptions = [] #Styles self.styles = {} #Copy any parent styles (they are inheritable properties) if self.parent: self.styles = self.parent.styles.copy() #Get the styles for this layer (items with the same name are replaced) for s in elem.findall('Style'): name = s.find('Name') title = s.find('Title') if name is None or title is None: raise ValueError('%s missing name or title' % (s, )) style = {'title': title.text} # legend url legend = s.find('LegendURL/OnlineResource') if legend is not None: style['legend'] = legend.attrib[ '{http://www.w3.org/1999/xlink}href'] self.styles[name.text] = style # keywords self.keywords = [f.text for f in elem.findall('KeywordList/Keyword')] # timepositions - times for which data is available. self.timepositions = None self.defaulttimeposition = None for extent in elem.findall('Extent'): if extent.attrib.get("name").lower() == 'time': if extent.text: self.timepositions = extent.text.split(',') self.defaulttimeposition = extent.attrib.get("default") break # Elevations - available vertical levels self.elevations = None for extent in elem.findall('Extent'): if extent.attrib.get("name").lower() == 'elevation': if extent.text: self.elevations = extent.text.split(',') break # MetadataURLs self.metadataUrls = [] for m in elem.findall('MetadataURL'): metadataUrl = { 'type': testXMLValue(m.attrib['type'], attrib=True), 'format': testXMLValue(m.find('Format')), 'url': testXMLValue(m.find('OnlineResource'). attrib['{http://www.w3.org/1999/xlink}href'], attrib=True) } if metadataUrl[ 'url'] is not None and parse_remote_metadata: # download URL try: content = urllib2.urlopen(metadataUrl['url']) doc = etree.parse(content) if metadataUrl['type'] is not None: if metadataUrl['type'] == 'FGDC': metadataUrl['metadata'] = Metadata(doc) if metadataUrl['type'] == 'TC211': metadataUrl['metadata'] = MD_Metadata(doc) except Exception, err: metadataUrl['metadata'] = None self.metadataUrls.append(metadataUrl)
def parseXML(self,naam): Log.log.startTimer("parseXML") xml = etree.parse(naam) Log.log.endTimer("parseXML") return xml