예제 #1
0
def _parse_metadata(xml):
    try:
        exml = etree.fromstring(xml)
    except ValueError:
        return None
    mdata = MD_Metadata(exml)
    # rimuovo xml
    mdata.xml = None
    return mdata
예제 #2
0
    def get_record(self, uuid):
        results = self._csw_local_dispatch(identifier=uuid)
        if len(results) < 1:
            return None

        result = etree.fromstring(results).find('{http://www.isotc211.org/2005/gmd}MD_Metadata')

        if result is None:
            return None

        record = MD_Metadata(result)
        record.keywords = []
        if hasattr(record, 'identification') and hasattr(record.identification, 'keywords'):
            for kw in record.identification.keywords:
                record.keywords.extend(kw['keywords'])

        record.links = {}
        record.links['metadata'] = self.catalogue.urls_for_uuid(uuid)
        record.links['download'] = self.catalogue.extract_links(record)
        return record
예제 #3
0
    def get_record(self, uuid):
        results = self._csw_local_dispatch(identifier=uuid)
        if len(results) < 1:
            return None

        result = etree.fromstring(results).find(
            '{http://www.isotc211.org/2005/gmd}MD_Metadata')

        if result is None:
            return None

        record = MD_Metadata(result)
        record.keywords = []
        if hasattr(record, 'identification') and hasattr(
                record.identification, 'keywords'):
            for kw in record.identification.keywords:
                record.keywords.extend(kw['keywords'])

        record.links = {}
        record.links['metadata'] = self.catalogue.urls_for_uuid(uuid)
        record.links['download'] = self.catalogue.extract_links(record)
        return record
예제 #4
0
    def __init__(self, elem, parse_remote_metadata=False, timeout=30):
        """."""
        self.id = testXMLValue(elem.find(nspath_eval('wfs:Name', namespaces)))
        self.title = testXMLValue(
            elem.find(nspath_eval('wfs:Title', namespaces)))
        self.abstract = testXMLValue(
            elem.find(nspath_eval('wfs:Abstract', namespaces)))
        self.keywords = [
            f.text for f in elem.findall(
                nspath_eval('ows:Keywords/ows:Keyword', namespaces))
        ]

        # bbox
        self.boundingBoxWGS84 = None
        b = BoundingBox(
            elem.find(nspath_eval('ows:WGS84BoundingBox', namespaces)),
            namespaces['ows'])
        if b is not None:
            self.boundingBoxWGS84 = (
                float(b.minx),
                float(b.miny),
                float(b.maxx),
                float(b.maxy),
            )
        # crs options
        self.crsOptions = [
            Crs(srs.text)
            for srs in elem.findall(nspath_eval('wfs:OtherSRS', namespaces))
        ]
        dsrs = testXMLValue(
            elem.find(nspath_eval('wfs:DefaultSRS', namespaces)))
        if dsrs is not None:  # first element is default srs
            self.crsOptions.insert(0, Crs(dsrs))

        # verbs
        self.verbOptions = [
            op.text for op in elem.findall(
                nspath_eval('wfs:Operations/wfs:Operation', namespaces))
        ]

        # output formats
        self.outputFormats = [
            op.text for op in elem.findall(
                nspath_eval('wfs:OutputFormats/wfs:Format', namespaces))
        ]

        # MetadataURLs
        self.metadataUrls = []
        for m in elem.findall(nspath_eval('wfs:MetadataURL', namespaces)):
            metadataUrl = {
                'type': testXMLValue(m.attrib['type'], attrib=True),
                'format': testXMLValue(m.find('Format')),
                'url': testXMLValue(m)
            }

            if metadataUrl[
                    'url'] is not None and parse_remote_metadata:  # download URL
                try:
                    content = urlopen(metadataUrl['url'], timeout=timeout)
                    doc = etree.parse(content)
                    if metadataUrl['type'] is not None:
                        if metadataUrl['type'] == 'FGDC':
                            metadataUrl['metadata'] = Metadata(doc)
                        if metadataUrl['type'] in ['TC211', '19115', '19139']:
                            metadataUrl['metadata'] = MD_Metadata(doc)
                except Exception, err:
                    metadataUrl['metadata'] = None

            self.metadataUrls.append(metadataUrl)
예제 #5
0
def _parse_iso(context, repos, exml):

    from owslib.iso import MD_Metadata

    recobj = repos.dataset()
    links = []

    md = MD_Metadata(exml)

    _set(context, recobj, 'pycsw:Identifier', md.identifier)
    _set(context, recobj, 'pycsw:Typename', 'gmd:MD_Metadata')
    _set(context, recobj, 'pycsw:Schema', context.namespaces['gmd'])
    _set(context, recobj, 'pycsw:MdSource', 'local')
    _set(context, recobj, 'pycsw:InsertDate', util.get_today_and_now())
    _set(context, recobj, 'pycsw:XML', md.xml)
    _set(context, recobj, 'pycsw:AnyText', util.get_anytext(exml))
    _set(context, recobj, 'pycsw:Language', md.language)
    _set(context, recobj, 'pycsw:Type', md.hierarchy)
    _set(context, recobj, 'pycsw:ParentIdentifier', md.parentidentifier)
    _set(context, recobj, 'pycsw:Date', md.datestamp)
    _set(context, recobj, 'pycsw:Source', md.dataseturi)
    if md.referencesystem is not None:
        _set(context, recobj, 'pycsw:CRS',
             'urn:ogc:def:crs:EPSG:6.11:%s' % md.referencesystem.code)

    if hasattr(md, 'identification'):
        _set(context, recobj, 'pycsw:Title', md.identification.title)
        _set(context, recobj, 'pycsw:AlternateTitle',
             md.identification.alternatetitle)
        _set(context, recobj, 'pycsw:Abstract', md.identification.abstract)
        _set(context, recobj, 'pycsw:Relation',
             md.identification.aggregationinfo)

        if hasattr(md.identification, 'temporalextent_start'):
            _set(context, recobj, 'pycsw:TempExtent_begin',
                 md.identification.temporalextent_start)
        if hasattr(md.identification, 'temporalextent_end'):
            _set(context, recobj, 'pycsw:TempExtent_end',
                 md.identification.temporalextent_end)

        if len(md.identification.topiccategory) > 0:
            _set(context, recobj, 'pycsw:TopicCategory',
                 md.identification.topiccategory[0])

        if len(md.identification.resourcelanguage) > 0:
            _set(context, recobj, 'pycsw:ResourceLanguage',
                 md.identification.resourcelanguage[0])

        if hasattr(md.identification, 'bbox'):
            bbox = md.identification.bbox
        else:
            bbox = None

        if (hasattr(md.identification, 'keywords')
                and len(md.identification.keywords) > 0):
            if None not in md.identification.keywords[0]['keywords']:
                _set(context, recobj, 'pycsw:Keywords',
                     ','.join(md.identification.keywords[0]['keywords']))
                _set(context, recobj, 'pycsw:KeywordType',
                     md.identification.keywords[0]['type'])

        if hasattr(md.identification, 'creator'):
            _set(context, recobj, 'pycsw:Creator', md.identification.creator)
        if hasattr(md.identification, 'publisher'):
            _set(context, recobj, 'pycsw:Publisher',
                 md.identification.publisher)
        if hasattr(md.identification, 'contributor'):
            _set(context, recobj, 'pycsw:Contributor',
                 md.identification.contributor)

        if (hasattr(md.identification, 'contact')
                and hasattr(md.identification.contact, 'organization')):
            _set(context, recobj, 'pycsw:OrganizationName',
                 md.identification.contact.organization)

        if len(md.identification.securityconstraints) > 0:
            _set(context, recobj, 'pycsw:SecurityConstraints',
                 md.identification.securityconstraints[0])
        if len(md.identification.accessconstraints) > 0:
            _set(context, recobj, 'pycsw:AccessConstraints',
                 md.identification.accessconstraints[0])
        if len(md.identification.otherconstraints) > 0:
            _set(context, recobj, 'pycsw:OtherConstraints',
                 md.identification.otherconstraints[0])

        if hasattr(md.identification, 'date'):
            for datenode in md.identification.date:
                if datenode.type == 'revision':
                    _set(context, recobj, 'pycsw:RevisionDate', datenode.date)
                elif datenode.type == 'creation':
                    _set(context, recobj, 'pycsw:CreationDate', datenode.date)
                elif datenode.type == 'publication':
                    _set(context, recobj, 'pycsw:PublicationDate',
                         datenode.date)

        if hasattr(md.identification, 'extent') and hasattr(
                md.identification.extent, 'description_code'):
            _set(context, recobj, 'pycsw:GeographicDescriptionCode',
                 md.identification.extent.description_code)

        if len(md.identification.denominators) > 0:
            _set(context, recobj, 'pycsw:Denominator',
                 md.identification.denominators[0])
        if len(md.identification.distance) > 0:
            _set(context, recobj, 'pycsw:DistanceValue',
                 md.identification.distance[0])
        if len(md.identification.uom) > 0:
            _set(context, recobj, 'pycsw:DistanceUOM',
                 md.identification.uom[0])

        if len(md.identification.classification) > 0:
            _set(context, recobj, 'pycsw:Classification',
                 md.identification.classification[0])
        if len(md.identification.uselimitation) > 0:
            _set(context, recobj, 'pycsw:ConditionApplyingToAccessAndUse',
                 md.identification.uselimitation[0])

    if hasattr(md.identification, 'format'):
        _set(context, recobj, 'pycsw:Format', md.distribution.format)

    if md.serviceidentification is not None:
        _set(context, recobj, 'pycsw:ServiceType',
             md.serviceidentification.type)
        _set(context, recobj, 'pycsw:ServiceTypeVersion',
             md.serviceidentification.version)

        _set(context, recobj, 'pycsw:CouplingType',
             md.serviceidentification.couplingtype)

        #if len(md.serviceidentification.operateson) > 0:
        #    _set(context, recobj, 'pycsw:operateson = VARCHAR(32),
        #_set(context, recobj, 'pycsw:operation VARCHAR(32),
        #_set(context, recobj, 'pycsw:operatesonidentifier VARCHAR(32),
        #_set(context, recobj, 'pycsw:operatesoname VARCHAR(32),

    if hasattr(md.identification, 'dataquality'):
        _set(context, recobj, 'pycsw:Degree', md.dataquality.conformancedegree)
        _set(context, recobj, 'pycsw:Lineage', md.dataquality.lineage)
        _set(context, recobj, 'pycsw:SpecificationTitle',
             md.dataquality.specificationtitle)
        if hasattr(md.dataquality, 'specificationdate'):
            _set(context, recobj, 'pycsw:specificationDate',
                 md.dataquality.specificationdate[0].date)
            _set(context, recobj, 'pycsw:SpecificationDateType',
                 md.dataquality.specificationdate[0].datetype)

    if hasattr(md, 'contact') and len(md.contact) > 0:
        _set(context, recobj, 'pycsw:ResponsiblePartyRole', md.contact[0].role)

    if hasattr(md, 'distribution') and hasattr(md.distribution, 'online'):
        for link in md.distribution.online:
            linkstr = '%s,%s,%s,%s' % \
            (link.name, link.description, link.protocol, link.url)
            links.append(linkstr)

    if len(links) > 0:
        _set(context, recobj, 'pycsw:Links', '^'.join(links))

    if bbox is not None:
        try:
            tmp = '%s,%s,%s,%s' % (bbox.minx, bbox.miny, bbox.maxx, bbox.maxy)
            _set(context, recobj, 'pycsw:BoundingBox',
                 util.bbox2wktpolygon(tmp))
        except:  # coordinates are corrupted, do not include
            _set(context, recobj, 'pycsw:BoundingBox', None)
    else:
        _set(context, recobj, 'pycsw:BoundingBox', None)

    return recobj
예제 #6
0
    def __init__(self,
                 elem,
                 parent=None,
                 children=None,
                 index=0,
                 parse_remote_metadata=False,
                 timeout=30):
        if elem.tag != 'Layer':
            raise ValueError('%s should be a Layer' % (elem, ))

        self.parent = parent
        if parent:
            self.index = "%s.%d" % (parent.index, index)
        else:
            self.index = str(index)

        self._children = children

        self.id = self.name = testXMLValue(elem.find('Name'))

        # layer attributes
        self.queryable = int(elem.attrib.get('queryable', 0))
        self.cascaded = int(elem.attrib.get('cascaded', 0))
        self.opaque = int(elem.attrib.get('opaque', 0))
        self.noSubsets = int(elem.attrib.get('noSubsets', 0))
        self.fixedWidth = int(elem.attrib.get('fixedWidth', 0))
        self.fixedHeight = int(elem.attrib.get('fixedHeight', 0))

        # title is mandatory property
        self.title = None
        title = testXMLValue(elem.find('Title'))
        if title is not None:
            self.title = title.strip()

        self.abstract = testXMLValue(elem.find('Abstract'))

        # bboxes
        b = elem.find('BoundingBox')
        self.boundingBox = None
        if b is not None:
            try:  # sometimes the SRS attribute is (wrongly) not provided
                srs = b.attrib['SRS']
            except KeyError:
                srs = None
            self.boundingBox = (
                float(b.attrib['minx']),
                float(b.attrib['miny']),
                float(b.attrib['maxx']),
                float(b.attrib['maxy']),
                srs,
            )
        elif self.parent:
            if hasattr(self.parent, 'boundingBox'):
                self.boundingBox = self.parent.boundingBox

        # ScaleHint
        sh = elem.find('ScaleHint')
        self.scaleHint = None
        if sh is not None:
            if 'min' in sh.attrib and 'max' in sh.attrib:
                self.scaleHint = {
                    'min': sh.attrib['min'],
                    'max': sh.attrib['max']
                }

        attribution = elem.find('Attribution')
        if attribution is not None:
            self.attribution = dict()
            title = attribution.find('Title')
            url = attribution.find('OnlineResource')
            logo = attribution.find('LogoURL')
            if title is not None:
                self.attribution['title'] = title.text
            if url is not None:
                self.attribution['url'] = url.attrib[
                    '{http://www.w3.org/1999/xlink}href']
            if logo is not None:
                self.attribution['logo_size'] = (int(logo.attrib['width']),
                                                 int(logo.attrib['height']))
                self.attribution['logo_url'] = logo.find(
                    'OnlineResource'
                ).attrib['{http://www.w3.org/1999/xlink}href']

        b = elem.find('LatLonBoundingBox')
        if b is not None:
            self.boundingBoxWGS84 = (
                float(b.attrib['minx']),
                float(b.attrib['miny']),
                float(b.attrib['maxx']),
                float(b.attrib['maxy']),
            )
        elif self.parent:
            self.boundingBoxWGS84 = self.parent.boundingBoxWGS84
        else:
            self.boundingBoxWGS84 = None

        # SRS options
        self.crsOptions = []

        # Copy any parent SRS options (they are inheritable properties)
        if self.parent:
            self.crsOptions = list(self.parent.crsOptions)

        # Look for SRS option attached to this layer
        if elem.find('SRS') is not None:
            ## some servers found in the wild use a single SRS
            ## tag containing a whitespace separated list of SRIDs
            ## instead of several SRS tags. hence the inner loop
            for srslist in [x.text for x in elem.findall('SRS')]:
                if srslist:
                    for srs in srslist.split():
                        self.crsOptions.append(srs)

        #Get rid of duplicate entries
        self.crsOptions = list(set(self.crsOptions))

        #Set self.crsOptions to None if the layer (and parents) had no SRS options
        if len(self.crsOptions) == 0:
            #raise ValueError('%s no SRS available!?' % (elem,))
            #Comment by D Lowe.
            #Do not raise ValueError as it is possible that a layer is purely a parent layer and does not have SRS specified. Instead set crsOptions to None
            # Comment by Jachym:
            # Do not set it to None, but to [], which will make the code
            # work further. Fixed by anthonybaxter
            self.crsOptions = []

        #Styles
        self.styles = {}

        #Copy any parent styles (they are inheritable properties)
        if self.parent:
            self.styles = self.parent.styles.copy()

        #Get the styles for this layer (items with the same name are replaced)
        for s in elem.findall('Style'):
            name = s.find('Name')
            title = s.find('Title')
            if name is None or title is None:
                raise ValueError('%s missing name or title' % (s, ))
            style = {'title': title.text}
            # legend url
            legend = s.find('LegendURL/OnlineResource')
            if legend is not None:
                style['legend'] = legend.attrib[
                    '{http://www.w3.org/1999/xlink}href']
            self.styles[name.text] = style

        # keywords
        self.keywords = [f.text for f in elem.findall('KeywordList/Keyword')]

        # timepositions - times for which data is available.
        self.timepositions = None
        self.defaulttimeposition = None
        for extent in elem.findall('Extent'):
            if extent.attrib.get("name").lower() == 'time':
                if extent.text:
                    self.timepositions = extent.text.split(',')
                    self.defaulttimeposition = extent.attrib.get("default")
                    break

        # Elevations - available vertical levels
        self.elevations = None
        for extent in elem.findall('Extent'):
            if extent.attrib.get("name").lower() == 'elevation':
                if extent.text:
                    self.elevations = extent.text.split(',')
                    break

        # MetadataURLs
        self.metadataUrls = []
        for m in elem.findall('MetadataURL'):
            metadataUrl = {
                'type':
                testXMLValue(m.attrib['type'], attrib=True),
                'format':
                testXMLValue(m.find('Format')),
                'url':
                testXMLValue(m.find('OnlineResource').
                             attrib['{http://www.w3.org/1999/xlink}href'],
                             attrib=True)
            }

            if metadataUrl[
                    'url'] is not None and parse_remote_metadata:  # download URL
                try:
                    content = openURL(metadataUrl['url'], timeout=timeout)
                    doc = etree.parse(content)
                    if metadataUrl['type'] is not None:
                        if metadataUrl['type'] == 'FGDC':
                            metadataUrl['metadata'] = Metadata(doc)
                        if metadataUrl['type'] == 'TC211':
                            metadataUrl['metadata'] = MD_Metadata(doc)
                except Exception:
                    metadataUrl['metadata'] = None

            self.metadataUrls.append(metadataUrl)

        # DataURLs
        self.dataUrls = []
        for m in elem.findall('DataURL'):
            dataUrl = {
                'format':
                m.find('Format').text.strip(),
                'url':
                m.find('OnlineResource').
                attrib['{http://www.w3.org/1999/xlink}href']
            }
            self.dataUrls.append(dataUrl)

        self.layers = []
        for child in elem.findall('Layer'):
            self.layers.append(ContentMetadata(child, self))
예제 #7
0
    def from_esa_iso_xml(self, esa_xml: bytes, inspire_xml: bytes,
                         collections: list, ows_url: str, stac_id: str) -> str:

        mcf = deepcopy(self.mcf)

        exml = etree.fromstring(esa_xml)
        ixml = etree.fromstring(inspire_xml)

        product_type = exml.xpath('//PRODUCT_TYPE/text()')[0]

        m = MD_Metadata(ixml)

        product_manifest = exml.xpath('//PRODUCT_URI/text()')[0]
        product_manifest_link = urljoin(self.base_url, product_manifest)

        if stac_id:
            mcf['metadata']['identifier'] = stac_id
        else:
            mcf['metadata']['identifier'] = product_manifest
        mcf['metadata']['hierarchylevel'] = m.hierarchy or 'dataset'
        mcf['metadata']['datestamp'] = exml.xpath(
            '//Product_Info/GENERATION_TIME/text()')[0]

        if product_type in collections:
            mcf['metadata']['parentidentifier'] = product_type

        gfp = exml.xpath('//Global_Footprint/EXT_POS_LIST/text()')[0].split()

        minx = gfp[1]
        miny = gfp[0]
        maxx = gfp[5]
        maxy = gfp[4]

        mcf['identification']['extents'] = {
            'spatial': [{
                'bbox': [minx, miny, maxx, maxy],
                'crs': 4326
            }],
            'temporal': [{
                'begin':
                exml.xpath('//Product_Info/PRODUCT_START_TIME/text()')[0],
                'end':
                exml.xpath('//Product_Info/PRODUCT_STOP_TIME/text()')[0]
            }]
        }

        mcf['identification']['title'] = product_manifest
        mcf['identification']['abstract'] = product_manifest

        mcf['identification']['dates'] = {
            'creation': mcf['metadata']['datestamp'],
            'publication': mcf['metadata']['datestamp']
        }

        for i, kws in enumerate(m.identification.keywords):
            kw_set = f'kw{i}'

            mcf['identification']['keywords'][kw_set] = {
                'keywords': kws['keywords']
            }
            mcf['identification']['keywords'][kw_set][
                'keywords_type'] = kws['type'] or 'theme'

        keyword_xpaths = {
            'eo:productType': '//PRODUCT_TYPE/text()',
            'eo:orbitNumber': '//SENSING_ORBIT_NUMBER/text()',
            'eo:orbitDirection': '//SENSING_ORBIT_DIRECTION/text()',
            'eo:snowCover': '//SNOW_ICE_PERCENTAGE/text()'
        }

        mcf['identification']['keywords']['product'] = {
            'keywords': [],
            'keywords_type': 'theme'
        }

        for key, value in keyword_xpaths.items():
            if len(exml.xpath(value)) > 0:
                keyword = value[0]
                mcf['identification']['keywords']['product'][
                    'keywords'].append(f"{key}:{keyword}")

        mcf['identification']['topiccategory'] = [
            m.identification.topiccategory[0]
        ]
        mcf['identification']['status'] = 'onGoing'
        mcf['identification']['maintenancefrequency'] = 'continual'
        mcf['identification'][
            'accessconstraints'] = m.identification.accessconstraints[0]

        if len(exml.xpath('//Cloud_Coverage_Assessment/text()')) > 0:
            mcf['content_info']['cloud_cover'] = exml.xpath(
                '//Cloud_Coverage_Assessment/text()')[0]
        mcf['content_info']['processing_level'] = exml.xpath(
            '//PROCESSING_LEVEL/text()')[0]

        for d in exml.xpath(
                '//Spectral_Information_List/Spectral_Information'):
            mcf['content_info']['dimensions'].append({
                'name':
                d.attrib.get('physicalBand'),
                'units':
                d.xpath('//CENTRAL')[0].attrib.get('unit'),
                'min':
                d.xpath('//MIN/text()')[0],
                'max':
                d.xpath('//MAX/text()')[0]
            })

        mcf['distribution'][product_manifest] = {
            'url': self.base_url,
            'type': 'enclosure',
            'name': 'product',
            'description': 'product',
            'function': 'download'
        }

        product_format = exml.xpath('//Granule_List/Granule/@imageFormat')[0]

        if product_format == 'JPEG2000':
            mime_type = 'image/jp2'
            file_extension = 'jp2'
        elif product_format == 'TIFF':
            mime_type = 'image/x.geotiff'
            file_extension = 'tif'
        else:
            logger.warning(f'unknown product format: {product_format}')
            mime_type = 'NA'
            file_extension = 'NA'

        for image_file in exml.xpath(
                '//Product_Organisation//IMAGE_FILE/text()'):
            dist = {
                'url':
                urljoin(product_manifest_link,
                        f'{image_file}.{file_extension}'),
                'type':
                mime_type,
                'name':
                'granule',
                'description':
                'granule',
                'function':
                'download'
            }
            mcf['distribution'][image_file] = dist

        logger.debug('Adding WMS/WCS links')
        wms_link_params = {
            'service': 'WMS',
            'version': '1.3.0',
            'request': 'GetCapabilities',
            'cql': f'identifier="{product_manifest}"'
        }

        mcf['distribution']['wms_link'] = {
            'url': f'{ows_url}?{urlencode(wms_link_params)}',
            'type': 'OGC:WMS',
            'name': product_manifest,
            'description': f'WMS URL for {product_manifest}',
        }

        wcs_link_params = {
            'service': 'WCS',
            'version': '2.0.1',
            'request': 'DescribeEOCoverageSet',
            'eoid': product_manifest
        }

        mcf['distribution']['wcs_link'] = {
            'url': f'{ows_url}?{urlencode(wcs_link_params)}',
            'type': 'OGC:WCS',
            'name': product_manifest,
            'description': f'WCS URL for {product_manifest}',
        }

        mcf['acquisition'] = {
            'platforms': [{
                'identifier':
                exml.xpath('//SPACECRAFT_NAME/text()')[0],
                'description':
                exml.xpath('//SPACECRAFT_NAME/text()')[0],
                'instruments': [{
                    'identifier':
                    exml.xpath('//DATATAKE_TYPE/text()')[0],
                    'type':
                    product_type
                }]
            }]
        }

        logger.debug(f'MCF: {mcf}')

        iso_os = ISO19139_2OutputSchema()

        return iso_os.write(mcf)
예제 #8
0
파일: wfs100.py 프로젝트: rjaduthie/OWSLib
    def __init__(self, elem, parent, parse_remote_metadata=False, timeout=30):
        """."""
        self.id = testXMLValue(elem.find(nspath('Name')))
        self.title = testXMLValue(elem.find(nspath('Title')))
        self.abstract = testXMLValue(elem.find(nspath('Abstract')))
        self.keywords = [f.text for f in elem.findall(nspath('Keywords'))]

        # bboxes
        self.boundingBox = None
        b = elem.find(nspath('LatLongBoundingBox'))
        srs = elem.find(nspath('SRS'))

        if b is not None:
            self.boundingBox = (float(b.attrib['minx']), float(
                b.attrib['miny']), float(b.attrib['maxx']),
                                float(b.attrib['maxy']), Crs(srs.text))

        # transform wgs84 bbox from given default bboxt
        self.boundingBoxWGS84 = None

        if b is not None and srs is not None:
            wgs84 = pyproj.Proj(init="epsg:4326")
            try:
                src_srs = pyproj.Proj(init=srs.text)
                mincorner = pyproj.transform(src_srs, wgs84, b.attrib['minx'],
                                             b.attrib['miny'])
                maxcorner = pyproj.transform(src_srs, wgs84, b.attrib['maxx'],
                                             b.attrib['maxy'])

                self.boundingBoxWGS84 = (mincorner[0], mincorner[1],
                                         maxcorner[0], maxcorner[1])
            except RuntimeError as e:
                pass

        # crs options
        self.crsOptions = [
            Crs(srs.text) for srs in elem.findall(nspath('SRS'))
        ]

        # verbs
        self.verbOptions = [op.tag for op \
            in parent.findall(nspath('Operations/*'))]
        self.verbOptions + [op.tag for op \
            in elem.findall(nspath('Operations/*')) \
            if op.tag not in self.verbOptions]

        #others not used but needed for iContentMetadata harmonisation
        self.styles = None
        self.timepositions = None
        self.defaulttimeposition = None

        # MetadataURLs
        self.metadataUrls = []
        for m in elem.findall(nspath('MetadataURL')):
            metadataUrl = {
                'type': testXMLValue(m.attrib['type'], attrib=True),
                'format': testXMLValue(m.find('Format')),
                'url': testXMLValue(m)
            }

            if metadataUrl[
                    'url'] is not None and parse_remote_metadata:  # download URL
                try:
                    content = openURL(metadataUrl['url'], timeout=timeout)
                    doc = etree.parse(content)
                    if metadataUrl['type'] is not None:
                        if metadataUrl['type'] == 'FGDC':
                            metadataUrl['metadata'] = Metadata(doc)
                        if metadataUrl['type'] == 'TC211':
                            metadataUrl['metadata'] = MD_Metadata(doc)
                except Exception:
                    metadataUrl['metadata'] = None

            self.metadataUrls.append(metadataUrl)
예제 #9
0
    def import_(self, metadata: str) -> dict:
        """
        Import metadata into MCF

        :param metadata: string of metadata content

        :returns: `dict` of MCF content
        """

        mcf = {
            'mcf': {
                'version': '1.0',
            },
            'metadata': {},
            'identification': {},
            'contact': {},
            'distribution': {}
        }

        LOGGER.debug('Parsing ISO metadata')
        try:
            m = MD_Metadata(etree.fromstring(metadata))
        except ValueError:
            m = MD_Metadata(etree.fromstring(bytes(metadata, 'utf-8')))

        LOGGER.debug('Setting metadata')
        mcf['metadata']['identifier'] = m.identifier

        mcf['metadata']['hierarchylevel'] = m.hierarchy
        mcf['metadata']['datestamp'] = m.datestamp

        LOGGER.debug('Setting identification')
        mcf['identification']['title'] = m.identification.title
        mcf['identification']['abstract'] = m.identification.abstract

        if m.identification.date:
            mcf['identification']['dates'] = {}
            for date_ in m.identification.date:
                mcf['identification']['dates'][date_.type] = date_.date

        if m.identification.keywords2:
            mcf['identification']['keywords'] = {}
            for count, value in enumerate(m.identification.keywords2):
                key = f'keywords-{count}'
                mcf['identification']['keywords'][key] = {
                    'type': value.type,
                    'keywords': value.keywords
                }
        mcf['identification'][
            'topiccategory'] = m.identification.topiccategory  # noqa

        mcf['identification']['extents'] = {
            'spatial': [{
                'bbox': [
                    ast.literal_eval(m.identification.extent.boundingBox.minx),
                    ast.literal_eval(m.identification.extent.boundingBox.miny),
                    ast.literal_eval(m.identification.extent.boundingBox.maxx),
                    ast.literal_eval(m.identification.extent.boundingBox.maxy)
                ]
            }],
            'temporal': []
        }

        temp_extent = {}
        if m.identification.temporalextent_start:
            temp_extent['begin'] = m.identification.temporalextent_start
        if m.identification.temporalextent_end:
            temp_extent['end'] = m.identification.temporalextent_end

        mcf['identification']['extents']['temporal'].append(temp_extent)

        if m.identification.accessconstraints:
            mcf['identification'][
                'accessconstraints'] = m.identification.accessconstraints[
                    0]  # noqa

        mcf['identification']['status'] = m.identification.status

        LOGGER.debug('Setting contact')
        if m.contact:
            for c in m.contact:
                mcf['contact'].update(get_contact(c))

        if m.distribution.distributor:
            for d in m.distribution.distributor:
                mcf['contact'].update(get_contact(d.contact))

        LOGGER.debug('Setting distribution')
        if m.distribution:
            for count, value in enumerate(m.distribution.online):
                key = f'link-{count}'
                mcf['distribution'][key] = get_link(value)

        return mcf
예제 #10
0
    else:  # xml
        text_bag = []

        if isinstance(bag, (bytes, str)):
            # serialize to lxml
            bag = etree.fromstring(bag)

        for t in bag.xpath('//gco:CharacterString', namespaces=namespaces):
            if t.text is not None:
                text_bag.append(t.text.strip())

    return ' '.join(text_bag)


for xml_file in glob('{}/*.xml'.format(xml_dir)):
    m = MD_Metadata(etree.parse(xml_file))

    _raw_metadata = m.xml.decode('utf-8')
    _anytext = get_anytext(_raw_metadata)

    identifier = m.identifier
    type_ = m.hierarchy
    title = m.identification.title
    description = m.identification.abstract

    contact = m.identification.contact
    issued = m.datestamp

    links = []
    if m.distribution and m.distribution.online:
        for ln in m.distribution.online:
예제 #11
0
파일: metadata.py 프로젝트: dodobas/pycsw
def _parse_iso(context, repos, exml):

    from owslib.iso import MD_Metadata

    recobj = repos.dataset()
    links = []

    md = MD_Metadata(exml)

    _set(context, recobj, 'pycsw:Identifier', md.identifier)
    _set(context, recobj, 'pycsw:Typename', 'gmd:MD_Metadata')
    _set(context, recobj, 'pycsw:Schema', context.namespaces['gmd'])
    _set(context, recobj, 'pycsw:MdSource', 'local')
    _set(context, recobj, 'pycsw:InsertDate', util.get_today_and_now())
    _set(context, recobj, 'pycsw:XML', md.xml)
    _set(context, recobj, 'pycsw:AnyText', util.get_anytext(exml))
    _set(context, recobj, 'pycsw:Language', md.language)
    _set(context, recobj, 'pycsw:Type', md.hierarchy)
    _set(context, recobj, 'pycsw:ParentIdentifier', md.parentidentifier)
    _set(context, recobj, 'pycsw:Date', md.datestamp)
    _set(context, recobj, 'pycsw:Modified', md.datestamp)
    _set(context, recobj, 'pycsw:Source', md.dataseturi)
    if md.referencesystem is not None:
        _set(context, recobj, 'pycsw:CRS',
             'urn:ogc:def:crs:EPSG:6.11:%s' % md.referencesystem.code)

    if hasattr(md, 'identification'):
        _set(context, recobj, 'pycsw:Title', md.identification.title)
        _set(context, recobj, 'pycsw:AlternateTitle',
             md.identification.alternatetitle)
        _set(context, recobj, 'pycsw:Abstract', md.identification.abstract)
        _set(context, recobj, 'pycsw:Relation',
             md.identification.aggregationinfo)

        if hasattr(md.identification, 'temporalextent_start'):
            _set(context, recobj, 'pycsw:TempExtent_begin',
                 md.identification.temporalextent_start)
        if hasattr(md.identification, 'temporalextent_end'):
            _set(context, recobj, 'pycsw:TempExtent_end',
                 md.identification.temporalextent_end)

        if len(md.identification.topiccategory) > 0:
            _set(context, recobj, 'pycsw:TopicCategory',
                 md.identification.topiccategory[0])

        if len(md.identification.resourcelanguage) > 0:
            _set(context, recobj, 'pycsw:ResourceLanguage',
                 md.identification.resourcelanguage[0])

        if hasattr(md.identification, 'bbox'):
            bbox = md.identification.bbox
        else:
            bbox = None

        if (hasattr(md.identification, 'keywords')
                and len(md.identification.keywords) > 0):
            all_keywords = [
                item for sublist in md.identification.keywords
                for item in sublist['keywords'] if item is not None
            ]
            _set(context, recobj, 'pycsw:Keywords', ','.join(all_keywords))
            _set(context, recobj, 'pycsw:KeywordType',
                 md.identification.keywords[0]['type'])

        if hasattr(md.identification, 'creator'):
            _set(context, recobj, 'pycsw:Creator', md.identification.creator)
        if hasattr(md.identification, 'publisher'):
            _set(context, recobj, 'pycsw:Publisher',
                 md.identification.publisher)
        if hasattr(md.identification, 'contributor'):
            _set(context, recobj, 'pycsw:Contributor',
                 md.identification.contributor)

        if (hasattr(md.identification, 'contact')
                and hasattr(md.identification.contact, 'organization')):
            _set(context, recobj, 'pycsw:OrganizationName',
                 md.identification.contact.organization)

        if len(md.identification.securityconstraints) > 0:
            _set(context, recobj, 'pycsw:SecurityConstraints',
                 md.identification.securityconstraints[0])
        if len(md.identification.accessconstraints) > 0:
            _set(context, recobj, 'pycsw:AccessConstraints',
                 md.identification.accessconstraints[0])
        if len(md.identification.otherconstraints) > 0:
            _set(context, recobj, 'pycsw:OtherConstraints',
                 md.identification.otherconstraints[0])

        if hasattr(md.identification, 'date'):
            for datenode in md.identification.date:
                if datenode.type == 'revision':
                    _set(context, recobj, 'pycsw:RevisionDate', datenode.date)
                elif datenode.type == 'creation':
                    _set(context, recobj, 'pycsw:CreationDate', datenode.date)
                elif datenode.type == 'publication':
                    _set(context, recobj, 'pycsw:PublicationDate',
                         datenode.date)

        if hasattr(md.identification, 'extent') and hasattr(
                md.identification.extent, 'description_code'):
            _set(context, recobj, 'pycsw:GeographicDescriptionCode',
                 md.identification.extent.description_code)

        if len(md.identification.denominators) > 0:
            _set(context, recobj, 'pycsw:Denominator',
                 md.identification.denominators[0])
        if len(md.identification.distance) > 0:
            _set(context, recobj, 'pycsw:DistanceValue',
                 md.identification.distance[0])
        if len(md.identification.uom) > 0:
            _set(context, recobj, 'pycsw:DistanceUOM',
                 md.identification.uom[0])

        if len(md.identification.classification) > 0:
            _set(context, recobj, 'pycsw:Classification',
                 md.identification.classification[0])
        if len(md.identification.uselimitation) > 0:
            _set(context, recobj, 'pycsw:ConditionApplyingToAccessAndUse',
                 md.identification.uselimitation[0])

    if hasattr(md.identification, 'format'):
        _set(context, recobj, 'pycsw:Format', md.distribution.format)

    if md.serviceidentification is not None:
        _set(context, recobj, 'pycsw:ServiceType',
             md.serviceidentification.type)
        _set(context, recobj, 'pycsw:ServiceTypeVersion',
             md.serviceidentification.version)

        _set(context, recobj, 'pycsw:CouplingType',
             md.serviceidentification.couplingtype)

        #if len(md.serviceidentification.operateson) > 0:
        #    _set(context, recobj, 'pycsw:operateson = VARCHAR(32),
        #_set(context, recobj, 'pycsw:operation VARCHAR(32),
        #_set(context, recobj, 'pycsw:operatesonidentifier VARCHAR(32),
        #_set(context, recobj, 'pycsw:operatesoname VARCHAR(32),

    if hasattr(md.identification, 'dataquality'):
        _set(context, recobj, 'pycsw:Degree', md.dataquality.conformancedegree)
        _set(context, recobj, 'pycsw:Lineage', md.dataquality.lineage)
        _set(context, recobj, 'pycsw:SpecificationTitle',
             md.dataquality.specificationtitle)
        if hasattr(md.dataquality, 'specificationdate'):
            _set(context, recobj, 'pycsw:specificationDate',
                 md.dataquality.specificationdate[0].date)
            _set(context, recobj, 'pycsw:SpecificationDateType',
                 md.dataquality.specificationdate[0].datetype)

    if hasattr(md, 'contact') and len(md.contact) > 0:
        _set(context, recobj, 'pycsw:ResponsiblePartyRole', md.contact[0].role)

    LOGGER.info('Scanning for links')
    if hasattr(md, 'distribution'):
        dist_links = []
        if hasattr(md.distribution, 'online'):
            LOGGER.debug('Scanning for gmd:transferOptions element(s)')
            dist_links.extend(md.distribution.online)
        if hasattr(md.distribution, 'distributor'):
            LOGGER.debug(
                'Scanning for gmd:distributorTransferOptions element(s)')
            for dist_member in md.distribution.distributor:
                dist_links.extend(dist_member.online)
        for link in dist_links:
            if link.url is not None and link.protocol is None:  # take a best guess
                link.protocol = sniff_link(link.url)
            linkstr = '%s,%s,%s,%s' % \
            (link.name, link.description, link.protocol, link.url)
            links.append(linkstr)

    try:
        LOGGER.debug('Scanning for srv:SV_ServiceIdentification links')
        for sident in md.identificationinfo:
            if hasattr(sident, 'operations'):
                for sops in sident.operations:
                    for scpt in sops['connectpoint']:
                        LOGGER.debug('adding srv link %s', scpt.url)
                        linkstr = '%s,%s,%s,%s' % \
                        (scpt.name, scpt.description, scpt.protocol, scpt.url)
                        links.append(linkstr)
    except Exception, err:  # srv: identification does not exist
        LOGGER.debug('no srv:SV_ServiceIdentification links found')
예제 #12
0
파일: wfs100.py 프로젝트: selimnairb/OWSLib
    def __init__(self, elem, parent, parse_remote_metadata=False, timeout=30):
        """."""
        self.id = testXMLValue(elem.find(nspath('Name')))
        self.title = testXMLValue(elem.find(nspath('Title')))
        self.abstract = testXMLValue(elem.find(nspath('Abstract')))
        self.keywords = [f.text for f in elem.findall(nspath('Keywords'))]

        # bboxes
        self.boundingBox = None
        b = elem.find(nspath('BoundingBox'))
        if b is not None:
            self.boundingBox = (float(b.attrib['minx']), float(
                b.attrib['miny']), float(b.attrib['maxx']),
                                float(b.attrib['maxy']), b.attrib['SRS'])
        self.boundingBoxWGS84 = None
        b = elem.find(nspath('LatLongBoundingBox'))
        if b is not None:
            self.boundingBoxWGS84 = (
                float(b.attrib['minx']),
                float(b.attrib['miny']),
                float(b.attrib['maxx']),
                float(b.attrib['maxy']),
            )
        # crs options
        self.crsOptions = [
            Crs(srs.text) for srs in elem.findall(nspath('SRS'))
        ]

        # verbs
        self.verbOptions = [op.tag for op \
            in parent.findall(nspath('Operations/*'))]
        self.verbOptions + [op.tag for op \
            in elem.findall(nspath('Operations/*')) \
            if op.tag not in self.verbOptions]

        #others not used but needed for iContentMetadata harmonisation
        self.styles = None
        self.timepositions = None
        self.defaulttimeposition = None

        # MetadataURLs
        self.metadataUrls = []
        for m in elem.findall(nspath('MetadataURL')):
            metadataUrl = {
                'type': testXMLValue(m.attrib['type'], attrib=True),
                'format': testXMLValue(m.find('Format')),
                'url': testXMLValue(m)
            }

            if metadataUrl[
                    'url'] is not None and parse_remote_metadata:  # download URL
                try:
                    content = urlopen(metadataUrl['url'], timeout=timeout)
                    doc = etree.parse(content)
                    if metadataUrl['type'] is not None:
                        if metadataUrl['type'] == 'FGDC':
                            metadataUrl['metadata'] = Metadata(doc)
                        if metadataUrl['type'] == 'TC211':
                            metadataUrl['metadata'] = MD_Metadata(doc)
                except Exception, err:
                    metadataUrl['metadata'] = None

            self.metadataUrls.append(metadataUrl)
예제 #13
0
def test_md_parsing_dov():
    """Test the parsing of a metadatarecord from DOV

    GetRecordById response available in
    tests/resources/csw_dov_getrecordbyid.xml

    """
    md_resource = get_md_resource('tests/resources/csw_dov_getrecordbyid.xml')
    md = MD_Metadata(md_resource)

    assert type(md) is MD_Metadata

    assert md.identifier == '6c39d716-aecc-4fbc-bac8-4f05a49a78d5'
    assert md.dataseturi is None
    assert md.parentidentifier is None

    assert md.language is None
    assert md.languagecode == 'dut'

    assert md.charset == 'utf8'
    assert md.datestamp == '2018-02-21T16:14:24'

    assert md.hierarchy == 'dataset'

    assert_list(md.contact, 1)

    contact = md.contact[0]
    assert contact.organization == 'Vlaamse overheid - Vlaamse ' \
                                   'MilieuMaatschappij - Afdeling ' \
                                   'Operationeel Waterbeheer'
    assert contact.address == 'Koning Albert II-laan 20 bus 16'
    assert contact.city == 'Brussel'
    assert contact.postcode == '1000'
    assert contact.country == u'België'
    assert contact.email == '*****@*****.**'
    assert contact.onlineresource.url == 'https://www.vmm.be'
    assert contact.role == 'pointOfContact'

    assert md.stdname == 'ISO 19115/2003/Cor.1:2006'
    assert md.stdver == 'GDI-Vlaanderen Best Practices - versie 1.0'

    assert md.referencesystem.code == '31370'
    assert md.referencesystem.codeSpace == 'EPSG'

    assert_list(md.identificationinfo, 1)

    iden = md.identificationinfo[0]
    assert iden.title == 'Grondwatermeetnetten'
    assert iden.alternatetitle == 'Grondwatermeetnetten beschikbaar op DOV'

    assert_list(iden.date, 2)
    assert iden.date[0].date == '2002-05-22'
    assert iden.date[0].type == 'creation'
    assert iden.date[1].date == '2002-05-22'
    assert iden.date[1].type == 'publication'

    assert_list(iden.uricode, 1)
    assert iden.uricode[0] == 'A64F073B-9FBE-91DD-36FDE7462BBAFA61'

    assert_list(iden.uricodespace, 1)
    assert iden.uricodespace[0] == 'DOV-be'

    assert_list(iden.uselimitation, 3)
    assert "Zie 'Overige beperkingen'" in iden.uselimitation
    assert "Bij het gebruik van de informatie die DOV aanbiedt, dient steeds " \
           "volgende standaardreferentie gebruikt te worden: Databank " \
           "Ondergrond Vlaanderen - (vermelding van de beheerder en de " \
           "specifieke geraadpleegde gegevens) - Geraadpleegd op dd/mm/jjjj, " \
           "op https://www.dov.vlaanderen.be" in iden.uselimitation
    assert "Volgende aansprakelijkheidsbepalingen gelden: " \
           "https://www.dov.vlaanderen.be/page/disclaimer" in iden.uselimitation

    assert_list(iden.uselimitation_url, 0)

    assert_list(iden.accessconstraints, 1)
    assert iden.accessconstraints[0] == 'otherRestrictions'

    assert_list(iden.classification, 0)

    assert_list(iden.otherconstraints, 1)
    assert iden.otherconstraints[
               0] == "Data beschikbaar voor hergebruik volgens de " \
                     "Modellicentie Gratis Hergebruik. Toelichting " \
                     "beschikbaar op " \
                     "https://www.dov.vlaanderen.be/page/gebruiksvoorwaarden-dov-services"

    assert_list(iden.securityconstraints, 1)
    assert iden.securityconstraints[0] == 'unclassified'

    assert_list(iden.useconstraints, 0)

    assert_list(iden.denominators, 1)
    assert iden.denominators[0] == '10000'

    assert_list(iden.distance, 0)
    assert_list(iden.uom, 0)

    assert_list(iden.resourcelanguage, 0)
    assert_list(iden.resourcelanguagecode, 1)
    assert iden.resourcelanguagecode[0] == 'dut'

    assert_list(iden.creator, 0)
    assert_list(iden.publisher, 0)
    assert_list(iden.contributor, 0)

    assert iden.edition is None

    assert iden.abstract.startswith("In de Databank Ondergrond Vlaanderen "
                                    "zijn verschillende grondwatermeetnetten "
                                    "opgenomen.")

    assert iden.purpose.startswith(
        "Het doel van de meetnetten is inzicht krijgen in de kwaliteit en "
        "kwantiteit van de watervoerende lagen in de ondergrond van "
        "Vlaanderen. Algemeen kan gesteld worden dat de grondwatermeetnetten "
        "een belangrijk beleidsinstrument vormen")

    assert iden.status == 'onGoing'

    assert_list(iden.contact, 2)

    assert iden.contact[
        0].organization == 'Vlaamse overheid - Vlaamse MilieuMaatschappij - Afdeling Operationeel Waterbeheer'
    assert iden.contact[0].address == 'Koning Albert II-laan 20 bus 16'
    assert iden.contact[0].city == 'Brussel'
    assert iden.contact[0].postcode == '1000'
    assert iden.contact[0].country == u'België'
    assert iden.contact[0].email == '*****@*****.**'
    assert iden.contact[0].onlineresource.url == 'https://www.vmm.be'
    assert iden.contact[0].role == 'pointOfContact'

    assert iden.contact[1].organization == 'Databank Ondergrond Vlaanderen (' \
                                           'DOV)'
    assert iden.contact[1].address == 'Technologiepark Gebouw 905'
    assert iden.contact[1].city == 'Zwijnaarde'
    assert iden.contact[1].postcode == '9052'
    assert iden.contact[1].country == u'België'
    assert iden.contact[1].email == '*****@*****.**'
    assert iden.contact[1].onlineresource.url == \
           'https://www.dov.vlaanderen.be'
    assert iden.contact[1].role == 'distributor'

    assert_list(iden.spatialrepresentationtype, 1)
    assert iden.spatialrepresentationtype[0] == 'vector'

    assert_list(iden.keywords, 5)

    assert type(iden.keywords[0]) is dict
    assert iden.keywords[0]['type'] == ''
    assert iden.keywords[0]['thesaurus'][
        'title'] == "GEMET - INSPIRE thema's, versie 1.0"
    assert iden.keywords[0]['thesaurus']['date'] == '2008-06-01'
    assert iden.keywords[0]['thesaurus']['datetype'] == 'publication'
    assert_list(iden.keywords[0]['keywords'], 1)
    assert iden.keywords[0]['keywords'] == ['Geologie']

    assert type(iden.keywords[1]) is dict
    assert iden.keywords[1]['type'] == ''
    assert iden.keywords[1]['thesaurus'][
        'title'] == "GEMET - Concepten, versie 2.4"
    assert iden.keywords[1]['thesaurus']['date'] == '2010-01-13'
    assert iden.keywords[1]['thesaurus']['datetype'] == 'publication'
    assert_list(iden.keywords[1]['keywords'], 2)
    assert iden.keywords[1]['keywords'] == ['grondwater', 'meetnet(werk)']

    assert type(iden.keywords[2]) is dict
    assert iden.keywords[2]['type'] == ''
    assert iden.keywords[2]['thesaurus']['title'] == "Vlaamse regio's"
    assert iden.keywords[2]['thesaurus']['date'] == '2013-09-25'
    assert iden.keywords[2]['thesaurus']['datetype'] == 'publication'
    assert_list(iden.keywords[2]['keywords'], 1)
    assert iden.keywords[2]['keywords'] == ['Vlaams Gewest']

    assert type(iden.keywords[3]) is dict
    assert iden.keywords[3]['type'] is None
    assert iden.keywords[3]['thesaurus'][
        'title'] == "GDI-Vlaanderen Trefwoorden"
    assert iden.keywords[3]['thesaurus']['date'] == '2014-02-26'
    assert iden.keywords[3]['thesaurus']['datetype'] == 'publication'
    assert_list(iden.keywords[3]['keywords'], 7)
    assert iden.keywords[3]['keywords'] == [
        'Toegevoegd GDI-Vl', 'Herbruikbaar', 'Vlaamse Open data', 'Kosteloos',
        'Lijst M&R INSPIRE', 'Metadata INSPIRE-conform',
        'Metadata GDI-Vl-conform'
    ]

    assert type(iden.keywords[4]) is dict
    assert iden.keywords[4]['type'] is None
    assert iden.keywords[4]['thesaurus']['title'] == "DOV"
    assert iden.keywords[4]['thesaurus']['date'] == '2010-12-01'
    assert iden.keywords[4]['thesaurus']['datetype'] == 'publication'
    assert_list(iden.keywords[4]['keywords'], 7)
    assert iden.keywords[4]['keywords'] == [
        'Ondergrond', 'DOV', 'Vlaanderen', 'monitoring', 'meetnetten',
        'Kaderrichtlijn Water', 'Decreet Integraal waterbeleid'
    ]

    assert_list(iden.keywords2, 5)
    assert iden.keywords2[0].type == ''
    assert iden.keywords2[0].thesaurus[
        'title'] == "GEMET - INSPIRE thema's, versie 1.0"
    assert iden.keywords2[0].thesaurus['date'] == '2008-06-01'
    assert iden.keywords2[0].thesaurus['datetype'] == 'publication'
    assert_list(iden.keywords2[0].keywords, 1)
    assert iden.keywords2[0].keywords == ['Geologie']

    assert iden.keywords2[1].type == ''
    assert iden.keywords2[1].thesaurus[
        'title'] == "GEMET - Concepten, versie 2.4"
    assert iden.keywords2[1].thesaurus['date'] == '2010-01-13'
    assert iden.keywords2[1].thesaurus['datetype'] == 'publication'
    assert_list(iden.keywords2[1].keywords, 2)
    assert iden.keywords2[1].keywords == ['grondwater', 'meetnet(werk)']

    assert iden.keywords2[2].type == ''
    assert iden.keywords2[2].thesaurus['title'] == "Vlaamse regio's"
    assert iden.keywords2[2].thesaurus['date'] == '2013-09-25'
    assert iden.keywords2[2].thesaurus['datetype'] == 'publication'
    assert_list(iden.keywords2[2].keywords, 1)
    assert iden.keywords2[2].keywords == ['Vlaams Gewest']

    assert iden.keywords2[3].type is None
    assert iden.keywords2[3].thesaurus['title'] == "GDI-Vlaanderen Trefwoorden"
    assert iden.keywords2[3].thesaurus['date'] == '2014-02-26'
    assert iden.keywords2[3].thesaurus['datetype'] == 'publication'
    assert_list(iden.keywords2[3].keywords, 7)
    assert iden.keywords2[3].keywords == [
        'Toegevoegd GDI-Vl', 'Herbruikbaar', 'Vlaamse Open data', 'Kosteloos',
        'Lijst M&R INSPIRE', 'Metadata INSPIRE-conform',
        'Metadata GDI-Vl-conform'
    ]

    assert iden.keywords2[4].type is None
    assert iden.keywords2[4].thesaurus['title'] == "DOV"
    assert iden.keywords2[4].thesaurus['date'] == '2010-12-01'
    assert iden.keywords2[4].thesaurus['datetype'] == 'publication'
    assert_list(iden.keywords2[4].keywords, 7)
    assert iden.keywords2[4].keywords == [
        'Ondergrond', 'DOV', 'Vlaanderen', 'monitoring', 'meetnetten',
        'Kaderrichtlijn Water', 'Decreet Integraal waterbeleid'
    ]

    assert_list(iden.topiccategory, 1)
    assert iden.topiccategory[0] == 'geoscientificInformation'

    assert iden.supplementalinformation == \
           "https://www.dov.vlaanderen.be/page/grondwatermeetnet"

    assert_list(md.contentinfo, 1)
    ci = md.contentinfo[0]

    assert ci.compliancecode is None
    assert_list(ci.language, 0)
    assert ci.includedwithdataset == True
    assert_list(ci.featuretypenames, 0)

    assert_list(ci.featurecatalogues, 1)
    assert ci.featurecatalogues[0] == 'b142965f-b2aa-429e-86ff-a7cb0e065d48'
예제 #14
0
def test_md_parsing_geobretagne():
    """Test the parsing of a metadatarecord from GéoBretagne

    MD_Metadata record available in
    tests/resources/csw_geobretagne_mdmetadata.xml

    """
    md_resource = get_md_resource(
        'tests/resources/csw_geobretagne_mdmetadata.xml')
    md = MD_Metadata(md_resource)

    assert type(md) is MD_Metadata

    assert md.identifier == '955c3e47-411e-4969-b61b-3556d1b9f879'
    assert md.dataseturi is None
    assert md.parentidentifier is None

    assert md.language == 'fre'
    assert md.languagecode is None

    assert md.charset == 'utf8'
    assert md.datestamp == '2018-07-30T14:19:40'

    assert md.hierarchy == 'dataset'

    assert_list(md.contact, 1)

    contact = md.contact[0]
    assert contact.organization == 'DIRECTION GENERALE DES FINANCES ' \
                                   'PUBLIQUES BUREAU GF-3A'
    assert contact.address is None
    assert contact.city is None
    assert contact.postcode is None
    assert contact.country is None
    assert contact.email == '*****@*****.**'
    assert contact.onlineresource is None
    assert contact.role == 'pointOfContact'

    assert md.stdname == 'ISO 19115'
    assert md.stdver == '1.0'

    assert md.referencesystem.code == 'RGF93 / CC48 (EPSG:3948)'
    assert md.referencesystem.codeSpace == 'EPSG'

    assert_list(md.identificationinfo, 1)

    iden = md.identificationinfo[0]
    assert iden.title == 'Cadastre 2018 en Bretagne'
    assert iden.alternatetitle is None

    assert_list(iden.date, 1)
    assert iden.date[0].date == '2018-09-01'
    assert iden.date[0].type == 'revision'

    assert_list(iden.uricode, 1)
    assert iden.uricode[
        0] == 'https://geobretagne.fr/geonetwork/apps/georchestra/?uuid=363e3a8e-d0ce-497d-87a9-2a2d58d82772'
    assert_list(iden.uricodespace, 0)

    assert_list(iden.uselimitation, 2)
    assert u"le plan cadastral décrit les limites apparentes de la " \
           u"propriété." in iden.uselimitation

    assert_list(iden.uselimitation_url, 0)

    assert_list(iden.accessconstraints, 1)
    assert iden.accessconstraints[0] == 'otherRestrictions'

    assert_list(iden.classification, 0)

    assert_list(iden.otherconstraints, 1)
    assert iden.otherconstraints[
               0] == u'Usage libre sous réserve des mentions obligatoires ' \
                     u'sur tout document de diffusion : "Source : DGFIP"'

    assert_list(iden.securityconstraints, 0)

    assert_list(iden.useconstraints, 1)
    assert iden.useconstraints[0] == 'copyright'

    assert_list(iden.denominators, 1)
    assert iden.denominators[0] == '500'

    assert_list(iden.distance, 0)
    assert_list(iden.uom, 0)

    assert_list(iden.resourcelanguage, 1)
    assert iden.resourcelanguage[0] == 'fre'
    assert_list(iden.resourcelanguagecode, 0)

    assert_list(iden.creator, 0)
    assert_list(iden.publisher, 0)
    assert_list(iden.contributor, 0)

    assert iden.edition is None

    assert iden.abstract.startswith(
        u"Le plan du cadastre est un document administratif qui propose "
        u"l’unique plan parcellaire à grande échelle couvrant le territoire "
        u"national.")

    assert iden.purpose.startswith(
        u"Le but premier du plan cadastral est d'identifier, de localiser et "
        u"représenter la propriété foncière, ainsi que de servir à l'assise "
        u"de la fiscalité locale des propriétés non bâties.")

    assert iden.status == 'completed'

    assert_list(iden.contact, 1)

    assert iden.contact[0].organization == 'DGFIP Bretagne'
    assert iden.contact[0].name == 'DIRECTION GENERALE DES FINANCES PUBLIQUES'
    assert iden.contact[0].address is None
    assert iden.contact[0].city is None
    assert iden.contact[0].postcode is None
    assert iden.contact[0].country is None
    assert iden.contact[0].email == '*****@*****.**'
    assert iden.contact[0].onlineresource is None
    assert iden.contact[0].role == 'pointOfContact'

    assert_list(iden.spatialrepresentationtype, 1)
    assert iden.spatialrepresentationtype[0] == 'vector'

    assert_list(iden.keywords, 7)

    assert type(iden.keywords[0]) is dict
    assert iden.keywords[0]['type'] == 'place'
    assert iden.keywords[0]['thesaurus']['title'] is None
    assert iden.keywords[0]['thesaurus']['date'] is None
    assert iden.keywords[0]['thesaurus']['datetype'] is None
    assert_list(iden.keywords[0]['keywords'], 1)
    assert iden.keywords[0]['keywords'] == ['France']

    assert type(iden.keywords[1]) is dict
    assert iden.keywords[1]['type'] is None
    assert iden.keywords[1]['thesaurus']['title'] is None
    assert iden.keywords[1]['thesaurus']['date'] is None
    assert iden.keywords[1]['thesaurus']['datetype'] is None
    assert_list(iden.keywords[1]['keywords'], 0)

    assert type(iden.keywords[2]) is dict
    assert iden.keywords[2]['type'] == 'theme'
    assert iden.keywords[2]['thesaurus']['title'] is None
    assert iden.keywords[2]['thesaurus']['date'] is None
    assert iden.keywords[2]['thesaurus']['datetype'] is None
    assert_list(iden.keywords[2]['keywords'], 7)
    assert iden.keywords[2]['keywords'] == [
        u'bâtiments', 'adresses', 'parcelles cadastrales', 'hydrographie',
        u'réseaux de transport', u'unités administratives',
        u'référentiels de coordonnées'
    ]

    assert type(iden.keywords[3]) is dict
    assert iden.keywords[3]['type'] == 'theme'
    assert iden.keywords[3]['thesaurus']['title'] is None
    assert iden.keywords[3]['thesaurus']['date'] is None
    assert iden.keywords[3]['thesaurus']['datetype'] is None
    assert_list(iden.keywords[3]['keywords'], 5)
    assert iden.keywords[3]['keywords'] == [
        u'bâtis', 'sections', 'parcelles', 'cadastre', 'cadastrale'
    ]

    assert type(iden.keywords[4]) is dict
    assert iden.keywords[4]['type'] == 'theme'
    assert iden.keywords[4]['thesaurus']['title'] == u"GéoBretagne v 2.0"
    assert iden.keywords[4]['thesaurus']['date'] == '2014-01-13'
    assert iden.keywords[4]['thesaurus']['datetype'] == 'publication'
    assert_list(iden.keywords[4]['keywords'], 1)
    assert iden.keywords[4]['keywords'] == [u'référentiels : cadastre']

    assert type(iden.keywords[5]) is dict
    assert iden.keywords[5]['type'] == 'theme'
    assert iden.keywords[5]['thesaurus']['title'] == "INSPIRE themes"
    assert iden.keywords[5]['thesaurus']['date'] == '2008-06-01'
    assert iden.keywords[5]['thesaurus']['datetype'] == 'publication'
    assert_list(iden.keywords[5]['keywords'], 1)
    assert iden.keywords[5]['keywords'] == ['Parcelles cadastrales']

    assert type(iden.keywords[6]) is dict
    assert iden.keywords[6]['type'] == 'theme'
    assert iden.keywords[6]['thesaurus']['title'] == "GEMET"
    assert iden.keywords[6]['thesaurus']['date'] == '2012-07-20'
    assert iden.keywords[6]['thesaurus']['datetype'] == 'publication'
    assert_list(iden.keywords[6]['keywords'], 2)
    assert iden.keywords[6]['keywords'] == ['cadastre', u'bâtiment']

    assert_list(iden.keywords2, 6)

    assert iden.keywords2[0].type == 'place'
    assert iden.keywords2[0].thesaurus is None
    assert_list(iden.keywords2[0].keywords, 1)
    assert iden.keywords2[0].keywords == ['France']

    assert iden.keywords2[1].type == 'theme'
    assert iden.keywords2[1].thesaurus is None
    assert_list(iden.keywords2[1].keywords, 7)
    assert iden.keywords2[1].keywords == [
        u'bâtiments', 'adresses', 'parcelles cadastrales', 'hydrographie',
        u'réseaux de transport', u'unités administratives',
        u'référentiels de coordonnées'
    ]

    assert iden.keywords2[2].type == 'theme'
    assert iden.keywords2[2].thesaurus is None
    assert_list(iden.keywords2[2].keywords, 5)
    assert iden.keywords2[2].keywords == [
        u'bâtis', 'sections', 'parcelles', 'cadastre', 'cadastrale'
    ]

    assert iden.keywords2[3].type == 'theme'
    assert iden.keywords2[3].thesaurus['title'] == u"GéoBretagne v 2.0"
    assert iden.keywords2[3].thesaurus['date'] == '2014-01-13'
    assert iden.keywords2[3].thesaurus['datetype'] == 'publication'
    assert_list(iden.keywords2[3].keywords, 1)
    assert iden.keywords2[3].keywords == [u'référentiels : cadastre']

    assert iden.keywords2[4].type == 'theme'
    assert iden.keywords2[4].thesaurus['title'] == "INSPIRE themes"
    assert iden.keywords2[4].thesaurus['date'] == '2008-06-01'
    assert iden.keywords2[4].thesaurus['datetype'] == 'publication'
    assert_list(iden.keywords2[4].keywords, 1)
    assert iden.keywords2[4].keywords == ['Parcelles cadastrales']

    assert iden.keywords2[5].type == 'theme'
    assert iden.keywords2[5].thesaurus['title'] == "GEMET"
    assert iden.keywords2[5].thesaurus['date'] == '2012-07-20'
    assert iden.keywords2[5].thesaurus['datetype'] == 'publication'
    assert_list(iden.keywords2[5].keywords, 2)
    assert iden.keywords2[5].keywords == ['cadastre', u'bâtiment']

    assert_list(iden.topiccategory, 1)
    assert iden.topiccategory[0] == 'planningCadastre'

    assert iden.supplementalinformation == \
           u"La légende du plan cadastral est consultable sur: " \
           "http://www.cadastre.gouv.fr/scpc/pdf/legendes/FR_fr/Legende%20du" \
           "%20plan%20sur%20internet.pdf"

    assert_list(md.contentinfo, 1)
    ci = md.contentinfo[0]

    assert ci.compliancecode is None
    assert_list(ci.language, 0)
    assert ci.includedwithdataset == False
    assert_list(ci.featuretypenames, 0)
    assert_list(ci.featurecatalogues, 0)
예제 #15
0
    def __init__(self,
                 elem,
                 parent=None,
                 children=None,
                 index=0,
                 parse_remote_metadata=False,
                 timeout=30):
        if xmltag_split(elem.tag) != 'Layer':
            raise ValueError('%s should be a Layer' % (elem, ))

        self.parent = parent
        if parent:
            self.index = "%s.%d" % (parent.index, index)
        else:
            self.index = str(index)

        self._children = children

        self.id = self.name = testXMLValue(
            elem.find(nspath('Name', WMS_NAMESPACE)))

        # layer attributes
        self.queryable = int(elem.attrib.get('queryable', 0))
        self.cascaded = int(elem.attrib.get('cascaded', 0))
        self.opaque = int(elem.attrib.get('opaque', 0))
        self.noSubsets = int(elem.attrib.get('noSubsets', 0))
        self.fixedWidth = int(elem.attrib.get('fixedWidth', 0))
        self.fixedHeight = int(elem.attrib.get('fixedHeight', 0))

        # title is mandatory property
        self.title = None
        title = testXMLValue(elem.find(nspath('Title', WMS_NAMESPACE)))
        if title is not None:
            self.title = title.strip()

        self.abstract = testXMLValue(
            elem.find(nspath('Abstract', WMS_NAMESPACE)))

        # TODO: what is the preferred response to esri's handling of custom projections
        #       in the spatial ref definitions? see http://resources.arcgis.com/en/help/main/10.1/index.html#//00sq000000m1000000
        #       and an example (20150812) http://maps.ngdc.noaa.gov/arcgis/services/firedetects/MapServer/WMSServer?request=GetCapabilities&service=WMS

        # bboxes
        b = elem.find(nspath('EX_GeographicBoundingBox', WMS_NAMESPACE))
        self.boundingBoxWGS84 = None
        if b is not None:
            minx = b.find(nspath('westBoundLongitude', WMS_NAMESPACE))
            miny = b.find(nspath('southBoundLatitude', WMS_NAMESPACE))
            maxx = b.find(nspath('eastBoundLongitude', WMS_NAMESPACE))
            maxy = b.find(nspath('northBoundLatitude', WMS_NAMESPACE))
            box = tuple(
                map(float, [
                    minx.text if minx is not None else None,
                    miny.text if miny is not None else None,
                    maxx.text if maxx is not None else None,
                    maxy.text if maxy is not None else None
                ]))

            self.boundingBoxWGS84 = tuple(box)
        elif self.parent:
            if hasattr(self.parent, 'boundingBoxWGS84'):
                self.boundingBoxWGS84 = self.parent.boundingBoxWGS84

        # make a bbox list (of tuples)
        crs_list = []
        for bb in elem.findall(nspath('BoundingBox', WMS_NAMESPACE)):
            srs_str = bb.attrib.get('CRS', None)
            srs = Crs(srs_str)

            box = tuple(
                map(float, [
                    bb.attrib['minx'], bb.attrib['miny'], bb.attrib['maxx'],
                    bb.attrib['maxy']
                ]))
            minx, miny, maxx, maxy = box[0], box[1], box[2], box[3]

            # handle the ordering so that it always
            # returns (minx, miny, maxx, maxy)
            if srs and srs.axisorder == 'yx':
                # reverse things
                minx, miny, maxx, maxy = box[1], box[0], box[3], box[2]

            crs_list.append((
                minx,
                miny,
                maxx,
                maxy,
                srs_str,
            ))
        self.crs_list = crs_list
        # and maintain the original boundingBox attribute (first in list)
        # or the wgs84 bbox (to handle cases of incomplete parentage)
        self.boundingBox = crs_list[0] if crs_list else self.boundingBoxWGS84

        # ScaleHint
        sh = elem.find(nspath('ScaleHint', WMS_NAMESPACE))
        self.scaleHint = None
        if sh is not None:
            if 'min' in sh.attrib and 'max' in sh.attrib:
                self.scaleHint = {
                    'min': sh.attrib['min'],
                    'max': sh.attrib['max']
                }

        attribution = elem.find(nspath('Attribution', WMS_NAMESPACE))
        if attribution is not None:
            self.attribution = dict()
            title = attribution.find(nspath('Title', WMS_NAMESPACE))
            url = attribution.find(nspath('OnlineResource', WMS_NAMESPACE))
            logo = attribution.find(nspath('LogoURL', WMS_NAMESPACE))
            if title is not None:
                self.attribution['title'] = title.text
            if url is not None:
                self.attribution['url'] = url.attrib[
                    '{http://www.w3.org/1999/xlink}href']
            if logo is not None:
                self.attribution['logo_size'] = (int(logo.attrib['width']),
                                                 int(logo.attrib['height']))
                self.attribution['logo_url'] = logo.find(
                    nspath('OnlineResource', WMS_NAMESPACE)
                ).attrib['{http://www.w3.org/1999/xlink}href']

        # TODO: get this from the bbox attributes instead (deal with parents)
        # SRS options
        self.crsOptions = []

        # Copy any parent SRS options (they are inheritable properties)
        if self.parent:
            self.crsOptions = list(self.parent.crsOptions)

        # Look for SRS option attached to this layer
        if elem.find(nspath('CRS', WMS_NAMESPACE)) is not None:
            # some servers found in the wild use a single SRS
            # tag containing a whitespace separated list of SRIDs
            # instead of several SRS tags. hence the inner loop
            for srslist in [
                    x.text for x in elem.findall(nspath('CRS', WMS_NAMESPACE))
            ]:
                if srslist:
                    for srs in srslist.split():
                        self.crsOptions.append(srs)

        # Get rid of duplicate entries
        self.crsOptions = list(set(self.crsOptions))

        # Set self.crsOptions to None if the layer (and parents) had no SRS options
        if len(self.crsOptions) == 0:
            # raise ValueError('%s no SRS available!?' % (elem,))
            # Comment by D Lowe.
            # Do not raise ValueError as it is possible that a layer is purely a parent layer and does not have SRS specified. Instead set crsOptions to None
            # Comment by Jachym:
            # Do not set it to None, but to [], which will make the code
            # work further. Fixed by anthonybaxter
            self.crsOptions = []

        # Styles
        self.styles = {}

        # Copy any parent styles (they are inheritable properties)
        if self.parent:
            self.styles = self.parent.styles.copy()

        # Get the styles for this layer (items with the same name are replaced)
        for s in elem.findall(nspath('Style', WMS_NAMESPACE)):
            name = s.find(nspath('Name', WMS_NAMESPACE))
            title = s.find(nspath('Title', WMS_NAMESPACE))
            if name is None or title is None:
                raise ValueError('%s missing name or title' % (s, ))
            style = {'title': title.text}
            # legend url
            legend = s.find(nspath('LegendURL/OnlineResource', WMS_NAMESPACE))
            if legend is not None:
                style['legend'] = legend.attrib[
                    '{http://www.w3.org/1999/xlink}href']

            lgd = s.find(nspath('LegendURL', WMS_NAMESPACE))
            if lgd is not None:
                if 'width' in list(lgd.attrib.keys()):
                    style['legend_width'] = lgd.attrib.get('width')
                if 'height' in list(lgd.attrib.keys()):
                    style['legend_height'] = lgd.attrib.get('height')

                lgd_format = lgd.find(nspath('Format', WMS_NAMESPACE))
                if lgd_format is not None:
                    style['legend_format'] = lgd_format.text.strip()
            self.styles[name.text] = style

        # keywords
        self.keywords = [
            f.text
            for f in elem.findall(nspath('KeywordList/Keyword', WMS_NAMESPACE))
        ]

        # extents replaced by dimensions of name
        # comment by Soren Scott
        # <Dimension name="elevation" units="meters" default="500" multipleValues="1"
        #    nearestValue="0" current="true" unitSymbol="m">500, 490, 480</Dimension>
        # it can be repeated with the same name so ? this assumes a single one to match 1.1.1

        self.timepositions = None
        self.defaulttimeposition = None
        time_dimension = None
        for dim in elem.findall(nspath('Dimension', WMS_NAMESPACE)):
            dim_name = dim.attrib.get('name')
            if dim_name is not None and dim_name.lower() == 'time':
                time_dimension = dim
        if time_dimension is not None:
            self.timepositions = time_dimension.text.split(
                ',') if time_dimension.text else None
            self.defaulttimeposition = time_dimension.attrib.get(
                'default', None)

        # Elevations - available vertical levels
        self.elevations = None
        elev_dimension = None
        for dim in elem.findall(nspath('Dimension', WMS_NAMESPACE)):
            if dim.attrib.get('elevation') is not None:
                elev_dimension = dim
        if elev_dimension is not None:
            self.elevations = [
                e.strip() for e in elev_dimension.text.split(',')
            ] if elev_dimension.text else None

        # and now capture the dimensions as more generic things (and custom things)
        self.dimensions = {}
        for dim in elem.findall(nspath('Dimension', WMS_NAMESPACE)):
            dim_name = dim.attrib.get('name')
            dim_data = {}
            for k, v in six.iteritems(dim.attrib):
                if k != 'name':
                    dim_data[k] = v
            # single values and ranges are not differentiated here
            dim_data['values'] = dim.text.strip().split(
                ',') if dim.text.strip() else None
            self.dimensions[dim_name] = dim_data

        # MetadataURLs
        self.metadataUrls = []
        for m in elem.findall(nspath('MetadataURL', WMS_NAMESPACE)):
            metadataUrl = {
                'type':
                testXMLValue(m.attrib['type'], attrib=True),
                'format':
                testXMLValue(m.find(nspath('Format', WMS_NAMESPACE))),
                'url':
                testXMLValue(m.find(nspath('OnlineResource', WMS_NAMESPACE)).
                             attrib['{http://www.w3.org/1999/xlink}href'],
                             attrib=True)
            }

            if metadataUrl[
                    'url'] is not None and parse_remote_metadata:  # download URL
                try:
                    content = openURL(metadataUrl['url'], timeout=timeout)
                    doc = etree.parse(content)
                    if metadataUrl['type'] is not None:
                        if metadataUrl['type'] == 'FGDC':
                            metadataUrl['metadata'] = Metadata(doc)
                        if metadataUrl['type'] == 'TC211':
                            metadataUrl['metadata'] = MD_Metadata(doc)
                except Exception:
                    metadataUrl['metadata'] = None

            self.metadataUrls.append(metadataUrl)

        # DataURLs
        self.dataUrls = []
        for m in elem.findall(nspath('DataURL', WMS_NAMESPACE)):
            dataUrl = {
                'format':
                m.find(nspath('Format', WMS_NAMESPACE)).text.strip(),
                'url':
                m.find(nspath('OnlineResource', WMS_NAMESPACE)).
                attrib['{http://www.w3.org/1999/xlink}href']
            }
            self.dataUrls.append(dataUrl)

        # FeatureListURLs
        self.featureListUrls = []
        for m in elem.findall(nspath('FeatureListURL', WMS_NAMESPACE)):
            featureUrl = {
                'format':
                m.find(nspath('Format', WMS_NAMESPACE)).text.strip(),
                'url':
                m.find(nspath('OnlineResource', WMS_NAMESPACE)).
                attrib['{http://www.w3.org/1999/xlink}href']
            }
            self.featureListUrls.append(featureUrl)

        self.layers = []
        for child in elem.findall(nspath('Layer', WMS_NAMESPACE)):
            self.layers.append(ContentMetadata(child, self))
예제 #16
0
    def from_xml(self, e):
        '''Build and return an InspireMetadata object from a (serialized) etree Element e.
        '''
        def to_date(s):
            return strptime(s, '%Y-%m-%d').date() if isinstance(s,
                                                                str) else None

        def to_responsible_party(alist):
            result = []
            for it in alist:
                result.append(
                    ResponsibleParty(organization=unicode(it.organization),
                                     email=unicode(it.email),
                                     role=it.role))
            return result

        # Parse object

        md = MD_Metadata(e)

        datestamp = to_date(md.datestamp)
        id_list = md.identification.uricode

        url_list = []
        if md.distribution:
            for it in md.distribution.online:
                url_list.append(it.url)

        topic_list = []
        for topic in md.identification.topiccategory:
            topic_list.append(topic)

        free_keywords = []
        keywords = {}
        for it in md.identification.keywords:
            thes_title = it['thesaurus']['title']
            # Lookup and instantiate a named thesaurus
            thes = None
            if thes_title:
                try:
                    thes_title, thes_version = thes_title.split(',')
                except:
                    thes_version = None
                else:
                    thes_version = re.sub(r'^[ ]*version[ ]+(\d\.\d)$', r'\1',
                                          thes_version)
                # Note thes_version can be used to enforce a specific thesaurus version
                try:
                    thes = Thesaurus.lookup(title=thes_title,
                                            for_keywords=True)
                except ValueError:
                    thes = None
            # Treat present keywords depending on if they belong to a thesaurus
            if thes:
                # Treat as thesaurus terms; discard unknown terms
                terms = []
                for keyword in it['keywords']:
                    term = thes.vocabulary.by_value.get(keyword)
                    if not term:
                        term = thes.vocabulary.by_token.get(keyword)
                    if term:
                        terms.append(term.value)
                keywords[thes.name] = ThesaurusTerms(thesaurus=thes,
                                                     terms=terms)
            else:
                # Treat as free keywords (not really a thesaurus)
                vocab_date = to_date(it['thesaurus']['date'])
                vocab_datetype = it['thesaurus']['datetype']
                if thes_title:
                    thes_title = unicode(thes_title)
                for keyword in it['keywords']:
                    free_keywords.append(
                        FreeKeyword(value=keyword,
                                    reference_date=vocab_date,
                                    date_type=vocab_datetype,
                                    originating_vocabulary=thes_title))

        temporal_extent = []
        if md.identification.temporalextent_start or md.identification.temporalextent_end:
            temporal_extent = [
                TemporalExtent(
                    start=to_date(md.identification.temporalextent_start),
                    end=to_date(md.identification.temporalextent_end))
            ]

        bbox = []
        if md.identification.extent:
            if md.identification.extent.boundingBox:
                bbox = [
                    GeographicBoundingBox(
                        nblat=float(md.identification.extent.boundingBox.maxy),
                        sblat=float(md.identification.extent.boundingBox.miny),
                        eblng=float(md.identification.extent.boundingBox.maxx),
                        wblng=float(md.identification.extent.boundingBox.minx))
                ]

        creation_date = None
        publication_date = None
        revision_date = None

        for it in md.identification.date:
            if it.type == 'creation':
                creation_date = to_date(it.date)
            elif it.type == 'publication':
                publication_date = to_date(it.date)
            elif it.type == 'revision':
                revision_date = to_date(it.date)

        spatial_list = []

        if len(md.identification.distance) != len(md.identification.uom):
            raise Exception(
                _('Found unequal list lengths distance,uom (%s, %s)' %
                  (md.identification.distance, md.identification.uom)))
        else:
            for i in range(0, len(md.identification.distance)):
                spatial_list.append(
                    SpatialResolution(distance=int(
                        md.identification.distance[i]),
                                      uom=unicode(md.identification.uom[i])))

            for i in range(0, len(md.identification.denominators)):
                spatial_list.append(
                    SpatialResolution(
                        denominator=int(md.identification.denominators[i])))
        conf_list = []
        invalid_degree = False
        #if md.referencesystem.codeSpace:
        #    code_space = md.referenceSystem.codeSpace
        reference_system = None
        if md.referencesystem:
            code = md.referencesystem.code
            reference_systems = vocabularies.by_name('reference-systems').get(
                'vocabulary')
            if code in reference_systems:
                # Check whether the URI is provided
                reference_system = ReferenceSystem(code=code)
            else:
                # Check whether just the EPSG code suffix is provided
                code_full = 'http://www.opengis.net/def/crs/EPSG/0/{code}'.format(
                    code=code)
                if code_full in reference_systems:
                    reference_system = ReferenceSystem(code=code_full)
                else:
                    raise Exception(_('Reference system not recognizable'))

            if md.referencesystem.codeSpace:
                reference_system.code_space = md.referencesystem.codeSpace
            if md.referencesystem.version:
                reference_system.version = md.referencesystem.version

        if len(md.dataquality.conformancedate) != len(
                md.dataquality.conformancedatetype):
            # Date list is unequal to datetype list, this means wrong XML so exception is thrown
            raise Exception(
                _('Found unequal list lengths: conformance date, conformancedatetype'
                  ))
        if len(md.dataquality.conformancedegree) != len(
                md.dataquality.conformancedate):
            # Degree list is unequal to date/datetype lists, so we are unable to conclude
            # to which conformity item each degree value corresponds, so all are set to
            # not-evaluated (Todo: MD_Metadata bug #63)
            invalid_degree = True

        if md.dataquality.conformancedate:
            #and len(md.dataquality.conformancedate) == len(md.dataquality.degree):
            for i in range(0, len(md.dataquality.conformancedate)):

                date = to_date(md.dataquality.conformancedate[i])

                date_type = md.dataquality.conformancedatetype[i]
                # TODO md.dataquality.conformancedatetype returns empty
                if invalid_degree:
                    degree = 'not-evaluated'
                else:
                    try:
                        if md.dataquality.conformancedegree[i] == 'true':
                            degree = 'conformant'
                        elif md.dataquality.conformancedegree[i] == 'false':
                            degree = 'not-conformant'
                    except:
                        degree = "not-evaluated"
                title = unicode(md.dataquality.conformancetitle[i])
                if title != 'None':
                    conf_list.append(
                        Conformity(title=title,
                                   date=date,
                                   date_type=date_type,
                                   degree=degree))

                # TODO: is title required fields? If so the following is unnecessary
                else:
                    conf_list.append(
                        Conformity(date=date,
                                   date_type=date_type,
                                   degree=degree))

        limit_list = []
        for it in md.identification.uselimitation:
            limit_list.append(unicode(it))
        constr_list = []
        for it in md.identification.otherconstraints:
            constr_list.append(unicode(it))

        obj = InspireMetadata()

        obj.contact = to_responsible_party(md.contact)
        obj.datestamp = datestamp
        obj.languagecode = md.languagecode
        obj.title = unicode(md.identification.title)
        obj.abstract = unicode(md.identification.abstract)
        obj.identifier = id_list[0]
        obj.locator = url_list
        #obj.resource_language = md.identification.resourcelanguage
        obj.topic_category = topic_list
        obj.keywords = keywords
        obj.free_keywords = free_keywords
        obj.bounding_box = bbox
        obj.temporal_extent = temporal_extent
        obj.creation_date = creation_date
        obj.publication_date = publication_date
        obj.revision_date = revision_date
        obj.lineage = unicode(md.dataquality.lineage)
        obj.spatial_resolution = spatial_list
        obj.reference_system = reference_system
        obj.conformity = conf_list
        obj.access_constraints = limit_list
        obj.limitations = constr_list
        obj.responsible_party = to_responsible_party(md.identification.contact)

        return obj
예제 #17
0
    def from_xml(self, e):
        '''Build and return an InspireMetadata object serialized as an etree
        Element e.
        '''

        def to_date(string):
            if isinstance(string, str):
                return datetime.datetime.strptime(string,'%Y-%m-%d').date()
            else:
                return None

        def to_resp_party(alist):
            result = []
            for it in alist:
                result.append(ResponsibleParty(
                    organization = unicode(it.organization),
                    email = unicode(it.email),
                    role = it.role))
            return result

        md = MD_Metadata(e)

        datestamp = to_date(md.datestamp)
        id_list = md.identification.uricode

        url_list = []
        if md.distribution:
            for it in md.distribution.online:
                url_list.append(it.url)

        topic_list = []
        for topic in md.identification.topiccategory:
            topic_list.append(topic)
        
        keywords_dict = {}
        for it in md.identification.keywords:
            thes_title = it['thesaurus']['title']
            if thes_title is not None:
                thes_split = thes_title.split(',')
                # TODO thes_split[1] (=version) can be used in a get_by_title_and_version() 
                # to enforce a specific thesaurus version.
                thes_title = thes_split[0]
                try:
                    thes_name = vocabularies.munge('Keywords-' + thes_title)
                    term_list = []
                    for t in it['keywords']:
                        term_list.append(t)
                    thes = Thesaurus.make(thes_name)
                    if thes:
                        kw = ThesaurusTerms(thesaurus=thes, terms=term_list)
                        keywords_dict.update({thes_name:kw})
                except:
                    pass
        temporal_extent = []
        if md.identification.temporalextent_start or md.identification.temporalextent_end:
            temporal_extent = [TemporalExtent(
                start = to_date(md.identification.temporalextent_start),
                end = to_date(md.identification.temporalextent_end))]

        bbox = []
        if md.identification.extent:
            if md.identification.extent.boundingBox:
                bbox = [GeographicBoundingBox(
                    nblat = float(md.identification.extent.boundingBox.maxy),
                    sblat = float(md.identification.extent.boundingBox.miny),
                    eblng = float(md.identification.extent.boundingBox.maxx),
                    wblng = float(md.identification.extent.boundingBox.minx))]

        creation_date = None
        publication_date = None
        revision_date = None

        for it in md.identification.date:
            if it.type == 'creation':
                creation_date = to_date(it.date)
            elif it.type == 'publication':
                publication_date = to_date(it.date)
            elif it.type == 'revision':
                revision_date = to_date(it.date)

        #if not creation_date:
        #    raise Exception('creation date not present','')
        #elif not publication_date:
        #    raise Exception('publication date not present','')
        #elif not revision_date:
        #    raise Exception('revision date not present','')

        spatial_list = []

        if len(md.identification.distance) != len(md.identification.uom):
            raise Exception(
                'Found unequal list lengths distance,uom (%s, %s)' % (
                    md.identification.distance,md.identification.uom))
        else:
                for i in range(0,len(md.identification.distance)):
                    spatial_list.append(SpatialResolution(
                        distance = int(md.identification.distance[i]),
                        uom = unicode(md.identification.uom[i])))

                for i in range(0, len(md.identification.denominators)):
                    spatial_list.append(SpatialResolution(
                        denominator = int(md.identification.denominators[i])))
        conf_list = []
        invalid_degree = False

        if len(md.dataquality.conformancedate) != len(md.dataquality.conformancedatetype):
            # Date list is unequal to datetype list, this means wrong XML so exception is thrown
            raise Exception('Found unequal list lengths: conformance date, conformancedatetype')
        if len(md.dataquality.conformancedegree) != len(md.dataquality.conformancedate):
            # Degree list is unequal to date/datetype lists, so we are unable to conclude
            # to which conformity item each degree value corresponds, so all are set to 
            # not-evaluated (Todo: MD_Metadata bug #63)
            invalid_degree = True

        if md.dataquality.conformancedate:
        #and len(md.dataquality.conformancedate) == len(md.dataquality.degree):
            for i in range(0,len(md.dataquality.conformancedate)):

                date = to_date(md.dataquality.conformancedate[i])

                date_type = md.dataquality.conformancedatetype[i]
                # TODO md.dataquality.conformancedatetype returns empty
                if invalid_degree:
                    degree = 'not-evaluated'
                else:
                    try:
                        if md.dataquality.conformancedegree[i] == 'true':
                            degree = 'conformant'
                        elif md.dataquality.conformancedegree[i] == 'false':
                            degree = 'not-conformant'
                    except:
                        degree = "not-evaluated"
                title = unicode(md.dataquality.conformancetitle[i])
                if title != 'None': 
                    conf_list.append(Conformity(title=title, date=date, date_type=date_type, degree=degree))

                # TODO: is title required fields? If so the following is unnecessary
                else:
                    conf_list.append(Conformity(date=date, date_type=date_type, degree=degree))

        limit_list = []
        for it in md.identification.uselimitation:
                limit_list.append(unicode(it))
        constr_list = []
        for it in md.identification.otherconstraints:
                constr_list.append(unicode(it))

        obj = InspireMetadata()

        obj.contact = to_resp_party(md.contact)
        obj.datestamp = datestamp
        obj.languagecode = md.languagecode
        obj.title = unicode(md.identification.title)
        obj.abstract = unicode(md.identification.abstract)
        obj.identifier = id_list[0]
        obj.locator = url_list
        #obj.resource_language = md.identification.resourcelanguage
        obj.topic_category = topic_list
        obj.keywords = keywords_dict
        obj.bounding_box = bbox
        obj.temporal_extent = temporal_extent
        obj.creation_date = creation_date
        obj.publication_date = publication_date
        obj.revision_date = revision_date
        obj.lineage = unicode(md.dataquality.lineage)
        obj.spatial_resolution = spatial_list
        obj.conformity = conf_list
        obj.access_constraints = limit_list
        obj.limitations = constr_list
        obj.responsible_party = to_resp_party(md.identification.contact)

        return obj