def patch_well_known_namespaces(etree_module):

    import warnings
    from owslib.namespaces import Namespaces
    ns = Namespaces()
    """Monkey patches the etree module to add some well-known namespaces."""

    try:
        register_namespace = etree_module.register_namespace
    except AttributeError:
        try:
            etree_module._namespace_map

            def register_namespace(prefix, uri):
                etree_module._namespace_map[uri] = prefix
        except AttributeError:

            def register_namespace(prefix, uri):
                pass

            warnings.warn(
                "Only 'lxml.etree' >= 2.3 and 'xml.etree.ElementTree' >= 1.3 are fully supported!"
            )

    for k, v in six.iteritems(ns.get_namespaces()):
        register_namespace(k, v)
Example #2
0
def patch_well_known_namespaces(etree_module):

    from owslib.namespaces import Namespaces
    ns = Namespaces()

    """Monkey patches the etree module to add some well-known namespaces."""
    etree_module._namespace_map.update(ns.get_namespaces())
Example #3
0
    def load_naps(self):

        ns = Namespaces()
        gmd = ns.get_namespace('gmd')
        session = connect_to_database()

        for napid in self.napids:

            print '{0}Full NAP Record for {1}{2}'.format(
                Fore.GREEN, Fore.CYAN, napid)
            self.csw.getrecordbyid(id=[napid], outputschema=gmd)

            ec_rec = find_record_by_uuid(session, napid, query_class=ECRecord)

            if ec_rec is None:
                ec_rec = ECRecord(
                    uuid=self.csw.records[napid].identifier,
                    title=self.csw.records[napid].identification.title,
                    state='active',
                    nap_record=self.csw.records[napid].xml,
                    csw_scanned=datetime.now().isoformat())
            else:
                ec_rec.title = self.csw.records[napid].identification.title,
                ec_rec.state = 'active',
                ec_rec.nap_record = self.csw.records[napid].xml,
                ec_rec.csw_scanned = datetime.now().isoformat()

            add_record(session, ec_rec)

        session.close_all()
Example #4
0
def get_namespaces():
    n = Namespaces()
    ns = n.get_namespaces(
        ["fes", "ogc", "om", "gml32", "sa", "sml", "swe20", "swes", "xlink"])
    ns["ows"] = n.get_namespace("ows110")
    ns["sos"] = n.get_namespace("sos20")
    return ns
Example #5
0
 def get_namespaces(self):
     """
     """
     n = Namespaces()
     namespaces = n.get_namespaces(["sml", "gml", "xlink", "swe"])
     namespaces["ism"] = "urn:us:gov:ic:ism:v2"
     return namespaces
Example #6
0
def patch_well_known_namespaces(etree_module):

    from owslib.namespaces import Namespaces
    ns = Namespaces()
    """Monkey patches the etree module to add some well-known namespaces."""
    for k, v in ns.get_namespaces().iteritems():
        etree_module.register_namespace(k, v)
Example #7
0
 def get_namespaces():
     """
     Returns specified namespaces using owslib Namespaces function.
     """
     n = Namespaces()
     ns = n.get_namespaces(
         ["gco", "gmd", "gml", "gml32", "gmx", "gts", "srv", "xlink", "dc"])
     return ns
Example #8
0
def patch_well_known_namespaces(etree_module):

    from owslib.namespaces import Namespaces
    ns = Namespaces()

    """Monkey patches the etree module to add some well-known namespaces."""
    for k,v in ns.get_namespaces().iteritems():
        etree_module.register_namespace(k, v)
Example #9
0
    def __init__(self, catalogue_url: str):

        self._catalogue_url = catalogue_url

        self._catalogue = None
        self._catalogue_service = None

        self._namespaces = Namespaces()
Example #10
0
 def get_namespaces():
     """
     Returns specified namespaces using owslib Namespaces function.
     """
     n = Namespaces()
     ns = n.get_namespaces(
         ["gco", "gmd", "gml", "gml32", "gmx", "gts", "srv", "xlink", "dc"])
     return ns
Example #11
0
def __get_namespaces():
    """Get default namespaces from OWSLib, extended with the 'gfc' namespace
    to be able to parse feature catalogues."""
    n = Namespaces()
    ns = n.get_namespaces()
    ns[None] = n.get_namespace("gmd")
    ns['gfc'] = 'http://www.isotc211.org/2005/gfc'
    return ns
Example #12
0
def get_namespaces():
    n = Namespaces()
    ns = n.get_namespaces([
        "fes", "gml32", "ogc", "om20", "sa", "sml", "swe20", "swes", "wml2",
        "xlink", "xsi"
    ])
    ns["gda"] = 'http://www.opengis.net/sosgda/1.0'
    ns["ns"] = "http://www.opengis.net/gml/3.2"
    ns["ows"] = n.get_namespace("ows110")
    ns["sams"] = "http://www.opengis.net/samplingSpatial/2.0"
    ns["sf"] = "http://www.opengis.net/sampling/2.0"
    ns["sos"] = n.get_namespace("sos20")
    return ns
Example #13
0
class IOOSSOSGCCheck(BaseSOSGCCheck, IOOSBaseCheck):

    # set up namespaces for XPath
    ns = Namespaces().get_namespaces(['sos', 'gml', 'xlink'])
    ns['ows'] = Namespaces().get_namespace('ows110')

    @check_has(BaseCheck.HIGH)
    def check_high(self, ds):
        return []

    @check_has(BaseCheck.MEDIUM)
    def check_recommended(self, ds):
        return [
            ('service_contact_email',
             XPath(
                 "/sos:Capabilities/ows:ServiceProvider/ows:ServiceContact/ows:ContactInfo/ows:Address/ows:ElectronicMailAddress",
                 namespaces=self.ns)),
            ('service_contact_name',
             XPath(
                 "/sos:Capabilities/ows:ServiceProvider/ows:ServiceContact/ows:IndividualName",
                 namespaces=self.ns)),
            ('service_provider_name',
             XPath("/sos:Capabilities/ows:ServiceProvider/ows:ProviderName",
                   namespaces=self.ns)),
            ('service_title',
             XPath("/sos:Capabilities/ows:ServiceProvider/ows:ProviderName",
                   namespaces=self.ns)),
            ('service_type_name',
             XPath(
                 "/sos:Capabilities/ows:ServiceIdentification/ows:ServiceType",
                 namespaces=self.ns)),
            ('service_type_version',
             XPath(
                 "/sos:Capabilities/ows:ServiceIdentification/ows:ServiceTypeVersion",
                 namespaces=self.ns)),
            # ds.identification[0].observed_properties has this as well, but
            # don't want to try to shoehorn a function here
            # ('variable_names', len(ds.identification[0].observed_properties) > 0)
            ('variable_names',
             XPath(
                 "/sos:Capabilities/sos:Contents/sos:ObservationOfferingList/sos:ObservationOffering/sos:observedProperty",
                 namespaces=self.ns)),
            ('data_format_template_version',
             XPath(
                 "/sos:Capabilities/ows:OperationsMetadata/ows:ExtendedCapabilities/gml:metaDataProperty[@xlink:title='ioosTemplateVersion']/gml:version",
                 namespaces=self.ns))
        ]

    @check_has(BaseCheck.LOW)
    def check_suggested(self, ds):
        return ['altitude_units']
Example #14
0
def patch_well_known_namespaces(etree_module):
    """Monkey patches the etree module to add some well-known namespaces."""

    ns = Namespaces()

    try:
        register_namespace = etree_module.register_namespace
    except AttributeError:
        etree_module._namespace_map

        def register_namespace(prefix, uri):
            etree_module._namespace_map[uri] = prefix

    for k, v in list(ns.get_namespaces().items()):
        register_namespace(k, v)
Example #15
0
def get_md_resource(file_path):
    """Read the file and parse into an XML tree.

    Parameters
    ----------
    file_path : str
        Path of the file to read.

    Returns
    -------
    etree.ElementTree
        XML tree of the resource on disk.

    """
    namespaces = Namespaces().get_namespaces(keys=('gmd', 'gmi'))

    with io.open(file_path, mode='r', encoding='utf-8') as f:
        data = f.read().encode('utf-8')
        data = etree.fromstring(data)
        mdelem = data.find(
            './/' + util.nspath_eval('gmd:MD_Metadata', namespaces)
        ) or data.find('.//' + util.nspath_eval('gmi:MI_Metadata', namespaces))

        if mdelem is None and data.tag in [
                '{http://www.isotc211.org/2005/gmd}MD_Metadata',
                '{http://www.isotc211.org/2005/gmi}MI_Metadata'
        ]:
            mdelem = data

    return mdelem
Example #16
0
 def schema(self):
     if not self._schema:
         if self._schema_type == SchemaType.ISO19139:
             ns_name = 'gmd'
         else:
             ns_name = 'csw'
         self._schema = Namespaces().get_namespace(ns_name)
     return self._schema
Example #17
0
def patch_well_known_namespaces(etree_module):

    import warnings
    from owslib.namespaces import Namespaces
    ns = Namespaces()

    """Monkey patches the etree module to add some well-known namespaces."""

    try:
        register_namespace = etree_module.register_namespace
    except AttributeError:
        try:
            etree_module._namespace_map

            def register_namespace(prefix, uri):
                etree_module._namespace_map[uri] = prefix
        except AttributeError:
            def register_namespace(prefix, uri):
                pass
            warnings.warn("Only 'lxml.etree' >= 2.3 and 'xml.etree.ElementTree' >= 1.3 are fully supported!")

    for k, v in ns.get_namespaces().iteritems():
        register_namespace(k, v)
Example #18
0
def add_namespaces(root, ns_keys):
    if isinstance(ns_keys, six.string_types):
        ns_keys = [ns_keys]

    namespaces = Namespaces()

    ns_keys = [(x, namespaces.get_namespace(x)) for x in ns_keys]

    if etree.__name__ != 'lxml.etree':
        # We can just add more namespaces when not using lxml.
        # We can't re-add an existing namespaces.  Get a list of current
        # namespaces in use
        existing_namespaces = set()
        for elem in root.getiterator():
            if elem.tag[0] == "{":
                uri, tag = elem.tag[1:].split("}")
                existing_namespaces.add(namespaces.get_namespace_from_url(uri))
        for key, link in ns_keys:
            if link is not None and key not in existing_namespaces:
                root.set("xmlns:%s" % key, link)
        return root
    else:
        # lxml does not support setting xmlns attributes
        # Update the elements nsmap with new namespaces
        new_map = root.nsmap
        for key, link in ns_keys:
            if link is not None:
                new_map[key] = link
        # Recreate the root element with updated nsmap
        new_root = etree.Element(root.tag, nsmap=new_map)
        # Carry over attributes
        for a, v in list(root.items()):
            new_root.set(a, v)
        # Carry over children
        for child in root:
            new_root.append(deepcopy(child))
        return new_root
Example #19
0
def add_namespaces(root, ns_keys):
    if isinstance(ns_keys, six.string_types):
        ns_keys = [ns_keys]

    namespaces = Namespaces()

    ns_keys = [(x, namespaces.get_namespace(x)) for x in ns_keys]

    if etree.__name__ != 'lxml.etree':
        # We can just add more namespaces when not using lxml.
        # We can't re-add an existing namespaces.  Get a list of current
        # namespaces in use
        existing_namespaces = set()
        for elem in root.getiterator():
            if elem.tag[0] == "{":
                uri, tag = elem.tag[1:].split("}")
                existing_namespaces.add(namespaces.get_namespace_from_url(uri))
        for key, link in ns_keys:
            if link is not None and key not in existing_namespaces:
                root.set("xmlns:%s" % key, link)
        return root
    else:
        # lxml does not support setting xmlns attributes
        # Update the elements nsmap with new namespaces
        new_map = root.nsmap
        for key, link in ns_keys:
            if link is not None:
                new_map[key] = link
        # Recreate the root element with updated nsmap
        new_root = etree.Element(root.tag, nsmap=new_map)
        # Carry over attributes
        for a, v in list(root.items()):
            new_root.set(a, v)
        # Carry over children
        for child in root:
            new_root.append(deepcopy(child))
        return new_root
Example #20
0
def get_namespaces_io100():
    n = Namespaces()
    ns = n.get_namespaces(["ogc","swes","sml","xlink","xsi"])
    ns["ows"] = n.get_namespace("ows110")
    ns["sos"] = n.get_namespace("sos20")
    ns["gml"] = n.get_namespace("gml32")
    ns["om"] = n.get_namespace("om20")
    ns['swe'] = 'http://www.opengis.net/swe/2.0'
    ns["sams"] = "http://www.opengis.net/samplingSpatial/2.0"
    ns["sf"] = "http://www.opengis.net/sampling/2.0"
    return ns
Example #21
0
def create_metadata_record(**kwargs):
    """
    Create a csw:Record XML document from harvested metadata
    """

    if 'srs' in kwargs:
        srs = kwargs['srs']
    else:
        srs = '4326'

    modified = '%sZ' % datetime.datetime.utcnow().isoformat().split('.')[0]

    nsmap = Namespaces().get_namespaces(['csw', 'dc', 'dct', 'ows'])

    e = etree.Element(nspath_eval('csw:Record', nsmap), nsmap=nsmap)

    etree.SubElement(e, nspath_eval('dc:identifier', nsmap)).text = kwargs['identifier']
    etree.SubElement(e, nspath_eval('dc:title', nsmap)).text = kwargs['title']
    if 'alternative' in kwargs:
        etree.SubElement(e, nspath_eval('dct:alternative', nsmap)).text = kwargs['alternative']
    etree.SubElement(e, nspath_eval('dct:modified', nsmap)).text = modified
    etree.SubElement(e, nspath_eval('dct:abstract', nsmap)).text = kwargs['abstract']
    etree.SubElement(e, nspath_eval('dc:type', nsmap)).text = kwargs['type']
    etree.SubElement(e, nspath_eval('dc:format', nsmap)).text = kwargs['format']
    etree.SubElement(e, nspath_eval('dc:source', nsmap)).text = kwargs['source']

    if 'relation' in kwargs:
        etree.SubElement(e, nspath_eval('dc:relation', nsmap)).text = kwargs['relation']

    if 'keywords' in kwargs:
        for keyword in kwargs['keywords']:
            etree.SubElement(e, nspath_eval('dc:subject', nsmap)).text = keyword

    for link in kwargs['links']:
        etree.SubElement(e, nspath_eval('dct:references', nsmap), scheme=link[0]).text = link[1]

    bbox2 = loads(kwargs['wkt_geometry']).bounds
    bbox = etree.SubElement(e, nspath_eval('ows:BoundingBox', nsmap),
                            crs='http://www.opengis.net/def/crs/EPSG/0/%s' % srs,
                            dimensions='2')

    etree.SubElement(bbox, nspath_eval('ows:LowerCorner', nsmap)).text = '%s %s' % (bbox2[1], bbox2[0])
    etree.SubElement(bbox, nspath_eval('ows:UpperCorner', nsmap)).text = '%s %s' % (bbox2[3], bbox2[2])

    return etree.tostring(e, pretty_print=True)
Example #22
0
class IOOSSOSDSCheck(BaseSOSDSCheck, IOOSBaseCheck):

    # set up namespaces for XPath
    ns = Namespaces().get_namespaces(['sml', 'swe', 'gml', 'xlink'])

    @check_has(BaseCheck.HIGH)
    def check_high(self, ds):
        return [
            ('platform_sponsor', XPath("/sml:SensorML/sml:member/sml:System/sml:classification/sml:ClassifierList/sml:classifier[@name='sponsor']/sml:Term/sml:value", namespaces=self.ns)),
            ('platform_type', XPath("/sml:SensorML/sml:member/sml:System/sml:classification/sml:ClassifierList/sml:classifier[@name='platformType']/sml:Term/sml:value", namespaces=self.ns)),
            ('station_publisher_name', XPath("/sml:SensorML/sml:member/sml:System/sml:contact/sml:ContactList/sml:member[@xlink:role='http://mmisw.org/ont/ioos/definition/publisher']/sml:ResponsibleParty/sml:organizationName", namespaces=self.ns)),
            ('station_publisher_email', XPath("/sml:SensorML/sml:member/sml:System/sml:contact/sml:ContactList/sml:member[@xlink:role='http://mmisw.org/ont/ioos/definition/publisher']/sml:ResponsibleParty/sml:contactInfo/address/sml:electronicMailAddress", namespaces=self.ns)),
            ('station_id', XPath("/sml:SensorML/sml:member/sml:System/sml:identification/sml:IdentifierList/sml:identifier[@name='stationID']/sml:Term/sml:value", namespaces=self.ns)),
            ('station_long_name', XPath("/sml:SensorML/sml:member/sml:System/sml:identification/sml:IdentifierList/sml:identifier[@name='longName']/sml:Term/sml:value", namespaces=self.ns)),
            ('station_short_name', XPath("/sml:SensorML/sml:member/sml:System/sml:identification/sml:IdentifierList/sml:identifier[@name='shortName']/sml:Term/sml:value", namespaces=self.ns)),
            ('station_wmo_id', XPath("/sml:SensorML/sml:member/sml:System/sml:identification/sml:IdentifierList/sml:identifier/sml:Term[@definition=\"http://mmisw.org/ont/ioos/definition/wmoID\"]/sml:value", namespaces=self.ns)),
            ('time_period', XPath("/sml:SensorML/sml:member/sml:System/sml:capabilities[@name='observationTimeRange']/swe:DataRecord/swe:field[@name='observationTimeRange']/swe:TimeRange/swe:value", namespaces=self.ns)),
            ('operator_email', XPath("/sml:SensorML/sml:member/sml:System/sml:contact/sml:ContactList/sml:member[@xlink:role='http://mmisw.org/ont/ioos/definition/operator']/sml:ResponsibleParty/sml:contactInfo/address/sml:electronicMailAddress", namespaces=self.ns)),
            ('operator_name', XPath("/sml:SensorML/sml:member/sml:System/sml:contact/sml:ContactList/sml:member[@xlink:role='http://mmisw.org/ont/ioos/definition/operator']/sml:ResponsibleParty/sml:organizationName", namespaces=self.ns)),
            ('station_description', XPath("/sml:SensorML/sml:member/sml:System/gml:description", namespaces=self.ns)),
            # replaced with lon/lat with point
            ('station_location_point', XPath("/sml:SensorML/sml:member/sml:System/sml:location/gml:Point/gml:pos", namespaces=self.ns))
        ]

    @check_has(BaseCheck.MEDIUM)
    def check_recommended(self, ds):
        return [
            ('sensor_descriptions', XPath("/sml:SensorML/sml:member/sml:System/sml:components/sml:ComponentList/sml:component/sml:System/gml:description", namespaces=self.ns)),
            ('sensor_ids', XPath("/sml:SensorML/sml:member/sml:System/sml:components/sml:ComponentList/sml:component/sml:System/@gml:id", namespaces=self.ns)),
            ('sensor_names', XPath("/sml:SensorML/sml:member/sml:System/sml:components/sml:ComponentList/sml:component/@name", namespaces=self.ns)),

            ('data_format_template_version', XPath("/sml:SensorML/sml:capabilities/swe:SimpleDataRecord/swe:field[@name='ioosTemplateVersion']/swe:Text/swe:value", namespaces=self.ns)),

            ('variable_names', XPath("/sml:SensorML/sml:member/sml:System/sml:components/sml:ComponentList/sml:component/sml:System/sml:outputs/sml:OutputList/sml:output/swe:Quantity/@definition", namespaces=self.ns)),
            ('variable_units', XPath("/sml:SensorML/sml:member/sml:System/sml:components/sml:ComponentList/sml:component/sml:System/sml:outputs/sml:OutputList/sml:output/swe:Quantity/swe:uom/@code", namespaces=self.ns)),
            ('network_id', XPath("/sml:SensorML/sml:member/sml:System/sml:capabilities[@name='networkProcedures']/swe:SimpleDataRecord/gml:metaDataProperty/@xlink:href", namespaces=self.ns)),
            ('operator_sector', XPath("/sml:SensorML/sml:member/sml:System/sml:classification/sml:ClassifierList/sml:classifier[@name='operatorSector']/sml:Term/sml:value", namespaces=self.ns)),
        ]

    @check_has(BaseCheck.LOW)
    def check_suggested(self, ds):
        return [
        ]
Example #23
0
API for OGC Web Services Common (OWS) constructs and metadata.

OWS Common: http://www.opengeospatial.org/standards/common

Currently supports version 1.1.0 (06-121r3).
"""

import logging

from owslib.etree import etree
from owslib import crs, util
from owslib.namespaces import Namespaces

LOGGER = logging.getLogger(__name__)

n = Namespaces()

OWS_NAMESPACE_1_0_0 = n.get_namespace("ows")
OWS_NAMESPACE_1_1_0 = n.get_namespace("ows110")
OWS_NAMESPACE_2_0_0 = n.get_namespace("ows200")
XSI_NAMESPACE = n.get_namespace("xsi")
XLINK_NAMESPACE = n.get_namespace("xlink")

DEFAULT_OWS_NAMESPACE = OWS_NAMESPACE_1_1_0  # Use this as default for OWSCommon objects


class OwsCommon(object):
    """Initialize OWS Common object"""
    def __init__(self, version):
        self.version = version
        if version == '1.0.0':
Example #24
0
def get_namespaces():
    n = Namespaces()
    ns = n.get_namespaces(["ogc", "sml", "gml", "sos", "swe", "xlink"])
    ns["ows"] = n.get_namespace("ows110")
    return ns
Example #25
0
from owslib.crs import Crs
from owslib.feature import WebFeatureService_
from owslib.feature.common import (
    WFSCapabilitiesReader,
    AbstractContentMetadata,
)
from owslib.namespaces import Namespaces

# other imports
from io import BytesIO
from urllib.parse import urlencode

import logging
from owslib.util import log

n = Namespaces()
WFS_NAMESPACE = n.get_namespace("wfs20")
OWS_NAMESPACE = n.get_namespace("ows110")
OGC_NAMESPACE = n.get_namespace("ogc")
GML_NAMESPACE = n.get_namespace("gml")
FES_NAMESPACE = n.get_namespace("fes")


class ServiceException(Exception):
    pass


class WebFeatureService_2_0_0(WebFeatureService_):
    """Abstraction for OGC Web Feature Service (WFS).

    Implements IWebFeatureService.
Example #26
0
def get_namespaces():
    ns = Namespaces()
    return ns.get_namespaces(["om10", "swe101", "swe20", "gml311", "xlink"])
Example #27
0
def get_namespaces():
    n = Namespaces()
    return n.get_namespaces()
Example #28
0
import warnings
import six
from owslib.etree import etree
from owslib.util import (openURL, ServiceException, testXMLValue,
                         extract_xml_list, xmltag_split, OrderedDict, nspath,
                         bind_url)
from owslib.util import nspath
from owslib.fgdc import Metadata
from owslib.iso import MD_Metadata
from owslib.crs import Crs
from owslib.namespaces import Namespaces
from owslib.map.common import WMSCapabilitiesReader

from owslib.util import log

n = Namespaces()
WMS_NAMESPACE = n.get_namespace("wms")
OGC_NAMESPACE = n.get_namespace('ogc')


class WebMapService_1_3_0(object):

    def __getitem__(self, name):
        ''' check contents dictionary to allow dict
        like access to service layers
        '''
        if name in self.__getattribute__('contents'):
            return self.__getattribute__('contents')[name]
        else:
            raise KeyError("No content named %s" % name)
Example #29
0
def get_namespaces():
    n = Namespaces()
    ns = n.get_namespaces(["ogc"])
    ns[None] = n.get_namespace("ogc")
    return ns
Example #30
0
def get_namespaces():
    n = Namespaces()
    ns = n.get_namespaces(["dif", "fes", "gml", "ogc", "xs", "xsi"])
    ns[None] = n.get_namespace("ogc")
    return ns
Example #31
0
File: gm03.py Project: Gustry/QGIS
def get_namespaces():
    n = Namespaces()
    ns = n.get_namespaces(["gm03"])
    ns[None] = n.get_namespace("gm03")
    return ns
Example #32
0
def get_namespaces():
    ns = Namespaces()
    return ns.get_namespaces(["swe20"])
Example #33
0
"""
API for OGC Web Services Common (OWS) constructs and metadata.

OWS Common: http://www.opengeospatial.org/standards/common

Currently supports version 1.1.0 (06-121r3).
"""

from __future__ import absolute_import, division, print_function

from owslib.etree import etree
from owslib import crs, util
from owslib.namespaces import Namespaces

n = Namespaces()

OWS_NAMESPACE_1_0_0 = n.get_namespace("ows")
OWS_NAMESPACE_1_1_0 = n.get_namespace("ows110")
OWS_NAMESPACE_2_0 = n.get_namespace("ows200")
XSI_NAMESPACE = n.get_namespace("xsi")
XLINK_NAMESPACE = n.get_namespace("xlink")

DEFAULT_OWS_NAMESPACE = OWS_NAMESPACE_1_1_0  # Use this as default for OWSCommon objects


class OwsCommon(object):
    """Initialize OWS Common object"""

    def __init__(self, version):
        self.version = version
def get_namespaces():
    n = Namespaces()
    ns = n.get_namespaces(["atom", "dc", "gml", "gml32", "xlink"])
    ns.update(add_namespaces)
    ns[None] = n.get_namespace("atom")
    return ns
Example #35
0
def get_namespaces():
    n = Namespaces()
    namespaces = n.get_namespaces(["sml","gml","xlink"])
    namespaces["ism"] = "urn:us:gov:ic:ism:v2"
    return namespaces
Example #36
0
import cgi
from cStringIO import StringIO
from urllib import urlencode
from urllib2 import urlopen
from owslib.util import openURL, testXMLValue, extract_xml_list, ServiceException, xmltag_split
from owslib.etree import etree
from owslib.fgdc import Metadata
from owslib.iso import MD_Metadata
from owslib.crs import Crs
from owslib.feature import WebFeatureService_
from owslib.namespaces import Namespaces
from owslib.util import log
from _socket import timeout

n = Namespaces()
WFS_NAMESPACE = n.get_namespace("wfs")
OGC_NAMESPACE = n.get_namespace("ogc")


#TODO: use nspath in util.py
def nspath(path, ns=WFS_NAMESPACE):
    """
    Prefix the given path with the given namespace identifier.
    
    Parameters
    ----------
    path : string
        ElementTree API Compatible path expression

    ns : string
Example #37
0
def get_namespaces():
    n = Namespaces()
    return n.get_namespaces()
Example #38
0
def get_namespaces():
    n = Namespaces()
    ns = n.get_namespaces(
        ["gco", "gmd", "gml", "gml32", "gmx", "gts", "srv", "xlink"])
    ns[None] = n.get_namespace("gmd")
    return ns
Example #39
0
"""
Set of functions, which are suitable for DescribeFeatureType parsing and
generating layer schema description compatible with `fiona`
"""

import cgi, sys
from owslib.util import openURL
try:
    from urllib import urlencode
except ImportError:
    from urllib.parse import urlencode
from owslib.etree import etree
from owslib.namespaces import Namespaces
from owslib.util import which_etree, findall

MYNS = Namespaces()
XS_NAMESPACE = MYNS.get_namespace('xs')
GML_NAMESPACES = (MYNS.get_namespace('gml'),
                  MYNS.get_namespace('gml311'),
                  MYNS.get_namespace('gml32'))


def get_schema(url, typename, version='1.0.0', timeout=30, username=None, password=None):
    """Parses DescribeFeatureType response and creates schema compatible
    with :class:`fiona`

    :param str url: url of the service
    :param str version: version of the service
    :param str typename: name of the layer
    :param int timeout: request timeout
    """
Example #40
0
def reindex_services():
    region_map = {
        'AOOS': '1706F520-2647-4A33-B7BF-592FAFDE4B45',
        'CARICOOS': '117F1684-A5E3-400E-98D8-A270BDBA1603',
        'CENCOOS': '4BA5624D-A61F-4C7E-BAEE-7F8BDDB8D9C4',
        'GCOOS': '003747E7-4818-43CD-937D-44D5B8E2F4E9',
        'GLOS': 'B664427E-6953-4517-A874-78DDBBD3893E',
        'MARACOOS': 'C664F631-6E53-4108-B8DD-EFADF558E408',
        'NANOOS': '254CCFC0-E408-4E13-BD62-87567E7586BB',
        'NERACOOS': 'E41F4FCD-0297-415D-AC53-967B970C3A3E',
        'PacIOOS': '68FF11D8-D66B-45EE-B33A-21919BB26421',
        'SCCOOS': 'B70B3E3C-3851-4BA9-8E9B-C9F195DCEAC7',
        'SECOORA': 'B3EA8869-B726-4E39-898A-299E53ABBC98'
    }
    #'NOS/CO-OPS':   '72E748DF-23B1-4E80-A2C4-81E70783094A',
    #'USACE':        '73019DFF-2E01-4800-91CD-0B3F812256A7',
    #'NAVY':         '3B94DAAE-B7E9-4789-993B-0045AD9149D9',
    #'NDBC':         '828981B0-0039-4360-9788-E788FA6B0875',
    #'USGS/CMGP':    'C6F11F00-C2BD-4AC6-8E2C-013E16F4932E' }

    services = {
        'SOS': 'urn:x-esri:specification:ServiceType:sos:url',
        'WMS': 'urn:x-esri:specification:ServiceType:wms:url',
        'WCS': 'urn:x-esri:specification:ServiceType:wcs:url',
        'DAP': 'urn:x-esri:specification:ServiceType:odp:url'
    }

    endpoint = 'http://www.ngdc.noaa.gov/geoportal/csw'  # NGDC Geoportal

    c = csw.CatalogueServiceWeb(endpoint, timeout=120)

    ns = Namespaces()

    with app.app_context():
        for region, uuid in region_map.iteritems():
            # Setup uuid filter
            uuid_filter = fes.PropertyIsEqualTo(propertyname='sys.siteuuid',
                                                literal="{%s}" % uuid)

            # Make CSW request
            c.getrecords2([uuid_filter], esn='full', maxrecords=999999)

            for name, record in c.records.iteritems():

                # @TODO: unfortunately CSW does not provide us with contact info, so
                # we must request it manually
                contact_email = ""
                metadata_url = None

                iso_ref = [
                    x['url'] for x in record.references if x['scheme'] ==
                    'urn:x-esri:specification:ServiceType:ArcIMS:Metadata:Document'
                ]
                if len(iso_ref):
                    metadata_url = iso_ref[0]

                    # Don't query for contact info right now.  It takes WAY too long.
                    #r = requests.get(iso_ref[0])
                    #r.raise_for_status()
                    #node = ET.fromstring(r.content)
                    #safe = nspath_eval("gmd:CI_ResponsibleParty/gmd:contactInfo/gmd:CI_Contact/gmd:address/gmd:CI_Address/gmd:electronicMailAddress/gco:CharacterString", ns.get_namespaces())
                    #contact_node = node.find(".//" + safe)
                    #if contact_node is not None and contact_node.text != "":
                    #    contact_email = contact_node.text
                    #    if " or " in contact_email:
                    #        contact_email = ",".join(contact_email.split(" or "))

                for ref in record.references:

                    # We are only interested in the 'services'
                    if ref["scheme"] in services.values():
                        url = unicode(ref["url"])
                        s = db.Service.find_one({
                            'data_provider':
                            unicode(region),
                            'url':
                            url
                        })
                        if s is None:
                            s = db.Service()
                            s.url = url
                            s.data_provider = unicode(region)

                        s.service_id = unicode(name)
                        s.name = unicode(record.title)
                        s.service_type = unicode(
                            next((k for k, v in services.items()
                                  if v == ref["scheme"])))
                        s.interval = 3600  # 1 hour
                        s.tld = unicode(urlparse(url).netloc)
                        s.updated = datetime.utcnow()
                        s.contact = unicode(contact_email)
                        s.metadata_url = unicode(metadata_url)
                        s.save()
                        s.schedule_harvest()
Example #41
0
def get_namespaces():
    n = Namespaces()
    ns = n.get_namespaces(["ogc","sa","sml","gml","sos","swe","xlink"])
    ns["ows"] = n.get_namespace("ows110")
    return ns
Example #42
0
File: iso.py Project: wsidl/OWSLib
def get_namespaces():
    n = Namespaces()
    ns = n.get_namespaces(["gco","gmd","gml","gml32","gmx","gts","srv","xlink"])
    ns[None] = n.get_namespace("gmd")
    return ns
Example #43
0
from __future__ import absolute_import, division, print_function

from owslib.namespaces import Namespaces

from pyoos.utils.etree import ElementType, etree

ns = Namespaces()
SML_NS = ns.get_versioned_namespace("sml", "1.0.1")
SWE_NS = [ns.get_versioned_namespace("swe", "1.0.1")]


class IoosDescribeSensor(object):
    def __new__(cls, element):
        if isinstance(element, ElementType):
            root = element
        else:
            root = etree.fromstring(element)

        sml_str = ".//{{{0}}}identifier/{{{0}}}Term[@definition='http://mmisw.org/ont/ioos/definition/%s']".format(
            SML_NS
        )

        if hasattr(root, "getroot"):
            root = root.getroot()

        # Circular dependencies are bad. consider a reorganization
        # find the the proper type for the DescribeSensor.
        from pyoos.parsers.ioos.one.describe_sensor import (
            NetworkDS,
            StationDS,
            SensorDS,
Example #44
0
import warnings
import six
from owslib.etree import etree
from owslib.util import (openURL, ServiceException, testXMLValue,
                         extract_xml_list, xmltag_split, OrderedDict, nspath,
                         bind_url)
from owslib.util import nspath
from owslib.fgdc import Metadata
from owslib.iso import MD_Metadata
from owslib.crs import Crs
from owslib.namespaces import Namespaces
from owslib.map.common import WMSCapabilitiesReader

from owslib.util import log

n = Namespaces()
WMS_NAMESPACE = n.get_namespace("wms")
OGC_NAMESPACE = n.get_namespace('ogc')


class WebMapService_1_3_0(object):
    def __getitem__(self, name):
        ''' check contents dictionary to allow dict
        like access to service layers
        '''
        if name in self.__getattribute__('contents'):
            return self.__getattribute__('contents')[name]
        else:
            raise KeyError("No content named %s" % name)

    def __init__(self,
Example #45
0
def get_namespaces():
    n = Namespaces()
    ns = n.get_namespaces(["gco","gmd","gml","gml32","gmx","gts","srv","xlink"])
    ns["che"] = 'http://www.geocat.ch/2008/che'
    ns[None] = n.get_namespace("gmd")
    return ns
Example #46
0
def get_namespaces():
    n = Namespaces()
    return n.get_namespaces(["sml101", "gml", "xlink", "swe101"])
Example #47
0
from owslib.etree import etree
from owslib.util import nspath, testXMLValue
from owslib.crs import Crs
from owslib.feature import WebFeatureService_
from owslib.namespaces import Namespaces

#other imports
import cgi
from cStringIO import StringIO
from urllib import urlencode
from urllib2 import urlopen

import logging
from owslib.util import log

n = Namespaces()
WFS_NAMESPACE = n.get_namespace("wfs20")
OWS_NAMESPACE = n.get_namespace("ows110")
OGC_NAMESPACE = n.get_namespace("ogc")
GML_NAMESPACE = n.get_namespace("gml")
FES_NAMESPACE = n.get_namespace("fes")


class ServiceException(Exception):
    pass


class WebFeatureService_2_0_0(WebFeatureService_):
    """Abstraction for OGC Web Feature Service (WFS).

    Implements IWebFeatureService.
Example #48
0
def get_namespaces():
    n = Namespaces()
    ns = n.get_namespaces("dif")
    ns[None] = n.get_namespace("dif")
    return ns
Example #49
0
def get_namespaces():
    ns = Namespaces()
    return ns.get_namespaces(["swe20", "xlink", "sos20", "om20", "gml32",
                              "xsi", "wml2"])
Example #50
0
def get_namespaces():
    n = Namespaces()
    return n.get_namespaces(["sml","gml","xlink"])
Example #51
0
from pyoos.utils.etree import etree
from owslib.namespaces import Namespaces

ns = Namespaces()
SML_NS = ns.get_versioned_namespace('sml', '1.0.1')
SWE_NS = [ns.get_versioned_namespace('swe', '1.0.1')]

class IoosDescribeSensor(object):

    def __new__(cls, element):
        if isinstance(element, str):
            root = etree.fromstring(element)
        else:
            root = element

        sml_str = ".//{{{0}}}identifier/{{{0}}}Term[@definition='http://mmisw.org/ont/ioos/definition/%s']".format(SML_NS)

        if hasattr(root, 'getroot'):
            root = root.getroot()

        # circular dependencies are bad. consider a reorganization
        # find the the proper type for the DescribeSensor
        from pyoos.parsers.ioos.one.describe_sensor import (NetworkDS,
                                                            StationDS, SensorDS)
        for ds_type, constructor in [('networkID', NetworkDS), ('stationID', StationDS), ('sensorID', SensorDS)]:
            if root.find(sml_str % ds_type) is not None:
                return super(IoosDescribeSensor, cls).__new__(constructor)

        # NOAA CO-OPS
        sml_str = ".//{{{0}}}identifier/{{{0}}}Term[@definition='urn:ioos:def:identifier:NOAA::networkID']".format(SML_NS)
        if root.find(sml_str) is not None:
Example #52
0
    def harvest(self, request):
        ## harvest (Harvester object, request = [community, source, verb, mdprefix, mdsubset])
        # Harvest all files with <mdprefix> and <mdsubset> from <source> via sickle module and store those to hard drive.
        #
        # Parameters:
        # -----------
        # (list)  request - A list with following items:
        #                    1. community
        #                    2. source (OAI URL)
        #                    3. verb (ListIdentifiers, ListRecords or JSONAPI)
        #                    4. mdprefix (OAI md format as oai_dc, iso etc.)
        #                    5. mdsubset
        #
        # Return Values:
        # --------------
        # 1. (integer)  is -1 if something went wrong

        # create a request dictionary:
        req = {
            "community": request[0],
            "url": request[1],
            "lverb": request[2],
            "mdprefix": request[3],
            "mdsubset": request[4] if len(request) > 4 else None
        }

        # create dictionary with stats:
        resKeys = ['count', 'tcount', 'ecount', 'time']
        results = dict.fromkeys(resKeys, 0)

        stats = {
            "tottcount": 0,  # total number of provided datasets
            "totcount": 0,  # total number of successful harvested datasets
            "totecount": 0,  # total number of failed datasets
            "totdcount": 0,  # total number of all deleted datasets
            "tcount": 0,  # number of all provided datasets per subset
            "count":
            0,  # number of all successful harvested datasets per subset
            "ecount": 0,  # number of all failed datasets per subset
            "dcount": 0,  # number of all deleted datasets per subset
            "timestart": time.time(),  # start time per subset process
        }

        # the gbif api client
        class GBIF_CLIENT(object):

            # call action api:
            ## GBIF.action('package_list',{})

            def __init__(self, api_url):  ##, api_key):
                self.api_url = api_url
                self.logger = logging.getLogger('root')

            def JSONAPI(self, action, offset, chunklen, key):
                ## JSONAPI (action) - method
                return self.__action_api(action, offset, chunklen, key)

            def __action_api(self, action, offset, chunklen, key):
                # Make the HTTP request for get datasets from GBIF portal
                response = ''
                rvalue = 0
                ## offset = 0
                limit = chunklen  ## None for DataCite-JSON !!!
                api_url = self.api_url
                if key:
                    action_url = "{apiurl}/{action}/{key}".format(
                        apiurl=api_url, action=action, key=str(key))
                elif offset == None:
                    action_url = "{apiurl}/{action}".format(apiurl=api_url,
                                                            action=action)
                else:
                    action_url = "{apiurl}/{action}?offset={offset}&limit={limit}".format(
                        apiurl=api_url,
                        action=action,
                        offset=str(offset),
                        limit=str(limit))

                self.logger.debug('action_url: %s' % action_url)
                try:
                    request = Request(action_url)
                    response = urlopen(request)
                except HTTPError as e:
                    self.logger.error(
                        '%s : The server %s couldn\'t fulfill the action %s.' %
                        (e.code, self.api_url, action))
                    if (e.code == 403):
                        self.logger.critical(
                            'Access forbidden, maybe the API key is not valid?'
                        )
                        exit(e.code)
                    elif (e.code == 409):
                        self.logger.critical(
                            'Maybe you have a parameter error?')
                        return {"success": False}
                    elif (e.code == 500):
                        self.logger.critical('Internal server error')
                        exit(e.code)
                except URLError as e:
                    exit('%s' % e.reason)
                else:
                    out = json.loads(response.read())
                    assert response.code >= 200
                    return out

        requests_log = logging.getLogger("requests")
        requests_log.setLevel(logging.WARNING)

        # if the number of files in a subset dir is greater than <count_break>
        # then create a new one with the name <set> + '_' + <count_set>
        count_break = 5000
        count_set = 1
        start = time.time()

        # set subset:
        mdsubset = req["mdsubset"]
        if (not mdsubset):
            subset = 'SET'
        elif mdsubset.endswith(
                '_'
        ):  # no OAI subsets, but different OAI-URLs for same community
            subset = mdsubset[:-1]
            mdsubset = None
        elif len(mdsubset) > 2 and mdsubset[-1].isdigit(
        ) and mdsubset[-2] == '_':
            subset = mdsubset[:-2]
        else:
            subset = mdsubset
            if req["community"] == "b2share" or re.match(
                    r'http(.*?)b2share(.*?)api(.*?)', req["url"]):
                setMapFile = '%s/mapfiles/b2share_mapset.json' % (os.getcwd())
            elif req["community"] == "dara" and req[
                    "url"] == "https://www.da-ra.de/oaip/oai":
                setMapFile = '%s/mapfiles/dara_mapset.json' % (os.getcwd())
            else:
                setMapFile = None
            if setMapFile:
                with open(setMapFile) as sm:
                    setMap = json.load(sm)
                    if mdsubset in setMap:
                        mdsubset = setMap[mdsubset]

        if (self.fromdate):
            subset = subset + '_f' + self.fromdate

        self.logger.debug(' |- Subset:    \t%s' % subset)

        # make subset dir:
        subsetdir = '/'.join([
            self.base_outdir, req['community'] + '-' + req['mdprefix'],
            subset + '_' + str(count_set)
        ])

        noffs = 0  # set to number of record, where harvesting should start
        stats['tcount'] = noffs
        fcount = 0
        oldperc = 0
        ntotrecs = 0
        choffset = 0
        chunklen = 1000
        pageno = 1
        records = list()

        ## JSON-API
        jsonapi_verbs = ['dataset', 'works', 'records']
        if req["lverb"] in jsonapi_verbs:
            GBIF = GBIF_CLIENT(req['url'])  # create GBIF object
            harvestreq = getattr(GBIF, 'JSONAPI', None)
            outtypedir = 'hjson'
            outtypeext = 'json'
            if mdsubset and req["lverb"] == 'works':
                haction = 'works?publisher-id=' + mdsubset
                dresultkey = 'data'
            elif req["lverb"] == 'records':
                haction = req["lverb"]
                if mdsubset:
                    haction += '?q=community:' + mdsubset + '&size=' + str(
                        chunklen) + '&page=' + str(pageno)
                dresultkey = 'hits'
            else:
                haction = req["lverb"]
                dresultkey = 'results'
            try:
                chunk = harvestreq(
                    **{
                        'action': haction,
                        'offset': None,
                        'chunklen': chunklen,
                        'key': None
                    })
                self.logger.debug(" Got first %d records : chunk['data'] %s " %
                                  (chunklen, chunk[dresultkey]))
            except (HTTPError, ConnectionError, Exception) as e:
                self.logger.critical(
                    "%s :\n\thaction %s\n\tharvest request %s\n" %
                    (e, haction, req))
                return -1

            if req["lverb"] == 'dataset':
                while ('endOfRecords' in chunk and not chunk['endOfRecords']):
                    if 'results' in chunk:
                        records.extend(chunk['results'])
                    choffset += chunklen
                    chunk = harvestreq(
                        **{
                            'action': haction,
                            'offset': choffset,
                            'chunklen': chunklen,
                            'key': None
                        })
                    self.logger.debug(
                        " Got next records [%d,%d] from chunk %s " %
                        (choffset, choffset + chunklen, chunk))
            elif req["lverb"] == 'records':
                records.extend(chunk['hits']['hits'])
                while ('hits' in chunk and 'next' in chunk['links']):
                    if 'hits' in chunk:
                        records.extend(chunk['hits']['hits'])
                    pageno += 1
                    chunk = harvestreq(
                        **{
                            'action': haction,
                            'page': pageno,
                            'size': chunklen,
                            'key': None
                        })
                    self.logger.debug(
                        " Got next records [%d,%d] from chunk %s " %
                        (choffset, choffset + chunklen, chunk))
            else:
                if 'data' in chunk:
                    records.extend(chunk['data'])

        # OAI-PMH (verb = ListRecords/Identifier )
        elif req["lverb"].startswith('List'):
            sickle = Sickle(req['url'], max_retries=3, timeout=300)
            outtypedir = 'xml'
            outtypeext = 'xml'
            harvestreq = getattr(sickle, req["lverb"], None)
            try:
                records, rc = tee(
                    harvestreq(
                        **{
                            'metadataPrefix': req['mdprefix'],
                            'set': mdsubset,
                            'ignore_deleted': True,
                            'from': self.fromdate
                        }))
            except (HTTPError, ConnectionError) as err:
                self.logger.critical("%s during connecting to %s\n" %
                                     (err, req['url']))
                return -1
            except (ImportError, etree.XMLSyntaxError, CannotDisseminateFormat,
                    Exception) as err:
                self.logger.critical("%s during harvest request %s\n" %
                                     (err, req))
                return -1

        # CSW2.0
        elif req["lverb"].startswith('csw'):
            outtypedir = 'xml'
            outtypeext = 'xml'
            startposition = 0
            maxrecords = 20
            try:
                src = CatalogueServiceWeb(req['url'])
                NS = Namespaces()
                namespaces = NS.get_namespaces()
                if req['mdprefix'] == 'iso19139' or req['mdprefix'] == 'own':
                    nsp = namespaces['gmd']
                else:
                    nsp = namespaces['csw']

                harvestreq = getattr(src, 'getrecords2')
                chunk = harvestreq(
                    **{
                        'esn': 'full',
                        'startposition': choffset,
                        'maxrecords': maxrecords,
                        'outputschema': nsp
                    })
                chunklist = list(src.records.items())
                while (len(chunklist) > 0):
                    records.extend(chunklist)
                    choffset += maxrecords
                    chunk = harvestreq(
                        **{
                            'esn': 'full',
                            'startposition': choffset,
                            'maxrecords': maxrecords,
                            'outputschema': nsp
                        })
                    chunklist = list(src.records.items())
                    self.logger.debug(
                        " Got next %s records [%d,%d] from chunk " %
                        (nsp, choffset, choffset + chunklen))
            except (HTTPError, ConnectionError) as err:
                self.logger.critical("%s during connecting to %s\n" %
                                     (err, req['url']))
                return -1
            except (ImportError, CannotDisseminateFormat, Exception) as err:
                self.logger.error("%s : During harvest request %s\n" %
                                  (err, req))
                ##return -1

        # SparQL
        elif req["lverb"].startswith('Sparql'):
            outtypedir = 'hjson'
            outtypeext = 'json'
            startposition = 0
            maxrecords = 1000
            try:
                src = SPARQLWrapper(req['url'])
                harvestreq = getattr(src, 'query', 'format')  ##
                statement = '''
prefix cpmeta: <http://meta.icos-cp.eu/ontologies/cpmeta/>
prefix prov: <http://www.w3.org/ns/prov#>
select (str(?submTime) as ?time) ?dobj ?spec ?dataLevel ?fileName ?submitterName where{
  ?dobj cpmeta:hasObjectSpec [rdfs:label ?spec ; cpmeta:hasDataLevel ?dataLevel].
  ?dobj cpmeta:hasName ?fileName .
  ?dobj cpmeta:wasSubmittedBy ?submission .
  ?submission prov:endedAtTime ?submTime .
  ?submission prov:wasAssociatedWith [cpmeta:hasName ?submitterName].
}
order by desc(?submTime)
limit 1000
'''
                src.setQuery(statement)
                src.setReturnFormat(JSON)
                records = harvestreq().convert()['results']['bindings']
            except (HTTPError, ConnectionError) as err:
                self.logger.critical("%s during connecting to %s\n" %
                                     (err, req['url']))
                return -1
            except (ImportError, CannotDisseminateFormat, Exception) as err:
                self.logger.critical("%s during harvest request %s\n" %
                                     (err, req))
                return -1

        else:
            self.logger.critical(' Not supported harvest type %s' %
                                 req["lverb"])
            sys.exit()

        self.logger.debug(" Harvest method used %s" % req["lverb"])
        try:
            if req["lverb"].startswith('List'):
                ntotrecs = len(list(rc))
            else:
                ntotrecs = len(records)
        except Exception as err:
            self.logger.error('%s Iteratation does not work ?' % (err))

        print("\t|- Retrieved %d records in %d sec - write %s files to disc" %
              (ntotrecs, time.time() - start, outtypeext.upper()))
        if ntotrecs == 0:
            self.logger.warning("\t|- Can not access any records to harvest")
            return -1

        self.logger.debug(' | %-4s | %-25s | %-25s |' %
                          ('#', 'OAI Identifier', 'DS Identifier'))
        start2 = time.time()

        if (not os.path.isdir(subsetdir + '/' + outtypedir)):
            os.makedirs(subsetdir + '/' + outtypedir)

        delete_ids = list()
        # loop over records
        for record in records:
            ## counter and progress bar
            stats['tcount'] += 1
            fcount += 1
            if fcount <= noffs: continue
            if ntotrecs > 0:
                perc = int(fcount * 100 / ntotrecs)
                bartags = int(perc / 5)
                if perc % 10 == 0 and perc != oldperc:
                    oldperc = perc
                    print("\r\t[%-20s] %5d (%3d%%) in %d sec" %
                          ('=' * bartags, fcount, perc, time.time() - start2))
                    sys.stdout.flush()

            # Set oai_id and generate a uniquely identifier for this dataset:
            delete_flag = False
            if req["lverb"] == 'dataset' or req["lverb"] == 'works' or req[
                    "lverb"] == 'records':  ## Harvest via JSON-API
                if 'key' in record:
                    oai_id = record['key']
                elif 'id' in record:
                    oai_id = record['id']

            elif req["lverb"] == 'csw':  ## Harvest via CSW2.0
                if hasattr(record, 'identifier'):
                    oai_id = record.identifier
                elif (record):
                    oai_id = record[0]
                else:
                    self.logger.critical(
                        'Record %s has no attrribute identifier %s' % record)

            elif req[
                    "lverb"] == 'ListIdentifiers':  ## OAI-PMH harvesting of XML records
                if (record.deleted):
                    stats['totdcount'] += 1
                    delete_flag = True
                    ##HEW-D continue
                else:
                    oai_id = record.identifier
                    record = sickle.GetRecord(
                        **{
                            'metadataPrefix': req['mdprefix'],
                            'identifier': record.identifier
                        })
            elif req["lverb"] == 'ListRecords':
                if (record.header.deleted):
                    stats['totdcount'] += 1
                    continue
                else:
                    oai_id = record.header.identifier
            elif req["lverb"].startswith('Sparql'):
                oai_id = record['fileName']['value']

            # generate a uniquely identifier and a filename for this dataset:
            uid = str(uuid.uuid5(uuid.NAMESPACE_DNS, oai_id))
            outfile = '%s/%s/%s.%s' % (subsetdir, outtypedir,
                                       os.path.basename(uid), outtypeext)

            if delete_flag:  # record marked as deleted on provider site
                jsonfile = '%s/%s/%s.%s' % (subsetdir, 'json',
                                            os.path.basename(uid), 'json')
                # remove xml and json file:
                os.remove(xmlfile)
                os.remove(jsonfile)
                delete_ids.append(uid)

            # write record on disc
            try:
                self.logger.debug('    | h | %-4d | %-45s | %-45s |' %
                                  (stats['count'] + 1, oai_id, uid))
                self.logger.debug(
                    'Try to write the harvested JSON record to %s' % outfile)

                if outtypeext == 'xml':  # get and write the XML content:
                    if req["lverb"] == 'csw':
                        metadata = etree.fromstring(record[1].xml)
                    elif hasattr(record, 'raw'):
                        metadata = etree.fromstring(record.raw)
                    elif hasattr(record, 'xml'):
                        metadata = etree.fromstring(record.xml)

                    if (metadata is not None):
                        try:
                            metadata = etree.tostring(
                                metadata, pretty_print=True).decode('utf-8')
                        except (Exception, UnicodeEncodeError) as e:
                            self.logger.critical('%s : Metadata: %s ...' %
                                                 (e, metadata[:20]))
                        ##if PY2 :
                        ##    try:
                        ##        metadata = metadata.encode('utf-8')
                        ##    except (Exception,UnicodeEncodeError) as e :
                        ##        self.logger.debug('%s : Metadata : %s ...' % (e,metadata[20]))

                        try:
                            f = open(outfile, 'w')
                            f.write(metadata)
                            f.close
                        except (Exception, IOError) as err:
                            self.logger.critical(
                                "%s : Cannot write metadata in xml file %s" %
                                (err, outfile))
                            stats['ecount'] += 1
                            continue
                        else:
                            logging.debug('Harvested XML file written to %s' %
                                          outfile)
                            stats['count'] += 1
                    else:
                        stats['ecount'] += 1
                        self.logger.error('No metadata available for %s' %
                                          record)

                elif outtypeext == 'json':  # get the raw json content:
                    if (record is not None):
                        try:
                            with open(outfile, 'w') as f:
                                json.dump(record, f, sort_keys=True, indent=4)
                        except IOError:
                            logging.error(
                                "[ERROR] Cannot write metadata in out file '%s': %s\n"
                                % (outfile))
                            stats['ecount'] += 1
                            continue
                        else:
                            stats['count'] += 1
                            logging.debug('Harvested JSON file written to %s' %
                                          outfile)
                    else:
                        stats['ecount'] += 1
                        logging.warning(
                            '    [WARNING] No metadata available for %s' %
                            record['key'])  ##HEW-???' % oai_id)

            except TypeError as e:
                logging.error('    [ERROR] TypeError: %s' % e)
                stats['ecount'] += 1
                continue
            except Exception as e:
                logging.error("    [ERROR] %s and %s" %
                              (e, traceback.format_exc()))
                ## logging.debug(metadata)
                stats['ecount'] += 1
                continue

            # Next or last subset?
            if (stats['count'] == count_break) or (fcount == ntotrecs):
                print('       | %d records written to subset directory %s ' %
                      (stats['count'], subsetdir))

                # clean up current subset and write ids to remove to delete file
                for df in os.listdir(subsetdir + '/' + outtypedir):
                    df = os.path.join(subsetdir + '/' + outtypedir, df)
                    logging.debug('File to delete : %s' % df)
                    id = os.path.splitext(os.path.basename(df))[0]
                    jf = os.path.join(subsetdir + '/json/', id + '.json')
                    if os.stat(df).st_mtime < start - 1 * 86400:
                        os.remove(df)
                        logging.warning('File %s is deleted' % df)
                        if os.path.exists(jf):
                            os.remove(jf)
                            logging.warning('File %s is deleted' % jf)
                        delete_ids.append(id)
                        logging.warning('Append Id %s to list delete_ids' % id)
                        stats['dcount'] += 1

                print('       | %d records deleted from subset directory %s ' %
                      (stats['dcount'], subsetdir))

                if not fcount == ntotrecs:  # next subset neded
                    subsetdir = self.save_subset(req, stats, subset, count_set)
                    if (not os.path.isdir(subsetdir + '/' + outtypedir)):
                        os.makedirs(subsetdir + '/' + outtypedir)

                    count_set += 1

                # add all subset stats to total stats and reset the temporal subset stats:
                for key in ['tcount', 'ecount', 'count', 'dcount']:
                    stats['tot' + key] += stats[key]
                    stats[key] = 0

                    # start with a new time:
                    stats['timestart'] = time.time()

                logging.debug(
                    '    | %d records written to subset directory %s (if not failed).'
                    % (stats['count'], subsetdir))

        # path to the file with all ids to delete:
        delete_file = '/'.join([
            self.base_outdir, 'delete',
            req['community'] + '-' + req['mdprefix'] + '.del'
        ])
        if len(delete_ids) > 0:
            with open(delete_file, 'a') as file:
                for id in delete_ids:
                    file.write(id + '\n')

        # add all subset stats to total stats and reset the temporal subset stats:
        for key in ['tcount', 'ecount', 'count', 'dcount']:
            stats['tot' + key] += stats[key]

        print(
            '   \t|- %-10s |@ %-10s |\n\t| Provided | Harvested | Failed | Deleted |\n\t| %8d | %9d | %6d | %6d |'
            % ('Finished', time.strftime("%H:%M:%S"), stats['tottcount'],
               stats['totcount'], stats['totecount'], stats['totdcount']))
Example #53
0
def get_namespaces():
    n = Namespaces()
    ns = n.get_namespaces(["fes","ogc","om","gml32","sml","swe20","swes","xlink"])
    ns["ows"] = n.get_namespace("ows110")
    ns["sos"] = n.get_namespace("sos20")
    return ns
Example #54
0
def get_namespaces():
    n = Namespaces()
    return n.get_namespaces(["gml","ogc","ows","wfs"])
Example #55
0
def get_namespaces():
    n = Namespaces()
    ns = n.get_namespaces(["gm03"])
    ns[None] = n.get_namespace("gm03")
    return ns
Example #56
0
def get_namespaces():
    n = Namespaces()
    ns = n.get_namespaces(["dif","fes","gml","ogc","xs","xsi"])
    ns[None] = n.get_namespace("ogc")
    return ns
Example #57
0
    from urllib import urlencode

import warnings

import six

from owslib.etree import etree
from owslib.util import (openURL, testXMLValue, extract_xml_list,
                         xmltag_split, OrderedDict, ServiceException,
                         bind_url, nspath_eval, Authentication)
from owslib.fgdc import Metadata
from owslib.iso import MD_Metadata
from owslib.map.common import WMSCapabilitiesReader, AbstractContentMetadata
from owslib.namespaces import Namespaces

n = Namespaces()

class CapabilitiesError(Exception):
    pass


class WebMapService_1_1_1(object):
    """Abstraction for OGC Web Map Service (WMS).

    Implements IWebMapService.
    """

    def __getitem__(self, name):
        ''' check contents dictionary to allow dict
        like access to service layers
        '''
Example #58
0
def get_namespaces():
    n = Namespaces()
    return n.get_namespaces(["gmd", "gml", "gmi", "ogc","ows","wfs"])
Example #59
0
class EsaCciCatalogueService:
    def __init__(self, catalogue_url: str):

        self._catalogue_url = catalogue_url

        self._catalogue = None
        self._catalogue_service = None

        self._namespaces = Namespaces()

    def reset(self):
        self._catalogue = None
        self._catalogue_service = None

    def getrecords(self, monitor: Monitor = Monitor.NONE):
        if not self._catalogue_service:
            self._init_service()

        if not self._catalogue:
            self._build_catalogue(monitor.child(1))

        return self._catalogue

    def _build_catalogue(self, monitor: Monitor = Monitor.NONE):

        self._catalogue = {}

        catalogue_metadata = {}

        start_position = 0
        max_records = _CSW_MAX_RESULTS

        matches = -1
        while True:
            # fetch record metadata
            self._catalogue_service.getrecords2(
                esn='full',
                outputschema=self._namespaces.get_namespace('gmd'),
                startposition=start_position,
                maxrecords=max_records)
            if matches == -1:
                # set counters, start progress monitor
                matches = self._catalogue_service.results.get('matches')
                if matches == 0:
                    break
                monitor.start(label="Fetching catalogue data... (%d records)" %
                              matches,
                              total_work=ceil(matches / max_records))

            catalogue_metadata.update(self._catalogue_service.records)
            monitor.progress(work=1)

            # bump counters
            start_position += max_records
            if start_position > matches:
                break

        self._catalogue = {
            record.identification.uricode[0]: {
                'abstract':
                record.identification.abstract,
                'bbox_minx':
                record.identification.bbox.minx
                if record.identification.bbox else None,
                'bbox_miny':
                record.identification.bbox.miny
                if record.identification.bbox else None,
                'bbox_maxx':
                record.identification.bbox.maxx
                if record.identification.bbox else None,
                'bbox_maxy':
                record.identification.bbox.maxy
                if record.identification.bbox else None,
                'creation_date':
                next(
                    iter(e.date for e in record.identification.date
                         if e and e.type == 'creation'), None),
                'publication_date':
                next(
                    iter(e.date for e in record.identification.date
                         if e and e.type == 'publication'), None),
                'title':
                record.identification.title,
                'data_sources':
                record.identification.uricode[1:],
                'licences':
                record.identification.uselimitation,
                'temporal_coverage_start':
                record.identification.temporalextent_start,
                'temporal_coverage_end':
                record.identification.temporalextent_end
            }
            for record in catalogue_metadata.values()
            if record.identification and len(record.identification.uricode) > 0
        }
        monitor.done()

    def _init_service(self):
        if self._catalogue:
            return
        if not self._catalogue_service:
            self._catalogue_service = CatalogueServiceWeb(
                url=self._catalogue_url, timeout=_CSW_TIMEOUT, skip_caps=True)