예제 #1
0
    def test_link_types(self):
        """simple link type tests"""

        for test in self.test_data['test_data']:
            self.assertEqual(sniff_link(test['link']), test['expected'],
                             'Expected %s and %s to be equal' %
                             (test['link'], test['expected']))
예제 #2
0
    def create_database(self):
        df = []
        for k,v in self.csw.records.items():
            df.append(pd.DataFrame(v.references))

        df = pd.concat(df, ignore_index=True)
        df['geolink'] = [sniff_link(url) for url in df['url']]
        self.df = df
예제 #3
0
    def test_link_types(self):
        """simple link type tests"""

        for test in self.test_data['test_data']:
            self.assertEqual(
                sniff_link(test['link']), test['expected'],
                'Expected %s and %s to be equal' %
                (test['link'], test['expected']))
예제 #4
0
    def create_database(self):
        '''
        Creates a PANDAS dataframe of URLs from which to query data. Checks for geolinking.
        '''
        df = []
        for k, v in self.csw.records.items():
            df.append(pd.DataFrame(v.references))

        df = pd.concat(df, ignore_index=True)
        df['geolink'] = [sniff_link(url) for url in df['url']]
        self.df = df
def csw_query(endpoint,
              bbox=None,
              start=None,
              stop=None,
              kw_names=None,
              crs="urn:ogc:def:crs:OGC:1.3:CRS84"):
    crs = "urn:ogc:def:crs:::EPSG:4326"  #https://github.com/qgis/QGIS/issues/40778
    constraints = []
    csw = None
    while csw is None:
        try:
            csw = CatalogueServiceWeb(endpoint, timeout=60)
            #csw.getrecords2(maxrecords=10)
            #for rec in csw.records:
            #    print(vars(csw.records[rec]))
            #    print(csw.records[rec].title)
        except:
            pass
    if kw_names:
        kw = dict(wildCard="*",
                  escapeChar="\\",
                  singleChar="?",
                  propertyname="apiso:AnyText")
        or_filt = fes.Or([
            fes.PropertyIsLike(literal=("*%s*" % val), **kw)
            for val in kw_names
        ])
        constraints.append(or_filt)

    if all(v is not None for v in [start, stop]):
        begin, end = fes_date_filter(start, stop)
        constraints.append(begin)
        constraints.append(end)
    if bbox:
        bbox_crs = fes.BBox(bbox, crs=crs)
        constraints.append(bbox_crs)
    if len(constraints) >= 2:
        filter_list = [fes.And(constraints)]
    else:
        filter_list = constraints
    get_csw_records(csw, filter_list, pagesize=10, maxrecords=10)

    print("Found {} records.\n".format(len(csw.records.keys())))
    for key, value in list(csw.records.items()):
        print(u"Title: [{}]\nID: {}\n".format(value.title, key))
        msg = "geolink: {geolink}\nscheme: {scheme}\nURL: {url}\n".format
        for ref in value.references:
            print(msg(geolink=sniff_link(ref["url"]), **ref))
        print("#########################################################",
              '\n')
예제 #6
0
def _parse_reference(ref, identifier):
    """
    First try to sniff the scheme from the URL in the `ref` dict with geolinks,
    if that fails get the `scheme` field directly.

    For all possible identifiers see:
    https://github.com/OSGeo/Cat-Interop/blob/master/LinkPropertyLookupTable.csv

    """
    from geolinks import sniff_link

    url = None
    scheme = sniff_link(ref['url'])
    if not scheme:
        scheme = ref['scheme']
    if identifier.endswith(':'):
        cond = identifier in scheme
    else:
        cond = identifier == scheme
    if cond:
        url = ref['url']
    return url
예제 #7
0
def _parse_reference(ref, identifier):
    """
    First try to sniff the scheme from the URL in the `ref` dict with geolinks,
    if that fails get the `scheme` field directly.

    For all possible identifiers see:
    https://github.com/OSGeo/Cat-Interop/blob/master/LinkPropertyLookupTable.csv

    """
    from geolinks import sniff_link

    url = None
    scheme = sniff_link(ref['url'])
    if not scheme:
        scheme = ref['scheme']
    if identifier.endswith(':'):
        cond = identifier in scheme
    else:
        cond = identifier == scheme
    if cond:
        url = ref['url']
    return url
예제 #8
0
print('\n'.join(textwrap.wrap(value.abstract)))

# In[ ]:

print('\n'.join(value.subjects))

# The next step is to inspect the type services/schemes available for downloading the data. The easiest way to accomplish that is with by "sniffing" the URLs with `geolinks`.

# In[ ]:

from geolinks import sniff_link

msg = 'geolink: {geolink}\nscheme: {scheme}\nURL: {url}\n'.format
for ref in value.references:
    print(msg(geolink=sniff_link(ref['url']), **ref))

# There are many direct links to Comma Separated Value (`CSV`) and
# eXtensible Markup Language (`XML`) responses to the various variables available in that station.
#
# In addition to those links, there are three very interesting links for more information: 1.) the QC document, 2.) the station photo, 3.) the station home page.
#
#
# For a detailed description of what those `geolink` results mean check the [lookup](https://github.com/OSGeo/Cat-Interop/blob/master/LinkPropertyLookupTable.csv) table.
#
#
# ![](https://tidesandcurrents.noaa.gov/images/stationphotos/9439040A.jpg)

# The original search was focused on sea water temperature,
# so there is the need to extract only the endpoint for that variable.
#
import textwrap

value = csw.records[key]

print("\n".join(textwrap.wrap(value.abstract)))

print("\n".join(value.subjects))

The next step is to inspect the type services/schemes available for downloading the data. The easiest way to accomplish that is with by "sniffing" the URLs with `geolinks`.

from geolinks import sniff_link

msg = "geolink: {geolink}\nscheme: {scheme}\nURL: {url}\n".format
for ref in value.references:
    print(msg(geolink=sniff_link(ref["url"]), **ref))

There are many direct links to Comma Separated Value (`CSV`) and
eXtensible Markup Language (`XML`) responses to the various variables available in that station. 

In addition to those links, there are three very interesting links for more information: 1.) the QC document, 2.) the station photo, 3.) the station home page.


For a detailed description of what those `geolink` results mean check the [lookup](https://github.com/OSGeo/Cat-Interop/blob/master/LinkPropertyLookupTable.csv) table.

The original search was focused on sea water temperature,
so there is the need to extract only the endpoint for that variable.

PS: see also the [pyoos example](http://ioos.github.io/notebooks_demos/notebooks/2016-10-12-fetching_data/) for fetching data from `CO-OPS`.

start, stop
예제 #10
0

# In[ ]:

print('\n'.join(value.subjects))


# The next step is to inspect the type services/schemes available for downloading the data. The easiest way to accomplish that is with by "sniffing" the URLs with `geolinks`.

# In[ ]:

from geolinks import sniff_link

msg = 'geolink: {geolink}\nscheme: {scheme}\nURL: {url}\n'.format
for ref in value.references:
    print(msg(geolink=sniff_link(ref['url']), **ref))


# There are many direct links to Comma Separated Value (`CSV`) and
# eXtensible Markup Language (`XML`) responses to the various variables available in that station. 
# 
# In addition to those links, there are three very interesting links for more information: 1.) the QC document, 2.) the station photo, 3.) the station home page.
# 
# 
# For a detailed description of what those `geolink` results mean check the [lookup](https://github.com/OSGeo/Cat-Interop/blob/master/LinkPropertyLookupTable.csv) table.
# 
# 
# ![](https://tidesandcurrents.noaa.gov/images/stationphotos/9439040A.jpg)

# The original search was focused on sea water temperature,
# so there is the need to extract only the endpoint for that variable.
예제 #11
0
def _parse_iso(context, repos, exml):

    from owslib.iso import MD_Metadata

    recobj = repos.dataset()
    links = []

    md = MD_Metadata(exml)

    _set(context, recobj, 'pycsw:Identifier', md.identifier)
    _set(context, recobj, 'pycsw:Typename', 'gmd:MD_Metadata')
    _set(context, recobj, 'pycsw:Schema', context.namespaces['gmd'])
    _set(context, recobj, 'pycsw:MdSource', 'local')
    _set(context, recobj, 'pycsw:InsertDate', util.get_today_and_now())
    _set(context, recobj, 'pycsw:XML', md.xml)
    _set(context, recobj, 'pycsw:AnyText', util.get_anytext(exml))
    _set(context, recobj, 'pycsw:Language', md.language)
    _set(context, recobj, 'pycsw:Type', md.hierarchy)
    _set(context, recobj, 'pycsw:ParentIdentifier', md.parentidentifier)
    _set(context, recobj, 'pycsw:Date', md.datestamp)
    _set(context, recobj, 'pycsw:Modified', md.datestamp)
    _set(context, recobj, 'pycsw:Source', md.dataseturi)
    if md.referencesystem is not None:
        _set(context, recobj, 'pycsw:CRS','urn:ogc:def:crs:EPSG:6.11:%s' %
        md.referencesystem.code)

    if hasattr(md, 'identification'):
        _set(context, recobj, 'pycsw:Title', md.identification.title)
        _set(context, recobj, 'pycsw:AlternateTitle', md.identification.alternatetitle)
        _set(context, recobj, 'pycsw:Abstract', md.identification.abstract)
        _set(context, recobj, 'pycsw:Relation', md.identification.aggregationinfo)

        if hasattr(md.identification, 'temporalextent_start'):
            _set(context, recobj, 'pycsw:TempExtent_begin', md.identification.temporalextent_start)
        if hasattr(md.identification, 'temporalextent_end'):
            _set(context, recobj, 'pycsw:TempExtent_end', md.identification.temporalextent_end)

        if len(md.identification.topiccategory) > 0:
            _set(context, recobj, 'pycsw:TopicCategory', md.identification.topiccategory[0])

        if len(md.identification.resourcelanguage) > 0:
            _set(context, recobj, 'pycsw:ResourceLanguage', md.identification.resourcelanguage[0])
 
        if hasattr(md.identification, 'bbox'):
            bbox = md.identification.bbox
        else:
            bbox = None

        if (hasattr(md.identification, 'keywords') and
            len(md.identification.keywords) > 0):
            all_keywords = [item for sublist in md.identification.keywords for item in sublist['keywords'] if item is not None]
            _set(context, recobj, 'pycsw:Keywords', ','.join(all_keywords))
            _set(context, recobj, 'pycsw:KeywordType', md.identification.keywords[0]['type'])

        if (hasattr(md.identification, 'creator') and
            len(md.identification.creator) > 0):
            all_orgs = set([item.organization for item in md.identification.creator if hasattr(item, 'organization') and item.organization is not None])
            _set(context, recobj, 'pycsw:Creator', ';'.join(all_orgs))
        if (hasattr(md.identification, 'publisher') and
            len(md.identification.publisher) > 0):
            all_orgs = set([item.organization for item in md.identification.publisher if hasattr(item, 'organization') and item.organization is not None])
            _set(context, recobj, 'pycsw:Publisher', ';'.join(all_orgs))
        if (hasattr(md.identification, 'contributor') and
            len(md.identification.contributor) > 0):
            all_orgs = set([item.organization for item in md.identification.contributor if hasattr(item, 'organization') and item.organization is not None])
            _set(context, recobj, 'pycsw:Contributor', ';'.join(all_orgs))

        if (hasattr(md.identification, 'contact') and 
            len(md.identification.contact) > 0):
            all_orgs = set([item.organization for item in md.identification.contact if hasattr(item, 'organization') and item.organization is not None])
            _set(context, recobj, 'pycsw:OrganizationName', ';'.join(all_orgs))

        if len(md.identification.securityconstraints) > 0:
            _set(context, recobj, 'pycsw:SecurityConstraints', 
            md.identification.securityconstraints[0])
        if len(md.identification.accessconstraints) > 0:
            _set(context, recobj, 'pycsw:AccessConstraints', 
            md.identification.accessconstraints[0])
        if len(md.identification.otherconstraints) > 0:
            _set(context, recobj, 'pycsw:OtherConstraints', md.identification.otherconstraints[0])

        if hasattr(md.identification, 'date'):
            for datenode in md.identification.date:
                if datenode.type == 'revision':
                    _set(context, recobj, 'pycsw:RevisionDate', datenode.date)
                elif datenode.type == 'creation':
                    _set(context, recobj, 'pycsw:CreationDate', datenode.date)
                elif datenode.type == 'publication':
                    _set(context, recobj, 'pycsw:PublicationDate', datenode.date)

        if hasattr(md.identification, 'extent') and hasattr(md.identification.extent, 'description_code'):
            _set(context, recobj, 'pycsw:GeographicDescriptionCode', md.identification.extent.description_code)

        if len(md.identification.denominators) > 0:
            _set(context, recobj, 'pycsw:Denominator', md.identification.denominators[0])
        if len(md.identification.distance) > 0:
            _set(context, recobj, 'pycsw:DistanceValue', md.identification.distance[0])
        if len(md.identification.uom) > 0:
            _set(context, recobj, 'pycsw:DistanceUOM', md.identification.uom[0])

        if len(md.identification.classification) > 0:
            _set(context, recobj, 'pycsw:Classification', md.identification.classification[0])
        if len(md.identification.uselimitation) > 0:
            _set(context, recobj, 'pycsw:ConditionApplyingToAccessAndUse',
            md.identification.uselimitation[0])

    if hasattr(md.identification, 'format'):
        _set(context, recobj, 'pycsw:Format', md.distribution.format)

    if md.serviceidentification is not None:
        _set(context, recobj, 'pycsw:ServiceType', md.serviceidentification.type)
        _set(context, recobj, 'pycsw:ServiceTypeVersion', md.serviceidentification.version)

        _set(context, recobj, 'pycsw:CouplingType', md.serviceidentification.couplingtype)
   
    service_types = []
    for smd in md.identificationinfo:
        if smd.identtype == 'service' and smd.type is not None:
            service_types.append(smd.type)

    _set(context, recobj, 'pycsw:ServiceType', ','.join(service_types))

        #if len(md.serviceidentification.operateson) > 0: 
        #    _set(context, recobj, 'pycsw:operateson = VARCHAR(32), 
        #_set(context, recobj, 'pycsw:operation VARCHAR(32), 
        #_set(context, recobj, 'pycsw:operatesonidentifier VARCHAR(32), 
        #_set(context, recobj, 'pycsw:operatesoname VARCHAR(32), 


    if hasattr(md.identification, 'dataquality'):     
        _set(context, recobj, 'pycsw:Degree', md.dataquality.conformancedegree)
        _set(context, recobj, 'pycsw:Lineage', md.dataquality.lineage)
        _set(context, recobj, 'pycsw:SpecificationTitle', md.dataquality.specificationtitle)
        if hasattr(md.dataquality, 'specificationdate'):
            _set(context, recobj, 'pycsw:specificationDate',
            md.dataquality.specificationdate[0].date)
            _set(context, recobj, 'pycsw:SpecificationDateType',
            md.dataquality.specificationdate[0].datetype)

    if hasattr(md, 'contact') and len(md.contact) > 0:
        _set(context, recobj, 'pycsw:ResponsiblePartyRole', md.contact[0].role)

    LOGGER.info('Scanning for links')
    if hasattr(md, 'distribution'):
        dist_links = []
        if hasattr(md.distribution, 'online'):
            LOGGER.debug('Scanning for gmd:transferOptions element(s)')
            dist_links.extend(md.distribution.online)
        if hasattr(md.distribution, 'distributor'):
            LOGGER.debug('Scanning for gmd:distributorTransferOptions element(s)')
            for dist_member in md.distribution.distributor:
                dist_links.extend(dist_member.online)
        for link in dist_links:
            if link.url is not None and link.protocol is None:  # take a best guess
                link.protocol = sniff_link(link.url)
            linkstr = '%s,%s,%s,%s' % \
            (link.name, link.description, link.protocol, link.url)
            links.append(linkstr)

    try:
        LOGGER.debug('Scanning for srv:SV_ServiceIdentification links')
        for sident in md.identificationinfo:
            if hasattr(sident, 'operations'):
                for sops in sident.operations:
                    for scpt in sops['connectpoint']:
                        LOGGER.debug('adding srv link %s', scpt.url)
                        linkstr = '%s,%s,%s,%s' % \
                        (scpt.name, scpt.description, scpt.protocol, scpt.url)
                        links.append(linkstr)
    except Exception as err:  # srv: identification does not exist
        LOGGER.debug('no srv:SV_ServiceIdentification links found')

    if len(links) > 0:
        _set(context, recobj, 'pycsw:Links', '^'.join(links))

    if bbox is not None:
        try:
            tmp = '%s,%s,%s,%s' % (bbox.minx, bbox.miny, bbox.maxx, bbox.maxy)
            _set(context, recobj, 'pycsw:BoundingBox', util.bbox2wktpolygon(tmp))
        except:  # coordinates are corrupted, do not include
            _set(context, recobj, 'pycsw:BoundingBox', None)
    else:
        _set(context, recobj, 'pycsw:BoundingBox', None)

    return recobj