Ejemplo n.º 1
0
def make_filter(config, ):
    kw = {
        "wildCard": "*",
        "escapeChar": "\\",
        "singleChar": "?",
        "propertyname": "apiso:Subject",
    }

    if len(config["cf_names"]) > 1:
        or_filt = fes.Or([
            fes.PropertyIsLike(literal=("*%s*" % val), **kw)
            for val in config["cf_names"]
        ])
    else:
        or_filt = fes.PropertyIsLike(literal=("*%s*" % config["cf_names"][0]),
                                     **kw)

    kw.update({"propertyname": "apiso:AnyText"})
    not_filt = fes.Not([fes.PropertyIsLike(literal="*cdip*", **kw)])

    begin, end = fes_date_filter(config["date"]["start"],
                                 config["date"]["stop"])
    bbox_crs = fes.BBox(config["region"]["bbox"], crs=config["region"]["crs"])
    filter_list = [fes.And([bbox_crs, begin, end, or_filt, not_filt])]
    return filter_list
Ejemplo n.º 2
0
    def any_case_match_filters(self, propertyname, word_list):
        '''
        Helper function to return and/or filter from a word list for partial case insensitivity
        Work-around resolves issues where "matchCase=False" is ignored and keywords with whitespace characters are not found
        @param propertyname: String denoting CSW property to search
        @param word_list: List of strings for (partially) case-insensitive "and" search
        
        @return: List of FES filters expressing "and" query
        '''
        filter_list = []
        for word in word_list:
            word_variant_set = set(
                [word, word.upper(),
                 word.lower(), word.title()])

            if len(word_variant_set
                   ) == 1:  # Use single filter if no "or" required
                filter_list.append(
                    fes.PropertyIsLike(propertyname=propertyname,
                                       literal=word_variant_set.pop(),
                                       matchCase=False))
            else:
                filter_list.append(
                    fes.Or([
                        fes.PropertyIsLike(propertyname=propertyname,
                                           literal=re.sub(
                                               '(\s)', '_', word_variant),
                                           matchCase=False) for word_variant in
                        set([word,
                             word.upper(),
                             word.lower(),
                             word.title()])
                    ]))

        return filter_list
Ejemplo n.º 3
0
    def make_fes_filter(self):
        begin, end = fes_date_filter(self.start, self.stop)
        kw = dict(wildCard='*',escapeChar='\\',singleChar='?',propertyname='apiso:AnyText')
        if len(self.target) > 1:
            prop_filt = fes.Or([fes.PropertyIsLike(literal=('*%s*' % val), **kw) for val in self.target])
        else:
            prop_filt = fes.PropertyIsLike(literal=(self.target[0]), **kw)

        if self.models == True:
            self.filter_list = [fes.And([self.bbox_crs, begin, end, prop_filt, fes.PropertyIsLike(literal=('*%s*' % 'forecast'), **kw), fes.Not([fes.PropertyIsLike(literal='*cdip',**kw)]),fes.Not([fes.PropertyIsLike(literal='*grib*', **kw)])])]
        else:
            self.filter_list = [fes.And([self.bbox_crs, begin, end, prop_filt, fes.Not([fes.PropertyIsLike(literal='*cdip',**kw)]),fes.Not([fes.PropertyIsLike(literal='*grib*', **kw)])])]
 def req_alpha(self,
               ident,
               schema,
               attribut,
               valeur,
               mods,
               maxi=0,
               ordre=None):
     """recupere les elements d'une requete alpha"""
     niveau, classe = ident
     requete = ""
     data = ""
     schema.resolve()
     attlist = schema.get_liste_attributs()
     self.get_attr_of_classe
     params = {"typename": niveau + ":" + classe}
     if attribut:
         filter = F.PropertyIsLike(propertyname=attribut,
                                   literal=valeur,
                                   wildCard="*")
         filterxml = etree.tostring(filter.toXML(), encoding="unicode")
         params["filter"] = filterxml
     print("envoi requete", params)
     # reponse = self.connection.getfeature(**params)
     reponse = self.connection.getfeature(typename=niveau + ":" + classe)
     print("wfs apres reponse", type(reponse))
     return reponse
def make_filter(config):
    from owslib import fes
    from ioos_tools.ioos import fes_date_filter

    kw = dict(
        wildCard="*", escapeChar="\\", singleChar="?", propertyname="apiso:AnyText"
    )

    or_filt = fes.Or(
        [fes.PropertyIsLike(literal=("*%s*" % val), **kw) for val in config["cf_names"]]
    )

    not_filt = fes.Not([fes.PropertyIsLike(literal="GRIB-2", **kw)])

    begin, end = fes_date_filter(config["date"]["start"], config["date"]["stop"])
    bbox_crs = fes.BBox(config["region"]["bbox"], crs=config["region"]["crs"])
    filter_list = [fes.And([bbox_crs, begin, end, or_filt, not_filt])]
    return filter_list
Ejemplo n.º 6
0
def csw_ajax(request, *args, **kwargs):
    if request.method == 'GET':
        csw_url = request.session['csw_url']
        user = request.session['user']
        password = request.session['password']
        keywords = request.session['keywords']
        keywords_query = [fes.PropertyIsLike(
            'csw:AnyText', '%%%s%%' % keywords)]
        if not csw_url:
            return HttpResponseServerError()

        try:
            csw = CatalogueServiceWeb(
                csw_url,
                username=user,
                password=password)
            result = csw.identification.type
            if result == 'CSW':
                offset = int(request.GET['offset'])
                per_page = int(request.GET['perPage'])
                csw.getrecords2(
                    typenames='gmd:MD_Metadata',
                    esn='full',
                    outputschema='http://www.isotc211.org/2005/gmd',
                    constraints=keywords_query,
                    startposition=offset,
                    maxrecords=per_page)
                result = []
                for key in csw.records:
                    rec = csw.records[key]
                    res = {}
                    if isinstance(rec, MD_Metadata):
                        res['id'] = rec.identifier
                        res['title'] = rec.identification.title
                        res['inasafe_keywords'] = rec.identification.\
                            supplementalinformation
                        if res['inasafe_keywords']:
                            res['inasafe_layer'] = (
                                '<inasafe_keywords/>' in
                                res['inasafe_keywords'])
                        result.append(res)
            json_result = {
                'records': result,
                'queryRecordCount': csw.results['matches'],
                'totalRecordCount': csw.results['matches']
            }
            return JsonResponse(json_result, safe=False)
        except Exception as e:
            LOGGER.exception(e)
            return HttpResponseServerError()

    return HttpResponseServerError()
def csw_query(endpoint,
              bbox=None,
              start=None,
              stop=None,
              kw_names=None,
              crs="urn:ogc:def:crs:OGC:1.3:CRS84"):
    crs = "urn:ogc:def:crs:::EPSG:4326"  #https://github.com/qgis/QGIS/issues/40778
    constraints = []
    csw = None
    while csw is None:
        try:
            csw = CatalogueServiceWeb(endpoint, timeout=60)
            #csw.getrecords2(maxrecords=10)
            #for rec in csw.records:
            #    print(vars(csw.records[rec]))
            #    print(csw.records[rec].title)
        except:
            pass
    if kw_names:
        kw = dict(wildCard="*",
                  escapeChar="\\",
                  singleChar="?",
                  propertyname="apiso:AnyText")
        or_filt = fes.Or([
            fes.PropertyIsLike(literal=("*%s*" % val), **kw)
            for val in kw_names
        ])
        constraints.append(or_filt)

    if all(v is not None for v in [start, stop]):
        begin, end = fes_date_filter(start, stop)
        constraints.append(begin)
        constraints.append(end)
    if bbox:
        bbox_crs = fes.BBox(bbox, crs=crs)
        constraints.append(bbox_crs)
    if len(constraints) >= 2:
        filter_list = [fes.And(constraints)]
    else:
        filter_list = constraints
    get_csw_records(csw, filter_list, pagesize=10, maxrecords=10)

    print("Found {} records.\n".format(len(csw.records.keys())))
    for key, value in list(csw.records.items()):
        print(u"Title: [{}]\nID: {}\n".format(value.title, key))
        msg = "geolink: {geolink}\nscheme: {scheme}\nURL: {url}\n".format
        for ref in value.references:
            print(msg(geolink=sniff_link(ref["url"]), **ref))
        print("#########################################################",
              '\n')
Ejemplo n.º 8
0
 def parseFilter(self, s):
     """translates inspirobot filter syntax into fes
     for example:
         'OrganisationName = DREAL Bretagne && Type = dataset || OrganisationName ~ DDTM 29 && Type = dataset'
     """
     filters = []
     for f_or in [x.split('&&') for x in s.split('||')]:
         andgroup = []
         for f_and in f_or:
             if '=' in f_and:
                 a = [s.strip() for s in f_and.split('=')]
                 andgroup.append(fes.PropertyIsEqualTo(propertyname=a[0], literal=a[1]))
             elif '~' in f_and:
                 a = [s.strip() for s in f_and.split('~')]
                 andgroup.append(fes.PropertyIsLike(propertyname=a[0], literal=a[1]))
         filters.append(andgroup)
     return filters
Ejemplo n.º 9
0
    f = fes.BBox(bbox=list(reversed(s.bounds)))
    polygon_filters.append(f)
# If we have more than one polygon filter, OR them together
if len(polygon_filters) > 1:
    polygon_filters = fes.Or(polygon_filters)
elif len(polygon_filters) == 1:
    polygon_filters = polygon_filters[0]

# Name filters
name_filters = []
model_strings = [
    'roms', 'selfe', 'adcirc', 'ncom', 'hycom', 'fvcom', 'wrf', 'wrams'
]
for model in model_strings:
    title_filter = fes.PropertyIsLike(propertyname='apiso:Title',
                                      literal='*%s*' % model,
                                      wildCard='*')
    name_filters.append(title_filter)
    subject_filter = fes.PropertyIsLike(propertyname='apiso:Subject',
                                        literal='*%s*' % model,
                                        wildCard='*')
    name_filters.append(subject_filter)
# Or all of the name filters together
name_filters = fes.Or(name_filters)

# Final filters
filters = fes.And([polygon_filters, name_filters])

# <markdowncell>

# ##### The actual CSW filters look like this
Ejemplo n.º 10
0
# In[2]:

endpoint = 'http://www.nodc.noaa.gov/geoportal/csw'  # NODC/UAF Geoportal: granule level
csw = CatalogueServiceWeb(endpoint, timeout=60)
print csw.version

# In[3]:

csw.get_operation_by_name('GetRecords').constraints

# In[4]:

val = 'level'
filter1 = fes.PropertyIsLike(propertyname='apiso:AnyText',
                             literal=('*%s*' % val),
                             escapeChar='\\',
                             wildCard='*',
                             singleChar='?')
filter_list = [filter1]

# In[5]:

csw.getrecords2(constraints=filter_list, maxrecords=100, esn='full')
len(csw.records.keys())

# In[6]:

choice = np.random.choice(list(csw.records.keys()))
print(csw.records[choice].title)
csw.records[choice].references
Ejemplo n.º 11
0
            propertyname=propertyname, literal=start
        )
    elif constraint == "within":
        propertyname = "apiso:TempExtent_begin"
        begin = fes.PropertyIsGreaterThanOrEqualTo(
            propertyname=propertyname, literal=start
        )
        propertyname = "apiso:TempExtent_end"
        end = fes.PropertyIsLessThanOrEqualTo(propertyname=propertyname, literal=stop)
    else:
        raise NameError("Unrecognized constraint {}".format(constraint))
    return begin, end

kw = dict(wildCard="*", escapeChar="\\", singleChar="?", propertyname="apiso:AnyText")

or_filt = fes.Or([fes.PropertyIsLike(literal=("*%s*" % val), **kw) for val in cf_names])

# Exclude GNOME returns.
not_filt = fes.Not([fes.PropertyIsLike(literal="*GNOME*", **kw)])

begin, end = fes_date_filter(start, stop)
bbox_crs = fes.BBox(bbox, crs=crs)
filter_list = [fes.And([bbox_crs, begin, end, or_filt, not_filt])]

It is possible to use the same filter to search multiple catalogs. The cell below loops over 3 catalogs hoping to find which one is more up-to-date and returns the near real time data.

def get_csw_records(csw, filter_list, pagesize=10, maxrecords=1000):
    """Iterate `maxrecords`/`pagesize` times until the requested value in
    `maxrecords` is reached.
    """
    from owslib.fes import SortBy, SortProperty
Ejemplo n.º 12
0
# <codecell>

endpoint = 'http://www.ngdc.noaa.gov/geoportal/csw' # NGDC Geoportal
csw = CatalogueServiceWeb(endpoint,timeout=60)

# <codecell>

# convert User Input into FES filters
start, stop = fes_date_filter(start_date,end_date)
bbox = fes.BBox(bounding_box)

#use the search name to create search filter
or_filt = fes.Or([fes.PropertyIsLike(propertyname='apiso:AnyText',
                                     literal='*%s*' % val,
                                     escapeChar='\\',
                                     wildCard='*',
                                     singleChar='?') for val in data_dict["temp"]["names"]])

# try request using multiple filters "and" syntax: [[filter1,filter2]]
filter_list = [fes.And([ bbox, start, stop, or_filt]) ]

csw.getrecords2(constraints=filter_list,maxrecords=1000,esn='full')
print str(len(csw.records)) + " csw records found"

# <markdowncell>

# #### Dap URLs

# <codecell>
Ejemplo n.º 13
0
                    for d in rec.references if d['scheme'] == service_string),
                   None)
        if url is not None:
            urls.append(url)
    return urls


# <codecell>

filt = []
search_text = ['roms', 'selfe', 'adcirc', 'ncom', 'hycom', 'fvcom']
for val in search_text:
    filt.append(
        fes.PropertyIsLike(propertyname='apiso:AnyText',
                           literal=('*%s*' % val),
                           escapeChar='\\',
                           wildCard='*',
                           singleChar='?'))

val = 'sea_water_temperature'
filt.append(
    fes.PropertyIsLike(propertyname='apiso:AnyText',
                       literal=('*%s*' % val),
                       escapeChar='\\',
                       wildCard='*',
                       singleChar='?'))

# <markdowncell>

# ##Find model results at NODC
# <markdowncell>

# <div class="error"><strong>Missing CF Standard Names</strong> - "sea_surface_height" and "sea_surface_elevation" are valid CF Aliases but are not returned by MMI when running the SPARQL query.  We added them here manually. See: https://github.com/ioos/system-test/issues/129</div>

# <markdowncell>

# #### Construct CSW Filters

# <codecell>

from owslib import fes

cf_name_filters = []
for cf_name in variables_to_query:
    text_filter   = fes.PropertyIsLike(propertyname='apiso:AnyText', literal="*%s*" % cf_name, wildCard='*')
    cf_name_filters.append(text_filter)

# <markdowncell>

# #### Query each CSW catalog for the cf_name_filters constructed above

# <codecell>

from owslib.csw import CatalogueServiceWeb
from utilities import normalize_service_urn

var_results = []

for x in range(len(cf_name_filters)):
    var_name          = variables_to_query[x]
            propertyname=propertyname, literal=start
        )
    elif constraint == "within":
        propertyname = "apiso:TempExtent_begin"
        begin = fes.PropertyIsGreaterThanOrEqualTo(
            propertyname=propertyname, literal=start
        )
        propertyname = "apiso:TempExtent_end"
        end = fes.PropertyIsLessThanOrEqualTo(propertyname=propertyname, literal=stop)
    else:
        raise NameError("Unrecognized constraint {}".format(constraint))
    return begin, end

kw = dict(wildCard="*", escapeChar="\\", singleChar="?", propertyname="apiso:AnyText")

or_filt = fes.Or([fes.PropertyIsLike(literal=("*%s*" % val), **kw) for val in cf_names])

begin, end = fes_date_filter(start, stop)
bbox_crs = fes.BBox(bbox, crs=crs)

filter_list = [
    fes.And(
        [
            bbox_crs,  # bounding box
            begin,
            end,  # start and end date
            or_filt,  # or conditions (CF variable names)
        ]
    )
]
Ejemplo n.º 16
0
    def retrieve(self,
                 variable=None,
                 dataframe=False,
                 count=False,
                 bbox=None,
                 or_filter=False,
                 *searchArgs,
                 **kwargs):
        '''
        Retrieve data from WFS service


        '''
        if not self._valid:
            raise Exception(self._status)

        if not variable:
            variable = self._client.items()[0][0]

        query_params = dict(typename=variable, propertyname=None)

        # construct query from searchArgs
        if count:
            query_params['resulttype'] = 'hits'

        if bbox:
            query_params['bbox'] = bbox

        #query_params.update(kwargs)
        if len(kwargs) > 0:
            import owslib.fes as fes
            from owslib.etree import etree
            filter_components = [
                fes.PropertyIsLike(propertyname=k, literal='%s' % (str(v)))
                for k, v in kwargs.items()
            ]
            if len(filter_components) == 1:
                query_filter = filter_components[0]
            elif or_filter:
                query_filter = fes.Or(filter_components)
            else:
                query_filter = fes.And(filter_components)

            filterxml = etree.tostring(query_filter.toXML()).decode('utf-8')
            filterxml = '<Filter>%s</Filter>' % filterxml
            query_params['filter'] = filterxml

        filter_result = False
        try:
            results = self._client.getfeature(**query_params)
        except:
            if 'filter' in query_params:
                query_params.pop('filter')
                filter_result = True
                results = self._client.getfeature(**query_params)
            else:
                raise

        feature_text = results.read()
        if not isinstance(feature_text, str):
            feature_text = feature_text.decode('utf-8')
        tmp_fn = 'tmp.gml'
        open(tmp_fn, 'w').write(feature_text)

        from osgeo import ogr
        ds = ogr.Open(tmp_fn)

        # remove tmp file

        layer = ds.GetLayer()

        features = [
            layer.GetNextFeature() for i in range(layer.GetFeatureCount())
        ]
        if filter_result:
            features = self.filter_features(features, or_filter, kwargs)

        from shapely.wkt import loads

        for f in features:
            geom = f.geometry()
            wkt = geom.ExportToWkt()
            try:
                f.geom = loads(wkt)
            except:
                f.geom = loads(wkt.replace('MULTISURFACE', 'MULTIPOLYGON'))

        if dataframe:
            from geopandas import GeoDataFrame
            feature_dicts = [
                dict([('geometry', f.geom)] + [(p, f[p]) for p in f.keys()])
                for f in features
            ]
            return GeoDataFrame(feature_dicts)

        return features
Ejemplo n.º 17
0
# #### Search the catologue using the FES filters

# <codecell>

# Convert User Input into FES filters.
start, stop = fes_date_filter(start_date, stop_date)
bbox = fes.BBox(bounding_box)

# Use the search name to create search filter.
kw = dict(propertyname='apiso:AnyText',
          escapeChar='\\',
          wildCard='*',
          singleChar='?')

or_filt = fes.Or([
    fes.PropertyIsLike(literal='*%s*' % val, **kw)
    for val in data_dict['winds']['u_names']
])

val = 'Averages'
not_filt = fes.Not([fes.PropertyIsLike(literal=('*%s*' % val), **kw)])

filter_list = [fes.And([bbox, start, stop, or_filt, not_filt])]
csw.getrecords2(constraints=filter_list, maxrecords=1000, esn='full')
print("%s csw records found" % len(csw.records))

# <markdowncell>

# #### DAP endpoints

# <codecell>
Ejemplo n.º 18
0
    else:
        raise NameError('Unrecognized constraint {}'.format(constraint))
    return begin, end


# In[5]:

from owslib import fes

kw = dict(wildCard='*',
          escapeChar='\\',
          singleChar='?',
          propertyname='apiso:AnyText')

or_filt = fes.Or(
    [fes.PropertyIsLike(literal=('*%s*' % val), **kw) for val in name_list])

# Exculde ROMS Averages and History files.
not_filt = fes.Not([fes.PropertyIsLike(literal='*Averages*', **kw)])

begin, end = fes_date_filter(start, stop)
filter_list = [fes.And([fes.BBox(bbox), begin, end, or_filt, not_filt])]

# In[6]:

from owslib.csw import CatalogueServiceWeb

endpoint = 'http://www.ngdc.noaa.gov/geoportal/csw'
csw = CatalogueServiceWeb(endpoint, timeout=60)
csw.getrecords2(constraints=filter_list, maxrecords=1000, esn='full')
Ejemplo n.º 19
0
    def query_csw(self,
                  keyword_list=None,
                  bounding_box=None,
                  bounding_box_crs=None,
                  anytext_list=None,
                  titleword_list=None,
                  start_datetime=None,
                  stop_datetime=None,
                  max_total_records=None):
        '''
        Function to query CSW using AND combination of provided search parameters and return generator object
            yielding nested dicts containing information about each record including distributions
        @param keyword_list: List of strings or comma-separated string containing keyword search terms
        @param bounding_box: Bounding box to search as a list of ordinates [bbox.minx, bbox.minx, bbox.maxx, bbox.maxy]
        @param bounding_box_crs: Coordinate reference system for bounding box. Defaults to value of CSWUtils.DEFAULT_CRS
        @param anytext_list: List of strings or comma-separated string containing any text search terms
        @param titleword: List of strings or comma-separated string containing title search terms
        @param start_datetime: Datetime object defining start of temporal search period
        @param stop_datetime: Datetime object defining end of temporal search period
        @param max_total_records: Maximum total number of records to return. Defaults to value of CSWUtils.DEFAULT_MAXTOTALRECORDS 
        
        @return: generator object yielding nested dicts containing information about each record including distributions
        '''
        bounding_box_crs = bounding_box_crs or CSWUtils.DEFAULT_CRS

        # Convert strings to lists if required
        if type(keyword_list) == str:
            keyword_list = self.list_from_comma_separated_string(keyword_list)

        if type(anytext_list) == str:
            anytext_list = self.list_from_comma_separated_string(anytext_list)

        if type(titleword_list) == str:
            titleword_list = self.list_from_comma_separated_string(
                titleword_list)

        # Build filter list
        fes_filter_list = []

        # Check for unchanged, upper-case, lower-case and capitalised keywords
        if keyword_list:
            fes_filter_list += self.any_case_match_filters(
                'Subject', keyword_list)

        if anytext_list:
            fes_filter_list += [
                fes.PropertyIsLike(propertyname='anyText',
                                   literal=phrase,
                                   matchCase=False) for phrase in anytext_list
            ]

        if start_datetime or stop_datetime:
            fes_filter_list += self.get_date_filter(start_datetime,
                                                    stop_datetime)

        if titleword_list:
            fes_filter_list += [
                fes.PropertyIsLike(propertyname='title',
                                   literal=titleword,
                                   matchCase=False)
                for titleword in titleword_list
            ]

        if bounding_box:
            fes_filter_list += [fes.BBox(bounding_box, crs=bounding_box_crs)]

        assert fes_filter_list, 'No search criteria defined'

        # Use single filter if no "and" required
        if len(fes_filter_list) == 1:
            fes_filter_list = fes_filter_list[0]

        # Return generator object
        return self.get_csw_records(fes_filter_list,
                                    max_total_records=max_total_records)
Ejemplo n.º 20
0
def getDataSetURI(anyText, CSWURL, BBox):
    """

    Searches a given CSW server and returns metadata content for the datasets found.

    Arguments
    ---------

    - anyText - A string that will be submitted to the CSW search. (Optional, default is empty which will return all records.)
    - CSWURL - A base URL for the CSW server to be searched. (Optional, defaults to the CDIA/GDP CSW server.)
    - BBox - A lat/lon bounding box in [minx,miny,maxx,maxy] that will be used to limit results to datasets that atleast partially intersect. (Optional)

    """
    csw = CatalogueServiceWeb(CSWURL, skip_caps=True)
    # FIXME: we should allow for "real" multiple keywords,
    # or change the API of anyText if that if that does not make sense in pygdp.
    # If the former we need `fes.And`, if the latter we need to not listfy `anyText`.
    if not anyText:
        constraints = []
    else:
        constraints = [
            fes.PropertyIsLike(propertyname='csw:AnyText', literal=literal)
            for literal in anyText
        ]

    csw.getrecords2(constraints=constraints,
                    outputschema='http://www.isotc211.org/2005/gmd',
                    esn='full',
                    maxrecords=100)
    dataset_uris = [['title', 'abstract', ['urls']]]

    for rec in csw.records:
        title = csw.records[rec].identification.title
        abstract = csw.records[rec].identification.abstract
        urls = []

        try:
            for onlineresource in range(
                    len(csw.records[rec].distribution.online)):
                urls.append(
                    csw.records[rec].distribution.online[onlineresource].url)
        except AttributeError:
            pass

        for ident in range(len(csw.records[rec].identificationinfo)):
            try:
                for operation in range(
                        len(csw.records[rec].identificationinfo[ident].
                            operations)):
                    urls.append(csw.records[rec].identificationinfo[ident].
                                operations[0]['connectpoint'][0].url)
            except AttributeError:
                pass
        entry = [title, abstract, urls]
        dataset_uris.append(entry)

    for i, dataset in enumerate(dataset_uris):
        dataset_uris[i][2] = [
            uri.replace("https", "dods").replace("http", "dods")
            if "/dodsC/" in uri else uri for uri in dataset[2]
        ]
    return dataset_uris
Ejemplo n.º 21
0
    def query_csw(self,
                  identifier_list=None,
                  alt_identifier_list=None,
                  keyword_list=None,
                  bounding_box=None,
                  bounding_box_crs=None,
                  anytext_list=None,
                  titleword_list=None,
                  start_datetime=None,
                  stop_datetime=None,
                  record_type_list=None,
                  max_total_records=None,
                  get_layers=None):
        '''
        Function to query CSW using AND combination of provided search parameters and return generator object
            yielding nested dicts containing information about each record including distributions
        @param identifier_list: List of strings or comma-separated string containing metadata identifiers (UUID)
        @param alt_identifier: List of strings or comma-separated string containing metadata alternate identifiers (eCat ID)
        @param keyword_list: List of strings or comma-separated string containing keyword search terms
        @param bounding_box: Bounding box to search as a list of ordinates [bbox.minx, bbox.minx, bbox.maxx, bbox.maxy]
        @param bounding_box_crs: Coordinate reference system for bounding box. Defaults to value of self.settings['DEFAULT_CRS']
        @param anytext_list: List of strings or comma-separated string containing any text search terms
        @param titleword: List of strings or comma-separated string containing title search terms
        @param start_datetime: Datetime object defining start of temporal search period
        @param stop_datetime: Datetime object defining end of temporal search period
        @param record_type_list: List of strings or comma-separated string containing record type(s) to return
        @param max_total_records: Maximum total number of records to return. Defaults to value of self.settings['DEFAULT_MAXTOTALRECORDS']
        @param get_layers: Boolean flag indicating whether to get WMS/WCS layer names. Defaults to value of self.settings['DEFAULT_GET_LAYERS']
        
        @return: generator object yielding nested dicts containing information about each record including distributions
        '''
        bounding_box_crs = bounding_box_crs or self.settings['DEFAULT_CRS']
        get_layers = self.settings[
            'DEFAULT_GET_LAYERS'] if get_layers is None else get_layers

        # Convert strings to lists if required
        if type(identifier_list) == str:
            identifier_list = self.list_from_comma_separated_string(
                identifier_list)

        if type(alt_identifier_list) == str:
            alt_identifier_list = self.list_from_comma_separated_string(
                alt_identifier_list)

        if type(keyword_list) == str:
            keyword_list = self.list_from_comma_separated_string(keyword_list)

        if type(anytext_list) == str:
            anytext_list = self.list_from_comma_separated_string(anytext_list)

        if type(titleword_list) == str:
            titleword_list = self.list_from_comma_separated_string(
                titleword_list)

        record_type_list = record_type_list or self.settings[
            'DEFAULT_RECORD_TYPES']
        if type(record_type_list) == str:
            record_type_list = self.list_from_comma_separated_string(
                record_type_list)

        # Build filter list
        fes_filter_list = []

        if identifier_list:
            if len(identifier_list) == 1:
                fes_filter_list += [
                    fes.PropertyIsLike(propertyname='Identifier',
                                       literal=identifier_list[0],
                                       matchCase=False)
                ]
            else:
                fes_filter_list.append(
                    fes.Or([
                        fes.PropertyIsLike(propertyname='Identifier',
                                           literal=identifier,
                                           matchCase=False)
                        for identifier in identifier_list
                    ]))

        if alt_identifier_list:
            if len(alt_identifier_list) == 1:
                fes_filter_list += [
                    fes.PropertyIsLike(propertyname='AlternateIdentifier',
                                       literal=alt_identifier_list[0],
                                       matchCase=False)
                ]
            else:
                fes_filter_list.append(
                    fes.Or([
                        fes.PropertyIsLike(propertyname='AlternateIdentifier',
                                           literal=alt_identifier,
                                           matchCase=False)
                        for alt_identifier in alt_identifier_list
                    ]))

        # Check for unchanged, upper-case, lower-case and capitalised keywords
        # with single-character wildcards substituted for whitespace characters
        # GeoNetwork keyword search is always case sensitive
        if keyword_list:
            fes_filter_list += self.any_case_match_filters(
                'Subject', keyword_list)

        if anytext_list:
            fes_filter_list += [
                fes.PropertyIsLike(propertyname='anyText',
                                   literal=phrase,
                                   matchCase=False) for phrase in anytext_list
            ]

        if start_datetime or stop_datetime:
            fes_filter_list += self.get_date_filter(start_datetime,
                                                    stop_datetime)

        if titleword_list:
            fes_filter_list += [
                fes.PropertyIsLike(propertyname='title',
                                   literal=titleword,
                                   matchCase=False)
                for titleword in titleword_list
            ]

        # Check for unchanged, upper-case, lower-case and capitalised keywords
        # with single-character wildcards substituted for whitespace characters
        # GeoNetwork type search is always case sensitive
        if record_type_list:
            fes_filter_list += self.any_case_match_filters(
                'type', record_type_list)

        if bounding_box:
            # N.B: Bounding box ordinate ordering must match CRS. Default CRS84 supports lon-lat ordering, not lat-lon
            # See https://gis.stackexchange.com/questions/124050/how-do-i-specify-the-lon-lat-ordering-in-csw-bounding-box-request
            fes_filter_list += [fes.BBox(bounding_box, crs=bounding_box_crs)]

        assert fes_filter_list, 'No search criteria defined'

        # Use single filter if no "and" required
        if len(fes_filter_list) == 1:
            fes_filter_list = fes_filter_list[0]

        # Return generator object
        return self.get_csw_records(fes_filter_list,
                                    max_total_records=max_total_records,
                                    get_layers=get_layers)
Ejemplo n.º 22
0
jd_start = dt.datetime.strptime(start_date,'%Y-%m-%d %H:%M')
jd_stop = dt.datetime.strptime(stop_date,'%Y-%m-%d %H:%M')

print start_date,'to',stop_date

sos_name = 'water_surface_height_above_reference_datum'

# <codecell>


# convert User Input into FES filters
start,stop = dateRange(start_date,stop_date)
bbox = fes.BBox(box)

or_filt = fes.Or([fes.PropertyIsLike(propertyname='apiso:AnyText',literal=('*%s*' % val),
                    escapeChar='\\',wildCard='*',singleChar='?') for val in model_name_list])

val = 'Averages'
not_filt = fes.Not([fes.PropertyIsLike(propertyname='apiso:AnyText',literal=('*%s*' % val),
                        escapeChar='\\',wildCard='*',singleChar='?')])

filter_list = [fes.And([ bbox, start, stop, or_filt, not_filt]) ]



# <markdowncell>

# ##Find model results at NODC

# <codecell>
Ejemplo n.º 23
0
# -*- coding: utf-8 -*-
# <nbformat>3.0</nbformat>

# <codecell>

from owslib import fes
from owslib.csw import CatalogueServiceWeb

schemes = set()
c = CatalogueServiceWeb("https://data.noaa.gov/csw", timeout=20)
fil = fes.PropertyIsLike(propertyname='apiso:AnyText',
                         literal="*sea_surface_height_above_sea_level*",
                         wildCard='*')
c.getrecords2(constraints=[fil], maxrecords=1000, esn='full')

# <codecell>

for record, item in c.records.items():
    for d in item.references:
        schemes.add(d['scheme'])

# <codecell>

for scheme in schemes:
    print scheme
Ejemplo n.º 24
0
try:
    csw.get_operation_by_name('GetDomain')
    csw.getdomain('apiso:ServiceType', 'property')
    print(csw.results['values'])
except:
    print('GetDomain not supported')

# ## Query for all COAWST datasets

# In[5]:

val = 'COAWST'
filter1 = fes.PropertyIsLike(propertyname='apiso:AnyText',
                             literal=('*%s*' % val),
                             escapeChar='\\',
                             wildCard='*',
                             singleChar='?')
filter_list = [filter1]

# In[6]:

csw.getrecords2(constraints=filter_list, maxrecords=100, esn='full')
print len(csw.records.keys())
for rec in list(csw.records.keys()):
    print csw.records[rec].title

# In[7]:

choice = np.random.choice(list(csw.records.keys()))
print(csw.records[choice].title)
Ejemplo n.º 25
0
# <codecell>

# convert User Input into FES filters
start, stop = date_range(start_date, end_date)
box = []
box.append(bounding_box[0][0])
box.append(bounding_box[0][1])
box.append(bounding_box[1][0])
box.append(bounding_box[1][1])
bbox = fes.BBox(box)

#use the search name to create search filter
or_filt = fes.Or([
    fes.PropertyIsLike(propertyname='apiso:AnyText',
                       literal=('*%s*' % val),
                       escapeChar='\\',
                       wildCard='*',
                       singleChar='?') for val in data_dict["waves"]["names"]
])
val = 'Averages'
not_filt = fes.Not([
    fes.PropertyIsLike(propertyname='apiso:AnyText',
                       literal=('*%s*' % val),
                       escapeChar='\\',
                       wildCard='*',
                       singleChar='?')
])
filter_list = [fes.And([bbox, start, stop, or_filt, not_filt])]
# connect to CSW, explore it's properties
# try request using multiple filters "and" syntax: [[filter1,filter2]]
csw.getrecords2(constraints=filter_list, maxrecords=1000, esn='full')
Ejemplo n.º 26
0
[op.name for op in csw.operations]

# <codecell>

for oper in csw.operations:
    if oper.name == 'GetRecords':
        print oper.constraints

# <markdowncell>

# Since the supported ISO queryables contain `apiso:ServiceType`, we can use CSW to find all datasets with services that contain the string "dap" 

# <codecell>

val = 'dap'
service_type = fes.PropertyIsLike(propertyname='apiso:ServiceType',literal=('*%s*' % val),
                        escapeChar='\\',wildCard='*',singleChar='?')
filter_list = [ service_type]

# <codecell>

csw.getrecords2(constraints=filter_list,maxrecords=10000,esn='full')
len(csw.records.keys())

# <markdowncell>

# By printing out the references from a random record, we see that for this CSW the DAP URL is identified by 
# `urn:x-esri:specification:ServiceType:odp:url`

# <codecell>

choice=random.choice(list(csw.records.keys()))
# <codecell>

for endpoint in bbox_endpoints:
    print endpoint

    csw = CatalogueServiceWeb(endpoint, timeout=60)

    # convert User Input into FES filters
    start, stop = fes_date_filter(start_date, stop_date)
    bbox = fes.BBox(bounding_box)

    #use the search name to create search filter
    or_filt = fes.Or([
        fes.PropertyIsLike(propertyname='apiso:AnyText',
                           literal='*%s*' % val,
                           escapeChar='\\',
                           wildCard='*',
                           singleChar='?')
        for val in data_dict['winds']['u_names']
    ])

    filter_list = [fes.And([bbox, start, stop, or_filt])]
    #     filter_list = [fes.And([ bbox, or_filt]) ]
    # connect to CSW, explore it's properties
    # try request using multiple filters "and" syntax: [[filter1,filter2]]
    try:
        csw.getrecords2(constraints=filter_list, maxrecords=1000, esn='full')
    except Exception as e:
        print 'ERROR - ' + str(e)
    else:
        print str(len(csw.records)) + " csw records found"
Ejemplo n.º 28
0
# With these 3 elements it is possible to assemble a [OGC Filter Encoding (FE)](http://www.opengeospatial.org/standards/filter) using the `owslib.fes`\* module.
#
# \* OWSLib is a Python package for client programming with Open Geospatial Consortium (OGC) web service (hence OWS) interface standards, and their related content models.

# In[8]:

from owslib import fes
from ioos_tools.ioos import fes_date_filter

kw = dict(wildCard='*',
          escapeChar='\\',
          singleChar='?',
          propertyname='apiso:AnyText')

or_filt = fes.Or(
    [fes.PropertyIsLike(literal=('*%s*' % val), **kw) for val in model_names])

kw = dict(wildCard='*',
          escapeChar='\\',
          singleChar='?',
          propertyname='apiso:ServiceType')

serviceType = fes.PropertyIsLike(literal=('*%s*' % service_type), **kw)

begin, end = fes_date_filter(start, stop)
bbox_crs = fes.BBox(bbox, crs=crs)

filter_list = [
    fes.And([
        bbox_crs,  # bounding box
        begin,
Ejemplo n.º 29
0
#endpoint = 'http://data.nodc.noaa.gov/geoportal/csw'
csw = CatalogueServiceWeb(endpoint,timeout=60)
print csw.version


# In[ ]:

from owslib import fes
from utilities import fes_date_filter

kw = dict(wildCard='*',
          escapeChar='\\',
          singleChar='?',
          propertyname='apiso:AnyText')

or_filt = fes.Or([fes.PropertyIsLike(literal=('*%s*' % val), **kw)
                  for val in name_list])

# Exclude ROMS Averages and History files.
not_filt = fes.Not([fes.PropertyIsLike(literal='*Averages*', **kw)])

bbox_filter = fes.BBox(bbox,crs='urn:ogc:def:crs:OGC:1.3:CRS84')


begin, end = fes_date_filter(start, stop)
filter_list = [fes.And([bbox_filter, begin, end, or_filt, not_filt])]


# In[ ]:

bbox = [-87.40, 34.25, -63.70, 66.70]    # [lon_min, lat_min, lon_max, lat_max]