Exemple #1
0
def test_search_url_bad_request(e):
    """Test if a bad request returns HTTPError."""
    kw = {
        "min_time": "1700-01-01T12:00:00Z",
        "max_time": "1750-01-01T12:00:00Z",
    }
    with pytest.raises(HTTPError):
        _check_url_response(e.get_search_url(**kw))
def test__check_url_response():
    """Test if a bad request returns HTTPError."""
    bad_request = ('http://erddap.sensors.ioos.us/erddap/tabledap/'
                   'gov_usgs_waterdata_340800117235901.htmlTable?'
                   'time,'
                   '&time>=2017-08-29T00:00:00Z'
                   '&time<=2015-09-05T19:00:00Z')
    with pytest.raises(HTTPError):
        _check_url_response(bad_request)
Exemple #3
0
 def __init__(self, server_url):
     if server_url in servers.keys():
         server_url = servers[server_url].url
     _check_url_response(server_url)
     self.server_url = server_url
     self.search_options = {}
     self.download_options = {}
     # Caching the last `dataset_id` request for quicker multiple accesses,
     # will be overridden when requesting a new `dataset_id`.
     self._dataset_id = None
     self._variables = {}
Exemple #4
0
    def get_opendap_url(self, dataset_id, protocol='tabledap'):
        """Compose the opendap URL for the `server_url` the endpoint.

        Args:
            dataset_id (str): a dataset unique id.
        Returns:
            download_url (str): the download URL for the `response` chosen.

        """
        base = '{server_url}/{protocol}/{dataset_id}'.format
        opendap_url = base(server_url=self.server_url,
                           protocol=protocol,
                           dataset_id=dataset_id)
        _check_url_response(opendap_url)
        return opendap_url
Exemple #5
0
def info_url(server, dataset_id, response='html'):
    """The info URL for the `server` endpoint.

    Args:
        dataset_id (str): a dataset unique id.
        response (str): default is HTML.

    Returns:
        url (str): the info URL for the `response` chosen.

    """
    url = f'{server}/info/{dataset_id}/index.{response}'
    return _check_url_response(url)
def download_url(server, dataset_id, protocol, variables, response='html', constraints=None):
    """The download URL for the `server` endpoint.

    Args:
        dataset_id (str): a dataset unique id.
        protocol (str): tabledap or griddap.
        variables (list/tuple): a list of the variables to download.
        response (str): default is HTML.
        constraints (dict): download constraints, default None (opendap-like url)
            example: constraints = {'latitude<=': 41.0,
                                    'latitude>=': 38.0,
                                    'longitude<=': -69.0,
                                    'longitude>=': -72.0,
                                    'time<=': '2017-02-10T00:00:00+00:00',
                                    'time>=': '2016-07-10T00:00:00+00:00',
                                    }

    Returns:
        url (str): the download URL for the `response` chosen.

    """
    base = '{server_url}/{protocol}/{dataset_id}'

    if not constraints:
        url = base.format(
            server_url=server,
            protocol=protocol,
            dataset_id=dataset_id
            )
    else:
        base += '.{response}?{variables}{constraints}'

        _constraints = copy.copy(constraints)
        for k, v in _constraints.items():
            if k.startswith('time'):
                _constraints.update({k: parse_dates(v)})

        _constraints = quote_string_constraints(_constraints)
        _constraints = ''.join(['&{}{}'.format(k, v) for k, v in _constraints.items()])
        variables = ','.join(variables)

        url = base.format(
            server_url=server,
            protocol=protocol,
            dataset_id=dataset_id,
            response=response,
            variables=variables,
            constraints=_constraints
        )
    return _check_url_response(url)
Exemple #7
0
    def get_download_url(self,
                         dataset_id,
                         variables,
                         response='csv',
                         protocol='tabledap',
                         **kwargs):
        """Compose the download URL for the `server_url` endpoint.

        Args:
            dataset_id (str): a dataset unique id.
            variables (list/tuple): a list of the variables to download.
            response (str): default is a Comma Separated Value ('csv').
                See ERDDAP docs for all the options,

                - tabledap: http://coastwatch.pfeg.noaa.gov/erddap/tabledap/documentation.html
                - griddap: http://coastwatch.pfeg.noaa.gov/erddap/griddap/documentation.html
        Returns:
            download_url (str): the download URL for the `response` chosen.

        """
        self.download_options.update(kwargs)
        variables = ','.join(variables)
        base = ('{server_url}/{protocol}/{dataset_id}.{response}'
                '?{variables}'
                '{kwargs}').format

        kwargs = ''.join(
            ['&{}{}'.format(k, v) for k, v in self.download_options.items()])
        download_url = base(server_url=self.server_url,
                            protocol=protocol,
                            dataset_id=dataset_id,
                            response=response,
                            variables=variables,
                            kwargs=kwargs)
        _check_url_response(download_url)
        return download_url
Exemple #8
0
    def get_info_url(self, dataset_id, response='csv'):
        """Compose the info URL for the `server_url` endpoint.

        Args:
            dataset_id (str): a dataset unique id.
            response (str): default is a Comma Separated Value ('csv').
                See ERDDAP docs for all the options,

                - tabledap: http://coastwatch.pfeg.noaa.gov/erddap/tabledap/documentation.html
                - griddap: http://coastwatch.pfeg.noaa.gov/erddap/griddap/documentation.html
        Returns:
            info_url (str): the info URL for the `response` chosen.

        """
        response = _clean_response(response)
        base = '{server_url}/info/{dataset_id}/index.{response}'.format
        info_options = {
            'server_url': self.server_url,
            'dataset_id': dataset_id,
            'response': response
        }
        info_url = base(**info_options)
        _check_url_response(info_url)
        return info_url
Exemple #9
0
    def __init__(self, server, protocol=None, response="html"):
        if server in servers.keys():
            server = servers[server].url
        self.server = _check_url_response(server)
        self.protocol = protocol

        # Initialized only via properties.
        self.constraints = None
        self.dataset_id = None
        self.params = None
        self.requests_kwargs = {}
        self.response = response
        self.variables = ""

        # Caching the last `dataset_id` and `variables` list request for quicker multiple accesses,
        # will be overridden when requesting a new `dataset_id`.
        self._dataset_id = None
        self._variables = {}
Exemple #10
0
    def get_categorize_url(self, categorize_by, value=None, response=None):
        """The categorize URL for the `server` endpoint.

        Args:
            categorize_by (str): a valid attribute, e.g.: ioos_category or standard_name.
            value (str): an attribute value.
            response (str): default is HTML.

        Returns:
            url (str): the categorized URL for the `response` chosen.

        """
        response = response if response else self.response
        if value:
            url = f"{self.server}/categorize/{categorize_by}/{value}/index.{response}"
        else:
            url = f"{self.server}/categorize/{categorize_by}/index.{response}"
        return _check_url_response(url)
Exemple #11
0
    def get_info_url(self, dataset_id=None, response=None):
        """The info URL for the `server` endpoint.

        Args:
            dataset_id (str): a dataset unique id.
            response (str): default is HTML.

        Returns:
            url (str): the info URL for the `response` chosen.

        """
        dataset_id = dataset_id if dataset_id else self.dataset_id
        response = response if response else self.response

        if not dataset_id:
            raise ValueError(
                f"You must specify a valid dataset_id, got {dataset_id}")

        url = f"{self.server}/info/{dataset_id}/index.{response}"
        return _check_url_response(url)
Exemple #12
0
    def __init__(self,
                 server,
                 dataset_id=None,
                 protocol=None,
                 variables='',
                 response='html',
                 constraints=None,
                 params=None,
                 requests_kwargs=None):
        if server in servers.keys():
            server = servers[server].url
        self.server = _check_url_response(server)
        self.dataset_id = dataset_id
        self.protocol = protocol
        self.variables = variables
        self.response = response
        self.constraints = constraints
        self.params = params
        self.requests_kwargs = requests_kwargs if requests_kwargs else {}

        # Caching the last `dataset_id` request for quicker multiple accesses,
        # will be overridden when requesting a new `dataset_id`.
        self._dataset_id = None
        self._variables = {}
Exemple #13
0
    def get_download_url(
        self,
        dataset_id=None,
        protocol=None,
        variables=None,
        response=None,
        constraints=None,
    ):
        """The download URL for the `server` endpoint.

        Args:
            dataset_id (str): a dataset unique id.
            protocol (str): tabledap or griddap.
            variables (list/tuple): a list of the variables to download.
            response (str): default is HTML.
            constraints (dict): download constraints, default None (opendap-like url)
            example: constraints = {'latitude<=': 41.0,
                                    'latitude>=': 38.0,
                                    'longitude<=': -69.0,
                                    'longitude>=': -72.0,
                                    'time<=': '2017-02-10T00:00:00+00:00',
                                    'time>=': '2016-07-10T00:00:00+00:00',}

        Returns:
            url (str): the download URL for the `response` chosen.

        """
        dataset_id = dataset_id if dataset_id else self.dataset_id
        protocol = protocol if protocol else self.protocol
        variables = variables if variables else self.variables
        response = response if response else self.response
        constraints = constraints if constraints else self.constraints

        if not dataset_id:
            raise ValueError(
                f"Please specify a valid `dataset_id`, got {dataset_id}")

        if not protocol:
            raise ValueError(
                f"Please specify a valid `protocol`, got {protocol}")

        # This is an unconstrained OPeNDAP response b/c
        # the integer based constrained version is just not worth supporting ;-p
        if response == "opendap":
            return f"{self.server}/{protocol}/{dataset_id}"
        else:
            url = f"{self.server}/{protocol}/{dataset_id}.{response}?"

        if variables:
            variables = ",".join(variables)
            url += f"{variables}"

        if constraints:
            _constraints = copy.copy(constraints)
            for k, v in _constraints.items():
                if k.startswith("time"):
                    _constraints.update({k: parse_dates(v)})
            _constraints = quote_string_constraints(_constraints)
            _constraints = "".join(
                [f"&{k}{v}" for k, v in _constraints.items()])

            url += f"{_constraints}"
        return _check_url_response(url)
Exemple #14
0
    def get_search_url(
        self,
        response=None,
        search_for=None,
        items_per_page=1000,
        page=1,
        **kwargs,
    ):
        """The search URL for the `server` endpoint provided.

        Args:
            search_for (str): "Google-like" search of the datasets' metadata.

                - Type the words you want to search for, with spaces between the words.
                    ERDDAP will search for the words separately, not as a phrase.
                - To search for a phrase, put double quotes around the phrase
                    (for example, `"wind speed"`).
                - To exclude datasets with a specific word, use `-excludedWord`.
                - To exclude datasets with a specific phrase, use `-"excluded phrase"`
                - Searches are not case-sensitive.
                - To find just grid or just table datasets, include `protocol=griddap`
                    or `protocol=tabledap` in your search.
                - You can search for any part of a word. For example,
                    searching for `spee` will find datasets with `speed` and datasets with
                    `WindSpeed`
                - The last word in a phrase may be a partial word. For example,
                    to find datasets from a specific website (usually the start of the datasetID),
                    include (for example) `"datasetID=erd"` in your search.

            response (str): default is HTML.
            items_per_page (int): how many items per page in the return,
                default is 1000.
            page (int): which page to display, default is the first page (1).
            kwargs (dict): extra search constraints based on metadata and/or coordinates ke/value.
                metadata: `cdm_data_type`, `institution`, `ioos_category`,
                `keywords`, `long_name`, `standard_name`, and `variableName`.
                coordinates: `minLon`, `maxLon`, `minLat`, `maxLat`, `minTime`, and `maxTime`.

        Returns:
            url (str): the search URL.

        """
        base = ("{server}/search/advanced.{response}"
                "?page={page}"
                "&itemsPerPage={itemsPerPage}"
                "&protocol={protocol}"
                "&cdm_data_type={cdm_data_type}"
                "&institution={institution}"
                "&ioos_category={ioos_category}"
                "&keywords={keywords}"
                "&long_name={long_name}"
                "&standard_name={standard_name}"
                "&variableName={variableName}"
                "&minLon={minLon}"
                "&maxLon={maxLon}"
                "&minLat={minLat}"
                "&maxLat={maxLat}"
                "&minTime={minTime}"
                "&maxTime={maxTime}")
        if search_for:
            search_for = quote_plus(search_for)
            base += "&searchFor={searchFor}"

        # Convert dates from datetime to `seconds since 1970-01-01T00:00:00Z`.
        min_time = kwargs.pop("min_time", None)
        max_time = kwargs.pop("max_time", None)
        if min_time:
            kwargs.update({"min_time": parse_dates(min_time)})
        if max_time:
            kwargs.update({"max_time": parse_dates(max_time)})

        default = "(ANY)"
        response = response if response else self.response
        url = base.format(
            server=self.server,
            response=response,
            page=page,
            itemsPerPage=items_per_page,
            protocol=kwargs.get("protocol", default),
            cdm_data_type=kwargs.get("cdm_data_type", default),
            institution=kwargs.get("institution", default),
            ioos_category=kwargs.get("ioos_category", default),
            keywords=kwargs.get("keywords", default),
            long_name=kwargs.get("long_name", default),
            standard_name=kwargs.get("standard_name", default),
            variableName=kwargs.get("variableName", default),
            minLon=kwargs.get("min_lon", default),
            maxLon=kwargs.get("max_lon", default),
            minLat=kwargs.get("min_lat", default),
            maxLat=kwargs.get("max_lat", default),
            minTime=kwargs.get("min_time", default),
            maxTime=kwargs.get("max_time", default),
            searchFor=search_for,
        )

        return _check_url_response(url)
Exemple #15
0
def test_servers():
    for server in servers.values():
        # Should raise HTTPError if broken, otherwise returns the URL.
        _check_url_response(server.url) == server.url
Exemple #16
0
    def get_search_url(self,
                       response='csv',
                       search_for=None,
                       items_per_page=1000,
                       page=1,
                       **kwargs):
        """Compose the search URL for the `server_url` endpoint provided.

        Args:
            search_for (str): "Google-like" search of the datasets' metadata.

                - Type the words you want to search for, with spaces between the words.
                  ERDDAP will search for the words separately, not as a phrase.
                - To search for a phrase, put double quotes around the phrase
                  (for example, `"wind speed"`).
                - To exclude datasets with a specific word, use `-excludedWord`.
                - To exclude datasets with a specific phrase, use `-"excluded phrase"`
                - Searches are not case-sensitive.
                - To find just grid or just table datasets, include `protocol=griddap`
                  or `protocol=tabledap` in your search.
                - You can search for any part of a word. For example,
                  searching for `spee` will find datasets with `speed` and datasets with
                  `WindSpeed`
                - The last word in a phrase may be a partial word. For example,
                  to find datasets from a specific website (usually the start of the datasetID),
                  include (for example) `"datasetID=erd"` in your search.

            response (str): default is a Comma Separated Value ('csv').
                See ERDDAP docs for all the options,

                - tabledap: http://coastwatch.pfeg.noaa.gov/erddap/tabledap/documentation.html
                - griddap: http://coastwatch.pfeg.noaa.gov/erddap/griddap/documentation.html

            items_per_page (int): how many items per page in the return,
                default is 1000.
            page (int): which page to display, defatul is the first page (1).
            kwargs (dict): extra search constraints based on metadata and/or coordinates ke/value.
                metadata: `cdm_data_type`, `institution`, `ioos_category`,
                `keywords`, `long_name`, `standard_name`, and `variableName`.
                coordinates: `minLon`, `maxLon`, `minLat`, `maxLat`, `minTime`, and `maxTime`.
        Returns:
            search_url (str): the search URL for the `response` chosen.

        """
        base = ('{server_url}/search/advanced.{response}'
                '?page={page}'
                '&itemsPerPage={itemsPerPage}'
                '&protocol={protocol}'
                '&cdm_data_type={cdm_data_type}'
                '&institution={institution}'
                '&ioos_category={ioos_category}'
                '&keywords={keywords}'
                '&long_name={long_name}'
                '&standard_name={standard_name}'
                '&variableName={variableName}'
                '&minLon={minLon}'
                '&maxLon={maxLon}'
                '&minLat={minLat}'
                '&maxLat={maxLat}'
                '&minTime={minTime}'
                '&maxTime={maxTime}')
        if search_for:
            search_for = quote_plus(search_for)
            base += '&searchFor={searchFor}'

        # Convert dates from datetime to `seconds since 1970-01-01T00:00:00Z`.
        min_time = kwargs.pop('min_time', None)
        max_time = kwargs.pop('max_time', None)
        if min_time:
            kwargs.update({'min_time': parse_dates(min_time)})
        if max_time:
            kwargs.update({'max_time': parse_dates(max_time)})

        default = '(ANY)'
        response = _clean_response(response)
        search_options = {
            'server_url': self.server_url,
            'response': response,
            'page': page,
            'itemsPerPage': items_per_page,
            'protocol': kwargs.get('protocol', default),
            'cdm_data_type': kwargs.get('cdm_data_type', default),
            'institution': kwargs.get('institution', default),
            'ioos_category': kwargs.get('ioos_category', default),
            'keywords': kwargs.get('keywords', default),
            'long_name': kwargs.get('long_name', default),
            'standard_name': kwargs.get('standard_name', default),
            'variableName': kwargs.get('variableName', default),
            'minLon': kwargs.get('min_lon', default),
            'maxLon': kwargs.get('max_lon', default),
            'minLat': kwargs.get('min_lat', default),
            'maxLat': kwargs.get('max_lat', default),
            'minTime': kwargs.get('min_time', default),
            'maxTime': kwargs.get('max_time', default),
            'searchFor': search_for,
        }
        self.search_options.update(search_options)
        search_url = base.format(**search_options)
        _check_url_response(search_url)
        return search_url