Esempio n. 1
0
    def test_get_timestamps_returns_correct_timestamps_for_historical(self):

        with SQLiteDatabase(self.historical_db) as db:

            timestamps = db.get_timestamps("vo")

            self.assertTrue(self.historical_timestamps == timestamps)
Esempio n. 2
0
    def test_get_all_variables_returns_all_variables(self):

        with SQLiteDatabase(self.historical_db) as db:

            variables = db.get_all_variables()

            self.assertEqual(len(variables), 12)
    def get_nc_file_list(self, datasetconfig: DatasetConfig,
                         **kwargs: dict) -> Union[List, None]:
        try:
            if not datasetconfig.url.endswith(".sqlite3"):
                # This method is only applicable to SQLite-indexed datasets
                return
        except AttributeError:
            # Probably a file path dataset config for which this method is also not applicable
            return

        try:
            variables = kwargs['variable']
        except KeyError:
            variables = datasetconfig.variables[0]
        variables = {variables} if isinstance(variables,
                                              str) else set(variables)
        calculated_variables = datasetconfig.calculated_variables
        with SQLiteDatabase(self.url) as db:
            variables_to_load = self.__get_variables_to_load(
                db, variables, calculated_variables)

            timestamp = self.__get_requested_timestamps(
                db, variables_to_load[0], kwargs.get('timestamp', -1),
                kwargs.get('endtime'), kwargs.get('nearest_timestamp', False))

            if not timestamp:
                raise RuntimeError("Error finding timestamp(s) in database.")

            file_list = db.get_netcdf_files(timestamp, variables_to_load)
            if not file_list:
                raise RuntimeError("NetCDF file list is empty.")

            return file_list
Esempio n. 4
0
def timestamps():
    """
    Returns all timestamps available for a given variable in a dataset. This is variable-dependent
    because datasets can have multiple "quantums", as in surface 2D variables may be hourly, while
    3D variables may be daily.

    API Format: /api/v1.0/timestamps/?dataset=''&variable=''

    Required Arguments:
    * dataset : Dataset key - Can be found using /api/v1.0/datasets
    * variable : Variable key - Can be found using /api/v1.0/variables/?dataset='...'...

    Raises:
        APIError: if dataset or variable is not specified in the request

    Returns:
        Response object containing all timestamp pairs (e.g. [raw_timestamp_integer, iso_8601_date_string]) for the given
        dataset and variable.
    """

    args = request.args
    if "dataset" not in args:
        raise APIError("Please specify a dataset via ?dataset=dataset_name")

    dataset = args.get("dataset")
    config = DatasetConfig(dataset)

    if "variable" not in args:
        raise APIError("Please specify a variable via ?variable=variable_name")
    variable = args.get("variable")

    # Handle possible list of URLs for staggered grid velocity field datasets
    url = config.url if not isinstance(config.url, list) else config.url[0]
    if url.endswith(".sqlite3"):
        with SQLiteDatabase(url) as db:
            if variable in config.calculated_variables:
                data_vars = get_data_vars_from_equation(
                    config.calculated_variables[variable]['equation'],
                    [v.key for v in db.get_data_variables()])
                vals = db.get_timestamps(data_vars[0])
            else:
                vals = db.get_timestamps(variable)
    else:
        with open_dataset(config, variable=variable) as ds:
            vals = list(map(int, ds.nc_data.time_variable.values))
    converted_vals = time_index_to_datetime(vals, config.time_dim_units)

    result = []
    for idx, date in enumerate(converted_vals):
        if config.quantum == 'month' or config.variable[
                variable].quantum == 'month':
            date = datetime.datetime(date.year, date.month, 15)
        result.append({'id': vals[idx], 'value': date})
    result = sorted(result, key=lambda k: k['id'])

    js = json.dumps(result, cls=DateTimeEncoder)

    resp = Response(js, status=200, mimetype='application/json')
    return resp
Esempio n. 5
0
    def test_get_variable_units_returns_correct_units(self):

        expected_units = "m"

        with SQLiteDatabase(self.historical_db) as db:

            units = db.get_variable_units("zos")

            self.assertEqual(expected_units, units)
Esempio n. 6
0
    def test_get_latest_timestamp_returns_latest_timestamp(self):

        expected_value = 2145483000

        with SQLiteDatabase(self.historical_db) as db:

            latest = db.get_latest_timestamp("zos")

            self.assertEqual(expected_value, latest)
    def test_get_all_dimensions_returns_dims(self):

        expected_dims = sorted(["axis_nbounds", "depthv", "time_counter", "x", "y"])

        with SQLiteDatabase(self.historical_db) as db:

            dims = sorted(db.get_all_dimensions())

            self.assertTrue(expected_dims == dims)
Esempio n. 8
0
    def test_get_earliest_timestamp_returns_earliest_timestamp(self):
    
        expected_value = 2144881800

        with SQLiteDatabase(self.historical_db) as db:

            earliest = db.get_earliest_timestamp("zos")

            self.assertEqual(expected_value, earliest)
Esempio n. 9
0
    def test_get_variable_dims_returns_correct_dims(self):

        expected_dims = sorted(["depthv", "time_counter", "x", "y"])

        with SQLiteDatabase(self.historical_db) as db:

            dims = sorted(db.get_variable_dims("vo"))

            self.assertTrue(expected_dims == dims)
Esempio n. 10
0
    def test_get_netcdf_files_returns_correct_files_for_historical(self):
        expected_nc_files = [
            "/home/nabil/test-mapper/ORCA025-CMC-TRIAL_1d_grid_V_2017122700.nc"]

        with SQLiteDatabase(self.historical_db) as db:

            nc_files = sorted(db.get_netcdf_files(
                self.historical_timestamps, ["vo"]))

            self.assertTrue(expected_nc_files == nc_files)
Esempio n. 11
0
    def test_get_all_dimensions_returns_dims(self):

        expected_dims = sorted(
            ['axis_nbounds', 'depthv', 'time_counter', 'x', 'y'])

        with SQLiteDatabase(self.historical_db) as db:

            dims = sorted(db.get_all_dimensions())

            self.assertTrue(expected_dims == dims)
    def test_get_data_variables_returns_variable_list(self):

        with SQLiteDatabase(self.historical_db) as db:

            variables = db.get_data_variables()

            self.assertEqual(len(variables), 2)
            self.assertTrue("vo" in variables)
            self.assertTrue("zos" in variables)
            self.assertEqual(variables["vo"].name, "Sea Water Y Velocity")
            self.assertEqual(variables["vo"].unit, "m/s")
Esempio n. 13
0
def timestamps():
    """
    Returns all timestamps available for a given variable in a dataset.
    This is variable-dependent because datasets can have multiple "quantums",
    as in surface 2D variables may be hourly, while 3D variables may be daily.

    Required Arguments:
    * dataset : Dataset key - Can be found using /api/v1.0/datasets
    * variable : Variable key - Can be found using /api/v1.0/variables/?dataset='...'...

    Returns:
        All timestamp pairs (e.g. [raw_timestamp_integer, iso_8601_date_string])
        for the given dataset and variable.
    """

    try:
        result = TimestampsSchema().load(request.args)
    except ValidationError as e:
        abort(400, str(e))

    dataset = result["dataset"]
    variable = result["variable"]

    config = DatasetConfig(dataset)

    # Handle possible list of URLs for staggered grid velocity field datasets
    url = config.url if not isinstance(config.url, list) else config.url[0]
    if url.endswith(".sqlite3"):
        with SQLiteDatabase(url) as db:
            if variable in config.calculated_variables:
                data_vars = get_data_vars_from_equation(
                    config.calculated_variables[variable]["equation"],
                    [v.key for v in db.get_data_variables()],
                )
                vals = db.get_timestamps(data_vars[0])
            else:
                vals = db.get_timestamps(variable)
    else:
        with open_dataset(config, variable=variable) as ds:
            vals = list(map(int, ds.nc_data.time_variable.values))
    converted_vals = time_index_to_datetime(vals, config.time_dim_units)

    result = []
    for idx, date in enumerate(converted_vals):
        if config.quantum == "month" or config.variable[variable].quantum == "month":
            date = datetime.datetime(date.year, date.month, 15)
        result.append({"id": vals[idx], "value": date})
    result = sorted(result, key=lambda k: k["id"])

    js = json.dumps(result, cls=DateTimeEncoder)

    resp = Response(js, status=200, mimetype="application/json")
    return resp
Esempio n. 14
0
    def variables(self) -> VariableList:
        """Returns a list of all data variables and their
        attributes in the dataset.

        Returns:
            VariableList -- contains all the data variables (no coordinates)
        """

        # Check if variable list has been created yet.
        # This saves approx 3 lookups per tile, and
        # over a dozen when a new dataset is loaded.
        if self._variable_list is not None:
            return self._variable_list

        # Handle possible list of URLs for staggered grid velocity field datasets
        url = self.url if not isinstance(self.url, list) else self.url[0]
        if url.endswith(".sqlite3"):
            with SQLiteDatabase(url) as db:
                self._variable_list = db.get_data_variables()  # Cache the list for later

        elif url.endswith(".zarr"):
            ds_zarr = xarray.open_zarr(url)
            var_list =[]
            for var in list(ds_zarr.data_vars):

                name = var
                units = ds_zarr.variables[var].attrs['units'] if ds_zarr.variables[var].attrs['units'] else None
                long_name = ds_zarr.variables[var].attrs['long_name'] if ds_zarr.variables[var].attrs['long_name'] else name
                valid_min = ds_zarr.variables[var].attrs['valid_min'] if ds_zarr.variables[var].attrs['valid_min'] else None
                valid_max = ds_zarr.variables[var].attrs['valid_max'] if ds_zarr.variables[var].attrs['valid_max'] else None

                var_list.append(Variable(name, long_name, units, list(ds_zarr[name].dims), valid_min, valid_max))

            self._variable_list = var_list

        else:
            try:
                # Handle possible list of URLs for staggered grid velocity field datasets
                url = self.url if isinstance(self.url, list) else [self.url]
                # This will raise a FutureWarning for xarray>=0.12.2.
                # That warning should be resolvable by changing to:
                # with xarray.open_mfdataset(url, combine="by_coords", decode_times=False) as ds:
                with xarray.open_mfdataset(url, decode_times=False) as ds:
                    self._variable_list = self._get_xarray_data_variables(ds)  # Cache the list for later

            except xarray.core.variable.MissingDimensionsError:
                # xarray won't open FVCOM files due to dimension/coordinate/variable label
                # duplication issue, so fall back to using netCDF4.Dataset()
                with netCDF4.Dataset(self.url) as ds:
                    self._variable_list = self._get_netcdf4_data_variables(ds)  # Cache the list for later

        return self._variable_list
    def test_erroneous_args_return_empty_lists(self):

        with SQLiteDatabase(self.historical_db) as db:

            ncfiles = db.get_netcdf_files(self.historical_timestamps, "fake_variable")
            timestamps = db.get_timestamps("fake_variable")
            dims = db.get_variable_dims("fake_variable")
            units = db.get_variable_units("fake_variable")

            self.assertFalse(ncfiles)
            self.assertFalse(timestamps)
            self.assertFalse(dims)
            self.assertFalse(units)
Esempio n. 16
0
    def __get_variables_to_load(self, db: SQLiteDatabase, variable: set,
                                    calculated_variables: dict) -> List[str]:

        calc_var_keys = set(calculated_variables)
        variables_to_load = variable.difference(calc_var_keys)
        requested_calculated_variables = variable & calc_var_keys
        if requested_calculated_variables:
            for rcv in requested_calculated_variables:
                equation = calculated_variables[rcv]['equation']

                variables_to_load.update(data.utils.get_data_vars_from_equation(
                    equation, [v.key for v in db.get_data_variables()]))

        return list(variables_to_load)
Esempio n. 17
0
    def get_nc_file_list(self, datasetconfig: DatasetConfig,
                         **kwargs: dict) -> Union[List, None]:
        try:
            if not datasetconfig.url.endswith(".sqlite3"):
                # This method is only applicable to SQLite-indexed datasets
                return
        except AttributeError:
            # Probably a file path dataset config for which this method is also not applicable
            return

        with SQLiteDatabase(self.url) as db:

            try:
                variable = kwargs['variable']
            except KeyError:
                raise RuntimeError(
                    "Opening a dataset via sqlite requires the 'variable' keyword argument."
                )
            if isinstance(variable, str):
                variable = {variable}
            else:
                if not isinstance(variable, set):
                    variable = set(variable)

            calculated_variables = datasetconfig.calculated_variables
            variables_to_load = self.__get_variables_to_load(
                db, variable, calculated_variables)

            try:
                timestamp = self.__get_requested_timestamps(
                    db, variables_to_load[0], kwargs['timestamp'],
                    kwargs.get('endtime'),
                    kwargs.get('nearest_timestamp', False))
            except KeyError:
                raise RuntimeError(
                    "Opening a dataset via sqlite requires the 'timestamp' keyword argument."
                )

            if not timestamp:
                raise RuntimeError("Error finding timestamp(s) in database.")

            file_list = db.get_netcdf_files(timestamp, variables_to_load)
            if not file_list:
                raise RuntimeError("NetCDF file list is empty.")

            self._nc_files = file_list
Esempio n. 18
0
    def __get_requested_timestamps(self, db: SQLiteDatabase, variable: str,
                                   timestamp, endtime,
                                   nearest_timestamp) -> List[int]:

        # We assume timestamp and/or endtime have been converted
        # to the same time units as the requested dataset. Otherwise
        # this won't work.
        if nearest_timestamp:
            all_timestamps = db.get_timestamps(variable)

            start = data.utils.find_le(all_timestamps, timestamp)
            if not endtime:
                return [start]

            end = data.utils.find_le(all_timestamps, endtime)
            return db.get_timestamp_range(start, end, variable)

        if timestamp > 0 and endtime is None:
            # We've received a specific timestamp (e.g. 21100345)
            if not isinstance(timestamp, list):
                return [timestamp]
            return timestamp

        if timestamp < 0 and endtime is None:
            all_timestamps = db.get_timestamps(variable)
            return [all_timestamps[timestamp]]

        if timestamp > 0 and endtime > 0:
            # We've received a request for a time range
            # with specific timestamps given
            return db.get_timestamp_range(timestamp, endtime, variable)

        # Otherwise assume negative values are indices into timestamp list
        all_timestamps = db.get_timestamps(variable)
        len_timestamps = len(all_timestamps)
        if timestamp < 0 and endtime > 0:
            idx = data.utils.roll_time(timestamp, len_timestamps)
            return db.get_timestamp_range(all_timestamps[idx], endtime,
                                          variable)

        if timestamp > 0 and endtime < 0:
            idx = data.utils.roll_time(endtime, len_timestamps)
            return db.get_timestamp_range(timestamp, all_timestamps[idx],
                                          variable)
Esempio n. 19
0
    def dimensions(self) -> List[str]:
        """Return a list of the dimensions in the dataset.
        """
        # Handle possible list of URLs for staggered grid velocity field datasets
        url = self.url if not isinstance(self.url, list) else self.url[0]

        if url.endswith(".sqlite3"):
            try:
                with SQLiteDatabase(url) as db:
                    dimension_list = db.get_all_dimensions()
            except sqlite3.OperationalError:
                pass
            return dimension_list

        # Open dataset (can't use xarray here since it doesn't like FVCOM files)
        try:
            with netCDF4.Dataset(url) as ds:
                dimension_list = [dim for dim in ds.dimensions]
        except FileNotFoundError:
            dimension_list = []
        return dimension_list
Esempio n. 20
0
    def test_get_timestamp_range_returns_range(self):

        with SQLiteDatabase(self.historical_db) as db:
            rng = db.get_timestamp_range(2144966400, 2145225600, "vo")

            self.assertEqual(len(rng), 4)