def test_get_timestamps_returns_correct_timestamps_for_historical(self): with SQLiteDatabase(self.historical_db) as db: timestamps = db.get_timestamps("vo") self.assertTrue(self.historical_timestamps == timestamps)
def test_get_all_variables_returns_all_variables(self): with SQLiteDatabase(self.historical_db) as db: variables = db.get_all_variables() self.assertEqual(len(variables), 12)
def get_nc_file_list(self, datasetconfig: DatasetConfig, **kwargs: dict) -> Union[List, None]: try: if not datasetconfig.url.endswith(".sqlite3"): # This method is only applicable to SQLite-indexed datasets return except AttributeError: # Probably a file path dataset config for which this method is also not applicable return try: variables = kwargs['variable'] except KeyError: variables = datasetconfig.variables[0] variables = {variables} if isinstance(variables, str) else set(variables) calculated_variables = datasetconfig.calculated_variables with SQLiteDatabase(self.url) as db: variables_to_load = self.__get_variables_to_load( db, variables, calculated_variables) timestamp = self.__get_requested_timestamps( db, variables_to_load[0], kwargs.get('timestamp', -1), kwargs.get('endtime'), kwargs.get('nearest_timestamp', False)) if not timestamp: raise RuntimeError("Error finding timestamp(s) in database.") file_list = db.get_netcdf_files(timestamp, variables_to_load) if not file_list: raise RuntimeError("NetCDF file list is empty.") return file_list
def timestamps(): """ Returns all timestamps available for a given variable in a dataset. This is variable-dependent because datasets can have multiple "quantums", as in surface 2D variables may be hourly, while 3D variables may be daily. API Format: /api/v1.0/timestamps/?dataset=''&variable='' Required Arguments: * dataset : Dataset key - Can be found using /api/v1.0/datasets * variable : Variable key - Can be found using /api/v1.0/variables/?dataset='...'... Raises: APIError: if dataset or variable is not specified in the request Returns: Response object containing all timestamp pairs (e.g. [raw_timestamp_integer, iso_8601_date_string]) for the given dataset and variable. """ args = request.args if "dataset" not in args: raise APIError("Please specify a dataset via ?dataset=dataset_name") dataset = args.get("dataset") config = DatasetConfig(dataset) if "variable" not in args: raise APIError("Please specify a variable via ?variable=variable_name") variable = args.get("variable") # Handle possible list of URLs for staggered grid velocity field datasets url = config.url if not isinstance(config.url, list) else config.url[0] if url.endswith(".sqlite3"): with SQLiteDatabase(url) as db: if variable in config.calculated_variables: data_vars = get_data_vars_from_equation( config.calculated_variables[variable]['equation'], [v.key for v in db.get_data_variables()]) vals = db.get_timestamps(data_vars[0]) else: vals = db.get_timestamps(variable) else: with open_dataset(config, variable=variable) as ds: vals = list(map(int, ds.nc_data.time_variable.values)) converted_vals = time_index_to_datetime(vals, config.time_dim_units) result = [] for idx, date in enumerate(converted_vals): if config.quantum == 'month' or config.variable[ variable].quantum == 'month': date = datetime.datetime(date.year, date.month, 15) result.append({'id': vals[idx], 'value': date}) result = sorted(result, key=lambda k: k['id']) js = json.dumps(result, cls=DateTimeEncoder) resp = Response(js, status=200, mimetype='application/json') return resp
def test_get_variable_units_returns_correct_units(self): expected_units = "m" with SQLiteDatabase(self.historical_db) as db: units = db.get_variable_units("zos") self.assertEqual(expected_units, units)
def test_get_latest_timestamp_returns_latest_timestamp(self): expected_value = 2145483000 with SQLiteDatabase(self.historical_db) as db: latest = db.get_latest_timestamp("zos") self.assertEqual(expected_value, latest)
def test_get_all_dimensions_returns_dims(self): expected_dims = sorted(["axis_nbounds", "depthv", "time_counter", "x", "y"]) with SQLiteDatabase(self.historical_db) as db: dims = sorted(db.get_all_dimensions()) self.assertTrue(expected_dims == dims)
def test_get_earliest_timestamp_returns_earliest_timestamp(self): expected_value = 2144881800 with SQLiteDatabase(self.historical_db) as db: earliest = db.get_earliest_timestamp("zos") self.assertEqual(expected_value, earliest)
def test_get_variable_dims_returns_correct_dims(self): expected_dims = sorted(["depthv", "time_counter", "x", "y"]) with SQLiteDatabase(self.historical_db) as db: dims = sorted(db.get_variable_dims("vo")) self.assertTrue(expected_dims == dims)
def test_get_netcdf_files_returns_correct_files_for_historical(self): expected_nc_files = [ "/home/nabil/test-mapper/ORCA025-CMC-TRIAL_1d_grid_V_2017122700.nc"] with SQLiteDatabase(self.historical_db) as db: nc_files = sorted(db.get_netcdf_files( self.historical_timestamps, ["vo"])) self.assertTrue(expected_nc_files == nc_files)
def test_get_all_dimensions_returns_dims(self): expected_dims = sorted( ['axis_nbounds', 'depthv', 'time_counter', 'x', 'y']) with SQLiteDatabase(self.historical_db) as db: dims = sorted(db.get_all_dimensions()) self.assertTrue(expected_dims == dims)
def test_get_data_variables_returns_variable_list(self): with SQLiteDatabase(self.historical_db) as db: variables = db.get_data_variables() self.assertEqual(len(variables), 2) self.assertTrue("vo" in variables) self.assertTrue("zos" in variables) self.assertEqual(variables["vo"].name, "Sea Water Y Velocity") self.assertEqual(variables["vo"].unit, "m/s")
def timestamps(): """ Returns all timestamps available for a given variable in a dataset. This is variable-dependent because datasets can have multiple "quantums", as in surface 2D variables may be hourly, while 3D variables may be daily. Required Arguments: * dataset : Dataset key - Can be found using /api/v1.0/datasets * variable : Variable key - Can be found using /api/v1.0/variables/?dataset='...'... Returns: All timestamp pairs (e.g. [raw_timestamp_integer, iso_8601_date_string]) for the given dataset and variable. """ try: result = TimestampsSchema().load(request.args) except ValidationError as e: abort(400, str(e)) dataset = result["dataset"] variable = result["variable"] config = DatasetConfig(dataset) # Handle possible list of URLs for staggered grid velocity field datasets url = config.url if not isinstance(config.url, list) else config.url[0] if url.endswith(".sqlite3"): with SQLiteDatabase(url) as db: if variable in config.calculated_variables: data_vars = get_data_vars_from_equation( config.calculated_variables[variable]["equation"], [v.key for v in db.get_data_variables()], ) vals = db.get_timestamps(data_vars[0]) else: vals = db.get_timestamps(variable) else: with open_dataset(config, variable=variable) as ds: vals = list(map(int, ds.nc_data.time_variable.values)) converted_vals = time_index_to_datetime(vals, config.time_dim_units) result = [] for idx, date in enumerate(converted_vals): if config.quantum == "month" or config.variable[variable].quantum == "month": date = datetime.datetime(date.year, date.month, 15) result.append({"id": vals[idx], "value": date}) result = sorted(result, key=lambda k: k["id"]) js = json.dumps(result, cls=DateTimeEncoder) resp = Response(js, status=200, mimetype="application/json") return resp
def variables(self) -> VariableList: """Returns a list of all data variables and their attributes in the dataset. Returns: VariableList -- contains all the data variables (no coordinates) """ # Check if variable list has been created yet. # This saves approx 3 lookups per tile, and # over a dozen when a new dataset is loaded. if self._variable_list is not None: return self._variable_list # Handle possible list of URLs for staggered grid velocity field datasets url = self.url if not isinstance(self.url, list) else self.url[0] if url.endswith(".sqlite3"): with SQLiteDatabase(url) as db: self._variable_list = db.get_data_variables() # Cache the list for later elif url.endswith(".zarr"): ds_zarr = xarray.open_zarr(url) var_list =[] for var in list(ds_zarr.data_vars): name = var units = ds_zarr.variables[var].attrs['units'] if ds_zarr.variables[var].attrs['units'] else None long_name = ds_zarr.variables[var].attrs['long_name'] if ds_zarr.variables[var].attrs['long_name'] else name valid_min = ds_zarr.variables[var].attrs['valid_min'] if ds_zarr.variables[var].attrs['valid_min'] else None valid_max = ds_zarr.variables[var].attrs['valid_max'] if ds_zarr.variables[var].attrs['valid_max'] else None var_list.append(Variable(name, long_name, units, list(ds_zarr[name].dims), valid_min, valid_max)) self._variable_list = var_list else: try: # Handle possible list of URLs for staggered grid velocity field datasets url = self.url if isinstance(self.url, list) else [self.url] # This will raise a FutureWarning for xarray>=0.12.2. # That warning should be resolvable by changing to: # with xarray.open_mfdataset(url, combine="by_coords", decode_times=False) as ds: with xarray.open_mfdataset(url, decode_times=False) as ds: self._variable_list = self._get_xarray_data_variables(ds) # Cache the list for later except xarray.core.variable.MissingDimensionsError: # xarray won't open FVCOM files due to dimension/coordinate/variable label # duplication issue, so fall back to using netCDF4.Dataset() with netCDF4.Dataset(self.url) as ds: self._variable_list = self._get_netcdf4_data_variables(ds) # Cache the list for later return self._variable_list
def test_erroneous_args_return_empty_lists(self): with SQLiteDatabase(self.historical_db) as db: ncfiles = db.get_netcdf_files(self.historical_timestamps, "fake_variable") timestamps = db.get_timestamps("fake_variable") dims = db.get_variable_dims("fake_variable") units = db.get_variable_units("fake_variable") self.assertFalse(ncfiles) self.assertFalse(timestamps) self.assertFalse(dims) self.assertFalse(units)
def __get_variables_to_load(self, db: SQLiteDatabase, variable: set, calculated_variables: dict) -> List[str]: calc_var_keys = set(calculated_variables) variables_to_load = variable.difference(calc_var_keys) requested_calculated_variables = variable & calc_var_keys if requested_calculated_variables: for rcv in requested_calculated_variables: equation = calculated_variables[rcv]['equation'] variables_to_load.update(data.utils.get_data_vars_from_equation( equation, [v.key for v in db.get_data_variables()])) return list(variables_to_load)
def get_nc_file_list(self, datasetconfig: DatasetConfig, **kwargs: dict) -> Union[List, None]: try: if not datasetconfig.url.endswith(".sqlite3"): # This method is only applicable to SQLite-indexed datasets return except AttributeError: # Probably a file path dataset config for which this method is also not applicable return with SQLiteDatabase(self.url) as db: try: variable = kwargs['variable'] except KeyError: raise RuntimeError( "Opening a dataset via sqlite requires the 'variable' keyword argument." ) if isinstance(variable, str): variable = {variable} else: if not isinstance(variable, set): variable = set(variable) calculated_variables = datasetconfig.calculated_variables variables_to_load = self.__get_variables_to_load( db, variable, calculated_variables) try: timestamp = self.__get_requested_timestamps( db, variables_to_load[0], kwargs['timestamp'], kwargs.get('endtime'), kwargs.get('nearest_timestamp', False)) except KeyError: raise RuntimeError( "Opening a dataset via sqlite requires the 'timestamp' keyword argument." ) if not timestamp: raise RuntimeError("Error finding timestamp(s) in database.") file_list = db.get_netcdf_files(timestamp, variables_to_load) if not file_list: raise RuntimeError("NetCDF file list is empty.") self._nc_files = file_list
def __get_requested_timestamps(self, db: SQLiteDatabase, variable: str, timestamp, endtime, nearest_timestamp) -> List[int]: # We assume timestamp and/or endtime have been converted # to the same time units as the requested dataset. Otherwise # this won't work. if nearest_timestamp: all_timestamps = db.get_timestamps(variable) start = data.utils.find_le(all_timestamps, timestamp) if not endtime: return [start] end = data.utils.find_le(all_timestamps, endtime) return db.get_timestamp_range(start, end, variable) if timestamp > 0 and endtime is None: # We've received a specific timestamp (e.g. 21100345) if not isinstance(timestamp, list): return [timestamp] return timestamp if timestamp < 0 and endtime is None: all_timestamps = db.get_timestamps(variable) return [all_timestamps[timestamp]] if timestamp > 0 and endtime > 0: # We've received a request for a time range # with specific timestamps given return db.get_timestamp_range(timestamp, endtime, variable) # Otherwise assume negative values are indices into timestamp list all_timestamps = db.get_timestamps(variable) len_timestamps = len(all_timestamps) if timestamp < 0 and endtime > 0: idx = data.utils.roll_time(timestamp, len_timestamps) return db.get_timestamp_range(all_timestamps[idx], endtime, variable) if timestamp > 0 and endtime < 0: idx = data.utils.roll_time(endtime, len_timestamps) return db.get_timestamp_range(timestamp, all_timestamps[idx], variable)
def dimensions(self) -> List[str]: """Return a list of the dimensions in the dataset. """ # Handle possible list of URLs for staggered grid velocity field datasets url = self.url if not isinstance(self.url, list) else self.url[0] if url.endswith(".sqlite3"): try: with SQLiteDatabase(url) as db: dimension_list = db.get_all_dimensions() except sqlite3.OperationalError: pass return dimension_list # Open dataset (can't use xarray here since it doesn't like FVCOM files) try: with netCDF4.Dataset(url) as ds: dimension_list = [dim for dim in ds.dimensions] except FileNotFoundError: dimension_list = [] return dimension_list
def test_get_timestamp_range_returns_range(self): with SQLiteDatabase(self.historical_db) as db: rng = db.get_timestamp_range(2144966400, 2145225600, "vo") self.assertEqual(len(rng), 4)