def test_load_dataset(): filename = "WAO-20magl_EUROPE_201306_small.nc" dir_path = os.path.dirname(__file__) test_data = "../data/emissions" filepath = os.path.join(dir_path, test_data, filename) ds = xarray.load_dataset(filepath) metadata = {"some": "metadata"} d = Datasource("dataset_test") d.add_data(metadata=metadata, data=ds, data_type="footprint") d.save() keys = d._data_keys["latest"]["keys"] key = list(keys.values())[0] bucket = get_local_bucket() loaded_ds = Datasource.load_dataset(bucket=bucket, key=key) assert loaded_ds.equals(ds)
def recombine_sections(data_keys): """ Combines separate dataframes into a single dataframe for processing to NetCDF for output Args: data_keys (list): Dictionary of object store keys keyed by search term Returns: Pandas.Dataframe or list: Combined dataframes """ # from pandas import concat as _concat from xarray import concat as xr_concat from HUGS.ObjectStore import get_bucket from HUGS.Modules import Datasource bucket = get_bucket() data = [Datasource.load_dataset(bucket=bucket, key=k) for k in data_keys] combined = xr_concat(data, dim="time") combined = combined.sortby("time") # Check for duplicates? # This is taken from https://stackoverflow.com/questions/51058379/drop-duplicate-times-in-xarray # _, index = np.unique(f['time'], return_index=True) # f.isel(time=index) # Check that the dataframe's index is sorted by date # if not combined.time.is_monotonic_increasing: # combined = combined.sortby("time") # if not combined.index.is_unique: # raise ValueError("Dataframe index is not unique") return combined