def test_img2ts_daily_no_resampling_missing_day(): """ Test resampling over missing day 2016-01-01 (see reader above) """ input_grid = BasicGrid( np.array([0.5, 0.5, -0.5, -0.5]), np.array([1, -1, 1, -1]), ) outputpath = tempfile.mkdtemp() start = datetime(2015, 12, 5) end = datetime(2016, 1, 10) ds_in = TestMultiTemporalImageDatasetDaily() img2ts = Img2Ts(ds_in, outputpath, start, end, imgbuffer=15, input_grid=input_grid) ts_should = np.concatenate( [np.arange(5, 32, dtype=np.float), np.arange(2, 11, dtype=np.float)]) dates_should = ds_in.tstamps_for_daterange(start, end) dates_should.remove(datetime(2016, 1, 1)) img2ts.calc() ts_file = os.path.join(outputpath, '0000.nc') with OrthoMultiTs(ts_file) as ds: ts = ds.read_ts('var1', 0) nptest.assert_allclose(ts['var1'], ts_should) assert dates_should == list(ts['time']) nptest.assert_allclose(ds.dataset.variables['location_id'][:], np.array([0, 1, 2, 3]))
def reshuffle(input_root, outputpath, startdate, enddate, parameters, land_points=True, imgbuffer=50): """ Reshuffle method applied to GLDAS data. Parameters ---------- input_root: string input path where gldas data was downloaded outputpath : string Output path. startdate : datetime Start date. enddate : datetime End date. parameters: list parameters to read and convert imgbuffer: int, optional How many images to read at once before writing time series. """ if land_points: landgrid = GLDAS025LandGrid() else: landgrid = None if get_filetype(input_root) == 'grib': input_dataset = GLDAS_Noah_v1_025Ds(input_root, parameters, array_1D=True) if land_points: warnings.warn('Land Grid is fit to GLDAS 2.x netCDF data') else: input_dataset = GLDAS_Noah_v21_025Ds(input_root, parameters, landgrid, array_1D=True) if not os.path.exists(outputpath): os.makedirs(outputpath) global_attr = {'product': 'GLDAS'} # get time series attributes from first day of data. data = input_dataset.read(startdate) ts_attributes = data.metadata if landgrid: grid = landgrid else: grid = BasicGrid(data.lon, data.lat) reshuffler = Img2Ts(input_dataset=input_dataset, outputpath=outputpath, startdate=startdate, enddate=enddate, input_grid=grid, imgbuffer=imgbuffer, cellsize_lat=5.0, cellsize_lon=5.0, global_attr=global_attr, zlib=True, unlim_chunksize=1000, ts_attributes=ts_attributes) reshuffler.calc()
def EASE25CellGrid(): ease25 = EASE2_grid(25000) lons, lats = np.meshgrid(ease25.londim, ease25.latdim) lats = np.flipud(lats) # flip lats, so that origin in bottom left grid = BasicGrid(lons.flatten(), lats.flatten(), shape=(ease25.londim.size, ease25.latdim.size)).to_cell_grid(5., 5.) return grid
def __init__(self, networks): """ Create network collection from previously created Networks. Parameters ---------- networks : list[Network] List of Networks that build the collection from. """ self.networks = OrderedDict([]) lons = [] lats = [] for net in networks: self.networks[net.name] = net net_lons, net_lats = net.coords lons += net_lons lats += net_lats self.grid = BasicGrid(lons, lats) if (len(lons) > 0 and len(lats) > 0) else None
def reshuffle(input_root, outputpath, startdate, enddate, parameters, imgbuffer=50): """ Reshuffle method applied to ESACCI SM v0.42 data. Parameters ---------- input_root: string input path where era interim data was downloaded outputpath : string Output path. startdate : datetime Start date. enddate : datetime End date. parameters: list parameters to read and convert imgbuffer: int, optional How many images to read at once before writing time series. """ input_dataset = CCI_SM_v042_025Ds(input_root, parameters, array_1D=True) if not os.path.exists(outputpath): os.makedirs(outputpath) global_attr = {'product': 'ESACCI'} # get time series attributes from first day of data. data = input_dataset.read(startdate) ts_attributes = data.metadata grid = BasicGrid(data.lon, data.lat) reshuffler = Img2Ts(input_dataset=input_dataset, outputpath=outputpath, startdate=startdate, enddate=enddate, input_grid=grid, imgbuffer=imgbuffer, cellsize_lat=5.0, cellsize_lon=5.0, global_attr=global_attr, zlib=True, unlim_chunksize=1000, ts_attributes=ts_attributes) reshuffler.calc()
def reshuffle(input_root, outputpath, startdate, enddate, parameters, overpass=None, crid=None, imgbuffer=50): """ Reshuffle method applied to ERA-Interim data. Parameters ---------- input_root: string input path where era interim data was downloaded outputpath : string Output path. startdate : datetime Start date. enddate : datetime End date. parameters: list parameters to read and convert overpass : str Select 'AM' for the descending overpass or 'PM' for the ascending one. If the version data does not contain multiple overpasses, this must be None crid : int, optional (default: None) Search for files with this Composite Release ID for reshuffling only. See also https://nsidc.org/data/smap/data_versions#CRID imgbuffer: int, optional How many images to read at once before writing time series. """ input_dataset = SPL3SMP_Ds(input_root, parameter=parameters, overpass=overpass, crid=crid, flatten=True) global_attr = {'product': 'SPL3SMP'} if overpass: global_attr['overpass'] = overpass if not os.path.exists(outputpath): os.makedirs(outputpath) # get time series attributes from first day of data. data = input_dataset.read(startdate) ts_attributes = data.metadata ease36 = EASE2_grid(36000) lons, lats = np.meshgrid(ease36.londim, ease36.latdim) grid = BasicGrid(lons.flatten(), lats.flatten()) reshuffler = Img2Ts(input_dataset=input_dataset, outputpath=outputpath, startdate=startdate, enddate=enddate, input_grid=grid, imgbuffer=imgbuffer, cellsize_lat=5.0, cellsize_lon=5.0, global_attr=global_attr, ts_attributes=ts_attributes) reshuffler.calc()
def reshuffle(input_root, outputpath, startdate, enddate, parameters, imgbuffer=50): """ Reshuffle method applied to ERA-Interim data. Parameters ---------- input_root: string input path where era interim data was downloaded outputpath : string Output path. startdate : datetime Start date. enddate : datetime End date. parameters: list parameters to read and convert imgbuffer: int, optional How many images to read at once before writing time series. """ input_dataset = ERAInterimDs(parameters, input_root, expand_grid=False) if not os.path.exists(outputpath): os.makedirs(outputpath) global_attr = {'product': 'ERA Interim'} # get time series attributes from first day of data. data = input_dataset.read(startdate) ts_attributes = data.metadata grid = BasicGrid(data.lon, data.lat) reshuffler = Img2Ts(input_dataset=input_dataset, outputpath=outputpath, startdate=startdate, enddate=enddate, input_grid=grid, imgbuffer=imgbuffer, cellsize_lat=5.0, cellsize_lon=5.0, ts_dtypes=np.dtype('float32'), global_attr=global_attr, ts_attributes=ts_attributes) reshuffler.calc()
def reshuffle(input_root, outputpath, startdate, enddate, parameters, imgbuffer=50): """ Reshuffle method applied to ERA-Interim data. Parameters ---------- input_root: string input path where era interim data was downloaded outputpath : string Output path. startdate : datetime Start date. enddate : datetime End date. parameters: list parameters to read and convert imgbuffer: int, optional How many images to read at once before writing time series. """ input_dataset = SPL3SMP_Ds(input_root, parameter=parameters, flatten=True) if not os.path.exists(outputpath): os.makedirs(outputpath) global_attr = {'product': 'SPL3SMP'} # get time series attributes from first day of data. data = input_dataset.read(startdate) ts_attributes = data.metadata ease36 = EASE2_grid(36000) lons, lats = np.meshgrid(ease36.londim, ease36.latdim) grid = BasicGrid(lons.flatten(), lats.flatten()) reshuffler = Img2Ts(input_dataset=input_dataset, outputpath=outputpath, startdate=startdate, enddate=enddate, input_grid=grid, imgbuffer=imgbuffer, cellsize_lat=5.0, cellsize_lon=5.0, global_attr=global_attr, ts_attributes=ts_attributes) reshuffler.calc()
def load_grid(filename, subset_flag='subset_flag', subset_value=1, location_var_name='gpi'): """ load a grid from netCDF file Parameters ---------- filename : string filename subset_flag : string, optional (default: 'subset_flag') name of the subset to load. subset_value : int or list, optional (default: 1) Value(s) of the subset variable that points are loaded for. location_var_name: string, optional (default: 'gpi') variable name under which the grid point locations are stored Returns ------- grid : BasicGrid or CellGrid instance grid instance initialized with the loaded data """ with Dataset(filename, 'r') as nc_data: # determine if it is a cell grid or a basic grid arrcell = None if 'cell' in nc_data.variables.keys(): arrcell = nc_data.variables['cell'][:].flatten() gpis = nc_data.variables[location_var_name][:].flatten() shape = None if hasattr(nc_data, 'shape'): try: shape = tuple(nc_data.shape) except TypeError as e: try: length = len(nc_data.shape) except TypeError: length = nc_data.shape.size if length == 1: shape = tuple([nc_data.shape]) else: raise e subset = None # some old grid do not have a shape attribute # this meant that they had shape of len 1 if shape is None: shape = tuple([len(nc_data.variables['lon'][:])]) # check if grid has regular shape if len(shape) == 2: lons, lats = np.meshgrid(nc_data.variables['lon'][:], nc_data.variables['lat'][:]) lons = lons.flatten() lats = lats.flatten() if subset_flag in nc_data.variables.keys(): subset = np.where( np.isin(nc_data.variables[subset_flag][:].flatten(), subset_value))[0] elif len(shape) == 1: lons = nc_data.variables['lon'][:] lats = nc_data.variables['lat'][:] # determine if it has a subset if subset_flag in nc_data.variables.keys(): subset = np.where( np.isin(nc_data.variables[subset_flag][:].flatten(), subset_value))[0] if 'crs' in nc_data.variables: geodatumName = nc_data.variables['crs'].getncattr('ellipsoid_name') else: # ellipsoid information is missing, use WGS84 by default geodatumName = 'WGS84' if arrcell is None: # BasicGrid return BasicGrid(lons, lats, gpis=gpis, geodatum=geodatumName, subset=subset, shape=shape) else: # CellGrid return CellGrid(lons, lats, arrcell, gpis=gpis, geodatum=geodatumName, subset=subset, shape=shape)
def reshuffle(in_path, out_path, start_date, end_date, parameters, temporal_sampling=6, img_buffer=50): """ Reshuffle method applied to MERRA2 data. Parameters ---------- in_path: string input path where merra2 data was downloaded out_path : string Output path. start_date : datetime Start date. end_date : datetime End date. parameters: list parameters to read and convert temporal_sampling: int in range [1, 24] Get an image every n hours where n=temporal_sampling. For example: if 1: return hourly sampled data -> hourly sampling if 6: return an image every 6 hours -> 6 hourly sampling if 24: return the 00:30 image of each day -> daily sampling img_buffer: int, optional How many images to read at once before writing the time series. """ # define input dataset # the img_bulk class in img2ts iterates through every nth # timestamp as specified by temporal_sampling input_dataset = MerraImageStack(data_path=in_path, parameter=parameters, temporal_sampling=temporal_sampling, array_1d=True) product = 'MERRA2_hourly' # create out_path directory if it does not exist yet if not os.path.exists(out_path): os.makedirs(out_path) # set global attribute global_attributes = {'product': product} # get ts attributes from fist day of data data = input_dataset.read(start_date) ts_attributes = data.metadata # define grid grid = BasicGrid(data.lon, data.lat) # define reshuffler reshuffler = Img2Ts(input_dataset=input_dataset, outputpath=out_path, startdate=start_date, enddate=end_date, input_grid=grid, imgbuffer=img_buffer, cellsize_lat=5.0, cellsize_lon=6.25, global_attr=global_attributes, zlib=True, unlim_chunksize=1000, ts_attributes=ts_attributes) reshuffler.calc()
class NetworkCollection(IsmnComponent): """ A NetworkCollection holds multiple networks and provides functionality to perform access to components from multiple networks. A grid is added that contains all stations to perform spatial searches. Attributes ---------- networks : OrderedDict Collection of network names and Networks grid : BasicGrid Grid that contains one point for each station in all networks. """ def __init__(self, networks): """ Create network collection from previously created Networks. Parameters ---------- networks : list[Network] List of Networks that build the collection from. """ self.networks = OrderedDict([]) lons = [] lats = [] for net in networks: self.networks[net.name] = net net_lons, net_lats = net.coords lons += net_lons lats += net_lats self.grid = BasicGrid(lons, lats) if (len(lons) > 0 and len(lats) > 0) else None def __repr__(self, indent=''): return ',\n'.join([ f"{indent}{net.name}: {list(net.stations.keys())}" for net in self.networks.values() ]) def __getitem__(self, item: Union[int, str]): # shortcut to access networks directly if isinstance(item, int): item = list(self.networks.keys())[item] return self.networks[item] def iter_networks(self) -> Network: """ Iterate through all networks in the Collection. """ for nw in self.networks.values(): yield nw def iter_stations(self, **filter_kwargs) -> (Network, Station): """ Iterate through Networks in the Collection and get (all/filtered) Stations. """ for nw in self.networks.values(): for stat in nw.iter_stations(**filter_kwargs): yield nw, stat def iter_sensors(self, **filter_kwargs) -> (Network, Station, Sensor): """ Iterate through Networks in the Collection and get (all/filtered) Stations and Sensors at each Station. """ for nw in self.networks.values(): for stat, sen in nw.iter_sensors(**filter_kwargs): yield nw, stat, sen def station4gpi(self, gpi): """ Get the Station for the passed gpi in the grid. Parameters ---------- gpi : int or List[int] Point index or multiple indices in self.grid. Returns ------- station : Station or list[Station] Station(s) at gpi(s). """ idxs = np.atleast_1d(gpi) in_grid = np.isin(idxs, self.grid.activegpis) if not all(in_grid): raise ValueError( f"Index not found in loaded grid: {idxs[~in_grid]}") lon, lat = self.grid.gpi2lonlat(idxs) stations = [] for net, stat in self.iter_stations(): if (stat.lon == lon) and (stat.lat == lat): stations.append(stat) if len(stations) == len(idxs): break # stop when all indices are found return stations[0] if len(stations) == 1 else stations def get_nearest_station(self, lon, lat, max_dist=np.inf): """ Get nearest station for given longitude/latitude coordinates. Parameters ---------- lon : float or List[float] Longitude coordinate(s). lat : float or List[float] Latitude coordinate(s). max_dist : float, optional (default: np.Inf) Maximum search distance. Returns ------- station : Station or List[Station] The nearest Station(s) to the passed coordinates. dist : float Distance in meter between the passed coordinates and the actual location of the station. """ gpi, dist = self.grid.find_nearest_gpi(lon, lat, max_dist=max_dist) station = self.station4gpi(gpi) return station, dist
def grid(self) -> BasicGrid: """ Get grid for all Stations in Network """ return BasicGrid(*self.coords)
def reshuffle( input_root, outputpath, startdate, enddate, variables, mask_seapoints=False, h_steps=(0, 6, 12, 18), imgbuffer=50, ): """ Reshuffle method applied to ERA images for conversion into netcdf time series format. Parameters ---------- input_root: str Input path where ERA image data was downloaded to. outputpath : str Output path, where the reshuffled netcdf time series are stored. startdate : datetime Start date, from which images are read and time series are generated. enddate : datetime End date, from which images are read and time series are generated. variables: list or str or tuple Variables to read from the passed images and convert into time series format. mask_seapoints: bool, optional (default: False) Mask points over sea, replace them with nan. h_steps: tuple, optional (default: (0,6,12,18)) Full hours for which images are available. imgbuffer: int, optional (default: 50) How many images to read at once before writing time series. This number affects how many images are stored in memory and should be chosen according to the available amount of memory and the size of a single image. """ filetype = parse_filetype(input_root) if filetype == "grib": input_dataset = ERAIntGrbDs( root_path=input_root, parameter=variables, subgrid=None, array_1D=True, mask_seapoints=mask_seapoints, h_steps=h_steps, ) elif filetype == "netcdf": input_dataset = ERAIntNcDs( root_path=input_root, parameter=variables, subgrid=None, array_1D=True, mask_seapoints=mask_seapoints, h_steps=h_steps, ) else: raise Exception("Unknown file format") if not os.path.exists(outputpath): os.makedirs(outputpath) global_attr = {"product": "ERA Interim (from {})".format(filetype)} # get time series attributes from first day of data. first_date_time = datetime.combine(startdate.date(), time(h_steps[0], 0)) data = input_dataset.read(first_date_time) ts_attributes = data.metadata grid = BasicGrid(data.lon, data.lat) reshuffler = Img2Ts( input_dataset=input_dataset, outputpath=outputpath, startdate=startdate, enddate=enddate, input_grid=grid, imgbuffer=imgbuffer, cellsize_lat=5.0, cellsize_lon=5.0, ts_dtypes=np.dtype("float32"), global_attr=global_attr, zlib=True, unlim_chunksize=1000, ts_attributes=ts_attributes, ) reshuffler.calc()
def reshuffle(input_root, outputpath, startdate, enddate, parameters=None, ignore_meta=False, imgbuffer=200): """ Reshuffle method applied to ESA CCI SM images. Parameters ---------- input_root: string input path where era interim data was downloaded outputpath : string Output path. startdate : datetime Start date. enddate : datetime End date. parameters: list, optional (default: None) parameters to read and convert If none are passed, we read an image in the root path and use vars from the image. imgbuffer: int, optional How many images to read at once before writing time series. """ if not os.path.exists(outputpath): os.makedirs(outputpath) file_args, file_vars = parse_filename(input_root) if parameters is None: parameters = [p for p in file_vars if p not in ['lat', 'lon', 'time']] input_dataset = CCI_SM_025Ds(input_root, parameters, array_1D=True) data = input_dataset.read(startdate) grid = BasicGrid(data.lon, data.lat) if not ignore_meta: global_attr, ts_attributes = read_metadata( sensortype=file_args['sensor_type'], version=int(file_args['version']), varnames=parameters, subversion=file_args['sub_version']) else: global_attr = {'product': 'ESA CCI SM'} ts_attributes = None reshuffler = Img2Ts(input_dataset=input_dataset, outputpath=outputpath, startdate=startdate, enddate=enddate, input_grid=grid, imgbuffer=imgbuffer, cellsize_lat=5.0, cellsize_lon=5.0, global_attr=global_attr, zlib=True, unlim_chunksize=1000, ts_attributes=ts_attributes) reshuffler.calc()
def reshuffle(input_root, outputpath, startdate, enddate, parameters, land_points=False, imgbuffer=50): """ Reshuffle method applied to ERA-Interim data. Parameters ---------- input_root: string input path where era interim data was downloaded outputpath : string Output path. startdate : datetime Start date. enddate : datetime End date. parameters: list parameters to read and convert landpoints: bool reshuffle land points only (not implemented yet) imgbuffer: int, optional How many images to read at once before writing time series. """ filetype = get_filetype(input_root) if filetype == 'grib': input_dataset = ERAGrbDs(input_root, parameters, expand_grid=False) elif filetype == 'netcdf': input_dataset = ERANcDs(input_root, parameters, subgrid=False, array_1D=True) else: raise Exception('Unknown file format') if not os.path.exists(outputpath): os.makedirs(outputpath) global_attr = {'product': 'ECMWF Reanalysis from {}'.format(filetype)} # get time series attributes from first day of data. data = input_dataset.read(startdate) ts_attributes = data.metadata grid = BasicGrid(data.lon, data.lat) reshuffler = Img2Ts(input_dataset=input_dataset, outputpath=outputpath, startdate=startdate, enddate=enddate, input_grid=grid, imgbuffer=imgbuffer, cellsize_lat=5.0, cellsize_lon=5.0, ts_dtypes=np.dtype('float32'), global_attr=global_attr, zlib=True, unlim_chunksize=1000, ts_attributes=ts_attributes) reshuffler.calc()
def reshuffle(input_root, outputpath, startdate, enddate, parameters, imgbuffer=50): """ Reshuffle method applied to ERA-Interim data. Parameters ---------- input_root: string input path where era interim data was downloaded outputpath : string Output path. startdate : datetime Start date. enddate : datetime End date. parameters: list parameters to read and convert imgbuffer: int, optional How many images to read at once before writing time series. """ input_dataset = ECMWF_ERA5_025Ds(input_root, parameters, array_1D=True) if not os.path.exists(outputpath): os.makedirs(outputpath) global_attr = {'product': 'ERA5'} # get time series attributes from first day of data. data = input_dataset.read(startdate) ts_attributes = data.metadata grid = BasicGrid(data.lon, data.lat) # test test_data = data['skt'] #test_data_res = np.reshape(test_data, (720,1440)) #test_lon_res = np.reshape(data.lon, (720, 1440)) #test_lat_res = np.reshape(data.lat, (720, 1440)) #test_data_res[test_data_res > 100] = np.nan #plt.figure(1) #plt.pcolor(test_lon_res, test_lat_res, test_data_res) #plt.show() reshuffler = Img2Ts(input_dataset=input_dataset, outputpath=outputpath, startdate=startdate, enddate=enddate, input_grid=grid, imgbuffer=imgbuffer, cellsize_lat=5.0, cellsize_lon=5.0, global_attr=global_attr, zlib=True, unlim_chunksize=1000, ts_attributes=ts_attributes) reshuffler.calc()