def test_save_basicgrid_irregular_nc(self): grid_nc.save_grid(self.testfilename, self.basic_irregular, global_attrs={"test": "test_attribute"}) with Dataset(self.testfilename) as nc_data: nptest.assert_array_equal(self.lats, nc_data.variables["lat"][:]) nptest.assert_array_equal(self.lons, nc_data.variables["lon"][:]) nptest.assert_array_equal(self.subset, np.where(nc_data.variables["subset_flag"][:] == 1)[0]) assert nc_data.test == "test_attribute" assert nc_data.shape == 64800
def test_save_cellgrid_nc(self): grid_nc.save_grid(self.testfilename, self.cellgrid, global_attrs={"test": "test_attribute"}) with Dataset(self.testfilename) as nc_data: nptest.assert_array_equal(self.lats, nc_data.variables["lat"][:]) nptest.assert_array_equal(self.lons, nc_data.variables["lon"][:]) nptest.assert_array_equal(self.cells, nc_data.variables["cell"][:]) nptest.assert_array_equal(self.subset, np.where(nc_data.variables["subset_flag"][:] == 1)[0]) assert nc_data.test == "test_attribute" assert nc_data.gpidirect == 0x1B
def test_save_basicgrid_nc(self): grid_nc.save_grid(self.testfilename, self.basic, global_attrs={"test": "test_attribute"}) with Dataset(self.testfilename) as nc_data: nptest.assert_array_equal(np.unique(self.lats)[::-1], nc_data.variables["lat"][:]) nptest.assert_array_equal(np.unique(self.lons), nc_data.variables["lon"][:]) nptest.assert_array_equal(self.subset, np.where(nc_data.variables["subset_flag"][:].flatten() == 1)[0]) assert nc_data.test == "test_attribute" assert nc_data.shape[0] == 360 assert nc_data.shape[1] == 180
def CDItoNetCDF(self, region=None, ip=None, separatefile=True, exclude=None): """ Creates NetCDF that contains CDI for all timestamps. Parameters ---------- region : str, list of str, optional Region(s) of of interest; must be one of the regions as set in the CDIPoet instance; Defaults the regions attribute value of the CDIPoet instance. ip : int, list of int, optional Interest period for calculating the DI; must be one of the ip as set in the CDIPoet instance; Defaults to the ip attribute value in the CDIPoet instance. separatefile : bool If True, writes weights to separate file; If False, writes weights to NetCDF database file. exclude : string, optional Variable which should not be used for calculation of CDI. """ if region is None: region = self.regions else: if isinstance(region, str): region = [region] if ip is None: ip = self.ip else: if isinstance(ip, int): ip = [ip] if not os.path.exists(self.cdi_path): os.mkdir(self.cdi_path) for reg in region: grid = grids.ShapeGrid(reg, self.spatial_resolution) gps = grid.get_gridpoints().index for ipe in ip: key = 'ECDI_' + str(ipe) print('[INFO] calc ECDI ' + reg + ' IP' + str(ipe)) if separatefile: dest_file = os.path.join(self.cdi_path, reg + '_' + key + '.nc') else: dest_file = os.path.join( self.data_path, reg + '_' + str(self.spatial_resolution) + '_' + self.temporal_resolution + '.nc') wfile = os.path.join(self.weights_path, reg + '_weights_' + str(ipe) + '.nc') if not os.path.isfile(dest_file): grid = grids.ShapeGrid(reg, self.spatial_resolution) save_grid(dest_file, grid) with Dataset(dest_file, 'r+', format='NETCDF4') as cdifile: if 'time' not in cdifile.dimensions.keys(): dt = get_dtindex(self.temporal_resolution, self.start_date) cdifile.createDimension("time", None) times = cdifile.createVariable('time', 'uint16', ('time', )) times.units = 'days since ' + str(self.start_date) times.calendar = 'standard' times[:] = date2num(dt.tolist(), units=times.units, calendar=times.calendar) else: times = cdifile.variables['time'] if key not in cdifile.variables.keys(): dim = ('time', 'lat', 'lon') cdi = cdifile.createVariable(key, 'f8', dim, fill_value=-99) else: cdi = cdifile.variables[key] for k, gp in enumerate(gps): if k % 100 == 0: print '.', position = np.where(cdifile.variables['gpi'][:] == gp) lat_pos = position[0][0] lon_pos = position[1][0] weights = {} parnum = (len(self.sources.keys()) - len(self.staticsources)) if exclude is not None: parnum = parnum - 1 dat = np.zeros((parnum, cdi.shape[0]), dtype=np.float) # dat = np.zeros((len(self.sources.keys()), cdi.shape[0]), # dtype=np.float) dat[dat == 0] = self.nan_value dat = np.ma.masked_values(dat, self.nan_value) # extract data from DI files and calc weights i = 0 for param in self.sources.keys(): if param in self.staticsources: continue if param == exclude: continue difile = os.path.join( self.di_path, reg + '_' + param + '_DI_' + str(ipe) + '.nc') with Dataset(difile, 'r', format='NETCDF4') as nc: for var in nc.variables.keys(): if param in var: for j in range( 0, nc.variables[var].shape[0]): dat[i, j] = ( nc.variables[var][j, lat_pos, lon_pos]) with Dataset(wfile, 'r', format='NETCDF4') as nc: for var in nc.variables.keys(): if param in var: weights[param] = ( nc.variables[var][lat_pos, lon_pos]) i += 1 dat = np.ma.masked_where(dat == self.nan_value, dat) dat = np.nan_to_num(dat) dat = np.ma.masked_where(dat == 0., dat) avg = np.ma.average(dat, axis=0, weights=weights.values()) cdi[:, lat_pos, lon_pos] = avg print 'Done!' print 'Done!'
def __writeWeight(self, gp, region, refparam, ip, exclude=None): """ Parameters ---------- exclude : string, optional Variable which should not be used for calculation of the weights. """ refparam += '_' + str(ip) df = pd.DataFrame() for param in self.sources.keys(): difile = os.path.join( self.di_path, region + '_' + param + '_DI_' + str(ip) + '.nc') if not os.path.exists(difile): continue with Dataset(difile, 'r', format='NETCDF4') as nc: if len(df.index.values) == 0: time = nc.variables['time'] dates = num2date(time[:], units=time.units, calendar=time.calendar) df = pd.DataFrame(index=pd.DatetimeIndex(dates)) ncvar = None for var in nc.variables.keys(): if param in var: ncvar = var continue position = np.where(nc.variables['gpi'][:] == gp) lat_pos = position[0][0] lon_pos = position[1][0] df[ncvar] = np.NAN for i in range(0, nc.variables[ncvar].shape[0] - 1): df[ncvar][i] = nc.variables[ncvar][i, lat_pos, lon_pos] if 'scaling_factor' in nc.variables[ncvar].ncattrs(): vvar = nc.variables[ncvar] if vvar.getncattr('scaling_factor') < 0: df[ncvar] = ( df[ncvar] * float(vvar.getncattr('scaling_factor'))) else: df[ncvar] = ( df[ncvar] / float(vvar.getncattr('scaling_factor'))) weights = cdi.calc_weights(df, refparam, lags=self.lags, exclude=exclude) dest_file = os.path.join(self.weights_path, region + '_weights_' + str(ip) + '.nc') if not os.path.isfile(dest_file): grid = grids.ShapeGrid(region, self.spatial_resolution) save_grid(dest_file, grid) with Dataset(dest_file, 'r+', format='NETCDF4') as nc: dim = ('lat', 'lon') position = np.where(nc.variables['gpi'][:] == gp) lat_pos = position[0][0] lon_pos = position[1][0] keys = [] if exclude is not None: for par in df.keys(): if exclude in par: continue keys.append(par) else: keys = df.keys() for i, dataset in enumerate(keys): if dataset not in nc.variables.keys(): var = nc.createVariable(dataset, 'd', dim, fill_value=self.nan_value) else: var = nc.variables[dataset] var[lat_pos, lon_pos] = weights[i]
def __writeDI(self, region, src, gridpoints, grid, ip, suffix='', scaled=True, modf_all=True, start=None): if start is not None: dt = get_dtindex('dekad', start) else: dt = get_dtindex('dekad', self.start_date) dest_file = os.path.join( self.di_path, region + '_' + src + '_DI' + '_' + str(ip) + '.nc') if not os.path.isfile(dest_file): save_grid(dest_file, grid) for i, gp in enumerate(gridpoints): if i % 100 == 0: print '.', ts = self.read_timeseries(src, gp, region) if start is not None: sel = (ts.index >= start) ts = ts[sel] inverse = False if src == 'MODIS_LST': inverse = True ts_di = cdi.calc_DI(ts.copy(), inverse, [ip], scale_zero=False, scaled=scaled, modf_all=modf_all) with Dataset(dest_file, 'r+', format='NETCDF4') as nc: if 'time' not in nc.dimensions.keys(): nc.createDimension("time", None) times = nc.createVariable('time', 'uint16', ('time', )) times.units = 'days since ' + str(self.start_date) times.calendar = 'standard' times[:] = date2num(dt.tolist(), units=times.units, calendar=times.calendar) else: times = nc.variables['time'] dim = ('time', 'lat', 'lon') position = np.where(nc.variables['gpi'][:] == gp) lat_pos = position[0][0] lon_pos = position[1][0] # extend times variable in NetCDF tsdates = date2num(ts_di.index.tolist(), units=times.units, calendar=times.calendar).astype(int) begin = np.where(times == tsdates[0])[0][0] times[begin:] = tsdates for dataset in ts_di.keys(): if dataset not in nc.variables.keys(): var = nc.createVariable(dataset, ts_di[dataset].dtype.char, dim, fill_value=self.nan_value) else: var = nc.variables[dataset] var[begin:, lat_pos, lon_pos] = ts_di[dataset].values
def test_save_load_cellgrid(self): grid_nc.save_grid(self.testfilename, self.cellgrid) loaded_grid = grid_nc.load_grid(self.testfilename) assert self.cellgrid == loaded_grid
def test_save_load_basicgrid_irregular(self): grid_nc.save_grid(self.testfilename, self.basic_irregular) loaded_grid = grid_nc.load_grid(self.testfilename) assert self.basic_irregular == loaded_grid
def test_save_load_basicgrid(self): grid_nc.save_grid(self.testfilename, self.basic) loaded_grid = grid_nc.load_grid(self.testfilename) assert self.basic == loaded_grid
def CDItoNetCDF(self, region=None, ip=None, separatefile=True, exclude=None): """ Creates NetCDF that contains CDI for all timestamps. Parameters ---------- region : str, list of str, optional Region(s) of of interest; must be one of the regions as set in the CDIPoet instance; Defaults the regions attribute value of the CDIPoet instance. ip : int, list of int, optional Interest period for calculating the DI; must be one of the ip as set in the CDIPoet instance; Defaults to the ip attribute value in the CDIPoet instance. separatefile : bool If True, writes weights to separate file; If False, writes weights to NetCDF database file. exclude : string, optional Variable which should not be used for calculation of CDI. """ if region is None: region = self.regions else: if isinstance(region, str): region = [region] if ip is None: ip = self.ip else: if isinstance(ip, int): ip = [ip] if not os.path.exists(self.cdi_path): os.mkdir(self.cdi_path) for reg in region: grid = grids.ShapeGrid(reg, self.spatial_resolution) gps = grid.get_gridpoints().index for ipe in ip: key = 'ECDI_' + str(ipe) print ('[INFO] calc ECDI ' + reg + ' IP' + str(ipe)) if separatefile: dest_file = os.path.join(self.cdi_path, reg + '_' + key + '.nc') else: dest_file = os.path.join(self.data_path, reg + '_' + str(self.spatial_resolution) + '_' + self.temporal_resolution + '.nc') wfile = os.path.join(self.weights_path, reg + '_weights_' + str(ipe) + '.nc') if not os.path.isfile(dest_file): grid = grids.ShapeGrid(reg, self.spatial_resolution) save_grid(dest_file, grid) with Dataset(dest_file, 'r+', format='NETCDF4') as cdifile: if 'time' not in cdifile.dimensions.keys(): dt = get_dtindex(self.temporal_resolution, self.start_date) cdifile.createDimension("time", None) times = cdifile.createVariable('time', 'uint16', ('time',)) times.units = 'days since ' + str(self.start_date) times.calendar = 'standard' times[:] = date2num(dt.tolist(), units=times.units, calendar=times.calendar) else: times = cdifile.variables['time'] if key not in cdifile.variables.keys(): dim = ('time', 'lat', 'lon') cdi = cdifile.createVariable(key, 'f8', dim, fill_value=-99) else: cdi = cdifile.variables[key] for k, gp in enumerate(gps): if k % 100 == 0: print '.', position = np.where(cdifile.variables['gpi'][:] == gp) lat_pos = position[0][0] lon_pos = position[1][0] weights = {} parnum = (len(self.sources.keys()) - len(self.staticsources)) if exclude is not None: parnum = parnum - 1 dat = np.zeros((parnum, cdi.shape[0]), dtype=np.float) # dat = np.zeros((len(self.sources.keys()), cdi.shape[0]), # dtype=np.float) dat[dat == 0] = self.nan_value dat = np.ma.masked_values(dat, self.nan_value) # extract data from DI files and calc weights i = 0 for param in self.sources.keys(): if param in self.staticsources: continue if param == exclude: continue difile = os.path.join(self.di_path, reg + '_' + param + '_DI_' + str(ipe) + '.nc') with Dataset(difile, 'r', format='NETCDF4') as nc: for var in nc.variables.keys(): if param in var: for j in range(0, nc.variables[var].shape[0]): dat[i, j] = (nc.variables[var] [j, lat_pos, lon_pos]) with Dataset(wfile, 'r', format='NETCDF4') as nc: for var in nc.variables.keys(): if param in var: weights[param] = (nc.variables[var] [lat_pos, lon_pos]) i += 1 dat = np.ma.masked_where(dat == self.nan_value, dat) dat = np.nan_to_num(dat) dat = np.ma.masked_where(dat == 0., dat) avg = np.ma.average(dat, axis=0, weights=weights.values()) cdi[:, lat_pos, lon_pos] = avg print 'Done!' print 'Done!'
def __writeWeight(self, gp, region, refparam, ip, exclude=None): """ Parameters ---------- exclude : string, optional Variable which should not be used for calculation of the weights. """ refparam += '_' + str(ip) df = pd.DataFrame() for param in self.sources.keys(): difile = os.path.join(self.di_path, region + '_' + param + '_DI_' + str(ip) + '.nc') if not os.path.exists(difile): continue with Dataset(difile, 'r', format='NETCDF4') as nc: if len(df.index.values) == 0: time = nc.variables['time'] dates = num2date(time[:], units=time.units, calendar=time.calendar) df = pd.DataFrame(index=pd.DatetimeIndex(dates)) ncvar = None for var in nc.variables.keys(): if param in var: ncvar = var continue position = np.where(nc.variables['gpi'][:] == gp) lat_pos = position[0][0] lon_pos = position[1][0] df[ncvar] = np.NAN for i in range(0, nc.variables[ncvar].shape[0] - 1): df[ncvar][i] = nc.variables[ncvar][i, lat_pos, lon_pos] if 'scaling_factor' in nc.variables[ncvar].ncattrs(): vvar = nc.variables[ncvar] if vvar.getncattr('scaling_factor') < 0: df[ncvar] = (df[ncvar] * float(vvar.getncattr('scaling_factor'))) else: df[ncvar] = (df[ncvar] / float(vvar.getncattr('scaling_factor'))) weights = cdi.calc_weights(df, refparam, lags=self.lags, exclude=exclude) dest_file = os.path.join(self.weights_path, region + '_weights_' + str(ip) + '.nc') if not os.path.isfile(dest_file): grid = grids.ShapeGrid(region, self.spatial_resolution) save_grid(dest_file, grid) with Dataset(dest_file, 'r+', format='NETCDF4') as nc: dim = ('lat', 'lon') position = np.where(nc.variables['gpi'][:] == gp) lat_pos = position[0][0] lon_pos = position[1][0] keys = [] if exclude is not None: for par in df.keys(): if exclude in par: continue keys.append(par) else: keys = df.keys() for i, dataset in enumerate(keys): if dataset not in nc.variables.keys(): var = nc.createVariable(dataset, 'd', dim, fill_value=self.nan_value) else: var = nc.variables[dataset] var[lat_pos, lon_pos] = weights[i]
def __writeDI(self, region, src, gridpoints, grid, ip, suffix='', scaled=True, modf_all=True, start=None): if start is not None: dt = get_dtindex('dekad', start) else: dt = get_dtindex('dekad', self.start_date) dest_file = os.path.join(self.di_path, region + '_' + src + '_DI' + '_' + str(ip) + '.nc') if not os.path.isfile(dest_file): save_grid(dest_file, grid) for i, gp in enumerate(gridpoints): if i % 100 == 0: print '.', ts = self.read_timeseries(src, gp, region) if start is not None: sel = (ts.index >= start) ts = ts[sel] inverse = False if src == 'MODIS_LST': inverse = True ts_di = cdi.calc_DI(ts.copy(), inverse, [ip], scale_zero=False, scaled=scaled, modf_all=modf_all) with Dataset(dest_file, 'r+', format='NETCDF4') as nc: if 'time' not in nc.dimensions.keys(): nc.createDimension("time", None) times = nc.createVariable('time', 'uint16', ('time',)) times.units = 'days since ' + str(self.start_date) times.calendar = 'standard' times[:] = date2num(dt.tolist(), units=times.units, calendar=times.calendar) else: times = nc.variables['time'] dim = ('time', 'lat', 'lon') position = np.where(nc.variables['gpi'][:] == gp) lat_pos = position[0][0] lon_pos = position[1][0] # extend times variable in NetCDF tsdates = date2num(ts_di.index.tolist(), units=times.units, calendar=times.calendar).astype(int) begin = np.where(times == tsdates[0])[0][0] times[begin:] = tsdates for dataset in ts_di.keys(): if dataset not in nc.variables.keys(): var = nc.createVariable(dataset, ts_di[dataset].dtype.char, dim, fill_value=self.nan_value) else: var = nc.variables[dataset] var[begin:, lat_pos, lon_pos] = ts_di[dataset].values
def save_image(image, timestamp, region, metadata, dest_file, start_date, sp_res, nan_value=-99, shapefile=None, temp_res='dekad', compression=False): """Saves numpy.ndarray images as multidimensional netCDF4 file. Creates a datetimeindex over the whole period defined in the settings file Parameters ---------- image : dict of numpy.ndarrays Input image. timestamp : datetime.datetime Timestamp of image. region : str, optional Identifier of the region in the shapefile. If the default shapefile is used, this would be the FIPS country code. metadata : dict NetCDF metadata from source file. dest_file : str Path to the output file. start_date : datetime.datetime First date of available data. sp_res : int or float Spatial resolution of the grid. nan_value : int, optional Not a number value for dataset, defaults to -99. shapefile : str, optional Path to shape file, uses "world country admin boundary shapefile" by default. temp_res : string or int, optional Temporal resolution of the output NetCDF4 file, defaults to dekad. compression : bool, optional If True, ncfile compression is active. """ if region == 'global': grid = grids.RegularGrid(sp_res) else: grid = grids.ShapeGrid(region, sp_res, shapefile) dest_file = dest_file if not os.path.isfile(dest_file): save_grid(dest_file, grid) dt = get_dtindex(temp_res, start_date) with Dataset(dest_file, 'r+', format='NETCDF4') as ncfile: if 'time' not in ncfile.dimensions.keys(): ncfile.createDimension("time", None) if compression: times = ncfile.createVariable('time', 'uint16', ('time',), zlib=True, complevel=4) else: times = ncfile.createVariable('time', 'uint16', ('time',)) times.units = 'days since ' + str(start_date) times.calendar = 'standard' times[:] = date2num(dt.tolist(), units=times.units, calendar=times.calendar) else: times = ncfile.variables['time'] dim = ('time', 'lat', 'lon') numdate = date2num(timestamp, units=times.units, calendar=times.calendar) for key in image.keys(): if key not in ncfile.variables.keys(): if compression: var = ncfile.createVariable(key, image[key].dtype.char, dim, zlib=True, complevel=4, fill_value=nan_value) else: var = ncfile.createVariable(key, image[key].dtype.char, dim, fill_value=nan_value) else: var = ncfile.variables[key] if numdate in times[:]: var_index = np.where(times[:] == numdate)[0][0] else: times[times[:].size] = numdate var_index = times[:].size - 1 var[var_index] = image[key] if metadata is not None: for item in metadata[key]: if item in var.ncattrs(): continue else: var.setncattr(str(item), metadata[key][item])