def test_save_cellgrid_nc(self): grid_nc.save_grid(self.testfile, self.cellgrid, global_attrs={'test': 'test_attribute'}) with Dataset(self.testfile) as nc_data: nptest.assert_array_equal(self.lats, nc_data.variables['lat'][:]) nptest.assert_array_equal(self.lons, nc_data.variables['lon'][:]) nptest.assert_array_equal(self.cells, nc_data.variables['cell'][:]) nptest.assert_array_equal( self.subset, np.where(nc_data.variables['subset_flag'][:] == 1)[0]) assert nc_data.test == 'test_attribute'
def test_save_cellgrid_nc(self): grid_nc.save_grid(self.testfile, self.cellgrid, global_attrs={'test': 'test_attribute'}) with Dataset(self.testfile) as nc_data: nptest.assert_array_equal(self.lats, nc_data.variables['lat'][:]) nptest.assert_array_equal(self.lons, nc_data.variables['lon'][:]) nptest.assert_array_equal(self.cells, nc_data.variables['cell'][:]) nptest.assert_array_equal( self.subset, np.where(nc_data.variables['subset_flag'][:] == 1)[0]) assert nc_data.test == 'test_attribute' assert nc_data.gpidirect == 0x1b
def test_save_basicgrid_irregular_nc(self): grid_nc.save_grid(self.testfile, self.basic_irregular, global_attrs={'test': 'test_attribute'}) with Dataset(self.testfile) as nc_data: nptest.assert_array_equal( self.basic_irregular.arrlat, nc_data.variables['lat'][:]) nptest.assert_array_equal( self.basic_irregular.arrlon, nc_data.variables['lon'][:]) nptest.assert_array_equal(self.subset, np.where(nc_data.variables['subset_flag'][:] == 1)[0]) assert nc_data.test == 'test_attribute' assert nc_data.shape == 64800
def test_save_basicgrid_irregular_nc(self): grid_nc.save_grid(self.testfile, self.basic_irregular, global_attrs={'test': 'test_attribute'}) with Dataset(self.testfile) as nc_data: nptest.assert_array_equal( self.basic_irregular.arrlat, nc_data.variables['lat'][:]) nptest.assert_array_equal( self.basic_irregular.arrlon, nc_data.variables['lon'][:]) nptest.assert_array_equal(self.subset, np.where(nc_data.variables['subset_flag'][:] == 1)[0]) assert nc_data.test == 'test_attribute' assert nc_data.shape == 64800
def test_store_load_regular_2D_grid(): """ Test the storing/loading of a 2D grid when the gpis are in a custom ordering. """ londim = np.arange(-180.0, 180.0, 60) latdim = np.arange(90.0, -90.0, -30) lons, lats = np.meshgrid(londim, latdim) gpis = np.arange(lons.flatten().size).reshape(lons.shape) grid = grids.BasicGrid(lons.flatten(), lats.flatten(), gpis.flatten(), shape=lons.shape) testfile = tempfile.NamedTemporaryFile().name grid_nc.save_grid(testfile, grid) grid_loaded = grid_nc.load_grid(testfile) assert grid == grid_loaded
def test_save_basicgrid_generated(self): grid_nc.save_grid(self.testfile, self.basic, global_attrs={'test': 'test_attribute'}) with Dataset(self.testfile) as nc_data: nptest.assert_array_equal(np.unique(self.lats)[::-1], nc_data.variables['lat'][:]) nptest.assert_array_equal(np.unique(self.lons), nc_data.variables['lon'][:]) nptest.assert_array_equal(self.subset, np.where(nc_data.variables['subset_flag'][:].flatten() == 1)[0]) assert nc_data.test == 'test_attribute' assert nc_data.shape[1] == 180 assert nc_data.shape[0] == 360
def test_save_basicgrid_generated(self): grid_nc.save_grid(self.testfile, self.basic, global_attrs={'test': 'test_attribute'}) with Dataset(self.testfile) as nc_data: nptest.assert_array_equal(np.unique(self.lats)[::-1], nc_data.variables['lat'][:]) nptest.assert_array_equal(np.unique(self.lons), nc_data.variables['lon'][:]) # subsets have to identify the same gpis in the original grid and # the stored one. stored_subset = np.where(nc_data.variables['subset_flag'][ :].flatten() == 1)[0] nptest.assert_array_equal(sorted(self.basic.gpis[self.subset]), sorted(nc_data.variables['gpi'][:].flatten()[stored_subset])) assert nc_data.test == 'test_attribute' assert nc_data.shape[1] == 180 assert nc_data.shape[0] == 360
def __writeDI(self, region, src, gridpoints, grid, ip, suffix='', scaled=True, modf_all=True, start=None): if start is not None: dt = get_dtindex('dekad', start) else: dt = get_dtindex('dekad', self.start_date) dest_file = os.path.join(self.di_path, region + '_' + src + '_DI' + '_' + str(ip) + '.nc') if not os.path.isfile(dest_file): save_grid(dest_file, grid) for i, gp in enumerate(gridpoints): if i % 100 == 0: print '.', ts = self.read_timeseries(src, gp, region) if start is not None: sel = (ts.index >= start) ts = ts[sel] inverse = False if src == 'MODIS_LST': inverse = True ts_di = cdi.calc_DI(ts.copy(), inverse, [ip], scale_zero=False, scaled=scaled, modf_all=modf_all) with Dataset(dest_file, 'r+', format='NETCDF4') as nc: if 'time' not in nc.dimensions.keys(): nc.createDimension("time", None) times = nc.createVariable('time', 'uint16', ('time',)) times.units = 'days since ' + str(self.start_date) times.calendar = 'standard' times[:] = date2num(dt.tolist(), units=times.units, calendar=times.calendar) else: times = nc.variables['time'] dim = ('time', 'lat', 'lon') position = np.where(nc.variables['gpi'][:] == gp) lat_pos = position[0][0] lon_pos = position[1][0] # extend times variable in NetCDF tsdates = date2num(ts_di.index.tolist(), units=times.units, calendar=times.calendar).astype(int) begin = np.where(times == tsdates[0])[0][0] times[begin:] = tsdates for dataset in ts_di.keys(): if dataset not in nc.variables.keys(): var = nc.createVariable(dataset, ts_di[dataset].dtype.char, dim, fill_value=self.nan_value) else: var = nc.variables[dataset] var[begin:, lat_pos, lon_pos] = ts_di[dataset].values
def __writeWeight(self, gp, region, refparam, ip, exclude=None): """ Parameters ---------- exclude : string, optional Variable which should not be used for calculation of the weights. """ refparam += '_' + str(ip) df = pd.DataFrame() for param in self.sources.keys(): difile = os.path.join(self.di_path, region + '_' + param + '_DI_' + str(ip) + '.nc') if not os.path.exists(difile): continue with Dataset(difile, 'r', format='NETCDF4') as nc: if len(df.index.values) == 0: time = nc.variables['time'] dates = num2date(time[:], units=time.units, calendar=time.calendar) df = pd.DataFrame(index=pd.DatetimeIndex(dates)) ncvar = None for var in nc.variables.keys(): if param in var: ncvar = var continue position = np.where(nc.variables['gpi'][:] == gp) lat_pos = position[0][0] lon_pos = position[1][0] df[ncvar] = np.NAN for i in range(0, nc.variables[ncvar].shape[0] - 1): df[ncvar][i] = nc.variables[ncvar][i, lat_pos, lon_pos] if 'scaling_factor' in nc.variables[ncvar].ncattrs(): vvar = nc.variables[ncvar] if vvar.getncattr('scaling_factor') < 0: df[ncvar] = (df[ncvar] * float(vvar.getncattr('scaling_factor'))) else: df[ncvar] = (df[ncvar] / float(vvar.getncattr('scaling_factor'))) weights = cdi.calc_weights(df, refparam, lags=self.lags, exclude=exclude) dest_file = os.path.join(self.weights_path, region + '_weights_' + str(ip) + '.nc') if not os.path.isfile(dest_file): grid = grids.ShapeGrid(region, self.spatial_resolution) save_grid(dest_file, grid) with Dataset(dest_file, 'r+', format='NETCDF4') as nc: dim = ('lat', 'lon') position = np.where(nc.variables['gpi'][:] == gp) lat_pos = position[0][0] lon_pos = position[1][0] keys = [] if exclude is not None: for par in df.keys(): if exclude in par: continue keys.append(par) else: keys = df.keys() for i, dataset in enumerate(keys): if dataset not in nc.variables.keys(): var = nc.createVariable(dataset, 'd', dim, fill_value=self.nan_value) else: var = nc.variables[dataset] var[lat_pos, lon_pos] = weights[i]
def test_save_load_basicgrid_shape_gpis(self): grid_nc.save_grid(self.testfile, self.basic_shape_gpis) loaded_grid = grid_nc.load_grid(self.testfile) assert self.basic_shape_gpis == loaded_grid
def test_save_load_cellgrid_shape(self): grid_nc.save_grid(self.testfile, self.cellgrid_shape) loaded_grid = grid_nc.load_grid(self.testfile) assert self.cellgrid_shape == loaded_grid
def CDItoNetCDF(self, region=None, ip=None, separatefile=True, exclude=None): """ Creates NetCDF that contains CDI for all timestamps. Parameters ---------- region : str, list of str, optional Region(s) of of interest; must be one of the regions as set in the CDIPoet instance; Defaults the regions attribute value of the CDIPoet instance. ip : int, list of int, optional Interest period for calculating the DI; must be one of the ip as set in the CDIPoet instance; Defaults to the ip attribute value in the CDIPoet instance. separatefile : bool If True, writes weights to separate file; If False, writes weights to NetCDF database file. exclude : string, optional Variable which should not be used for calculation of CDI. """ if region is None: region = self.regions else: if isinstance(region, str): region = [region] if ip is None: ip = self.ip else: if isinstance(ip, int): ip = [ip] if not os.path.exists(self.cdi_path): os.mkdir(self.cdi_path) for reg in region: grid = grids.ShapeGrid(reg, self.spatial_resolution) gps = grid.get_gridpoints().index for ipe in ip: key = 'ECDI_' + str(ipe) print ('[INFO] calc ECDI ' + reg + ' IP' + str(ipe)) if separatefile: dest_file = os.path.join(self.cdi_path, reg + '_' + key + '.nc') else: dest_file = os.path.join(self.data_path, reg + '_' + str(self.spatial_resolution) + '_' + self.temporal_resolution + '.nc') wfile = os.path.join(self.weights_path, reg + '_weights_' + str(ipe) + '.nc') if not os.path.isfile(dest_file): grid = grids.ShapeGrid(reg, self.spatial_resolution) save_grid(dest_file, grid) with Dataset(dest_file, 'r+', format='NETCDF4') as cdifile: if 'time' not in cdifile.dimensions.keys(): dt = get_dtindex(self.temporal_resolution, self.start_date) cdifile.createDimension("time", None) times = cdifile.createVariable('time', 'uint16', ('time',)) times.units = 'days since ' + str(self.start_date) times.calendar = 'standard' times[:] = date2num(dt.tolist(), units=times.units, calendar=times.calendar) else: times = cdifile.variables['time'] if key not in cdifile.variables.keys(): dim = ('time', 'lat', 'lon') cdi = cdifile.createVariable(key, 'f8', dim, fill_value=-99) else: cdi = cdifile.variables[key] for k, gp in enumerate(gps): if k % 100 == 0: print '.', position = np.where(cdifile.variables['gpi'][:] == gp) lat_pos = position[0][0] lon_pos = position[1][0] weights = {} parnum = (len(self.sources.keys()) - len(self.staticsources)) if exclude is not None: parnum = parnum - 1 dat = np.zeros((parnum, cdi.shape[0]), dtype=np.float) # dat = np.zeros((len(self.sources.keys()), cdi.shape[0]), # dtype=np.float) dat[dat == 0] = self.nan_value dat = np.ma.masked_values(dat, self.nan_value) # extract data from DI files and calc weights i = 0 for param in self.sources.keys(): if param in self.staticsources: continue if param == exclude: continue difile = os.path.join(self.di_path, reg + '_' + param + '_DI_' + str(ipe) + '.nc') with Dataset(difile, 'r', format='NETCDF4') as nc: for var in nc.variables.keys(): if param in var: for j in range(0, nc.variables[var].shape[0]): dat[i, j] = (nc.variables[var] [j, lat_pos, lon_pos]) with Dataset(wfile, 'r', format='NETCDF4') as nc: for var in nc.variables.keys(): if param in var: weights[param] = (nc.variables[var] [lat_pos, lon_pos]) i += 1 dat = np.ma.masked_where(dat == self.nan_value, dat) dat = np.nan_to_num(dat) dat = np.ma.masked_where(dat == 0., dat) avg = np.ma.average(dat, axis=0, weights=weights.values()) cdi[:, lat_pos, lon_pos] = avg print 'Done!' print 'Done!'
def test_save_load_basicgrid_shape_gpis(self): grid_nc.save_grid(self.testfile, self.basic_shape_gpis) loaded_grid = grid_nc.load_grid(self.testfile) assert self.basic_shape_gpis == loaded_grid
def __writeDI(self, region, src, gridpoints, grid, ip, suffix='', scaled=True, modf_all=True, start=None): if start is not None: dt = get_dtindex('dekad', start) else: dt = get_dtindex('dekad', self.start_date) dest_file = os.path.join( self.di_path, region + '_' + src + '_DI' + '_' + str(ip) + '.nc') if not os.path.isfile(dest_file): save_grid(dest_file, grid) for i, gp in enumerate(gridpoints): if i % 100 == 0: print '.', ts = self.read_timeseries(src, gp, region) if start is not None: sel = (ts.index >= start) ts = ts[sel] inverse = False if src == 'MODIS_LST': inverse = True ts_di = cdi.calc_DI(ts.copy(), inverse, [ip], scale_zero=False, scaled=scaled, modf_all=modf_all) with Dataset(dest_file, 'r+', format='NETCDF4') as nc: if 'time' not in nc.dimensions.keys(): nc.createDimension("time", None) times = nc.createVariable('time', 'uint16', ('time', )) times.units = 'days since ' + str(self.start_date) times.calendar = 'standard' times[:] = date2num(dt.tolist(), units=times.units, calendar=times.calendar) else: times = nc.variables['time'] dim = ('time', 'lat', 'lon') position = np.where(nc.variables['gpi'][:] == gp) lat_pos = position[0][0] lon_pos = position[1][0] # extend times variable in NetCDF tsdates = date2num(ts_di.index.tolist(), units=times.units, calendar=times.calendar).astype(int) begin = np.where(times == tsdates[0])[0][0] times[begin:] = tsdates for dataset in ts_di.keys(): if dataset not in nc.variables.keys(): var = nc.createVariable(dataset, ts_di[dataset].dtype.char, dim, fill_value=self.nan_value) else: var = nc.variables[dataset] var[begin:, lat_pos, lon_pos] = ts_di[dataset].values
def test_save_load_cellgrid(self): grid_nc.save_grid(self.testfile, self.cellgrid) loaded_grid = grid_nc.load_grid(self.testfile) assert self.cellgrid == loaded_grid
def calc(self): """ go through all images and retrieve a stack of them then go through all grid points in cell order and write to netCDF file """ # save grid information in file grid2nc.save_grid(os.path.join(self.outputpath, self.gridname), self.target_grid) for img_stack_dict, start, end, dates, jd_stack in self.img_bulk(): #================================================================== start_time = datetime.now() for cell in self.target_grid.get_cells(): cell_gpis, cell_lons, cell_lats = self.target_grid.grid_points_for_cell( cell) # look where in the subset the data is cell_index = np.where( cell == self.target_grid.activearrcell)[0] if cell_index.size == 0: raise Img2TsError('cell not found in grid subset') data = {} for key in img_stack_dict: # rename variable in output dataset if self.variable_rename is None: var_new_name = str(key) else: var_new_name = self.variable_rename[key] output_array = np.swapaxes( img_stack_dict[key][:, cell_index], 0, 1) # change dtypes of output time series if self.ts_dtypes is not None: if type(self.ts_dtypes) == dict: output_dtype = self.ts_dtypes[key] else: output_dtype = self.ts_dtypes output_array = output_array.astype(output_dtype) data[var_new_name] = output_array if self.orthogonal: with nc.OrthoMultiTs( os.path.join(self.outputpath, self.filename_templ % cell), n_loc=cell_gpis.size, mode='a', zlib=self.zlib, unlim_chunksize=self.unlim_chunksize, time_units=self.time_units) as dataout: # add global attributes to file if self.global_attr is not None: for attr in self.global_attr: dataout.add_global_attr( attr, self.global_attr[attr]) dataout.add_global_attr('geospatial_lat_min', np.min(cell_lats)) dataout.add_global_attr('geospatial_lat_max', np.max(cell_lats)) dataout.add_global_attr('geospatial_lon_min', np.min(cell_lons)) dataout.add_global_attr('geospatial_lon_max', np.max(cell_lons)) dataout.write_ts_all_loc(cell_gpis, data, dates, lons=cell_lons, lats=cell_lats, attributes=self.ts_attributes) elif not self.orthogonal: with nc.IndexedRaggedTs( os.path.join(self.outputpath, self.filename_templ % cell), n_loc=cell_gpis.size, mode='a', zlib=self.zlib, unlim_chunksize=self.unlim_chunksize, time_units=self.non_ortho_time_units) as dataout: # add global attributes to file if self.global_attr is not None: for attr in self.global_attr: dataout.add_global_attr( attr, self.global_attr[attr]) dataout.add_global_attr('geospatial_lat_min', np.min(cell_lats)) dataout.add_global_attr('geospatial_lat_max', np.max(cell_lats)) dataout.add_global_attr('geospatial_lon_min', np.min(cell_lons)) dataout.add_global_attr('geospatial_lon_max', np.max(cell_lons)) # for this dataset we have to loop through the gpis since each time series # can be different in length for i, (gpi, gpi_lon, gpi_lat) in enumerate( zip(cell_gpis, cell_lons, cell_lats)): gpi_data = {} # convert to modified julian date gpi_jd = jd_stack[:, cell_index[i]] - 2400000.5 # remove measurements that were filled with the fill value # during resampling # doing this on the basis of the time variable should # be enought since without time -> no valid # observations if self.resample: if self.r_fill_values is not None: if type(self.r_fill_values) == dict: time_fill_value = self.r_fill_values[ self.time_var] else: time_fill_value = self.r_fill_values valid_mask = gpi_jd != time_fill_value else: valid_mask = np.invert(gpi_jd.mask) gpi_jd = gpi_jd[valid_mask] else: # all are valid if no resampling took place valid_mask = slice(None, None, None) for key in data: gpi_data[key] = data[key][i, valid_mask] if gpi_jd.data.size > 0: dataout.write_ts(gpi, gpi_data, gpi_jd, lon=gpi_lon, lat=gpi_lat, attributes=self.ts_attributes, dates_direct=True) data = {} output_array = None logging.log(logging.INFO, datetime.now() - start_time)
def save_image(image, timestamp, region, metadata, dest_file, start_date, sp_res, nan_value=-99, shapefile=None, temp_res='dekad', compression=False): """Saves numpy.ndarray images as multidimensional netCDF4 file. Creates a datetimeindex over the whole period defined in the settings file Parameters ---------- image : dict of numpy.ndarrays Input image. timestamp : datetime.datetime Timestamp of image. region : str, optional Identifier of the region in the shapefile. If the default shapefile is used, this would be the FIPS country code. metadata : dict NetCDF metadata from source file. dest_file : str Path to the output file. start_date : datetime.datetime First date of available data. sp_res : int or float Spatial resolution of the grid. nan_value : int, optional Not a number value for dataset, defaults to -99. shapefile : str, optional Path to shape file, uses "world country admin boundary shapefile" by default. temp_res : string or int, optional Temporal resolution of the output NetCDF4 file, defaults to dekad. compression : bool, optional If True, ncfile compression is active. """ if region == 'global': grid = grids.RegularGrid(sp_res) else: grid = grids.ShapeGrid(region, sp_res, shapefile) dest_file = dest_file if not os.path.isfile(dest_file): save_grid(dest_file, grid) dt = get_dtindex(temp_res, start_date) with Dataset(dest_file, 'r+', format='NETCDF4') as ncfile: if 'time' not in ncfile.dimensions.keys(): ncfile.createDimension("time", None) if compression: times = ncfile.createVariable('time', 'uint16', ('time',), zlib=True, complevel=4) else: times = ncfile.createVariable('time', 'uint16', ('time',)) times.units = 'days since ' + str(start_date) times.calendar = 'standard' times[:] = date2num(dt.tolist(), units=times.units, calendar=times.calendar) else: times = ncfile.variables['time'] dim = ('time', 'lat', 'lon') numdate = date2num(timestamp, units=times.units, calendar=times.calendar) for key in image.keys(): if key not in ncfile.variables.keys(): if compression: var = ncfile.createVariable(key, image[key].dtype.char, dim, zlib=True, complevel=4, fill_value=nan_value) else: var = ncfile.createVariable(key, image[key].dtype.char, dim, fill_value=nan_value) else: var = ncfile.variables[key] if numdate in times[:]: var_index = np.where(times[:] == numdate)[0][0] else: times[times[:].size] = numdate var_index = times[:].size - 1 var[var_index] = image[key] if metadata is not None: for item in metadata[key]: if item in var.ncattrs(): continue else: var.setncattr(str(item), metadata[key][item])
def CDItoNetCDF(self, region=None, ip=None, separatefile=True, exclude=None): """ Creates NetCDF that contains CDI for all timestamps. Parameters ---------- region : str, list of str, optional Region(s) of of interest; must be one of the regions as set in the CDIPoet instance; Defaults the regions attribute value of the CDIPoet instance. ip : int, list of int, optional Interest period for calculating the DI; must be one of the ip as set in the CDIPoet instance; Defaults to the ip attribute value in the CDIPoet instance. separatefile : bool If True, writes weights to separate file; If False, writes weights to NetCDF database file. exclude : string, optional Variable which should not be used for calculation of CDI. """ if region is None: region = self.regions else: if isinstance(region, str): region = [region] if ip is None: ip = self.ip else: if isinstance(ip, int): ip = [ip] if not os.path.exists(self.cdi_path): os.mkdir(self.cdi_path) for reg in region: grid = grids.ShapeGrid(reg, self.spatial_resolution) gps = grid.get_gridpoints().index for ipe in ip: key = 'ECDI_' + str(ipe) print('[INFO] calc ECDI ' + reg + ' IP' + str(ipe)) if separatefile: dest_file = os.path.join(self.cdi_path, reg + '_' + key + '.nc') else: dest_file = os.path.join( self.data_path, reg + '_' + str(self.spatial_resolution) + '_' + self.temporal_resolution + '.nc') wfile = os.path.join(self.weights_path, reg + '_weights_' + str(ipe) + '.nc') if not os.path.isfile(dest_file): grid = grids.ShapeGrid(reg, self.spatial_resolution) save_grid(dest_file, grid) with Dataset(dest_file, 'r+', format='NETCDF4') as cdifile: if 'time' not in cdifile.dimensions.keys(): dt = get_dtindex(self.temporal_resolution, self.start_date) cdifile.createDimension("time", None) times = cdifile.createVariable('time', 'uint16', ('time', )) times.units = 'days since ' + str(self.start_date) times.calendar = 'standard' times[:] = date2num(dt.tolist(), units=times.units, calendar=times.calendar) else: times = cdifile.variables['time'] if key not in cdifile.variables.keys(): dim = ('time', 'lat', 'lon') cdi = cdifile.createVariable(key, 'f8', dim, fill_value=-99) else: cdi = cdifile.variables[key] for k, gp in enumerate(gps): if k % 100 == 0: print '.', position = np.where(cdifile.variables['gpi'][:] == gp) lat_pos = position[0][0] lon_pos = position[1][0] weights = {} parnum = (len(self.sources.keys()) - len(self.staticsources)) if exclude is not None: parnum = parnum - 1 dat = np.zeros((parnum, cdi.shape[0]), dtype=np.float) # dat = np.zeros((len(self.sources.keys()), cdi.shape[0]), # dtype=np.float) dat[dat == 0] = self.nan_value dat = np.ma.masked_values(dat, self.nan_value) # extract data from DI files and calc weights i = 0 for param in self.sources.keys(): if param in self.staticsources: continue if param == exclude: continue difile = os.path.join( self.di_path, reg + '_' + param + '_DI_' + str(ipe) + '.nc') with Dataset(difile, 'r', format='NETCDF4') as nc: for var in nc.variables.keys(): if param in var: for j in range( 0, nc.variables[var].shape[0]): dat[i, j] = ( nc.variables[var][j, lat_pos, lon_pos]) with Dataset(wfile, 'r', format='NETCDF4') as nc: for var in nc.variables.keys(): if param in var: weights[param] = ( nc.variables[var][lat_pos, lon_pos]) i += 1 dat = np.ma.masked_where(dat == self.nan_value, dat) dat = np.nan_to_num(dat) dat = np.ma.masked_where(dat == 0., dat) avg = np.ma.average(dat, axis=0, weights=weights.values()) cdi[:, lat_pos, lon_pos] = avg print 'Done!' print 'Done!'
def __writeWeight(self, gp, region, refparam, ip, exclude=None): """ Parameters ---------- exclude : string, optional Variable which should not be used for calculation of the weights. """ refparam += '_' + str(ip) df = pd.DataFrame() for param in self.sources.keys(): difile = os.path.join( self.di_path, region + '_' + param + '_DI_' + str(ip) + '.nc') if not os.path.exists(difile): continue with Dataset(difile, 'r', format='NETCDF4') as nc: if len(df.index.values) == 0: time = nc.variables['time'] dates = num2date(time[:], units=time.units, calendar=time.calendar) df = pd.DataFrame(index=pd.DatetimeIndex(dates)) ncvar = None for var in nc.variables.keys(): if param in var: ncvar = var continue position = np.where(nc.variables['gpi'][:] == gp) lat_pos = position[0][0] lon_pos = position[1][0] df[ncvar] = np.NAN for i in range(0, nc.variables[ncvar].shape[0] - 1): df[ncvar][i] = nc.variables[ncvar][i, lat_pos, lon_pos] if 'scaling_factor' in nc.variables[ncvar].ncattrs(): vvar = nc.variables[ncvar] if vvar.getncattr('scaling_factor') < 0: df[ncvar] = ( df[ncvar] * float(vvar.getncattr('scaling_factor'))) else: df[ncvar] = ( df[ncvar] / float(vvar.getncattr('scaling_factor'))) weights = cdi.calc_weights(df, refparam, lags=self.lags, exclude=exclude) dest_file = os.path.join(self.weights_path, region + '_weights_' + str(ip) + '.nc') if not os.path.isfile(dest_file): grid = grids.ShapeGrid(region, self.spatial_resolution) save_grid(dest_file, grid) with Dataset(dest_file, 'r+', format='NETCDF4') as nc: dim = ('lat', 'lon') position = np.where(nc.variables['gpi'][:] == gp) lat_pos = position[0][0] lon_pos = position[1][0] keys = [] if exclude is not None: for par in df.keys(): if exclude in par: continue keys.append(par) else: keys = df.keys() for i, dataset in enumerate(keys): if dataset not in nc.variables.keys(): var = nc.createVariable(dataset, 'd', dim, fill_value=self.nan_value) else: var = nc.variables[dataset] var[lat_pos, lon_pos] = weights[i]
def test_save_load_basicgrid(self): grid_nc.save_grid(self.testfile, self.basic) loaded_grid = grid_nc.load_grid(self.testfile) assert self.basic == loaded_grid
def test_save_load_basicgrid_irregular(self): grid_nc.save_grid(self.testfile, self.basic_irregular) loaded_grid = grid_nc.load_grid(self.testfile) assert self.basic_irregular == loaded_grid
def test_save_load_basicgrid_irregular(self): grid_nc.save_grid(self.testfile, self.basic_irregular) loaded_grid = grid_nc.load_grid(self.testfile) assert self.basic_irregular == loaded_grid
def save_image(image, timestamp, region, metadata, dest_file, start_date, sp_res, nan_value=-99, shapefile=None, temp_res='dekad', compression=False): """Saves numpy.ndarray images as multidimensional netCDF4 file. Creates a datetimeindex over the whole period defined in the settings file Parameters ---------- image : dict of numpy.ndarrays Input image. timestamp : datetime.datetime Timestamp of image. region : str, optional Identifier of the region in the shapefile. If the default shapefile is used, this would be the FIPS country code. metadata : dict NetCDF metadata from source file. dest_file : str Path to the output file. start_date : datetime.datetime First date of available data. sp_res : int or float Spatial resolution of the grid. nan_value : int, optional Not a number value for dataset, defaults to -99. shapefile : str, optional Path to shape file, uses "world country admin boundary shapefile" by default. temp_res : string or int, optional Temporal resolution of the output NetCDF4 file, defaults to dekad. compression : bool, optional If True, ncfile compression is active. """ if region == 'global': grid = grids.RegularGrid(sp_res) else: grid = grids.ShapeGrid(region, sp_res, shapefile) dest_file = dest_file if not os.path.isfile(dest_file): save_grid(dest_file, grid) dt = get_dtindex(temp_res, start_date) with Dataset(dest_file, 'r+', format='NETCDF4') as ncfile: if 'time' not in ncfile.dimensions.keys(): ncfile.createDimension("time", None) if compression: times = ncfile.createVariable('time', 'uint16', ('time',), zlib=True, complevel=4) else: times = ncfile.createVariable('time', 'uint16', ('time',)) times.units = 'days since ' + str(start_date) times.calendar = 'standard' times[:] = date2num(dt.tolist(), units=times.units, calendar=times.calendar) else: times = ncfile.variables['time'] dim = ('time', 'lat', 'lon') numdate = date2num(timestamp, units=times.units, calendar=times.calendar) for key in image.keys(): if key not in ncfile.variables.keys(): if compression: var = ncfile.createVariable(key, image[key].dtype.char, dim, zlib=True, complevel=4, fill_value=nan_value) else: var = ncfile.createVariable(key, image[key].dtype.char, dim, fill_value=nan_value) else: var = ncfile.variables[key] if numdate in times[:]: var_index = np.where(times[:] == numdate)[0][0] else: times[times[:].size] = numdate var_index = times[:].size - 1 var[var_index] = image[key] if metadata is not None: for item in metadata[key]: if item in var.ncattrs(): continue else: var.setncattr(str(item), metadata[key][item])
def test_save_load_basicgrid(self): grid_nc.save_grid(self.testfile, self.basic) loaded_grid = grid_nc.load_grid(self.testfile) assert self.basic == loaded_grid
for id in country_ids: ids.append(id) return ids def continent_countries(self, *continents): if isinstance(continents, str): continents = [continents] names = np.array([]) for continent in continents: n = self.df.loc[self.df.continent == continent, 'country'].values names = np.append(names, n) return names if __name__ == '__main__': country = 'Morocco' from smos.grid import EASE25CellGrid #from smecv_grid.grid import SMECV_Grid_v052 grid = EASE25CellGrid() from pygeogrids.netcdf import save_grid adapter = GridShpAdapter(grid) print(adapter) grid = adapter.create_subgrid([country]) save_grid( r"R:\Projects\SMART-DRI\07_data\sm_country_data\SMOS-IC\ease25grid_{country}.nc" .format(country=country), grid=grid)