def WeightsToNetCDF(self, refparam, region=None, ip=None, exclude=None): """ Parameters ---------- exclude : string, optional Variable which should not be used for calculation of the weights. """ if region is None: region = self.regions else: if isinstance(region, str): region = [region] if ip is None: ip = self.ip else: if isinstance(ip, int): ip = [ip] if not os.path.exists(self.weights_path): os.mkdir(self.weights_path) for reg in region: grid = grids.ShapeGrid(reg, self.spatial_resolution) gps = grid.get_gridpoints().index for ipe in ip: print '[INFO] calc weights ' + reg + ' IP' + str(ipe), for i, gp in enumerate(gps): if i % 100 == 0: print '.', self.__writeWeight(gp, reg, refparam, ipe, exclude) print ' done!'
def DItoNetCDF(self, region=None, source=None, ip=None): """ Calculates the Drought Index (DI) for a given source over one or more regions and stores them as NetCDF files. Parameters ---------- region : str, list of str, optional Region(s) of of interest; must be one of the regions as set in the CDIPoet instance; Defaults the regions attribute value of the CDIPoet instance. source : str, list of str, optional Source parameter(s) for which to calculate the DI; must be one of the sources set in the CDIPoet instance; Defaults to all sources set in the CDIPoet instance. ip : int, list of int, optional Interest period for calculating the DI; must be one of the ip as set in the CDIPoet instance; Defaults to the ip attribute value in the CDIPoet instance. """ if region is None: region = self.regions else: if isinstance(region, str): region = [region] if source is None: source = self.sources.keys() else: if isinstance(source, str): source = [source] if ip is None: ip = self.ip else: if isinstance(ip, int): ip = [ip] if not os.path.exists(self.di_path): os.mkdir(self.di_path) for reg in region: grid = grids.ShapeGrid(reg, self.spatial_resolution) gps = grid.get_gridpoints().index for ipe in ip: for src in source: if src in self.staticsources: continue if reg not in self.sources[src].valid_regions: continue print('[INFO] calc DI ' + reg + ' IP' + str(ipe) + ' ' + src), self.__writeDI(reg, src, gps, grid, ipe) print ' done!'
def setUp(self): self.sp_res = 60 self.region = 'UG' self.timestamp = datetime.today() self.start_date = datetime.today() self.temp_res = 'day' self.fill_value = -99 self.variable = 'data' # create image self.shape = (3, 6) self.mask = np.array([[1, 0, 1, 0, 1, 0], [0, 1, 0, 1, 0, 1], [1, 0, 1, 0, 1, 0]]) self.image = {} self.data = np.ma.array(np.ones(self.shape), mask=self.mask, fill_value=self.fill_value) self.data.data[np.where(self.mask == 1)] = self.fill_value self.image['data'] = self.data self.image['data2'] = self.data * 2 # create metadata self.metadata = { 'data': { 'Attribute1': 'Value1' }, 'data2': { 'Attribut2': 'Value2' }, 'data3': { 'Attribut3': 'Value3' } } if not os.path.exists(os.path.join(curpath(), 'data')): os.mkdir(os.path.join(curpath(), 'data')) self.grid = gr.ShapeGrid(self.region, self.sp_res) self.globalgrid = gr.RegularGrid(sp_res=self.sp_res) # Build NetCDF testfile self.ncfile = os.path.join(curpath(), 'data', 'test_nc.nc') if os.path.exists(self.ncfile): os.remove(self.ncfile) save_image(self.image, self.timestamp, 'global', self.metadata, self.ncfile, self.start_date, self.sp_res, temp_res=self.temp_res) # Build HDF5 testfile self.h5file = os.path.join(curpath(), 'data', 'tests_hdf5.h5') if os.path.exists(self.h5file): os.remove(self.h5file) with h5py.File(self.h5file, 'w') as hdf5_file: group = hdf5_file.create_group('group') for dataset_name in self.image.keys(): attributes = self.metadata[dataset_name] write_data = self.image[dataset_name] dataset = group.create_dataset(dataset_name, write_data.shape, write_data.dtype, write_data) for key in attributes: dataset.attrs[key] = attributes[key] # Build png Testfile self.pngfile = os.path.join(curpath(), 'data', 'test_png.png') if os.path.exists(self.pngfile): os.remove(self.pngfile) n = 60 pngimg = np.kron(np.copy(self.data), np.ones((n, n))) pngimg[pngimg == self.fill_value] = np.NAN plt.imsave(self.pngfile, pngimg)
def CDItoNetCDF(self, region=None, ip=None, separatefile=True, exclude=None): """ Creates NetCDF that contains CDI for all timestamps. Parameters ---------- region : str, list of str, optional Region(s) of of interest; must be one of the regions as set in the CDIPoet instance; Defaults the regions attribute value of the CDIPoet instance. ip : int, list of int, optional Interest period for calculating the DI; must be one of the ip as set in the CDIPoet instance; Defaults to the ip attribute value in the CDIPoet instance. separatefile : bool If True, writes weights to separate file; If False, writes weights to NetCDF database file. exclude : string, optional Variable which should not be used for calculation of CDI. """ if region is None: region = self.regions else: if isinstance(region, str): region = [region] if ip is None: ip = self.ip else: if isinstance(ip, int): ip = [ip] if not os.path.exists(self.cdi_path): os.mkdir(self.cdi_path) for reg in region: grid = grids.ShapeGrid(reg, self.spatial_resolution) gps = grid.get_gridpoints().index for ipe in ip: key = 'ECDI_' + str(ipe) print('[INFO] calc ECDI ' + reg + ' IP' + str(ipe)) if separatefile: dest_file = os.path.join(self.cdi_path, reg + '_' + key + '.nc') else: dest_file = os.path.join( self.data_path, reg + '_' + str(self.spatial_resolution) + '_' + self.temporal_resolution + '.nc') wfile = os.path.join(self.weights_path, reg + '_weights_' + str(ipe) + '.nc') if not os.path.isfile(dest_file): grid = grids.ShapeGrid(reg, self.spatial_resolution) save_grid(dest_file, grid) with Dataset(dest_file, 'r+', format='NETCDF4') as cdifile: if 'time' not in cdifile.dimensions.keys(): dt = get_dtindex(self.temporal_resolution, self.start_date) cdifile.createDimension("time", None) times = cdifile.createVariable('time', 'uint16', ('time', )) times.units = 'days since ' + str(self.start_date) times.calendar = 'standard' times[:] = date2num(dt.tolist(), units=times.units, calendar=times.calendar) else: times = cdifile.variables['time'] if key not in cdifile.variables.keys(): dim = ('time', 'lat', 'lon') cdi = cdifile.createVariable(key, 'f8', dim, fill_value=-99) else: cdi = cdifile.variables[key] for k, gp in enumerate(gps): if k % 100 == 0: print '.', position = np.where(cdifile.variables['gpi'][:] == gp) lat_pos = position[0][0] lon_pos = position[1][0] weights = {} parnum = (len(self.sources.keys()) - len(self.staticsources)) if exclude is not None: parnum = parnum - 1 dat = np.zeros((parnum, cdi.shape[0]), dtype=np.float) # dat = np.zeros((len(self.sources.keys()), cdi.shape[0]), # dtype=np.float) dat[dat == 0] = self.nan_value dat = np.ma.masked_values(dat, self.nan_value) # extract data from DI files and calc weights i = 0 for param in self.sources.keys(): if param in self.staticsources: continue if param == exclude: continue difile = os.path.join( self.di_path, reg + '_' + param + '_DI_' + str(ipe) + '.nc') with Dataset(difile, 'r', format='NETCDF4') as nc: for var in nc.variables.keys(): if param in var: for j in range( 0, nc.variables[var].shape[0]): dat[i, j] = ( nc.variables[var][j, lat_pos, lon_pos]) with Dataset(wfile, 'r', format='NETCDF4') as nc: for var in nc.variables.keys(): if param in var: weights[param] = ( nc.variables[var][lat_pos, lon_pos]) i += 1 dat = np.ma.masked_where(dat == self.nan_value, dat) dat = np.nan_to_num(dat) dat = np.ma.masked_where(dat == 0., dat) avg = np.ma.average(dat, axis=0, weights=weights.values()) cdi[:, lat_pos, lon_pos] = avg print 'Done!' print 'Done!'
def __writeWeight(self, gp, region, refparam, ip, exclude=None): """ Parameters ---------- exclude : string, optional Variable which should not be used for calculation of the weights. """ refparam += '_' + str(ip) df = pd.DataFrame() for param in self.sources.keys(): difile = os.path.join( self.di_path, region + '_' + param + '_DI_' + str(ip) + '.nc') if not os.path.exists(difile): continue with Dataset(difile, 'r', format='NETCDF4') as nc: if len(df.index.values) == 0: time = nc.variables['time'] dates = num2date(time[:], units=time.units, calendar=time.calendar) df = pd.DataFrame(index=pd.DatetimeIndex(dates)) ncvar = None for var in nc.variables.keys(): if param in var: ncvar = var continue position = np.where(nc.variables['gpi'][:] == gp) lat_pos = position[0][0] lon_pos = position[1][0] df[ncvar] = np.NAN for i in range(0, nc.variables[ncvar].shape[0] - 1): df[ncvar][i] = nc.variables[ncvar][i, lat_pos, lon_pos] if 'scaling_factor' in nc.variables[ncvar].ncattrs(): vvar = nc.variables[ncvar] if vvar.getncattr('scaling_factor') < 0: df[ncvar] = ( df[ncvar] * float(vvar.getncattr('scaling_factor'))) else: df[ncvar] = ( df[ncvar] / float(vvar.getncattr('scaling_factor'))) weights = cdi.calc_weights(df, refparam, lags=self.lags, exclude=exclude) dest_file = os.path.join(self.weights_path, region + '_weights_' + str(ip) + '.nc') if not os.path.isfile(dest_file): grid = grids.ShapeGrid(region, self.spatial_resolution) save_grid(dest_file, grid) with Dataset(dest_file, 'r+', format='NETCDF4') as nc: dim = ('lat', 'lon') position = np.where(nc.variables['gpi'][:] == gp) lat_pos = position[0][0] lon_pos = position[1][0] keys = [] if exclude is not None: for par in df.keys(): if exclude in par: continue keys.append(par) else: keys = df.keys() for i, dataset in enumerate(keys): if dataset not in nc.variables.keys(): var = nc.createVariable(dataset, 'd', dim, fill_value=self.nan_value) else: var = nc.variables[dataset] var[lat_pos, lon_pos] = weights[i]
def _resample_spatial(self, region, begin, end, delete_rawdata, shapefile=None): """Helper method that calls spatial resampling routines. Parameters: region : str FIPS country code (https://en.wikipedia.org/wiki/FIPS_country_code) begin : datetime Start date of resampling end : datetime End date of resampling delete_rawdata : bool True if original downloaded files should be deleted after resampling """ dest_file = self._get_tmp_filepath('spatial', region) dirList = os.listdir(self.rawdata_path) dirList.sort() if region == 'global': grid = gr.RegularGrid(sp_res=self.dest_sp_res) else: grid = gr.ShapeGrid(region, self.dest_sp_res, shapefile) for item in dirList: src_file = os.path.join(self.rawdata_path, item) fdate = get_file_date(item, self.filedate) if begin is not None: if fdate < begin: continue if end is not None: if fdate > end: continue if check_compressed(src_file): dirname = os.path.splitext(item)[0] dirpath = os.path.join(self.rawdata_path, dirname) unpack(src_file) src_file = select_file(os.listdir(dirpath)) src_file = os.path.join(dirpath, src_file) if begin is not None: if fdate < begin: if check_compressed(item): shutil.rmtree( os.path.join(self.rawdata_path, os.path.splitext(item)[0])) continue if end is not None: if fdate > end: if check_compressed(item): shutil.rmtree( os.path.join(self.rawdata_path, os.path.splitext(item)[0])) continue print '.', try: image, _, _, _, timestamp, metadata = \ resample_to_shape(src_file, region, self.dest_sp_res, grid, self.name, self.nan_value, self.dest_nan_value, self.variables, shapefile) except ValueError: print "[INFO] no data available for that region." return "[INFO] no data available for that region." if timestamp is None: timestamp = get_file_date(item, self.filedate) if self.temp_res == self.dest_temp_res: filename = (region + '_' + str(self.dest_sp_res) + '_' + str(self.dest_temp_res) + '.nc') dfile = os.path.join(self.data_path, filename) nc.save_image(image, timestamp, region, metadata, dfile, self.dest_start_date, self.dest_sp_res, self.dest_nan_value, shapefile, self.dest_temp_res) else: nc.write_tmp_file(image, timestamp, region, metadata, dest_file, self.dest_start_date, self.dest_sp_res, self.dest_nan_value, shapefile) # deletes unpacked files if existing if check_compressed(item): shutil.rmtree( os.path.join(self.rawdata_path, os.path.splitext(item)[0])) print ''
def save_image(image, timestamp, region, metadata, dest_file, start_date, sp_res, nan_value=-99, shapefile=None, temp_res='dekad', compression=False): """Saves numpy.ndarray images as multidimensional netCDF4 file. Creates a datetimeindex over the whole period defined in the settings file Parameters ---------- image : dict of numpy.ndarrays Input image. timestamp : datetime.datetime Timestamp of image. region : str, optional Identifier of the region in the shapefile. If the default shapefile is used, this would be the FIPS country code. metadata : dict NetCDF metadata from source file. dest_file : str Path to the output file. start_date : datetime.datetime First date of available data. sp_res : int or float Spatial resolution of the grid. nan_value : int, optional Not a number value for dataset, defaults to -99. shapefile : str, optional Path to shape file, uses "world country admin boundary shapefile" by default. temp_res : string or int, optional Temporal resolution of the output NetCDF4 file, defaults to dekad. compression : bool, optional If True, ncfile compression is active. """ if region == 'global': grid = grids.RegularGrid(sp_res) else: grid = grids.ShapeGrid(region, sp_res, shapefile) dest_file = dest_file if not os.path.isfile(dest_file): save_grid(dest_file, grid) dt = get_dtindex(temp_res, start_date) with Dataset(dest_file, 'r+', format='NETCDF4') as ncfile: if 'time' not in ncfile.dimensions.keys(): ncfile.createDimension("time", None) if compression: times = ncfile.createVariable('time', 'uint16', ('time',), zlib=True, complevel=4) else: times = ncfile.createVariable('time', 'uint16', ('time',)) times.units = 'days since ' + str(start_date) times.calendar = 'standard' times[:] = date2num(dt.tolist(), units=times.units, calendar=times.calendar) else: times = ncfile.variables['time'] dim = ('time', 'lat', 'lon') numdate = date2num(timestamp, units=times.units, calendar=times.calendar) for key in image.keys(): if key not in ncfile.variables.keys(): if compression: var = ncfile.createVariable(key, image[key].dtype.char, dim, zlib=True, complevel=4, fill_value=nan_value) else: var = ncfile.createVariable(key, image[key].dtype.char, dim, fill_value=nan_value) else: var = ncfile.variables[key] if numdate in times[:]: var_index = np.where(times[:] == numdate)[0][0] else: times[times[:].size] = numdate var_index = times[:].size - 1 var[var_index] = image[key] if metadata is not None: for item in metadata[key]: if item in var.ncattrs(): continue else: var.setncattr(str(item), metadata[key][item])