def test_get_dtindex(self): interval1 = 'dekad' interval2 = 'day' interval3 = 'week' interval4 = 'month' interval5 = 8 dtindex1 = get_dtindex(interval1, self.begin, self.end) dtindex2 = get_dtindex(interval2, self.begin, self.end) dtindex3 = get_dtindex(interval3, self.begin, self.end) dtindex4 = get_dtindex(interval4, self.begin, self.end) dtindex5 = get_dtindex(interval5, self.begin, self.end) assert dtindex1.size == 6 assert dtindex1[0] == datetime(2004, 2, 10) assert dtindex1[-1] == datetime(2004, 3, 31) assert dtindex2.size == 60 assert dtindex2[0] == datetime(2004, 2, 1) assert dtindex2[-1] == datetime(2004, 3, 31) assert dtindex3.size == 9 assert dtindex3[0] == datetime(2004, 2, 1) assert dtindex3[-1] == datetime(2004, 3, 28) assert dtindex4.size == 2 assert dtindex4[0] == datetime(2004, 2, 29) assert dtindex4[-1] == datetime(2004, 3, 31) assert dtindex5.size == 8 assert dtindex5[0] == datetime(2004, 2, 1) assert dtindex5[-1] == datetime(2004, 3, 28)
def setUpClass(self): self.rootpath = os.path.join(curpath(), 'data', 'src_test') self.nan_value = -99 self.regions = ['AU'] self.spatial_resolution = 1 self.temporal_resolution = 'week' self.start_date = datetime(2013, 1, 7) self.enddate = datetime(2013, 1, 13) self.testdate = datetime(2013, 1, 10) if os.path.exists(self.rootpath): shutil.rmtree(self.rootpath) os.mkdir(self.rootpath) os.mkdir(os.path.join(self.rootpath, 'DATA')) os.mkdir(os.path.join(self.rootpath, 'TMP')) self.poet = Poet(self.rootpath, self.regions, self.spatial_resolution, self.temporal_resolution, self.start_date, self.nan_value) # setup test png files self.pngdir = os.path.join(curpath(), 'data', 'testpngs') if os.path.exists(self.pngdir): shutil.rmtree(self.pngdir) os.mkdir(self.pngdir) dtindex = get_dtindex('day', self.start_date, self.enddate) for dat in dtindex: year = str(dat.year) month = "%02d" % (dat.month) day = "%02d" % (dat.day) fname = ('test_' + year + '_' + month + '_' + day + '.png') shutil.copy(os.path.join(curpath(), 'data', 'test.png'), os.path.join(self.pngdir, fname))
def check_rawdata(self, begin=None, end=None): """ Checks if files are already downloaded. Parameters ---------- begin : datetime Check files beginning from this date. end : datetime Check files until this date. Returns ------- boolean True if all files are already downloaded, False if files are missing. """ drange = dt.get_dtindex(self.temp_res, begin, end) rawfiles = os.listdir(self.rawdata_path) rf_dates = [] for rf in rawfiles: rf_dates.append(get_file_date(rf, self.filedate)) for d in drange: if d not in rf_dates: return False return True
def _resample_temporal(self, region, shapefile=None): """Helper method that calls temporal resampling routines. Parameters: region : str Identifier of the region in the shapefile. If the default shapefile is used, this would be the FIPS country code. shapefile : str, optional Path to shape file, uses "world country admin boundary shapefile" by default. """ src_file = self._get_tmp_filepath('spatial', region) if not os.path.exists(src_file): print '[Info] No data available for this period' return False data = {} variables, _, period = nc.get_properties(src_file) dtindex = dt.get_dtindex(self.dest_temp_res, period[0], period[1]) for date in dtindex: # skip if data for period is not complete # if date > period[1]: # continue if self.dest_temp_res == 'dekad': if date.day < 21: begin = datetime(date.year, date.month, date.day - 10 + 1) else: begin = datetime(date.year, date.month, 21) end = date else: begin = period[0] end = date data = {} metadata = {} for var in variables: img, _, _, meta = \ nc.read_variable(src_file, var, begin, end) metadata[var] = meta data[var] = average_layers(img, self.dest_nan_value) dest_file = self.src_file[region] nc.save_image(data, date, region, metadata, dest_file, self.dest_start_date, self.dest_sp_res, self.dest_nan_value, shapefile, self.dest_temp_res) # delete intermediate netCDF file print '' os.unlink(src_file)
def setUpClass(self): self.rootpath = os.path.join(curpath(), 'data', 'src_test') self.dest_nan_value = -99 self.dest_regions = ['AU'] self.dest_sp_res = 1 self.dest_temp_res = 'daily' self.dest_start_date = datetime(2013, 1, 7) self.enddate = datetime(2013, 1, 13) self.testdate = datetime(2013, 1, 10) self.name = 'TEST' self.filename = "test_{YYYY}_{MM}_{TT}.png" self.filedate = {'YYYY': (5, 9), 'MM': (10, 12), 'DD': (13, 15)} self.temp_res = 'daily' self.protocol = 'local' self.begin_date = datetime(2013, 1, 7) if os.path.exists(self.rootpath): shutil.rmtree(self.rootpath) os.mkdir(self.rootpath) os.mkdir(os.path.join(self.rootpath, 'RAWDATA')) os.mkdir(os.path.join(self.rootpath, 'DATA')) os.mkdir(os.path.join(self.rootpath, 'TMP')) # setup test png files self.pngdir = os.path.join(curpath(), 'data', 'testpngs') self.host = self.pngdir if os.path.exists(self.pngdir): shutil.rmtree(self.pngdir) os.mkdir(self.pngdir) self.dtindex = get_dtindex('day', self.dest_start_date, self.enddate) for i, dat in enumerate(self.dtindex): if i == 3: continue year = str(dat.year) month = "%02d" % (dat.month) day = "%02d" % (dat.day) fname = ('test_' + year + '_' + month + '_' + day + '.png') shutil.copy(os.path.join(curpath(), 'data', 'test.png'), os.path.join(self.pngdir, fname)) self.source = BasicSource(self.name, self.filename, self.filedate, self.temp_res, self.rootpath, self.host, self.protocol, begin_date=self.begin_date, dest_nan_value=-self.dest_nan_value, dest_regions=self.dest_regions, dest_sp_res=self.dest_sp_res, dest_temp_res=self.dest_temp_res, dest_start_date=self.dest_start_date)
def download_and_resample(self, download_path=None, begin=None, end=None, delete_rawdata=False, shapefile=None): """Downloads and resamples data. Parameters ---------- download_path : str Path where to save the downloaded files. begin : datetime.date, optional set either to first date of remote repository or date of last file in local repository end : datetime.date, optional set to today if none given delete_rawdata : bool, optional Original files will be deleted from rawdata_path if set True shapefile : str, optional Path to shape file, uses "world country admin boundary shapefile" by default. """ begin, end = self._check_begin_end(begin, end) if begin > end: print '[INFO] everything up to date' return '[INFO] everything up to date' drange = dt.get_dtindex(self.dest_temp_res, begin, end) for i, date in enumerate(drange): if i == 0: start = begin else: if self.dest_temp_res in [ 'dekad', 'dekadal', 'week', 'weekly', 'month', 'monthly' ]: start = drange[i - 1] + timedelta(days=1) else: start = date stop = date filecheck = self.download(download_path, start, stop) if filecheck is True: self.resample(start, stop, delete_rawdata, shapefile, False) else: if filecheck is False: print '[WARNING] no data available for this date' else: print filecheck
def fill_gaps(self, begin=None, end=None): """ Detects gaps in data and tries to fill them by downloading and resampling the data within these periods. Parameters ---------- begin : datetime Begin date of intervall to check, defaults to None. end : datetime End date of intervall to check, defaults to None. """ gaps = [] for region in self.valid_regions: if self.regions is not None: if region not in self.regions: continue _, _, period = nc.get_properties(self.src_file[region]) if begin is None: if self.begin_date < self.dest_start_date: begin = self.dest_start_date else: begin = self.begin_date if end is None: end = period[1] drange = dt.get_dtindex(self.dest_temp_res, begin, end) for date in drange: nonans = [] for var in self.get_variables(): img, _, _, _ = self.read_img(date, region, var) if np.nanmean(img) is not np.ma.masked: nonans.append(1) if len(nonans) == 0: if date not in gaps: gaps.append(date) if len(gaps) == 0: print '[INFO] No gaps found.' else: print('[INFO] Found ' + str(len(gaps)) + ' gap(s), attempt to fill..') for date in gaps: if self.dest_temp_res in ['day', 'daily']: begin = date end = date else: begin, end = dt.check_period_boundaries(self.dest_temp_res, date) self.download_and_resample(begin=begin, end=end)
def fill_gaps(self, begin=None, end=None): """ Detects gaps in data and tries to fill them by downloading and resampling the data within these periods. Parameters ---------- begin : datetime Begin date of intervall to check, defaults to None. end : datetime End date of intervall to check, defaults to None. """ gaps = [] for region in self.valid_regions: if self.regions is not None: if region not in self.regions: continue _, _, period = nc.get_properties(self.src_file[region]) if begin is None: if self.begin_date < self.dest_start_date: begin = self.dest_start_date else: begin = self.begin_date if end is None: end = period[1] drange = dt.get_dtindex(self.dest_temp_res, begin, end) for date in drange: nonans = [] for var in self.get_variables(): img, _, _, _ = self.read_img(date, region, var) if np.nanmean(img) is not np.ma.masked: nonans.append(1) if len(nonans) == 0: if date not in gaps: gaps.append(date) if len(gaps) == 0: print '[INFO] No gaps found.' else: print('[INFO] Found ' + str(len(gaps)) + ' gap(s), attempt to fill..') for date in gaps: if self.dest_temp_res in ['day', 'daily']: begin = date end = date else: begin, end = dt.check_period_boundaries( self.dest_temp_res, date) self.download_and_resample(begin=begin, end=end)
def download_and_resample(self, download_path=None, begin=None, end=None, delete_rawdata=False, shapefile=None): """Downloads and resamples data. Parameters ---------- download_path : str Path where to save the downloaded files. begin : datetime.date, optional set either to first date of remote repository or date of last file in local repository end : datetime.date, optional set to today if none given delete_rawdata : bool, optional Original files will be deleted from rawdata_path if set True shapefile : str, optional Path to shape file, uses "world country admin boundary shapefile" by default. """ begin, end = self._check_begin_end(begin, end) if begin > end: print '[INFO] everything up to date' return '[INFO] everything up to date' drange = dt.get_dtindex(self.dest_temp_res, begin, end) for i, date in enumerate(drange): if i == 0: start = begin else: if self.dest_temp_res in ['dekad', 'dekadal', 'week', 'weekly', 'month', 'monthly']: start = drange[i - 1] + timedelta(days=1) else: start = date stop = date filecheck = self.download(download_path, start, stop) if filecheck is True: self.resample(start, stop, delete_rawdata, shapefile, False) else: if filecheck is False: print '[WARNING] no data available for this date' else: print filecheck
def save_image(image, timestamp, region, metadata, dest_file, start_date, sp_res, nan_value=-99, shapefile=None, temp_res='dekad', compression=False): """Saves numpy.ndarray images as multidimensional netCDF4 file. Creates a datetimeindex over the whole period defined in the settings file Parameters ---------- image : dict of numpy.ndarrays Input image. timestamp : datetime.datetime Timestamp of image. region : str, optional Identifier of the region in the shapefile. If the default shapefile is used, this would be the FIPS country code. metadata : dict NetCDF metadata from source file. dest_file : str Path to the output file. start_date : datetime.datetime First date of available data. sp_res : int or float Spatial resolution of the grid. nan_value : int, optional Not a number value for dataset, defaults to -99. shapefile : str, optional Path to shape file, uses "world country admin boundary shapefile" by default. temp_res : string or int, optional Temporal resolution of the output NetCDF4 file, defaults to dekad. compression : bool, optional If True, ncfile compression is active. """ if region == 'global': grid = grids.RegularGrid(sp_res) else: grid = grids.ShapeGrid(region, sp_res, shapefile) dest_file = dest_file if not os.path.isfile(dest_file): save_grid(dest_file, grid) dt = get_dtindex(temp_res, start_date) with Dataset(dest_file, 'r+', format='NETCDF4') as ncfile: if 'time' not in ncfile.dimensions.keys(): ncfile.createDimension("time", None) if compression: times = ncfile.createVariable('time', 'uint16', ('time',), zlib=True, complevel=4) else: times = ncfile.createVariable('time', 'uint16', ('time',)) times.units = 'days since ' + str(start_date) times.calendar = 'standard' times[:] = date2num(dt.tolist(), units=times.units, calendar=times.calendar) else: times = ncfile.variables['time'] dim = ('time', 'lat', 'lon') numdate = date2num(timestamp, units=times.units, calendar=times.calendar) for key in image.keys(): if key not in ncfile.variables.keys(): if compression: var = ncfile.createVariable(key, image[key].dtype.char, dim, zlib=True, complevel=4, fill_value=nan_value) else: var = ncfile.createVariable(key, image[key].dtype.char, dim, fill_value=nan_value) else: var = ncfile.variables[key] if numdate in times[:]: var_index = np.where(times[:] == numdate)[0][0] else: times[times[:].size] = numdate var_index = times[:].size - 1 var[var_index] = image[key] if metadata is not None: for item in metadata[key]: if item in var.ncattrs(): continue else: var.setncattr(str(item), metadata[key][item])
def resample(self, begin=None, end=None, delete_rawdata=False, shapefile=None, stepwise=True): """Resamples source data to given spatial and temporal resolution. Writes resampled images into a netCDF data file. Deletes original files if flag delete_rawdata is set True. Parameters ---------- begin : datetime Start date of resampling. end : datetime End date of resampling. delete_rawdata : bool Original files will be deleted from rawdata_path if set 'True'. shapefile : str, optional Path to shape file, uses "world country admin boundary shapefile" by default. """ if len(os.listdir(self.tmp_path)) != 0: for fname in os.listdir(self.tmp_path): if '.nc' in fname: os.remove(os.path.join(self.tmp_path, fname)) # clean rawdata folder from sudirectories for item in os.listdir(self.rawdata_path): if os.path.isdir(os.path.join(self.rawdata_path, item)): os.rmdir(os.path.join(self.rawdata_path, item)) begin, end = self._check_begin_end(begin, end) if begin > end: print '[INFO] everything up to date' return '[INFO] everything up to date' if stepwise: drange = dt.get_dtindex(self.dest_temp_res, begin, end) for i, date in enumerate(drange): if i == 0: start = begin else: if self.dest_temp_res in [ 'dekad', 'dekadal', 'week', 'weekly', 'month', 'monthly' ]: start = drange[i - 1] + timedelta(days=1) else: start = date stop = date print '[INFO] Resampling ' + str(start) + ' to ' + str(stop) for region in self.valid_regions: print '[INFO] resampling to region ' + region print '[INFO] performing spatial resampling ', self._resample_spatial(region, start, stop, delete_rawdata, shapefile) if self.temp_res == self.dest_temp_res: print '[INFO] skipping temporal resampling' else: print '[INFO] performing temporal resampling ', self._resample_temporal(region, shapefile) else: print '[INFO] ' + str(begin) + '-' + str(end) for region in self.valid_regions: print '[INFO] resampling to region ' + region print '[INFO] performing spatial resampling ', self._resample_spatial(region, begin, end, delete_rawdata, shapefile) if self.temp_res == self.dest_temp_res: print '[INFO] skipping temporal resampling' else: print '[INFO] performing temporal resampling ', self._resample_temporal(region, shapefile) if delete_rawdata: print '[INFO] Cleaning up rawdata' dirList = os.listdir(self.rawdata_path) dirList.sort() for item in dirList: src_file = os.path.join(self.rawdata_path, item) os.unlink(src_file)
def index(**kwargs): """ Renders main page of the web application. Generates image arguments needed for OpenLayers overlay if parameters `reg` and `var` are set, renders entry page if not set. """ global enddate global dates global ndate regions = [] for i, reg in enumerate(p.regions): regions.append({'code': reg, 'name': p.region_names[i]}) if len(kwargs) > 0: if 'reg' in kwargs: region = kwargs['reg'] if 'var' in kwargs: variable = kwargs['var'] for src in p.sources.keys(): if variable in p.sources[src].get_variables(): source = p.sources[src] ndate = source._check_current_date() begindate = ndate[region][variable][0] enddate = ndate[region][variable][1] if begindate is None and enddate is None: error = 'No data available for this dataset.' return render_template('index.html', regions=p.regions, sources=p.sources.keys(), variables=variables, error=error) d = get_dtindex(p.temporal_resolution, begindate, enddate) dates = d.to_pydatetime() fdates = [] for i, d in enumerate(dates.tolist()): dat = {'id': i, 'date': d.strftime('%Y-%m-%d')} fdates.append(dat) lon_min, lon_max, lat_min, lat_max, c_lat, c_lon, _ = \ bounds(region, p.shapefile) if source.valid_range is None: vrange = [-999, -999] else: vrange = source.valid_range return render_template('app.html', max=len(dates) - 1, coord=[c_lon, c_lat], ex1=(lon_max, lat_min), ex2=(lon_min, lat_max), region=region, source=source.name, variable=variable, regions=regions, variables=variables, dates=fdates, host=host_gl, port=port_gl, sp_res=p.spatial_resolution, range=vrange ) else: return render_template('index.html', regions=regions, sources=p.sources.keys(), variables=variables, host=host_gl, port=port_gl)
def index(**kwargs): """ Renders main page of the web application. Generates image arguments needed for OpenLayers overlay if parameters `reg` and `var` are set, renders entry page if not set. """ global enddate global dates global ndate regions = [] for i, reg in enumerate(p.regions): regions.append({'code': reg, 'name': p.region_names[i]}) if len(kwargs) > 0: if 'reg' in kwargs: region = kwargs['reg'] if 'var' in kwargs: variable = kwargs['var'] for src in p.sources.keys(): if variable in p.sources[src].get_variables(): source = p.sources[src] ndate = source._check_current_date() begindate = ndate[region][variable][0] enddate = ndate[region][variable][1] if begindate is None and enddate is None: error = 'No data available for this dataset.' return render_template('index.html', regions=p.regions, sources=p.sources.keys(), variables=variables, error=error) d = get_dtindex(p.temporal_resolution, begindate, enddate) dates = d.to_pydatetime() fdates = [] for i, d in enumerate(dates.tolist()): dat = {'id': i, 'date': d.strftime('%Y-%m-%d')} fdates.append(dat) lon_min, lon_max, lat_min, lat_max, c_lat, c_lon, _ = \ image_bounds(region, p.spatial_resolution, p.shapefile) if source.valid_range is None: vrange = [-999, -999] else: vrange = source.valid_range ex1 = (lon_max, lat_min) ex2 = (lon_min, lat_max) return render_template('app.html', max=len(dates) - 1, coord=[c_lon, c_lat], ex1=ex1, ex2=ex2, region=region, source=source.name, variable=variable, regions=regions, variables=variables, dates=fdates, host=host_gl, port=port_gl, sp_res=p.spatial_resolution, range=vrange, url=url_gl, subregions=get_subregions(region)) else: return render_template('index.html', regions=regions, sources=p.sources.keys(), variables=variables, host=host_gl, port=port_gl, url=url_gl)
def CDItoNetCDF(self, region=None, ip=None, separatefile=True, exclude=None): """ Creates NetCDF that contains CDI for all timestamps. Parameters ---------- region : str, list of str, optional Region(s) of of interest; must be one of the regions as set in the CDIPoet instance; Defaults the regions attribute value of the CDIPoet instance. ip : int, list of int, optional Interest period for calculating the DI; must be one of the ip as set in the CDIPoet instance; Defaults to the ip attribute value in the CDIPoet instance. separatefile : bool If True, writes weights to separate file; If False, writes weights to NetCDF database file. exclude : string, optional Variable which should not be used for calculation of CDI. """ if region is None: region = self.regions else: if isinstance(region, str): region = [region] if ip is None: ip = self.ip else: if isinstance(ip, int): ip = [ip] if not os.path.exists(self.cdi_path): os.mkdir(self.cdi_path) for reg in region: grid = grids.ShapeGrid(reg, self.spatial_resolution) gps = grid.get_gridpoints().index for ipe in ip: key = 'ECDI_' + str(ipe) print('[INFO] calc ECDI ' + reg + ' IP' + str(ipe)) if separatefile: dest_file = os.path.join(self.cdi_path, reg + '_' + key + '.nc') else: dest_file = os.path.join( self.data_path, reg + '_' + str(self.spatial_resolution) + '_' + self.temporal_resolution + '.nc') wfile = os.path.join(self.weights_path, reg + '_weights_' + str(ipe) + '.nc') if not os.path.isfile(dest_file): grid = grids.ShapeGrid(reg, self.spatial_resolution) save_grid(dest_file, grid) with Dataset(dest_file, 'r+', format='NETCDF4') as cdifile: if 'time' not in cdifile.dimensions.keys(): dt = get_dtindex(self.temporal_resolution, self.start_date) cdifile.createDimension("time", None) times = cdifile.createVariable('time', 'uint16', ('time', )) times.units = 'days since ' + str(self.start_date) times.calendar = 'standard' times[:] = date2num(dt.tolist(), units=times.units, calendar=times.calendar) else: times = cdifile.variables['time'] if key not in cdifile.variables.keys(): dim = ('time', 'lat', 'lon') cdi = cdifile.createVariable(key, 'f8', dim, fill_value=-99) else: cdi = cdifile.variables[key] for k, gp in enumerate(gps): if k % 100 == 0: print '.', position = np.where(cdifile.variables['gpi'][:] == gp) lat_pos = position[0][0] lon_pos = position[1][0] weights = {} parnum = (len(self.sources.keys()) - len(self.staticsources)) if exclude is not None: parnum = parnum - 1 dat = np.zeros((parnum, cdi.shape[0]), dtype=np.float) # dat = np.zeros((len(self.sources.keys()), cdi.shape[0]), # dtype=np.float) dat[dat == 0] = self.nan_value dat = np.ma.masked_values(dat, self.nan_value) # extract data from DI files and calc weights i = 0 for param in self.sources.keys(): if param in self.staticsources: continue if param == exclude: continue difile = os.path.join( self.di_path, reg + '_' + param + '_DI_' + str(ipe) + '.nc') with Dataset(difile, 'r', format='NETCDF4') as nc: for var in nc.variables.keys(): if param in var: for j in range( 0, nc.variables[var].shape[0]): dat[i, j] = ( nc.variables[var][j, lat_pos, lon_pos]) with Dataset(wfile, 'r', format='NETCDF4') as nc: for var in nc.variables.keys(): if param in var: weights[param] = ( nc.variables[var][lat_pos, lon_pos]) i += 1 dat = np.ma.masked_where(dat == self.nan_value, dat) dat = np.nan_to_num(dat) dat = np.ma.masked_where(dat == 0., dat) avg = np.ma.average(dat, axis=0, weights=weights.values()) cdi[:, lat_pos, lon_pos] = avg print 'Done!' print 'Done!'
def __writeDI(self, region, src, gridpoints, grid, ip, suffix='', scaled=True, modf_all=True, start=None): if start is not None: dt = get_dtindex('dekad', start) else: dt = get_dtindex('dekad', self.start_date) dest_file = os.path.join(self.di_path, region + '_' + src + '_DI' + '_' + str(ip) + '.nc') if not os.path.isfile(dest_file): save_grid(dest_file, grid) for i, gp in enumerate(gridpoints): if i % 100 == 0: print '.', ts = self.read_timeseries(src, gp, region) if start is not None: sel = (ts.index >= start) ts = ts[sel] inverse = False if src == 'MODIS_LST': inverse = True ts_di = cdi.calc_DI(ts.copy(), inverse, [ip], scale_zero=False, scaled=scaled, modf_all=modf_all) with Dataset(dest_file, 'r+', format='NETCDF4') as nc: if 'time' not in nc.dimensions.keys(): nc.createDimension("time", None) times = nc.createVariable('time', 'uint16', ('time',)) times.units = 'days since ' + str(self.start_date) times.calendar = 'standard' times[:] = date2num(dt.tolist(), units=times.units, calendar=times.calendar) else: times = nc.variables['time'] dim = ('time', 'lat', 'lon') position = np.where(nc.variables['gpi'][:] == gp) lat_pos = position[0][0] lon_pos = position[1][0] # extend times variable in NetCDF tsdates = date2num(ts_di.index.tolist(), units=times.units, calendar=times.calendar).astype(int) begin = np.where(times == tsdates[0])[0][0] times[begin:] = tsdates for dataset in ts_di.keys(): if dataset not in nc.variables.keys(): var = nc.createVariable(dataset, ts_di[dataset].dtype.char, dim, fill_value=self.nan_value) else: var = nc.variables[dataset] var[begin:, lat_pos, lon_pos] = ts_di[dataset].values
def CDItoNetCDF(self, region=None, ip=None, separatefile=True, exclude=None): """ Creates NetCDF that contains CDI for all timestamps. Parameters ---------- region : str, list of str, optional Region(s) of of interest; must be one of the regions as set in the CDIPoet instance; Defaults the regions attribute value of the CDIPoet instance. ip : int, list of int, optional Interest period for calculating the DI; must be one of the ip as set in the CDIPoet instance; Defaults to the ip attribute value in the CDIPoet instance. separatefile : bool If True, writes weights to separate file; If False, writes weights to NetCDF database file. exclude : string, optional Variable which should not be used for calculation of CDI. """ if region is None: region = self.regions else: if isinstance(region, str): region = [region] if ip is None: ip = self.ip else: if isinstance(ip, int): ip = [ip] if not os.path.exists(self.cdi_path): os.mkdir(self.cdi_path) for reg in region: grid = grids.ShapeGrid(reg, self.spatial_resolution) gps = grid.get_gridpoints().index for ipe in ip: key = 'ECDI_' + str(ipe) print ('[INFO] calc ECDI ' + reg + ' IP' + str(ipe)) if separatefile: dest_file = os.path.join(self.cdi_path, reg + '_' + key + '.nc') else: dest_file = os.path.join(self.data_path, reg + '_' + str(self.spatial_resolution) + '_' + self.temporal_resolution + '.nc') wfile = os.path.join(self.weights_path, reg + '_weights_' + str(ipe) + '.nc') if not os.path.isfile(dest_file): grid = grids.ShapeGrid(reg, self.spatial_resolution) save_grid(dest_file, grid) with Dataset(dest_file, 'r+', format='NETCDF4') as cdifile: if 'time' not in cdifile.dimensions.keys(): dt = get_dtindex(self.temporal_resolution, self.start_date) cdifile.createDimension("time", None) times = cdifile.createVariable('time', 'uint16', ('time',)) times.units = 'days since ' + str(self.start_date) times.calendar = 'standard' times[:] = date2num(dt.tolist(), units=times.units, calendar=times.calendar) else: times = cdifile.variables['time'] if key not in cdifile.variables.keys(): dim = ('time', 'lat', 'lon') cdi = cdifile.createVariable(key, 'f8', dim, fill_value=-99) else: cdi = cdifile.variables[key] for k, gp in enumerate(gps): if k % 100 == 0: print '.', position = np.where(cdifile.variables['gpi'][:] == gp) lat_pos = position[0][0] lon_pos = position[1][0] weights = {} parnum = (len(self.sources.keys()) - len(self.staticsources)) if exclude is not None: parnum = parnum - 1 dat = np.zeros((parnum, cdi.shape[0]), dtype=np.float) # dat = np.zeros((len(self.sources.keys()), cdi.shape[0]), # dtype=np.float) dat[dat == 0] = self.nan_value dat = np.ma.masked_values(dat, self.nan_value) # extract data from DI files and calc weights i = 0 for param in self.sources.keys(): if param in self.staticsources: continue if param == exclude: continue difile = os.path.join(self.di_path, reg + '_' + param + '_DI_' + str(ipe) + '.nc') with Dataset(difile, 'r', format='NETCDF4') as nc: for var in nc.variables.keys(): if param in var: for j in range(0, nc.variables[var].shape[0]): dat[i, j] = (nc.variables[var] [j, lat_pos, lon_pos]) with Dataset(wfile, 'r', format='NETCDF4') as nc: for var in nc.variables.keys(): if param in var: weights[param] = (nc.variables[var] [lat_pos, lon_pos]) i += 1 dat = np.ma.masked_where(dat == self.nan_value, dat) dat = np.nan_to_num(dat) dat = np.ma.masked_where(dat == 0., dat) avg = np.ma.average(dat, axis=0, weights=weights.values()) cdi[:, lat_pos, lon_pos] = avg print 'Done!' print 'Done!'
def resample(self, begin=None, end=None, delete_rawdata=False, shapefile=None, stepwise=True): """Resamples source data to given spatial and temporal resolution. Writes resampled images into a netCDF data file. Deletes original files if flag delete_rawdata is set True. Parameters ---------- begin : datetime Start date of resampling. end : datetime End date of resampling. delete_rawdata : bool Original files will be deleted from rawdata_path if set 'True'. shapefile : str, optional Path to shape file, uses "world country admin boundary shapefile" by default. """ if len(os.listdir(self.tmp_path)) != 0: for fname in os.listdir(self.tmp_path): if '.nc' in fname: os.remove(os.path.join(self.tmp_path, fname)) # clean rawdata folder from sudirectories for item in os.listdir(self.rawdata_path): if os.path.isdir(os.path.join(self.rawdata_path, item)): os.rmdir(os.path.join(self.rawdata_path, item)) begin, end = self._check_begin_end(begin, end) if begin > end: print '[INFO] everything up to date' return '[INFO] everything up to date' if stepwise: drange = dt.get_dtindex(self.dest_temp_res, begin, end) for i, date in enumerate(drange): if i == 0: start = begin else: if self.dest_temp_res in ['dekad', 'dekadal', 'week', 'weekly', 'month', 'monthly']: start = drange[i - 1] + timedelta(days=1) else: start = date stop = date print '[INFO] Resampling ' + str(start) + ' to ' + str(stop) for region in self.dest_regions: if self.regions is not None: if region not in self.regions: continue print '[INFO] resampling to region ' + region print '[INFO] performing spatial resampling ', self._resample_spatial(region, start, stop, delete_rawdata, shapefile) if self.temp_res == self.dest_temp_res: print '[INFO] skipping temporal resampling' else: print '[INFO] performing temporal resampling ', self._resample_temporal(region, shapefile) else: print '[INFO] ' + str(begin) + '-' + str(end) for region in self.dest_regions: if self.regions is not None: if region not in self.regions: continue print '[INFO] resampling to region ' + region print '[INFO] performing spatial resampling ', self._resample_spatial(region, begin, end, delete_rawdata, shapefile) if self.temp_res == self.dest_temp_res: print '[INFO] skipping temporal resampling' else: print '[INFO] performing temporal resampling ', self._resample_temporal(region, shapefile) if delete_rawdata: print '[INFO] Cleaning up rawdata' dirList = os.listdir(self.rawdata_path) dirList.sort() for item in dirList: src_file = os.path.join(self.rawdata_path, item) os.unlink(src_file)
def __writeDI(self, region, src, gridpoints, grid, ip, suffix='', scaled=True, modf_all=True, start=None): if start is not None: dt = get_dtindex('dekad', start) else: dt = get_dtindex('dekad', self.start_date) dest_file = os.path.join( self.di_path, region + '_' + src + '_DI' + '_' + str(ip) + '.nc') if not os.path.isfile(dest_file): save_grid(dest_file, grid) for i, gp in enumerate(gridpoints): if i % 100 == 0: print '.', ts = self.read_timeseries(src, gp, region) if start is not None: sel = (ts.index >= start) ts = ts[sel] inverse = False if src == 'MODIS_LST': inverse = True ts_di = cdi.calc_DI(ts.copy(), inverse, [ip], scale_zero=False, scaled=scaled, modf_all=modf_all) with Dataset(dest_file, 'r+', format='NETCDF4') as nc: if 'time' not in nc.dimensions.keys(): nc.createDimension("time", None) times = nc.createVariable('time', 'uint16', ('time', )) times.units = 'days since ' + str(self.start_date) times.calendar = 'standard' times[:] = date2num(dt.tolist(), units=times.units, calendar=times.calendar) else: times = nc.variables['time'] dim = ('time', 'lat', 'lon') position = np.where(nc.variables['gpi'][:] == gp) lat_pos = position[0][0] lon_pos = position[1][0] # extend times variable in NetCDF tsdates = date2num(ts_di.index.tolist(), units=times.units, calendar=times.calendar).astype(int) begin = np.where(times == tsdates[0])[0][0] times[begin:] = tsdates for dataset in ts_di.keys(): if dataset not in nc.variables.keys(): var = nc.createVariable(dataset, ts_di[dataset].dtype.char, dim, fill_value=self.nan_value) else: var = nc.variables[dataset] var[begin:, lat_pos, lon_pos] = ts_di[dataset].values