def get_ts_from_complete_data(complete_data, mask, keys, dates=None): if keys == None: keys = list(complete_data.keys()) common_dates = becgis.common_dates([complete_data[key][1] for key in keys]) becgis.assert_proj_res_ndv([complete_data[key][0] for key in keys]) MASK = becgis.open_as_array(mask, nan_values=True) tss = dict() for key in keys: var_mm = np.array([]) for date in common_dates: tif = complete_data[key][0][complete_data[key][1] == date][0] DATA = becgis.open_as_array(tif, nan_values=True) DATA[np.isnan(DATA)] = 0.0 DATA[np.isnan(MASK)] = np.nan var_mm = np.append(var_mm, np.nanmean(DATA)) tss[key] = (common_dates, var_mm) return tss
def calc_basinmean(perc_fh, lu_fh): """ Calculate the mean of a map after masking out the areas outside an basin defined by its landusemap. Parameters ---------- perc_fh : str Filehandle pointing to the map for which the mean needs to be determined. lu_fh : str Filehandle pointing to landusemap. Returns ------- percentage : float The mean of the map within the border of the lu_fh. """ output_folder = tf.mkdtemp() perc_fh = becgis.match_proj_res_ndv(lu_fh, np.array([perc_fh]), output_folder) EWR = becgis.open_as_array(perc_fh[0], nan_values=True) LULC = becgis.open_as_array(lu_fh, nan_values=True) EWR[np.isnan(LULC)] = np.nan percentage = np.nanmean(EWR) shutil.rmtree(output_folder) return percentage
def root_zone_storage_Wpx(output_folder, rz_sm_fhs, rz_depth_fh): Data_Path_RZ = "RZstor" out_folder = os.path.join(output_folder, Data_Path_RZ) if not os.path.exists(out_folder): os.mkdir(out_folder) root_depth = becgis.open_as_array(rz_depth_fh, nan_values=True) geo = becgis.get_geoinfo(rz_depth_fh) root_storage_fhs = [] for rz_sm_fh in rz_sm_fhs: root_depth_sm = becgis.open_as_array(rz_sm_fh, nan_values=True) root_storage = root_depth * root_depth_sm out_fh = os.path.join(out_folder, 'RZ_storage_mm_%s' % (rz_sm_fh[-10:])) becgis.create_geotiff(out_fh, root_storage, *geo) root_storage_fhs.append(out_fh) return root_storage_fhs
def calc_ETs(ET, lu_fh, sheet1_lucs): """ Calculates the sums of the values within a specified landuse category. Parameters ---------- ET : ndarray Array of the data for which the sum needs to be calculated. lu_fh : str Filehandle pointing to landusemap. sheet1_lucs : dict Dictionary with landuseclasses per category. Returns ------- et : dict Dictionary with the totals per landuse category. """ LULC = becgis.open_as_array(lu_fh, nan_values=True) et = dict() for key in sheet1_lucs: classes = sheet1_lucs[key] mask = np.logical_or.reduce([LULC == value for value in classes]) et[key] = np.nansum(ET[mask]) return et
def get_ts_from_complete_data_spec(complete_data, lu_fh, keys, a, dates=None): if keys == None: keys = list(complete_data.keys()) common_dates = becgis.common_dates([complete_data[key][1] for key in keys]) becgis.assert_proj_res_ndv([complete_data[key][0] for key in keys]) MASK = becgis.open_as_array(lu_fh, nan_values=True) lucs = lucs = gd.get_sheet4_6_classes() gw_classes = list() for subclass in [ 'Forests', 'Rainfed Crops', 'Shrubland', 'Forest Plantations' ]: gw_classes += lucs[subclass] mask_gw = np.logical_or.reduce([MASK == value for value in gw_classes]) tss = dict() for key in keys: var_mm = np.array([]) for date in common_dates: tif = complete_data[key][0][complete_data[key][1] == date][0] DATA = becgis.open_as_array(tif, nan_values=True) DATA[np.isnan(DATA)] = 0.0 DATA[np.isnan(MASK)] = np.nan alpha = np.ones(np.shape(DATA)) * a alpha[mask_gw] = 0.0 var_mm = np.append(var_mm, np.nanmean(DATA * alpha)) tss[key] = (common_dates, var_mm) return tss
def fuel_wood(output_folder, lu_fh, AREA, ndm_fhs, fraction_fhs, ndmdates): """ Calculate natural livestock feed production INPUTS ---------- lu_fh : str filehandle for land use map ndm_fhs: nd array array of filehandles of NDM maps abv_grnd_biomass_ratio: dict dictionnary 'LULC':[above ground biomass] """ Data_Path_Fuel = "Fuel" out_folder = os.path.join(output_folder, Data_Path_Fuel) if not os.path.exists(out_folder): os.mkdir(out_folder) area_ha = AREA * 100 LULC = RC.Open_tiff_array(lu_fh) geo_out, proj, size_X, size_Y = RC.Open_array_info(lu_fh) fuel_classes = [1, 8, 9, 10, 11, 12, 13] fuel_mask = np.zeros(LULC.shape) for fc in fuel_classes: fuel_mask[np.where(LULC == fc)] = 1 fuel_fhs_landscape = [] fuel_fhs_incremental = [] for d in range(len(ndm_fhs)): ndm_fh = ndm_fhs[d] fraction_fh = fraction_fhs[d] yield_fract = RC.Open_tiff_array(fraction_fh) date1 = ndmdates[d] year = '%d' % date1.year month = '%02d' % date1.month # year = ndm_fh[-14:-10] # month = ndm_fh[-9:-7] out_fh_l = out_folder + '\\fuel_prod_landscape_%s_%s.tif' % (year, month) out_fh_i = out_folder + '\\fuel_prod_incremental_%s_%s.tif' % (year, month) NDM = becgis.open_as_array(ndm_fh, nan_values=True) NDM_fuel_incremental = NDM * .05 * fuel_mask * yield_fract * area_ha / 1e6 NDM_fuel_landscape = NDM * .05 * fuel_mask * ( 1 - yield_fract) * area_ha / 1e6 DC.Save_as_tiff(out_fh_i, NDM_fuel_incremental, geo_out) DC.Save_as_tiff(out_fh_l, NDM_fuel_landscape, geo_out) fuel_fhs_landscape.append(out_fh_l) fuel_fhs_incremental.append(out_fh_i) return fuel_fhs_landscape, fuel_fhs_incremental
def lu_type_sum(data_fh, lu_fh, AREA, lu_dict, convert=None): LULC = RC.Open_tiff_array(lu_fh) in_data = becgis.open_as_array(data_fh, nan_values=True) # in_data = RC.Open_tiff_array(data_fh) if convert == 'mm_to_km3': in_data *= AREA / 1e6 out_data = {} for lu_class in list(lu_dict.keys()): mask = [LULC == value for value in lu_dict[lu_class]] mask = (np.sum(mask, axis=0)).astype(bool) out_data[lu_class] = np.nansum(in_data[mask]) return out_data
def recycle(output_folder, et_bg_fhs, recy_ratio, lu_fh, et_type): Data_Path_rec = "temp_et_recycle" out_folder = os.path.join(output_folder, Data_Path_rec) geo_out, proj, size_X, size_Y = RC.Open_array_info(lu_fh) if not os.path.exists(out_folder): os.mkdir(out_folder) recycle_fhs = [] for et_fh in et_bg_fhs: out_fh = out_folder + "\\recycled_et_" + et_type + et_fh[ -11:-4] + ".tif" et = becgis.open_as_array(et_fh, nan_values=True) et_recy = et * recy_ratio DC.Save_as_tiff(out_fh, et_recy, geo_out) recycle_fhs.append(out_fh) return recycle_fhs
def get_timeseries_raster(ds1_fhs, ds1_dates, coordinates, output_fh, unit = 'm3/s'): """ Substract a timeseries from a set of raster files. Store results in a csv-file. Parameters ---------- ds1_fhs : 1dnarray List containing filehandles to georeferenced raster files. ds1_dates : 1dnarray List containing datetime.date or datetime.datetime objects corresponding to the filehandles in ds1_fhs. Lenght should be equal to ds1_fhs. coordinates : tuple Tuple with the latitude and longitude, (lat, lon). output_fh : str Filehandle pointing to a csv-file. unit : str, optional String indicating the unit of the data, default is 'm3/s'. """ ds1_values = list() xpixel, ypixel = pixelcoordinates(coordinates[0], coordinates[1], ds1_fhs[0]) if np.any([np.isnan(xpixel), np.isnan(ypixel)]): print("Coordinates ({0}) not on the map".format(coordinates)) else: for date in ds1_dates: ds1_values.append(becgis.open_as_array(ds1_fhs[ds1_dates == date][0], nan_values = True)[ypixel, xpixel]) ds1_values = np.array(ds1_values) csv_file = open(output_fh, 'wb') writer = csv.writer(csv_file, delimiter=';') writer.writerow(['lat:',coordinates[0], 'lon:', coordinates[1], unit]) writer.writerow(['datetime','year','month','day','data']) for date in ds1_dates: year = date.year month = date.month day = date.day dt = datetime.datetime(year, month, day, 0,0,0) data = ds1_values[ds1_dates == date][0] writer.writerow([dt, year, month, day, data]) csv_file.close()
def correct_var(metadata, complete_data, output_dir, formula, new_var, slope=False, bounds=(0, [1.0, 1., 12.])): var = split_form(formula)[0][-1] a, x0 = calc_var_correction(metadata, complete_data, output_dir, formula=formula, slope=slope, plot=True, bounds=bounds) for date, fn in zip(complete_data[var][1], complete_data[var][0]): geo_info = becgis.get_geoinfo(fn) data = becgis.open_as_array(fn, nan_values=True) x = calc_delta_months(x0, date) fraction = a[0] * (np.cos( (x - a[2]) * (np.pi / 6)) * 0.5 + 0.5) + (a[1] * (1 - a[0])) data *= fraction folder = os.path.join(output_dir, metadata['name'], 'data', new_var) if not os.path.exists(folder): os.makedirs(folder) bla = os.path.split(fn)[1].split('_')[-1] filen = 'supply_sw_' + bla[0:6] + '.tif' fn = os.path.join(folder, filen) becgis.create_geotiff(fn, data, *geo_info) meta = becgis.sort_files(folder, [-10, -6], month_position=[-6, -4])[0:2] return a, meta
def calc_non_utilizable(P, ET, fractions_fh): """ Calculate non utilizable outflow. Parameters ---------- P : ndarray Array with the volumes of precipitation per pixel. ET : ndarray Array with the volumes of evapotranspiration per pixel. fractions_fh : str Filehandle pointing to a map with fractions indicating how much of the (P-ET) difference is non-utilizable. Returns ------- non_utilizable_runoff : float The total volume of non_utilizable runoff. """ fractions = becgis.open_as_array(fractions_fh, nan_values=True) non_utilizable_runoff = np.nansum((P - ET) * fractions) return non_utilizable_runoff
def diagnosis_wp(metadata, complete_data, output_dir, waterpix): output_dir = os.path.join(output_dir) if not os.path.exists(output_dir): os.makedirs(output_dir) LU = becgis.open_as_array(metadata['lu'], nan_values=True) # S = SortWaterPix(waterpix, 'Supply_M', output_dir) # becgis.match_proj_res_ndv(metadata['lu'], becgis.list_files_in_folder(S), os.path.join(output_dir, "s_matched")) # complete_data['supply'] = becgis.sort_files(os.path.join(output_dir, "s_matched"), [-10,-6], month_position = [-6,-4])[0:2] common_dates = becgis.common_dates([ complete_data['p'][1], complete_data['et'][1], complete_data['tr'][1], complete_data['etb'][1] ]) becgis.assert_proj_res_ndv([ complete_data['p'][0], complete_data['et'][0], complete_data['tr'][0] ]) balance_km3 = np.array([]) p_km3 = np.array([]) et_km3 = np.array([]) ro_km3 = np.array([]) balance_mm = np.array([]) p_mm = np.array([]) et_mm = np.array([]) ro_mm = np.array([]) area = becgis.map_pixel_area_km(metadata['lu']) for date in common_dates: print(date) P = complete_data['p'][0][complete_data['p'][1] == date][0] ET = complete_data['et'][0][complete_data['et'][1] == date][0] RO = complete_data['tr'][0][complete_data['tr'][1] == date][0] factor = 0.001 * 0.001 * area p = becgis.open_as_array(P, nan_values=True) et = becgis.open_as_array(ET, nan_values=True) ro = becgis.open_as_array(RO, nan_values=True) p[np.isnan(LU)] = et[np.isnan(LU)] = ro[np.isnan(LU)] = np.nan balance_km3 = np.append( balance_km3, np.nansum(p * factor) - np.nansum(et * factor) - np.nansum(ro * factor)) p_km3 = np.append(p_km3, np.nansum(p * factor)) et_km3 = np.append(et_km3, np.nansum(et * factor)) ro_km3 = np.append(ro_km3, np.nansum(ro * factor)) balance_mm = np.append(balance_mm, np.nanmean(p) - np.nanmean(et) - np.nanmean(ro)) p_mm = np.append(p_mm, np.nanmean(p)) et_mm = np.append(et_mm, np.nanmean(et)) ro_mm = np.append(ro_mm, np.nanmean(ro)) relative_storage = np.cumsum(balance_km3) / np.mean(p_km3) ## # BASIC BASINSCALE WATERBALANCE (PRE-SHEETS) ## fig = plt.figure(1, figsize=(9, 6)) plt.clf() fig.patch.set_alpha(0.7) ax2 = plt.gca() ax = ax2.twinx() ax2.bar(common_dates, relative_storage, width=25, color='#3ee871') ax2.grid(b=True, which='Major', color='0.65', linestyle='--', zorder=0) ax.bar([common_dates[0]], [0], label='$\sum dS / \overline{P}$', color='#3ee871') ax.plot(common_dates, np.cumsum(balance_km3), label='$\sum dS$') ax.plot(common_dates, np.cumsum(p_km3), label='$\sum (P)$') ax.plot(common_dates, np.cumsum(et_km3) + np.cumsum(ro_km3), label='$\sum (ET + RO)$') box = ax.get_position() ax.set_position( [box.x0, box.y0 + box.height * 0.1, box.width, box.height * 0.9]) ax2.set_position( [box.x0, box.y0 + box.height * 0.1, box.width, box.height * 0.9]) ax.legend(loc='upper center', bbox_to_anchor=(0.5, -0.1), fancybox=True, shadow=True, ncol=5) plt.suptitle( '$\sum P = {0:.1f}\;{4}, \\ \sum ET = {1:.1f}\;{4}, \sum RO = {2:.1f}\;{4}, \sum dS = {3:.1f}\;{4}$' .format(np.sum(p_km3), np.sum(et_km3), np.sum(ro_km3), np.sum(balance_km3), r"km^{3}")) plt.title( '{0}, ${5} = {2:.3f}\;{6}, {7} = {3:.3f}, dt = {4}\;months$'.format( metadata['name'], np.sum(balance_km3), np.mean(balance_km3), np.mean(relative_storage), len(p_km3), r"\overline{dS}", r"km^{3}", r"\overline{\sum dS / \overline{P}}")) plt.xlabel('Time') ax2.set_ylabel('Relative Storage [months of $\overline{P}$]') ax.set_ylabel('Stock [$km^{3}$]') #plt.savefig(os.path.join(output_dir, 'balance_{0}'.format(metadata['name']))) fig = plt.figure(2) plt.clf() ax2 = plt.gca() ax = ax2.twinx() ax2.plot(common_dates, p_mm, common_dates, et_mm, common_dates, ro_mm) ax.plot(common_dates, np.cumsum(balance_mm), 'k')
def compare_rasters2rasters_per_lu(ds1_fhs, ds1_dates, ds2_fhs, ds2_dates, lu_fh, output_dir, dataset_names = ["DS1", "DS2"], class_dictionary = None, no_of_classes = 6): """ Compare two raster datasets with eachother per different landuse categories. Parameters ---------- ds1_fhs : ndarray Array with strings pointing to maps of dataset 1. ds1_dates : ndarray Array with same shape as ds1_fhs, containing datetime.date objects. ds2_fhs : ndarray Array with strings pointing to maps of dataset 2. ds2_dates : ndarray Array with same shape as ds2_fhs, containing datetime.date objects. lu_fh : str Pointer to a landusemap. output_dir : str Map to save results. dataset_names : list, optional List with two strings describing the names of the two datasets. class_dictionary : dict Dictionary specifying all the landuse categories. no_of_classes : int The 'no_of_classes' most dominant classes in the the lu_fh are compared, the rest is ignored. """ LUCS = becgis.open_as_array(lu_fh, nan_values = True) DS1 = becgis.open_as_array(ds1_fhs[0], nan_values = True) DS2 = becgis.open_as_array(ds2_fhs[0], nan_values = True) DS1[np.isnan(DS2)] = np.nan LUCS[np.isnan(DS1)] = np.nan classes, counts = np.unique(LUCS[~np.isnan(LUCS)], return_counts = True) counts_sorted = np.sort(counts)[-no_of_classes:] selected_lucs = [classes[counts == counter][0] for counter in counts_sorted] driver, NDV, xsize, ysize, GeoT, Projection = becgis.get_geoinfo(lu_fh) becgis.create_geotiff(lu_fh.replace('.tif','_.tif'), LUCS, driver, NDV, xsize, ysize, GeoT, Projection) common_dates = becgis.common_dates([ds1_dates, ds2_dates]) ds1_totals = np.array([]) ds2_totals = np.array([]) DS1_per_class = dict() DS2_per_class = dict() for date in common_dates: DS1 = becgis.open_as_array(ds1_fhs[ds1_dates == date][0], nan_values = True) DS2 = becgis.open_as_array(ds2_fhs[ds2_dates == date][0], nan_values = True) for clss in selected_lucs: if clss in list(DS1_per_class.keys()): DS1_per_class[clss] = np.append(DS1_per_class[clss], np.nanmean(DS1[LUCS == clss])) else: DS1_per_class[clss] = np.array([np.nanmean(DS1[LUCS == clss])]) if clss in list(DS2_per_class.keys()): DS2_per_class[clss] = np.append(DS2_per_class[clss], np.nanmean(DS2[LUCS == clss])) else: DS2_per_class[clss] = np.array([np.nanmean(DS2[LUCS == clss])]) ds1_totals = np.append(ds1_totals, np.nanmean(DS1)) ds2_totals = np.append(ds2_totals, np.nanmean(DS2)) print("Finished {0}, going to {1}".format(date, common_dates[-1])) for clss in selected_lucs: if class_dictionary is None: plot_scatter_series(DS1_per_class[clss], DS2_per_class[clss], dataset_names[0], dataset_names[1], clss, output_dir) else: cats = {v[0]: k for k, v in list(class_dictionary.items())} plot_scatter_series(DS1_per_class[clss], DS2_per_class[clss], dataset_names[0], dataset_names[1], cats[clss], output_dir) plot_scatter_series(ds1_totals, ds2_totals, dataset_names[0], dataset_names[1], "Total Area", output_dir) if class_dictionary is not None: output_fh = os.path.join(output_dir, 'landuse_percentages.png') driver, NDV, xsize, ysize, GeoT, Projection = becgis.get_geoinfo(lu_fh) becgis.create_geotiff(lu_fh.replace('.tif','_.tif'), LUCS, driver, NDV, xsize, ysize, GeoT, Projection) becgis.plot_category_areas(lu_fh.replace('.tif','_.tif'), class_dictionary, output_fh, area_treshold = 0.01) os.remove(lu_fh.replace('.tif','_.tif'))
def compare_rasters2rasters(ds1_fhs, ds1_dates, ds2_fhs, ds2_dates, output_dir = None, dataset_names = None, data_treshold = 0.75): """ Compare two series of raster maps by computing the relative bias, RMAE, Pearson-correlation coefficient and the Nash-Sutcliffe coefficient per pixel. Parameters ---------- ds1_fhs : list list pointing to georeferenced raster files of dataset 1. ds1_dates : list list corresponding to ds1_fhs specifying the dates. ds2_fhs : list list pointing to georeferenced raster files of dataset 2. ds2_dates : list list corresponding to ds2_fhs specifying the dates. quantity_unit : list, optional list of two strings describing the quantity and unit of the data. e.g. ['Precipitation', 'mm/month']. dataset_names : list, optional list of strings describing the names of the datasets. e.g. ['CHIRPS', 'ERA-I']. output_dir : list, optional directory to store some results, i.e. (1) a graph of the spatially averaged datasets trough time and the bias and (2) 4 geotiffs showing the bias, nash-sutcliffe coefficient, pearson coefficient and rmae per pixel. data_treshold : float, optional pixels with less than data_treshold * total_number_of_samples actual values are set to no-data, i.e. pixels with too few data points are ignored. Returns ------- results : dict dictionary with four keys (relative bias, RMAE, Pearson-correlation coefficient and the Nash-Sutcliffe) with 2dnarrays of the values per pixel. Examples -------- >>> results = compare_rasters2rasters(ds1_fhs, ds1_dates, ds2_fhs, ds2_dates, output_dir = r"C:/Desktop/", quantity_unit = ["P", "mm/month"], dataset_names = ["CHIRPS", "TRMM"]) """ becgis.assert_proj_res_ndv([ds1_fhs, ds2_fhs]) if dataset_names is None: dataset_names = ['DS1','DS2'] driver, NDV, xsize, ysize, GeoT, Projection = becgis.get_geoinfo(ds1_fhs[0]) common_dates = becgis.common_dates([ds1_dates, ds2_dates]) diff_sum = np.zeros((ysize,xsize)) non_nans = np.zeros((ysize,xsize)) progress = 0 samples = len(common_dates) for date in common_dates: DS1 = becgis.open_as_array(ds1_fhs[ds1_dates == date][0], nan_values = True) DS2 = becgis.open_as_array(ds2_fhs[ds2_dates == date][0], nan_values = True) DS1[np.isnan(DS2)] = np.nan DS2[np.isnan(DS1)] = np.nan non_nans[~np.isnan(DS1)] += np.ones((ysize,xsize))[~np.isnan(DS1)] diff = (DS1 - DS2)**2 diff_sum[~np.isnan(DS1)] += diff[~np.isnan(DS1)] progress += 1 print("progress: {0} of {1} finished".format(progress, samples)) diff_sum[non_nans <= data_treshold*samples] = np.nan results = dict() results['rmse'] = np.where(non_nans == 0., np.nan, np.sqrt(diff_sum / non_nans)) startdate = common_dates[0].strftime('%Y%m%d') enddate = common_dates[-1].strftime('%Y%m%d') path = os.path.join(output_dir, 'spatial_errors') if not os.path.exists(path): os.makedirs(path) if output_dir is not None: for varname in list(results.keys()): fh = os.path.join(path, '{0}_{1}_vs_{2}_{3}_{4}.tif'.format(varname, dataset_names[0], dataset_names[1], startdate, enddate)) becgis.create_geotiff(fh, results[varname], driver, NDV, xsize, ysize, GeoT, Projection) return results
def compare_rasters2stations(ds1_fhs, ds1_dates, station_dict, output_dir, station_names = None, quantity_unit = None, dataset_names = None, method = 'cubic', min_records = 1): """ Compare a series of raster maps with station time series by computing the relative bias, RMAE, Pearson-correlation coefficient and the Nash-Sutcliffe coefficient for each station. Parameters ---------- ds1_fhs : 1dnarray List containing filehandles to georeferenced raster files. ds1_dates : 1dnarray List containing datetime.date or datetime.datetime objects corresponding to the filehandles in ds1_fhs. Lenght should be equal to ds1_fhs. station_dict : dictionary Dictionary containing coordinates of stations and timeseries. See examples below for an example output_dir : str, optional Directory to store several results, i.e. (1) a csv file to load in a GIS program, (2) interpolated maps showing the various error indicators spatially and (3) scatter plots for all the stations. station_names : dictionary, optional Dictionary containing names of the respective stations which can be added to the csv-file, see Examples for more information. quantity_unit : list, optional List of two strings describing the quantity and unit of the data. dataset_name : list, optional List of strings describing the names of the datasets. method : str, optional Method used for interpolation of the error-indicators, i.e.: 'linear', 'nearest' or 'cubic' (default). Returns ------- results : dictionary Dictionary containing several error indicators per station. Examples -------- >>> station_dict = {(lat1, lon1): [(datetime.date(year, month, day), data_value), (datetime.date(year, month, day), data_value), etc.], (lat2, lon2): [(datetime.date(year, month, day), data_value), (datetime.date(year, month, day), data_value), etc.], etc.} >>> station_names = {(lat1,lon1): 'stationname1', (lat2,lon2): 'stationname2', etc.} >>> results = compare_rasters2stations(ds1_fhs, ds1_dates, station_dict, output_dir = r"C:/Desktop", station_names = None, quantity_unit = ["P", "mm/month"], dataset_names = ["CHIRPS", "Meteo Stations"], method = 'cubic') """ results = dict() pixel_coordinates = list() if dataset_names is None: dataset_names = ['Spatial', 'Station'] if quantity_unit is not None: quantity_unit[1] = r'[' + quantity_unit[1] + r']' else: quantity_unit = ['data', ''] becgis.assert_proj_res_ndv([ds1_fhs]) no_of_stations = len(list(station_dict.keys())) ds1_dates = becgis.convert_datetime_date(ds1_dates, out = 'datetime') for i, station in enumerate(station_dict.keys()): station_dates, station_values = unzip(station_dict[station]) common_dates = becgis.common_dates([ds1_dates, station_dates]) sample_size = common_dates.size if sample_size >= min_records: ds1_values = list() xpixel, ypixel = pixelcoordinates(station[0], station[1], ds1_fhs[0]) if np.any([np.isnan(xpixel), np.isnan(ypixel)]): print("Skipping station ({0}), cause its not on the map".format(station)) continue else: for date in common_dates: ds1_values.append(becgis.open_as_array(ds1_fhs[ds1_dates == date][0], nan_values = True)[ypixel, xpixel]) common_station_values = [station_values[station_dates == date][0] for date in common_dates] results[station] = pairwise_validation(ds1_values, common_station_values) results[station] += (sample_size,) pixel_coordinates.append((xpixel, ypixel)) #m, b = np.polyfit(ds1_values, common_station_values, 1) path_scatter = os.path.join(output_dir, 'scatter_plots') if not os.path.exists(path_scatter): os.makedirs(path_scatter) path_ts = os.path.join(output_dir, 'time_series') if not os.path.exists(path_ts): os.makedirs(path_ts) path_int = os.path.join(output_dir, 'interp_errors') if not os.path.exists(path_int): os.makedirs(path_int) xlabel = '{0} {1} {2}'.format(dataset_names[0], quantity_unit[0], quantity_unit[1]) ylabel = '{0} {1} {2}'.format(dataset_names[1], quantity_unit[0], quantity_unit[1]) if station_names is not None: title = station_names[station] fn = os.path.join(path_scatter,'{0}_vs_{1}.png'.format(station_names[station], dataset_names[0])) fnts = os.path.join(path_ts,'{0}_vs_{1}.png'.format(station_names[station], dataset_names[0])) else: title = station fn = os.path.join(path_scatter,'{0}_vs_station_{1}.png'.format(dataset_names[0],i)) fnts = os.path.join(path_ts,'{0}_vs_station_{1}.png'.format(dataset_names[0],i)) suptitle = 'pearson: {0:.5f}, rmse: {1:.5f}, ns: {2:.5f}, bias: {3:.5f}, n: {4:.0f}'.format(results[station][0],results[station][1],results[station][2],results[station][3],results[station][4]) plot_scatter_series(ds1_values, common_station_values, xlabel, ylabel, title, fn, suptitle = suptitle, dates = common_dates) xaxis_label = '{0} {1}'.format(quantity_unit[0], quantity_unit[1]) xlabel = '{0}'.format(dataset_names[0]) ylabel = '{0}'.format(dataset_names[1]) plot_time_series(ds1_values,common_station_values,common_dates,xlabel,ylabel,xaxis_label, title, fnts, suptitle = suptitle) print("station {0} ({3}) of {1} finished ({2} matching records)".format(i+1, no_of_stations, sample_size, title)) else: print("____station {0} of {1} skipped____ (less than {2} matching records)".format(i+1, no_of_stations, min_records)) continue n = len(results) csv_filename = os.path.join(output_dir, '{0}stations_vs_{1}_indicators.csv'.format(n, dataset_names[0])) with open(csv_filename, 'wb') as csv_file: writer = csv.writer(csv_file, delimiter=';') writer.writerow(['longitude','latitude','station_id','pearson','rmse','nash_sutcliffe','bias', 'no_of_samples']) for station in list(results.keys()): writer.writerow([station[1], station[0], station_names[station], results[station][0],results[station][1],results[station][2],results[station][3],results[station][4]]) rslt = {'Relative Bias':list(),'RMSE':list(),'Pearson Coefficient':list(),'Nash-Sutcliffe Coefficient':list(),'Number Of Samples':list()} for value in list(results.values()): rslt['Relative Bias'].append(value[3]) rslt['RMSE'].append(value[1]) rslt['Pearson Coefficient'].append(value[0]) rslt['Nash-Sutcliffe Coefficient'].append(value[2]) rslt['Number Of Samples'].append(value[4]) for key, value in list(rslt.items()): title = '{0}'.format(key) print(title) if key is 'RMSE': xlabel = '{0} [mm/month]'.format(key) else: xlabel = key value = np.array(value) value = value[(~np.isnan(value)) & (~np.isinf(value))] suptitle = 'mean: {0:.5f}, std: {1:.5f}, n: {2}'.format(np.nanmean(value), np.nanstd(value), n) print(value) plot_histogram(value[(~np.isnan(value)) & (~np.isinf(value))], title, xlabel, output_dir, suptitle = suptitle) driver, NDV, xsize, ysize, GeoT, Projection = becgis.get_geoinfo(ds1_fhs[0]) dummy_map = becgis.open_as_array(ds1_fhs[0]) grid = np.mgrid[0:ysize, 0:xsize] var_names = ['pearson', 'rmse', 'ns', 'bias', 'no_of_samples'] for i, var in enumerate(unzip(list(results.values()))): xy = np.array(pixel_coordinates)[~np.isnan(var)] z = var[~np.isnan(var)] interpolation_field = interpolate.griddata(xy, z, (grid[1], grid[0]), method=method, fill_value = np.nanmean(z)) interpolation_field[dummy_map == NDV] = NDV fh = os.path.join(path_int, '{0}_{1}stations_vs_{2}.tif'.format(var_names[i], len(xy), dataset_names[0])) becgis.create_geotiff(fh, interpolation_field, driver, NDV, xsize, ysize, GeoT, Projection) return results
def calc_sheet1(entries, lu_fh, sheet1_lucs, recycling_ratio, q_outflow, q_out_avg, output_folder, q_in_sw, q_in_gw=0., q_in_desal=0., q_out_sw=0., q_out_gw=0.): """ Calculate the required values to plot Water Accounting Plus Sheet 1. Parameters ---------- entries : dict Dictionary with several filehandles, also see examples below. lu_fh : str Filehandle pointing to the landuse map. sheet1_lucs : dict Dictionary sorting different landuse classes into categories. recycling_ratio : float Value indicating the recycling ratio. q_outflow : float The outflow of the basin. q_out_avg : float The longterm average outflow. output_folder : str Folder to store results. q_in_sw : float, optional Surfacewater inflow into the basin. Default is 0.0. q_in_gw : float, optional Groundwater inflow into the basin. Default is 0.0. q_in_desal : float, optional Desalinised water inflow into the basin. Default is 0.0. q_out_sw : float, optional Additional surfacewater outflow from basin. Default is 0.0. q_out_gw : float, optional Groundwater outflow from the basin. Default is 0.0. Returns ------- results : dict Dictionary containing necessary variables for Sheet 1. """ results = dict() LULC = becgis.open_as_array(lu_fh, nan_values=True) P = becgis.open_as_array(entries['P'], nan_values=True) ETgreen = becgis.open_as_array(entries['ETgreen'], nan_values=True) ETblue = becgis.open_as_array(entries['ETblue'], nan_values=True) pixel_area = becgis.map_pixel_area_km(lu_fh) gray_water_fraction = calc_basinmean(entries['WPL'], lu_fh) ewr_percentage = calc_basinmean(entries['EWR'], lu_fh) P[np.isnan(LULC)] = ETgreen[np.isnan(LULC)] = ETblue[np.isnan( LULC)] = np.nan P, ETgreen, ETblue = np.array([P, ETgreen, ETblue]) * 0.000001 * pixel_area ET = np.nansum([ETblue, ETgreen], axis=0) results['et_advection'], results['p_advection'], results[ 'p_recycled'], results['dS'] = calc_wb(P, ET, q_outflow, recycling_ratio, q_in_sw=q_in_sw, q_in_gw=q_in_gw, q_in_desal=q_in_desal, q_out_sw=q_out_sw, q_out_gw=q_out_gw) results['non_recoverable'] = gray_water_fraction * ( q_outflow + q_out_sw ) # Mekonnen and Hoekstra (2015), Global Gray Water Footprint and Water Pollution Levels Related to Anthropogenic Nitrogen Loads to Fresh Water results['reserved_outflow_demand'] = q_out_avg * ewr_percentage results['other'] = 0.0 landscape_et = calc_ETs(ETgreen, lu_fh, sheet1_lucs) incremental_et = calc_ETs(ETblue, lu_fh, sheet1_lucs) results['manmade'] = incremental_et['Managed'] results['natural'] = incremental_et['Modified'] + incremental_et[ 'Protected'] + incremental_et['Utilized'] other_fractions = { 'Modified': 0.00, 'Managed': 1.00, 'Protected': 0.00, 'Utilized': 0.00 } non_recoverable_fractions = { 'Modified': 0.00, 'Managed': 1.00, 'Protected': 0.00, 'Utilized': 0.00 } results['uf_plu'], results['uf_ulu'], results['uf_mlu'], results[ 'uf_mwu'] = calc_utilizedflow(incremental_et, results['other'], results['non_recoverable'], other_fractions, non_recoverable_fractions) net_inflow = results['p_recycled'] + results[ 'p_advection'] + q_in_sw + q_in_gw + q_in_desal + results['dS'] consumed_water = np.nansum(list(landscape_et.values())) + np.nansum( list(incremental_et.values()) ) + results['other'] + results['non_recoverable'] non_consumed_water = net_inflow - consumed_water results['non_utilizable_outflow'] = min( non_consumed_water, max(0.0, calc_non_utilizable(P, ET, entries['Fractions']))) results['reserved_outflow_actual'] = min( non_consumed_water - results['non_utilizable_outflow'], results['reserved_outflow_demand']) results['utilizable_outflow'] = max( 0.0, non_consumed_water - results['non_utilizable_outflow'] - results['reserved_outflow_actual']) results['landscape_et_mwu'] = landscape_et['Managed'] results['landscape_et_mlu'] = landscape_et['Modified'] results['landscape_et_ulu'] = landscape_et['Utilized'] results['landscape_et_plu'] = landscape_et['Protected'] results['q_outflow'] = q_outflow results['q_in_sw'] = q_in_sw results['q_in_gw'] = q_in_gw results['q_in_desal'] = q_in_desal results['q_out_sw'] = q_out_sw results['q_out_gw'] = q_out_gw return results
def livestock_feed(output_folder, lu_fh, AREA, ndm_fhs, feed_dict, live_feed, cattle_fh, fraction_fhs, ndmdates): """ Calculate natural livestock feed production INPUTS ---------- lu_fh : str filehandle for land use map ndm_fhs: nd array array of filehandles of NDM maps ndm_dates: nd array array of dates for NDM maps feed_dict: dict dictionnary 'pasture class':[list of LULC] feed_pct: dict dictionnary 'pasture class':[percent available as feed] cattle_fh : str filehandle for cattle map """ Data_Path_Feed = "Feed" out_folder = os.path.join(output_folder, Data_Path_Feed) if not os.path.exists(out_folder): os.mkdir(out_folder) area_ha = AREA * 100 LULC = RC.Open_tiff_array(lu_fh) # cattle = RC.Open_tiff_array(cattle_fh) geo_out, proj, size_X, size_Y = RC.Open_array_info(lu_fh) f_pct = np.zeros(LULC.shape) for lu_type in list(feed_dict.keys()): classes = feed_dict[lu_type] mask = np.logical_or.reduce([LULC == value for value in classes]) f_pct[mask] = live_feed[lu_type] feed_fhs_landscape = [] feed_fhs_incremental = [] for d in range(len(ndm_fhs)): ndm_fh = ndm_fhs[d] fraction_fh = fraction_fhs[d] date1 = ndmdates[d] year = '%d' % date1.year month = '%02d' % date1.month yield_fract = RC.Open_tiff_array(fraction_fh) out_fh_l = out_folder + '\\feed_prod_landscape_%s_%s.tif' % (year, month) out_fh_i = out_folder + '\\feed_prod_incremental_%s_%s.tif' % (year, month) # out_fh2 = out_folder+'\\Feed_prod_pH_%s_%s.tif' %(year, month) NDM = becgis.open_as_array(ndm_fh, nan_values=True) NDM_feed = NDM * f_pct NDM_feed_incremental = NDM_feed * yield_fract * area_ha / 1e6 NDM_feed_landscape = (NDM_feed * (1 - yield_fract)) * area_ha / 1e6 DC.Save_as_tiff(out_fh_l, NDM_feed_landscape, geo_out) DC.Save_as_tiff(out_fh_i, NDM_feed_incremental, geo_out) # NDM_feed_perHead = NDM_feed / cattle # DC.Save_as_tiff(out_fh2, NDM_feed, geo_out) feed_fhs_landscape.append(out_fh_l) feed_fhs_incremental.append(out_fh_i) return feed_fhs_landscape, feed_fhs_incremental