def get_ts_from_complete_data(complete_data, mask, keys, dates=None): if keys == None: keys = list(complete_data.keys()) common_dates = becgis.common_dates([complete_data[key][1] for key in keys]) becgis.assert_proj_res_ndv([complete_data[key][0] for key in keys]) MASK = becgis.open_as_array(mask, nan_values=True) tss = dict() for key in keys: var_mm = np.array([]) for date in common_dates: tif = complete_data[key][0][complete_data[key][1] == date][0] DATA = becgis.open_as_array(tif, nan_values=True) DATA[np.isnan(DATA)] = 0.0 DATA[np.isnan(MASK)] = np.nan var_mm = np.append(var_mm, np.nanmean(DATA)) tss[key] = (common_dates, var_mm) return tss
def get_ts_from_complete_data_spec(complete_data, lu_fh, keys, a, dates=None): if keys == None: keys = list(complete_data.keys()) common_dates = becgis.common_dates([complete_data[key][1] for key in keys]) becgis.assert_proj_res_ndv([complete_data[key][0] for key in keys]) MASK = becgis.open_as_array(lu_fh, nan_values=True) lucs = lucs = gd.get_sheet4_6_classes() gw_classes = list() for subclass in [ 'Forests', 'Rainfed Crops', 'Shrubland', 'Forest Plantations' ]: gw_classes += lucs[subclass] mask_gw = np.logical_or.reduce([MASK == value for value in gw_classes]) tss = dict() for key in keys: var_mm = np.array([]) for date in common_dates: tif = complete_data[key][0][complete_data[key][1] == date][0] DATA = becgis.open_as_array(tif, nan_values=True) DATA[np.isnan(DATA)] = 0.0 DATA[np.isnan(MASK)] = np.nan alpha = np.ones(np.shape(DATA)) * a alpha[mask_gw] = 0.0 var_mm = np.append(var_mm, np.nanmean(DATA * alpha)) tss[key] = (common_dates, var_mm) return tss
def compare_rasters2rasters(ds1_fhs, ds1_dates, ds2_fhs, ds2_dates, output_dir = None, dataset_names = None, data_treshold = 0.75): """ Compare two series of raster maps by computing the relative bias, RMAE, Pearson-correlation coefficient and the Nash-Sutcliffe coefficient per pixel. Parameters ---------- ds1_fhs : list list pointing to georeferenced raster files of dataset 1. ds1_dates : list list corresponding to ds1_fhs specifying the dates. ds2_fhs : list list pointing to georeferenced raster files of dataset 2. ds2_dates : list list corresponding to ds2_fhs specifying the dates. quantity_unit : list, optional list of two strings describing the quantity and unit of the data. e.g. ['Precipitation', 'mm/month']. dataset_names : list, optional list of strings describing the names of the datasets. e.g. ['CHIRPS', 'ERA-I']. output_dir : list, optional directory to store some results, i.e. (1) a graph of the spatially averaged datasets trough time and the bias and (2) 4 geotiffs showing the bias, nash-sutcliffe coefficient, pearson coefficient and rmae per pixel. data_treshold : float, optional pixels with less than data_treshold * total_number_of_samples actual values are set to no-data, i.e. pixels with too few data points are ignored. Returns ------- results : dict dictionary with four keys (relative bias, RMAE, Pearson-correlation coefficient and the Nash-Sutcliffe) with 2dnarrays of the values per pixel. Examples -------- >>> results = compare_rasters2rasters(ds1_fhs, ds1_dates, ds2_fhs, ds2_dates, output_dir = r"C:/Desktop/", quantity_unit = ["P", "mm/month"], dataset_names = ["CHIRPS", "TRMM"]) """ becgis.assert_proj_res_ndv([ds1_fhs, ds2_fhs]) if dataset_names is None: dataset_names = ['DS1','DS2'] driver, NDV, xsize, ysize, GeoT, Projection = becgis.get_geoinfo(ds1_fhs[0]) common_dates = becgis.common_dates([ds1_dates, ds2_dates]) diff_sum = np.zeros((ysize,xsize)) non_nans = np.zeros((ysize,xsize)) progress = 0 samples = len(common_dates) for date in common_dates: DS1 = becgis.open_as_array(ds1_fhs[ds1_dates == date][0], nan_values = True) DS2 = becgis.open_as_array(ds2_fhs[ds2_dates == date][0], nan_values = True) DS1[np.isnan(DS2)] = np.nan DS2[np.isnan(DS1)] = np.nan non_nans[~np.isnan(DS1)] += np.ones((ysize,xsize))[~np.isnan(DS1)] diff = (DS1 - DS2)**2 diff_sum[~np.isnan(DS1)] += diff[~np.isnan(DS1)] progress += 1 print("progress: {0} of {1} finished".format(progress, samples)) diff_sum[non_nans <= data_treshold*samples] = np.nan results = dict() results['rmse'] = np.where(non_nans == 0., np.nan, np.sqrt(diff_sum / non_nans)) startdate = common_dates[0].strftime('%Y%m%d') enddate = common_dates[-1].strftime('%Y%m%d') path = os.path.join(output_dir, 'spatial_errors') if not os.path.exists(path): os.makedirs(path) if output_dir is not None: for varname in list(results.keys()): fh = os.path.join(path, '{0}_{1}_vs_{2}_{3}_{4}.tif'.format(varname, dataset_names[0], dataset_names[1], startdate, enddate)) becgis.create_geotiff(fh, results[varname], driver, NDV, xsize, ysize, GeoT, Projection) return results
def compare_rasters2stations(ds1_fhs, ds1_dates, station_dict, output_dir, station_names = None, quantity_unit = None, dataset_names = None, method = 'cubic', min_records = 1): """ Compare a series of raster maps with station time series by computing the relative bias, RMAE, Pearson-correlation coefficient and the Nash-Sutcliffe coefficient for each station. Parameters ---------- ds1_fhs : 1dnarray List containing filehandles to georeferenced raster files. ds1_dates : 1dnarray List containing datetime.date or datetime.datetime objects corresponding to the filehandles in ds1_fhs. Lenght should be equal to ds1_fhs. station_dict : dictionary Dictionary containing coordinates of stations and timeseries. See examples below for an example output_dir : str, optional Directory to store several results, i.e. (1) a csv file to load in a GIS program, (2) interpolated maps showing the various error indicators spatially and (3) scatter plots for all the stations. station_names : dictionary, optional Dictionary containing names of the respective stations which can be added to the csv-file, see Examples for more information. quantity_unit : list, optional List of two strings describing the quantity and unit of the data. dataset_name : list, optional List of strings describing the names of the datasets. method : str, optional Method used for interpolation of the error-indicators, i.e.: 'linear', 'nearest' or 'cubic' (default). Returns ------- results : dictionary Dictionary containing several error indicators per station. Examples -------- >>> station_dict = {(lat1, lon1): [(datetime.date(year, month, day), data_value), (datetime.date(year, month, day), data_value), etc.], (lat2, lon2): [(datetime.date(year, month, day), data_value), (datetime.date(year, month, day), data_value), etc.], etc.} >>> station_names = {(lat1,lon1): 'stationname1', (lat2,lon2): 'stationname2', etc.} >>> results = compare_rasters2stations(ds1_fhs, ds1_dates, station_dict, output_dir = r"C:/Desktop", station_names = None, quantity_unit = ["P", "mm/month"], dataset_names = ["CHIRPS", "Meteo Stations"], method = 'cubic') """ results = dict() pixel_coordinates = list() if dataset_names is None: dataset_names = ['Spatial', 'Station'] if quantity_unit is not None: quantity_unit[1] = r'[' + quantity_unit[1] + r']' else: quantity_unit = ['data', ''] becgis.assert_proj_res_ndv([ds1_fhs]) no_of_stations = len(list(station_dict.keys())) ds1_dates = becgis.convert_datetime_date(ds1_dates, out = 'datetime') for i, station in enumerate(station_dict.keys()): station_dates, station_values = unzip(station_dict[station]) common_dates = becgis.common_dates([ds1_dates, station_dates]) sample_size = common_dates.size if sample_size >= min_records: ds1_values = list() xpixel, ypixel = pixelcoordinates(station[0], station[1], ds1_fhs[0]) if np.any([np.isnan(xpixel), np.isnan(ypixel)]): print("Skipping station ({0}), cause its not on the map".format(station)) continue else: for date in common_dates: ds1_values.append(becgis.open_as_array(ds1_fhs[ds1_dates == date][0], nan_values = True)[ypixel, xpixel]) common_station_values = [station_values[station_dates == date][0] for date in common_dates] results[station] = pairwise_validation(ds1_values, common_station_values) results[station] += (sample_size,) pixel_coordinates.append((xpixel, ypixel)) #m, b = np.polyfit(ds1_values, common_station_values, 1) path_scatter = os.path.join(output_dir, 'scatter_plots') if not os.path.exists(path_scatter): os.makedirs(path_scatter) path_ts = os.path.join(output_dir, 'time_series') if not os.path.exists(path_ts): os.makedirs(path_ts) path_int = os.path.join(output_dir, 'interp_errors') if not os.path.exists(path_int): os.makedirs(path_int) xlabel = '{0} {1} {2}'.format(dataset_names[0], quantity_unit[0], quantity_unit[1]) ylabel = '{0} {1} {2}'.format(dataset_names[1], quantity_unit[0], quantity_unit[1]) if station_names is not None: title = station_names[station] fn = os.path.join(path_scatter,'{0}_vs_{1}.png'.format(station_names[station], dataset_names[0])) fnts = os.path.join(path_ts,'{0}_vs_{1}.png'.format(station_names[station], dataset_names[0])) else: title = station fn = os.path.join(path_scatter,'{0}_vs_station_{1}.png'.format(dataset_names[0],i)) fnts = os.path.join(path_ts,'{0}_vs_station_{1}.png'.format(dataset_names[0],i)) suptitle = 'pearson: {0:.5f}, rmse: {1:.5f}, ns: {2:.5f}, bias: {3:.5f}, n: {4:.0f}'.format(results[station][0],results[station][1],results[station][2],results[station][3],results[station][4]) plot_scatter_series(ds1_values, common_station_values, xlabel, ylabel, title, fn, suptitle = suptitle, dates = common_dates) xaxis_label = '{0} {1}'.format(quantity_unit[0], quantity_unit[1]) xlabel = '{0}'.format(dataset_names[0]) ylabel = '{0}'.format(dataset_names[1]) plot_time_series(ds1_values,common_station_values,common_dates,xlabel,ylabel,xaxis_label, title, fnts, suptitle = suptitle) print("station {0} ({3}) of {1} finished ({2} matching records)".format(i+1, no_of_stations, sample_size, title)) else: print("____station {0} of {1} skipped____ (less than {2} matching records)".format(i+1, no_of_stations, min_records)) continue n = len(results) csv_filename = os.path.join(output_dir, '{0}stations_vs_{1}_indicators.csv'.format(n, dataset_names[0])) with open(csv_filename, 'wb') as csv_file: writer = csv.writer(csv_file, delimiter=';') writer.writerow(['longitude','latitude','station_id','pearson','rmse','nash_sutcliffe','bias', 'no_of_samples']) for station in list(results.keys()): writer.writerow([station[1], station[0], station_names[station], results[station][0],results[station][1],results[station][2],results[station][3],results[station][4]]) rslt = {'Relative Bias':list(),'RMSE':list(),'Pearson Coefficient':list(),'Nash-Sutcliffe Coefficient':list(),'Number Of Samples':list()} for value in list(results.values()): rslt['Relative Bias'].append(value[3]) rslt['RMSE'].append(value[1]) rslt['Pearson Coefficient'].append(value[0]) rslt['Nash-Sutcliffe Coefficient'].append(value[2]) rslt['Number Of Samples'].append(value[4]) for key, value in list(rslt.items()): title = '{0}'.format(key) print(title) if key is 'RMSE': xlabel = '{0} [mm/month]'.format(key) else: xlabel = key value = np.array(value) value = value[(~np.isnan(value)) & (~np.isinf(value))] suptitle = 'mean: {0:.5f}, std: {1:.5f}, n: {2}'.format(np.nanmean(value), np.nanstd(value), n) print(value) plot_histogram(value[(~np.isnan(value)) & (~np.isinf(value))], title, xlabel, output_dir, suptitle = suptitle) driver, NDV, xsize, ysize, GeoT, Projection = becgis.get_geoinfo(ds1_fhs[0]) dummy_map = becgis.open_as_array(ds1_fhs[0]) grid = np.mgrid[0:ysize, 0:xsize] var_names = ['pearson', 'rmse', 'ns', 'bias', 'no_of_samples'] for i, var in enumerate(unzip(list(results.values()))): xy = np.array(pixel_coordinates)[~np.isnan(var)] z = var[~np.isnan(var)] interpolation_field = interpolate.griddata(xy, z, (grid[1], grid[0]), method=method, fill_value = np.nanmean(z)) interpolation_field[dummy_map == NDV] = NDV fh = os.path.join(path_int, '{0}_{1}stations_vs_{2}.tif'.format(var_names[i], len(xy), dataset_names[0])) becgis.create_geotiff(fh, interpolation_field, driver, NDV, xsize, ysize, GeoT, Projection) return results
def diagnosis_wp(metadata, complete_data, output_dir, waterpix): output_dir = os.path.join(output_dir) if not os.path.exists(output_dir): os.makedirs(output_dir) LU = becgis.open_as_array(metadata['lu'], nan_values=True) # S = SortWaterPix(waterpix, 'Supply_M', output_dir) # becgis.match_proj_res_ndv(metadata['lu'], becgis.list_files_in_folder(S), os.path.join(output_dir, "s_matched")) # complete_data['supply'] = becgis.sort_files(os.path.join(output_dir, "s_matched"), [-10,-6], month_position = [-6,-4])[0:2] common_dates = becgis.common_dates([ complete_data['p'][1], complete_data['et'][1], complete_data['tr'][1], complete_data['etb'][1] ]) becgis.assert_proj_res_ndv([ complete_data['p'][0], complete_data['et'][0], complete_data['tr'][0] ]) balance_km3 = np.array([]) p_km3 = np.array([]) et_km3 = np.array([]) ro_km3 = np.array([]) balance_mm = np.array([]) p_mm = np.array([]) et_mm = np.array([]) ro_mm = np.array([]) area = becgis.map_pixel_area_km(metadata['lu']) for date in common_dates: print(date) P = complete_data['p'][0][complete_data['p'][1] == date][0] ET = complete_data['et'][0][complete_data['et'][1] == date][0] RO = complete_data['tr'][0][complete_data['tr'][1] == date][0] factor = 0.001 * 0.001 * area p = becgis.open_as_array(P, nan_values=True) et = becgis.open_as_array(ET, nan_values=True) ro = becgis.open_as_array(RO, nan_values=True) p[np.isnan(LU)] = et[np.isnan(LU)] = ro[np.isnan(LU)] = np.nan balance_km3 = np.append( balance_km3, np.nansum(p * factor) - np.nansum(et * factor) - np.nansum(ro * factor)) p_km3 = np.append(p_km3, np.nansum(p * factor)) et_km3 = np.append(et_km3, np.nansum(et * factor)) ro_km3 = np.append(ro_km3, np.nansum(ro * factor)) balance_mm = np.append(balance_mm, np.nanmean(p) - np.nanmean(et) - np.nanmean(ro)) p_mm = np.append(p_mm, np.nanmean(p)) et_mm = np.append(et_mm, np.nanmean(et)) ro_mm = np.append(ro_mm, np.nanmean(ro)) relative_storage = np.cumsum(balance_km3) / np.mean(p_km3) ## # BASIC BASINSCALE WATERBALANCE (PRE-SHEETS) ## fig = plt.figure(1, figsize=(9, 6)) plt.clf() fig.patch.set_alpha(0.7) ax2 = plt.gca() ax = ax2.twinx() ax2.bar(common_dates, relative_storage, width=25, color='#3ee871') ax2.grid(b=True, which='Major', color='0.65', linestyle='--', zorder=0) ax.bar([common_dates[0]], [0], label='$\sum dS / \overline{P}$', color='#3ee871') ax.plot(common_dates, np.cumsum(balance_km3), label='$\sum dS$') ax.plot(common_dates, np.cumsum(p_km3), label='$\sum (P)$') ax.plot(common_dates, np.cumsum(et_km3) + np.cumsum(ro_km3), label='$\sum (ET + RO)$') box = ax.get_position() ax.set_position( [box.x0, box.y0 + box.height * 0.1, box.width, box.height * 0.9]) ax2.set_position( [box.x0, box.y0 + box.height * 0.1, box.width, box.height * 0.9]) ax.legend(loc='upper center', bbox_to_anchor=(0.5, -0.1), fancybox=True, shadow=True, ncol=5) plt.suptitle( '$\sum P = {0:.1f}\;{4}, \\ \sum ET = {1:.1f}\;{4}, \sum RO = {2:.1f}\;{4}, \sum dS = {3:.1f}\;{4}$' .format(np.sum(p_km3), np.sum(et_km3), np.sum(ro_km3), np.sum(balance_km3), r"km^{3}")) plt.title( '{0}, ${5} = {2:.3f}\;{6}, {7} = {3:.3f}, dt = {4}\;months$'.format( metadata['name'], np.sum(balance_km3), np.mean(balance_km3), np.mean(relative_storage), len(p_km3), r"\overline{dS}", r"km^{3}", r"\overline{\sum dS / \overline{P}}")) plt.xlabel('Time') ax2.set_ylabel('Relative Storage [months of $\overline{P}$]') ax.set_ylabel('Stock [$km^{3}$]') #plt.savefig(os.path.join(output_dir, 'balance_{0}'.format(metadata['name']))) fig = plt.figure(2) plt.clf() ax2 = plt.gca() ax = ax2.twinx() ax2.plot(common_dates, p_mm, common_dates, et_mm, common_dates, ro_mm) ax.plot(common_dates, np.cumsum(balance_mm), 'k')