def root_zone_storage_Wpx(output_folder, rz_sm_fhs, rz_depth_fh): Data_Path_RZ = "RZstor" out_folder = os.path.join(output_folder, Data_Path_RZ) if not os.path.exists(out_folder): os.mkdir(out_folder) root_depth = becgis.open_as_array(rz_depth_fh, nan_values=True) geo = becgis.get_geoinfo(rz_depth_fh) root_storage_fhs = [] for rz_sm_fh in rz_sm_fhs: root_depth_sm = becgis.open_as_array(rz_sm_fh, nan_values=True) root_storage = root_depth * root_depth_sm out_fh = os.path.join(out_folder, 'RZ_storage_mm_%s' % (rz_sm_fh[-10:])) becgis.create_geotiff(out_fh, root_storage, *geo) root_storage_fhs.append(out_fh) return root_storage_fhs
def correct_var(metadata, complete_data, output_dir, formula, new_var, slope=False, bounds=(0, [1.0, 1., 12.])): var = split_form(formula)[0][-1] a, x0 = calc_var_correction(metadata, complete_data, output_dir, formula=formula, slope=slope, plot=True, bounds=bounds) for date, fn in zip(complete_data[var][1], complete_data[var][0]): geo_info = becgis.get_geoinfo(fn) data = becgis.open_as_array(fn, nan_values=True) x = calc_delta_months(x0, date) fraction = a[0] * (np.cos( (x - a[2]) * (np.pi / 6)) * 0.5 + 0.5) + (a[1] * (1 - a[0])) data *= fraction folder = os.path.join(output_dir, metadata['name'], 'data', new_var) if not os.path.exists(folder): os.makedirs(folder) bla = os.path.split(fn)[1].split('_')[-1] filen = 'supply_sw_' + bla[0:6] + '.tif' fn = os.path.join(folder, filen) becgis.create_geotiff(fn, data, *geo_info) meta = becgis.sort_files(folder, [-10, -6], month_position=[-6, -4])[0:2] return a, meta
def compare_rasters2rasters_per_lu(ds1_fhs, ds1_dates, ds2_fhs, ds2_dates, lu_fh, output_dir, dataset_names = ["DS1", "DS2"], class_dictionary = None, no_of_classes = 6): """ Compare two raster datasets with eachother per different landuse categories. Parameters ---------- ds1_fhs : ndarray Array with strings pointing to maps of dataset 1. ds1_dates : ndarray Array with same shape as ds1_fhs, containing datetime.date objects. ds2_fhs : ndarray Array with strings pointing to maps of dataset 2. ds2_dates : ndarray Array with same shape as ds2_fhs, containing datetime.date objects. lu_fh : str Pointer to a landusemap. output_dir : str Map to save results. dataset_names : list, optional List with two strings describing the names of the two datasets. class_dictionary : dict Dictionary specifying all the landuse categories. no_of_classes : int The 'no_of_classes' most dominant classes in the the lu_fh are compared, the rest is ignored. """ LUCS = becgis.open_as_array(lu_fh, nan_values = True) DS1 = becgis.open_as_array(ds1_fhs[0], nan_values = True) DS2 = becgis.open_as_array(ds2_fhs[0], nan_values = True) DS1[np.isnan(DS2)] = np.nan LUCS[np.isnan(DS1)] = np.nan classes, counts = np.unique(LUCS[~np.isnan(LUCS)], return_counts = True) counts_sorted = np.sort(counts)[-no_of_classes:] selected_lucs = [classes[counts == counter][0] for counter in counts_sorted] driver, NDV, xsize, ysize, GeoT, Projection = becgis.get_geoinfo(lu_fh) becgis.create_geotiff(lu_fh.replace('.tif','_.tif'), LUCS, driver, NDV, xsize, ysize, GeoT, Projection) common_dates = becgis.common_dates([ds1_dates, ds2_dates]) ds1_totals = np.array([]) ds2_totals = np.array([]) DS1_per_class = dict() DS2_per_class = dict() for date in common_dates: DS1 = becgis.open_as_array(ds1_fhs[ds1_dates == date][0], nan_values = True) DS2 = becgis.open_as_array(ds2_fhs[ds2_dates == date][0], nan_values = True) for clss in selected_lucs: if clss in list(DS1_per_class.keys()): DS1_per_class[clss] = np.append(DS1_per_class[clss], np.nanmean(DS1[LUCS == clss])) else: DS1_per_class[clss] = np.array([np.nanmean(DS1[LUCS == clss])]) if clss in list(DS2_per_class.keys()): DS2_per_class[clss] = np.append(DS2_per_class[clss], np.nanmean(DS2[LUCS == clss])) else: DS2_per_class[clss] = np.array([np.nanmean(DS2[LUCS == clss])]) ds1_totals = np.append(ds1_totals, np.nanmean(DS1)) ds2_totals = np.append(ds2_totals, np.nanmean(DS2)) print("Finished {0}, going to {1}".format(date, common_dates[-1])) for clss in selected_lucs: if class_dictionary is None: plot_scatter_series(DS1_per_class[clss], DS2_per_class[clss], dataset_names[0], dataset_names[1], clss, output_dir) else: cats = {v[0]: k for k, v in list(class_dictionary.items())} plot_scatter_series(DS1_per_class[clss], DS2_per_class[clss], dataset_names[0], dataset_names[1], cats[clss], output_dir) plot_scatter_series(ds1_totals, ds2_totals, dataset_names[0], dataset_names[1], "Total Area", output_dir) if class_dictionary is not None: output_fh = os.path.join(output_dir, 'landuse_percentages.png') driver, NDV, xsize, ysize, GeoT, Projection = becgis.get_geoinfo(lu_fh) becgis.create_geotiff(lu_fh.replace('.tif','_.tif'), LUCS, driver, NDV, xsize, ysize, GeoT, Projection) becgis.plot_category_areas(lu_fh.replace('.tif','_.tif'), class_dictionary, output_fh, area_treshold = 0.01) os.remove(lu_fh.replace('.tif','_.tif'))
def compare_rasters2rasters(ds1_fhs, ds1_dates, ds2_fhs, ds2_dates, output_dir = None, dataset_names = None, data_treshold = 0.75): """ Compare two series of raster maps by computing the relative bias, RMAE, Pearson-correlation coefficient and the Nash-Sutcliffe coefficient per pixel. Parameters ---------- ds1_fhs : list list pointing to georeferenced raster files of dataset 1. ds1_dates : list list corresponding to ds1_fhs specifying the dates. ds2_fhs : list list pointing to georeferenced raster files of dataset 2. ds2_dates : list list corresponding to ds2_fhs specifying the dates. quantity_unit : list, optional list of two strings describing the quantity and unit of the data. e.g. ['Precipitation', 'mm/month']. dataset_names : list, optional list of strings describing the names of the datasets. e.g. ['CHIRPS', 'ERA-I']. output_dir : list, optional directory to store some results, i.e. (1) a graph of the spatially averaged datasets trough time and the bias and (2) 4 geotiffs showing the bias, nash-sutcliffe coefficient, pearson coefficient and rmae per pixel. data_treshold : float, optional pixels with less than data_treshold * total_number_of_samples actual values are set to no-data, i.e. pixels with too few data points are ignored. Returns ------- results : dict dictionary with four keys (relative bias, RMAE, Pearson-correlation coefficient and the Nash-Sutcliffe) with 2dnarrays of the values per pixel. Examples -------- >>> results = compare_rasters2rasters(ds1_fhs, ds1_dates, ds2_fhs, ds2_dates, output_dir = r"C:/Desktop/", quantity_unit = ["P", "mm/month"], dataset_names = ["CHIRPS", "TRMM"]) """ becgis.assert_proj_res_ndv([ds1_fhs, ds2_fhs]) if dataset_names is None: dataset_names = ['DS1','DS2'] driver, NDV, xsize, ysize, GeoT, Projection = becgis.get_geoinfo(ds1_fhs[0]) common_dates = becgis.common_dates([ds1_dates, ds2_dates]) diff_sum = np.zeros((ysize,xsize)) non_nans = np.zeros((ysize,xsize)) progress = 0 samples = len(common_dates) for date in common_dates: DS1 = becgis.open_as_array(ds1_fhs[ds1_dates == date][0], nan_values = True) DS2 = becgis.open_as_array(ds2_fhs[ds2_dates == date][0], nan_values = True) DS1[np.isnan(DS2)] = np.nan DS2[np.isnan(DS1)] = np.nan non_nans[~np.isnan(DS1)] += np.ones((ysize,xsize))[~np.isnan(DS1)] diff = (DS1 - DS2)**2 diff_sum[~np.isnan(DS1)] += diff[~np.isnan(DS1)] progress += 1 print("progress: {0} of {1} finished".format(progress, samples)) diff_sum[non_nans <= data_treshold*samples] = np.nan results = dict() results['rmse'] = np.where(non_nans == 0., np.nan, np.sqrt(diff_sum / non_nans)) startdate = common_dates[0].strftime('%Y%m%d') enddate = common_dates[-1].strftime('%Y%m%d') path = os.path.join(output_dir, 'spatial_errors') if not os.path.exists(path): os.makedirs(path) if output_dir is not None: for varname in list(results.keys()): fh = os.path.join(path, '{0}_{1}_vs_{2}_{3}_{4}.tif'.format(varname, dataset_names[0], dataset_names[1], startdate, enddate)) becgis.create_geotiff(fh, results[varname], driver, NDV, xsize, ysize, GeoT, Projection) return results
def compare_rasters2stations(ds1_fhs, ds1_dates, station_dict, output_dir, station_names = None, quantity_unit = None, dataset_names = None, method = 'cubic', min_records = 1): """ Compare a series of raster maps with station time series by computing the relative bias, RMAE, Pearson-correlation coefficient and the Nash-Sutcliffe coefficient for each station. Parameters ---------- ds1_fhs : 1dnarray List containing filehandles to georeferenced raster files. ds1_dates : 1dnarray List containing datetime.date or datetime.datetime objects corresponding to the filehandles in ds1_fhs. Lenght should be equal to ds1_fhs. station_dict : dictionary Dictionary containing coordinates of stations and timeseries. See examples below for an example output_dir : str, optional Directory to store several results, i.e. (1) a csv file to load in a GIS program, (2) interpolated maps showing the various error indicators spatially and (3) scatter plots for all the stations. station_names : dictionary, optional Dictionary containing names of the respective stations which can be added to the csv-file, see Examples for more information. quantity_unit : list, optional List of two strings describing the quantity and unit of the data. dataset_name : list, optional List of strings describing the names of the datasets. method : str, optional Method used for interpolation of the error-indicators, i.e.: 'linear', 'nearest' or 'cubic' (default). Returns ------- results : dictionary Dictionary containing several error indicators per station. Examples -------- >>> station_dict = {(lat1, lon1): [(datetime.date(year, month, day), data_value), (datetime.date(year, month, day), data_value), etc.], (lat2, lon2): [(datetime.date(year, month, day), data_value), (datetime.date(year, month, day), data_value), etc.], etc.} >>> station_names = {(lat1,lon1): 'stationname1', (lat2,lon2): 'stationname2', etc.} >>> results = compare_rasters2stations(ds1_fhs, ds1_dates, station_dict, output_dir = r"C:/Desktop", station_names = None, quantity_unit = ["P", "mm/month"], dataset_names = ["CHIRPS", "Meteo Stations"], method = 'cubic') """ results = dict() pixel_coordinates = list() if dataset_names is None: dataset_names = ['Spatial', 'Station'] if quantity_unit is not None: quantity_unit[1] = r'[' + quantity_unit[1] + r']' else: quantity_unit = ['data', ''] becgis.assert_proj_res_ndv([ds1_fhs]) no_of_stations = len(list(station_dict.keys())) ds1_dates = becgis.convert_datetime_date(ds1_dates, out = 'datetime') for i, station in enumerate(station_dict.keys()): station_dates, station_values = unzip(station_dict[station]) common_dates = becgis.common_dates([ds1_dates, station_dates]) sample_size = common_dates.size if sample_size >= min_records: ds1_values = list() xpixel, ypixel = pixelcoordinates(station[0], station[1], ds1_fhs[0]) if np.any([np.isnan(xpixel), np.isnan(ypixel)]): print("Skipping station ({0}), cause its not on the map".format(station)) continue else: for date in common_dates: ds1_values.append(becgis.open_as_array(ds1_fhs[ds1_dates == date][0], nan_values = True)[ypixel, xpixel]) common_station_values = [station_values[station_dates == date][0] for date in common_dates] results[station] = pairwise_validation(ds1_values, common_station_values) results[station] += (sample_size,) pixel_coordinates.append((xpixel, ypixel)) #m, b = np.polyfit(ds1_values, common_station_values, 1) path_scatter = os.path.join(output_dir, 'scatter_plots') if not os.path.exists(path_scatter): os.makedirs(path_scatter) path_ts = os.path.join(output_dir, 'time_series') if not os.path.exists(path_ts): os.makedirs(path_ts) path_int = os.path.join(output_dir, 'interp_errors') if not os.path.exists(path_int): os.makedirs(path_int) xlabel = '{0} {1} {2}'.format(dataset_names[0], quantity_unit[0], quantity_unit[1]) ylabel = '{0} {1} {2}'.format(dataset_names[1], quantity_unit[0], quantity_unit[1]) if station_names is not None: title = station_names[station] fn = os.path.join(path_scatter,'{0}_vs_{1}.png'.format(station_names[station], dataset_names[0])) fnts = os.path.join(path_ts,'{0}_vs_{1}.png'.format(station_names[station], dataset_names[0])) else: title = station fn = os.path.join(path_scatter,'{0}_vs_station_{1}.png'.format(dataset_names[0],i)) fnts = os.path.join(path_ts,'{0}_vs_station_{1}.png'.format(dataset_names[0],i)) suptitle = 'pearson: {0:.5f}, rmse: {1:.5f}, ns: {2:.5f}, bias: {3:.5f}, n: {4:.0f}'.format(results[station][0],results[station][1],results[station][2],results[station][3],results[station][4]) plot_scatter_series(ds1_values, common_station_values, xlabel, ylabel, title, fn, suptitle = suptitle, dates = common_dates) xaxis_label = '{0} {1}'.format(quantity_unit[0], quantity_unit[1]) xlabel = '{0}'.format(dataset_names[0]) ylabel = '{0}'.format(dataset_names[1]) plot_time_series(ds1_values,common_station_values,common_dates,xlabel,ylabel,xaxis_label, title, fnts, suptitle = suptitle) print("station {0} ({3}) of {1} finished ({2} matching records)".format(i+1, no_of_stations, sample_size, title)) else: print("____station {0} of {1} skipped____ (less than {2} matching records)".format(i+1, no_of_stations, min_records)) continue n = len(results) csv_filename = os.path.join(output_dir, '{0}stations_vs_{1}_indicators.csv'.format(n, dataset_names[0])) with open(csv_filename, 'wb') as csv_file: writer = csv.writer(csv_file, delimiter=';') writer.writerow(['longitude','latitude','station_id','pearson','rmse','nash_sutcliffe','bias', 'no_of_samples']) for station in list(results.keys()): writer.writerow([station[1], station[0], station_names[station], results[station][0],results[station][1],results[station][2],results[station][3],results[station][4]]) rslt = {'Relative Bias':list(),'RMSE':list(),'Pearson Coefficient':list(),'Nash-Sutcliffe Coefficient':list(),'Number Of Samples':list()} for value in list(results.values()): rslt['Relative Bias'].append(value[3]) rslt['RMSE'].append(value[1]) rslt['Pearson Coefficient'].append(value[0]) rslt['Nash-Sutcliffe Coefficient'].append(value[2]) rslt['Number Of Samples'].append(value[4]) for key, value in list(rslt.items()): title = '{0}'.format(key) print(title) if key is 'RMSE': xlabel = '{0} [mm/month]'.format(key) else: xlabel = key value = np.array(value) value = value[(~np.isnan(value)) & (~np.isinf(value))] suptitle = 'mean: {0:.5f}, std: {1:.5f}, n: {2}'.format(np.nanmean(value), np.nanstd(value), n) print(value) plot_histogram(value[(~np.isnan(value)) & (~np.isinf(value))], title, xlabel, output_dir, suptitle = suptitle) driver, NDV, xsize, ysize, GeoT, Projection = becgis.get_geoinfo(ds1_fhs[0]) dummy_map = becgis.open_as_array(ds1_fhs[0]) grid = np.mgrid[0:ysize, 0:xsize] var_names = ['pearson', 'rmse', 'ns', 'bias', 'no_of_samples'] for i, var in enumerate(unzip(list(results.values()))): xy = np.array(pixel_coordinates)[~np.isnan(var)] z = var[~np.isnan(var)] interpolation_field = interpolate.griddata(xy, z, (grid[1], grid[0]), method=method, fill_value = np.nanmean(z)) interpolation_field[dummy_map == NDV] = NDV fh = os.path.join(path_int, '{0}_{1}stations_vs_{2}.tif'.format(var_names[i], len(xy), dataset_names[0])) becgis.create_geotiff(fh, interpolation_field, driver, NDV, xsize, ysize, GeoT, Projection) return results