def tp_vs(mask_filepath, variable, mask=None, longname=""): """ df = dp.download_data(mask_filepath) df_var = df[['time','tp', variable]] #df_mean = df_var.groupby('time').mean() """ if mask == None: df = dd.download_data(mask_filepath) else: cds_filepath = fd.update_cds_monthly_data() da = dd.apply_mask(cds_filepath, mask) df = da.to_dataframe().reset_index() df = df[["time", "tp", variable]] # gilgit = ds.interp(coords={'longitude':74.4584, 'latitude':35.8884 }, method='nearest') df_var = df.dropna() # Plot df_var.plot.scatter(x=variable, y="tp", alpha=0.2, c="b") plt.title("Upper Indus Basin") plt.ylabel("Total precipitation [m/day]") plt.xlabel(longname) plt.grid(True) plt.show()
def areal_model_eval(location, number=None, EDA_average=False, minyear=1979, maxyear=2020): """ Returns data to evaluate an areal model at a given location, area and time period. Variables: total precipitation as a function of time, 2m dewpoint temperature, angle of sub-gridscale orography, orography, slope of sub-gridscale orography, total column water vapour, Nino 3.4, Nino 4 and NAO index for a single point. Inputs number, optional: specify desired ensemble run, integer EDA_average, optional: specify if you want average of low resolution ensemble runs, boolean coords [latitude, longitude], optional: specify if you want a specific location, list of floats mask, optional: specify area to train model, defaults to Upper Indus Basin Outputs x_tr: evaluation feature vector, numpy array y_tr: evaluation output vector, numpy array """ if number != None: da_ensemble = dd.download_data(location, xarray=True, ensemble=True) da = da_ensemble.sel(number=number).drop("number") if EDA_average == True: da_ensemble = dd.download_data(location, xarray=True, ensemble=True) da = da_ensemble.mean(dim="number") else: da = dd.download_data(location, xarray=True) sliced_da = da.sel(time=slice(minyear, maxyear)) if isinstance(location, str) == True: mask_filepath = find_mask(location) masked_da = dd.apply_mask(sliced_da, mask_filepath) multiindex_df = masked_da.to_dataframe() multiindex_df = da.to_dataframe() df = multiindex_df.dropna().reset_index() else: da_location = sliced_da.interp(coords={ "lat": location[0], "lon": location[1] }, method="nearest") multiindex_df = da_location.to_dataframe() df = multiindex_df.dropna().reset_index() df["time"] = df["time"] - 1970 # to years df["tp"] = log_transform(df["tp"]) df = df[[ "time", "lat", "lon", "slor", "anor", "z", "d2m", "tcwv", "N34", "tp" ]] #format order xtr = df.drop(columns=["tp"]).values ytr = df["tp"].values return xtr, ytr
def change_maps(data_filepath, mask_filepath, variable): """ Maps of average annual change from 1979 to 1989, 1999, 2009 and 2019 """ da = dd.apply_mask(data_filepath, mask_filepath) da_var = da[variable] * 1000 # to mm/day da_1979 = da_var.sel(time=slice("1979-01-16T12:00:00", str(+1) + "1980-01-01T12:00:00")) da_processed = cumulative_monthly(da_1979) basin_1979_sum = da_processed.sum(dim="time") basin_1989 = da_var.sel( time=slice("1989-01-01T12:00:00", "1990-01-01T12:00:00")) basin_1989_sum = cumulative_monthly(basin_1989).sum(dim="time") basin_1989_change = basin_1989_sum / basin_1979_sum - 1 basin_1999 = da_var.sel( time=slice("1999-01-01T12:00:00", "2000-01-01T12:00:00")) basin_1999_sum = cumulative_monthly(basin_1999).sum(dim="time") basin_1999_change = basin_1999_sum / basin_1979_sum - 1 basin_2009 = da_var.sel( time=slice("2009-01-01T12:00:00", "2010-01-01T12:00:00")) basin_2009_sum = cumulative_monthly(basin_2009).sum(dim="time") basin_2009_change = basin_2009_sum / basin_1979_sum - 1 basin_2019 = da_var.sel( time=slice("2019-01-01T12:00:00", "2020-01-01T12:00:00")) basin_2019_sum = cumulative_monthly(basin_2019).sum(dim="time") basin_2019_change = basin_2019_sum / basin_1979_sum - 1 basin_changes = xr.concat( [ basin_1989_change, basin_1999_change, basin_2009_change, basin_2019_change ], pd.Index(["1989", "1999", "2009", "2019"], name="year"), ) g = basin_changes.plot( x="longitude", y="latitude", col="year", col_wrap=2, subplot_kws={"projection": ccrs.PlateCarree()}, cbar_kwargs={ "label": "Precipitation change", "format": tck.PercentFormatter(xmax=1.0), }, ) for ax in g.axes.flat: ax.coastlines() ax.gridlines() ax.set_extent([71, 83, 30, 38]) ax.add_feature(cf.BORDERS) plt.show()
def annual_map(data_filepath, mask_filepath, variable, year, cumulative=False): """ Annual map """ da = dd.apply_mask(data_filepath, mask_filepath) ds_year = da.sel(time=slice( str(year) + "-01-16T12:00:00", str(year + 1) + "-01-01T12:00:00")) ds_var = ds_year[variable] * 1000 # to mm/day if cumulative is True: ds_processed = cumulative_monthly(ds_var) ds_final = ds_processed.sum(dim="time") else: ds_final = ds_var.std(dim="time") # TODO weighted mean print(ds_final) plt.figure() ax = plt.subplot(projection=ccrs.PlateCarree()) ax.set_extent([71, 83, 30, 38]) g = ds_final["tp_0001"].plot( cmap="magma_r", vmin=0.001, cbar_kwargs={ "label": "Precipitation standard deviation [mm/day]", "extend": "neither", "pad": 0.10 }) g.cmap.set_under("white") #ax.add_feature(cf.BORDERS) ax.coastlines() ax.gridlines(draw_labels=True) ax.set_xlabel("Longitude") ax.set_ylabel("Latitude") plt.title("Upper Indus Basin Total Precipitation " + str(year) + "\n \n") plt.show()
def areal_model(location, number=None, EDA_average=False, length=3000, seed=42): """ Outputs test, validation and training data for total precipitation as a function of time, 2m dewpoint temperature, angle of sub-gridscale orography, orography, slope of sub-gridscale orography, total column water vapour, Nino 3.4 index for given number randomly sampled data points for a given basin. Inputs location: specify area to train model number, optional: specify desired ensemble run, integer EDA_average, optional: specify if you want average of low resolution ensemble runs, boolean length, optional: specify number of points to sample, integer seed, optional: specify seed, integer Outputs x_train: training feature vector, numpy array y_train: training output vector, numpy array x_test: testing feature vector, numpy array y_test: testing output vector, numpy array """ if number != None: da_ensemble = dd.download_data(location, xarray=True, ensemble=True) da = da_ensemble.sel(number=number).drop("number") if EDA_average == True: da_ensemble = dd.download_data(location, xarray=True, ensemble=True) da = da_ensemble.mean(dim="number") else: da = dd.download_data(location, xarray=True) # apply mask mask_filepath = find_mask(location) masked_da = dd.apply_mask(da, mask_filepath) multiindex_df = masked_da.to_dataframe() df_clean = multiindex_df.dropna().reset_index() df = sa.random_location_and_time_sampler(df_clean, length=length, seed=seed) df["time"] = df["time"] - 1970 df["tp"] = log_transform(df["tp"]) df = df[[ "time", "lat", "lon", "slor", "anor", "z", "d2m", "tcwv", "N34", "tp" ]] # Remove last 10% of time for testing test_df = df[df["time"] > df["time"].max() * 0.9] xtest = test_df.drop(columns=["tp"]).values ytest = test_df["tp"].values # Training and validation data tr_df = df[df["time"] < df["time"].max() * 0.9] xtr = tr_df.drop(columns=["tp"]).values ytr = tr_df["tp"].values xtrain, xval, ytrain, yval = train_test_split( xtr, ytr, test_size=0.30, shuffle=False) # Training and validation data """ # Keep first of 70% for training train_df = df[ df['time']< df['time'].max()*0.7] xtrain = train_df.drop(columns=['tp']).values ytrain = train_df['tp'].values # Last 30% for evaluation eval_df = df[ df['time']> df['time'].max()*0.7] x_eval = eval_df.drop(columns=['tp']).values y_eval = eval_df['tp'].values # Training and validation data xval, xtest, yval, ytest = train_test_split(x_eval, y_eval, test_size=0.3333, shuffle=True) """ return xtrain, xval, xtest, ytrain, yval, ytest
def select_basin(dataset, location): """ Interpolate dataset at given coordinates """ mask_filepath = dp.find_mask(location) basin = dd.apply_mask(dataset, mask_filepath) basin = basin.sel(time=slice(1990, 2005)) return basin