def tp_vs(mask_filepath, variable, mask=None, longname=""):
    """
    df = dp.download_data(mask_filepath)
    df_var = df[['time','tp', variable]]
    #df_mean = df_var.groupby('time').mean()
    """

    if mask == None:
        df = dd.download_data(mask_filepath)
    else:
        cds_filepath = fd.update_cds_monthly_data()
        da = dd.apply_mask(cds_filepath, mask)
        df = da.to_dataframe().reset_index()

    df = df[["time", "tp", variable]]
    # gilgit = ds.interp(coords={'longitude':74.4584, 'latitude':35.8884 }, method='nearest')
    df_var = df.dropna()

    # Plot
    df_var.plot.scatter(x=variable, y="tp", alpha=0.2, c="b")
    plt.title("Upper Indus Basin")
    plt.ylabel("Total precipitation [m/day]")
    plt.xlabel(longname)
    plt.grid(True)
    plt.show()
def areal_model_eval(location,
                     number=None,
                     EDA_average=False,
                     minyear=1979,
                     maxyear=2020):
    """
    Returns data to evaluate an areal model at a given location, area and time period.
    Variables: total precipitation as a function of time, 2m dewpoint temperature, angle of 
    sub-gridscale orography, orography, slope of sub-gridscale orography, total column 
    water vapour, Nino 3.4, Nino 4 and NAO index for a single point.

    Inputs
        number, optional: specify desired ensemble run, integer 
        EDA_average, optional: specify if you want average of low resolution ensemble runs, boolean
        coords [latitude, longitude], optional: specify if you want a specific location, list of floats
        mask, optional: specify area to train model, defaults to Upper Indus Basin

    Outputs
        x_tr: evaluation feature vector, numpy array
        y_tr: evaluation output vector, numpy array
    """
    if number != None:
        da_ensemble = dd.download_data(location, xarray=True, ensemble=True)
        da = da_ensemble.sel(number=number).drop("number")
    if EDA_average == True:
        da_ensemble = dd.download_data(location, xarray=True, ensemble=True)
        da = da_ensemble.mean(dim="number")
    else:
        da = dd.download_data(location, xarray=True)

    sliced_da = da.sel(time=slice(minyear, maxyear))

    if isinstance(location, str) == True:
        mask_filepath = find_mask(location)
        masked_da = dd.apply_mask(sliced_da, mask_filepath)
        multiindex_df = masked_da.to_dataframe()
        multiindex_df = da.to_dataframe()
        df = multiindex_df.dropna().reset_index()

    else:
        da_location = sliced_da.interp(coords={
            "lat": location[0],
            "lon": location[1]
        },
                                       method="nearest")
        multiindex_df = da_location.to_dataframe()
        df = multiindex_df.dropna().reset_index()

    df["time"] = df["time"] - 1970  # to years
    df["tp"] = log_transform(df["tp"])
    df = df[[
        "time", "lat", "lon", "slor", "anor", "z", "d2m", "tcwv", "N34", "tp"
    ]]  #format order

    xtr = df.drop(columns=["tp"]).values
    ytr = df["tp"].values

    return xtr, ytr
예제 #3
0
def change_maps(data_filepath, mask_filepath, variable):
    """ Maps of average annual change from 1979 to 1989, 1999, 2009 and 2019 """

    da = dd.apply_mask(data_filepath, mask_filepath)
    da_var = da[variable] * 1000  # to mm/day

    da_1979 = da_var.sel(time=slice("1979-01-16T12:00:00",
                                    str(+1) + "1980-01-01T12:00:00"))
    da_processed = cumulative_monthly(da_1979)
    basin_1979_sum = da_processed.sum(dim="time")

    basin_1989 = da_var.sel(
        time=slice("1989-01-01T12:00:00", "1990-01-01T12:00:00"))
    basin_1989_sum = cumulative_monthly(basin_1989).sum(dim="time")
    basin_1989_change = basin_1989_sum / basin_1979_sum - 1

    basin_1999 = da_var.sel(
        time=slice("1999-01-01T12:00:00", "2000-01-01T12:00:00"))
    basin_1999_sum = cumulative_monthly(basin_1999).sum(dim="time")
    basin_1999_change = basin_1999_sum / basin_1979_sum - 1

    basin_2009 = da_var.sel(
        time=slice("2009-01-01T12:00:00", "2010-01-01T12:00:00"))
    basin_2009_sum = cumulative_monthly(basin_2009).sum(dim="time")
    basin_2009_change = basin_2009_sum / basin_1979_sum - 1

    basin_2019 = da_var.sel(
        time=slice("2019-01-01T12:00:00", "2020-01-01T12:00:00"))
    basin_2019_sum = cumulative_monthly(basin_2019).sum(dim="time")
    basin_2019_change = basin_2019_sum / basin_1979_sum - 1

    basin_changes = xr.concat(
        [
            basin_1989_change, basin_1999_change, basin_2009_change,
            basin_2019_change
        ],
        pd.Index(["1989", "1999", "2009", "2019"], name="year"),
    )

    g = basin_changes.plot(
        x="longitude",
        y="latitude",
        col="year",
        col_wrap=2,
        subplot_kws={"projection": ccrs.PlateCarree()},
        cbar_kwargs={
            "label": "Precipitation change",
            "format": tck.PercentFormatter(xmax=1.0),
        },
    )

    for ax in g.axes.flat:
        ax.coastlines()
        ax.gridlines()
        ax.set_extent([71, 83, 30, 38])
        ax.add_feature(cf.BORDERS)

    plt.show()
예제 #4
0
def annual_map(data_filepath, mask_filepath, variable, year, cumulative=False):
    """ Annual map """

    da = dd.apply_mask(data_filepath, mask_filepath)
    ds_year = da.sel(time=slice(
        str(year) + "-01-16T12:00:00",
        str(year + 1) + "-01-01T12:00:00"))
    ds_var = ds_year[variable] * 1000  # to mm/day

    if cumulative is True:
        ds_processed = cumulative_monthly(ds_var)
        ds_final = ds_processed.sum(dim="time")
    else:
        ds_final = ds_var.std(dim="time")  # TODO weighted mean

    print(ds_final)

    plt.figure()
    ax = plt.subplot(projection=ccrs.PlateCarree())
    ax.set_extent([71, 83, 30, 38])
    g = ds_final["tp_0001"].plot(
        cmap="magma_r",
        vmin=0.001,
        cbar_kwargs={
            "label": "Precipitation standard deviation [mm/day]",
            "extend": "neither",
            "pad": 0.10
        })
    g.cmap.set_under("white")
    #ax.add_feature(cf.BORDERS)
    ax.coastlines()
    ax.gridlines(draw_labels=True)
    ax.set_xlabel("Longitude")
    ax.set_ylabel("Latitude")
    plt.title("Upper Indus Basin Total Precipitation " + str(year) + "\n \n")
    plt.show()
def areal_model(location,
                number=None,
                EDA_average=False,
                length=3000,
                seed=42):
    """
    Outputs test, validation and training data for total precipitation as a function of time, 2m dewpoint temperature,
    angle of sub-gridscale orography, orography, slope of sub-gridscale orography, total column water vapour,
    Nino 3.4 index for given number randomly sampled data points for a given basin.

    Inputs
        location: specify area to train model
        number, optional: specify desired ensemble run, integer 
        EDA_average, optional: specify if you want average of low resolution ensemble runs, boolean
        length, optional: specify number of points to sample, integer
        seed, optional: specify seed, integer

    Outputs
        x_train: training feature vector, numpy array
        y_train: training output vector, numpy array
        x_test: testing feature vector, numpy array
        y_test: testing output vector, numpy array
    """

    if number != None:
        da_ensemble = dd.download_data(location, xarray=True, ensemble=True)
        da = da_ensemble.sel(number=number).drop("number")

    if EDA_average == True:
        da_ensemble = dd.download_data(location, xarray=True, ensemble=True)
        da = da_ensemble.mean(dim="number")
    else:
        da = dd.download_data(location, xarray=True)

    # apply mask
    mask_filepath = find_mask(location)
    masked_da = dd.apply_mask(da, mask_filepath)

    multiindex_df = masked_da.to_dataframe()
    df_clean = multiindex_df.dropna().reset_index()
    df = sa.random_location_and_time_sampler(df_clean,
                                             length=length,
                                             seed=seed)

    df["time"] = df["time"] - 1970
    df["tp"] = log_transform(df["tp"])
    df = df[[
        "time", "lat", "lon", "slor", "anor", "z", "d2m", "tcwv", "N34", "tp"
    ]]

    # Remove last 10% of time for testing
    test_df = df[df["time"] > df["time"].max() * 0.9]
    xtest = test_df.drop(columns=["tp"]).values
    ytest = test_df["tp"].values

    # Training and validation data
    tr_df = df[df["time"] < df["time"].max() * 0.9]
    xtr = tr_df.drop(columns=["tp"]).values
    ytr = tr_df["tp"].values

    xtrain, xval, ytrain, yval = train_test_split(
        xtr, ytr, test_size=0.30,
        shuffle=False)  # Training and validation data
    """
    # Keep first of 70% for training
    train_df = df[ df['time']< df['time'].max()*0.7]
    xtrain = train_df.drop(columns=['tp']).values
    ytrain = train_df['tp'].values

    # Last 30% for evaluation
    eval_df = df[ df['time']> df['time'].max()*0.7]
    x_eval = eval_df.drop(columns=['tp']).values
    y_eval = eval_df['tp'].values

    # Training and validation data
    xval, xtest, yval, ytest = train_test_split(x_eval, y_eval, test_size=0.3333, shuffle=True)
    """
    return xtrain, xval, xtest, ytrain, yval, ytest
예제 #6
0
def select_basin(dataset, location):
    """ Interpolate dataset at given coordinates """
    mask_filepath = dp.find_mask(location)
    basin = dd.apply_mask(dataset, mask_filepath)
    basin = basin.sel(time=slice(1990, 2005))
    return basin