def tp_vs(mask_filepath, variable, mask=None, longname=""):
    """
    df = dp.download_data(mask_filepath)
    df_var = df[['time','tp', variable]]
    #df_mean = df_var.groupby('time').mean()
    """

    if mask == None:
        df = dd.download_data(mask_filepath)
    else:
        cds_filepath = fd.update_cds_monthly_data()
        da = dd.apply_mask(cds_filepath, mask)
        df = da.to_dataframe().reset_index()

    df = df[["time", "tp", variable]]
    # gilgit = ds.interp(coords={'longitude':74.4584, 'latitude':35.8884 }, method='nearest')
    df_var = df.dropna()

    # Plot
    df_var.plot.scatter(x=variable, y="tp", alpha=0.2, c="b")
    plt.title("Upper Indus Basin")
    plt.ylabel("Total precipitation [m/day]")
    plt.xlabel(longname)
    plt.grid(True)
    plt.show()
def areal_model_eval(location,
                     number=None,
                     EDA_average=False,
                     minyear=1979,
                     maxyear=2020):
    """
    Returns data to evaluate an areal model at a given location, area and time period.
    Variables: total precipitation as a function of time, 2m dewpoint temperature, angle of 
    sub-gridscale orography, orography, slope of sub-gridscale orography, total column 
    water vapour, Nino 3.4, Nino 4 and NAO index for a single point.

    Inputs
        number, optional: specify desired ensemble run, integer 
        EDA_average, optional: specify if you want average of low resolution ensemble runs, boolean
        coords [latitude, longitude], optional: specify if you want a specific location, list of floats
        mask, optional: specify area to train model, defaults to Upper Indus Basin

    Outputs
        x_tr: evaluation feature vector, numpy array
        y_tr: evaluation output vector, numpy array
    """
    if number != None:
        da_ensemble = dd.download_data(location, xarray=True, ensemble=True)
        da = da_ensemble.sel(number=number).drop("number")
    if EDA_average == True:
        da_ensemble = dd.download_data(location, xarray=True, ensemble=True)
        da = da_ensemble.mean(dim="number")
    else:
        da = dd.download_data(location, xarray=True)

    sliced_da = da.sel(time=slice(minyear, maxyear))

    if isinstance(location, str) == True:
        mask_filepath = find_mask(location)
        masked_da = dd.apply_mask(sliced_da, mask_filepath)
        multiindex_df = masked_da.to_dataframe()
        multiindex_df = da.to_dataframe()
        df = multiindex_df.dropna().reset_index()

    else:
        da_location = sliced_da.interp(coords={
            "lat": location[0],
            "lon": location[1]
        },
                                       method="nearest")
        multiindex_df = da_location.to_dataframe()
        df = multiindex_df.dropna().reset_index()

    df["time"] = df["time"] - 1970  # to years
    df["tp"] = log_transform(df["tp"])
    df = df[[
        "time", "lat", "lon", "slor", "anor", "z", "d2m", "tcwv", "N34", "tp"
    ]]  #format order

    xtr = df.drop(columns=["tp"]).values
    ytr = df["tp"].values

    return xtr, ytr
def eof_correlation(eof_filepath, mask_filepath):
    """ Returns plot and DataArray of areas with p<0.05 """

    print("processing precipitation")
    da = dd.download_data(mask_filepath, xarray=True)
    tp_ds = da.mean(dim=["latitude", "longitude"]).tp
    tp = tp_ds.assign_coords(time=(tp_ds.time.astype("datetime64")))
    tp_df = tp.to_dataframe()

    print("processing EOF")
    eof_da = xr.open_dataset(eof_filepath)
    eof_ds = eof_da.EOF
    eof = eof_ds.assign_coords(time=(eof_ds.time.astype("datetime64")))
    eof_df = eof.to_dataframe()
    eof_pv = pd.pivot_table(eof_df,
                            values="EOF",
                            index=["time"],
                            columns=["latitude", "longitude"])
    eof_reset = eof_pv.reset_index()
    eof_reset["time"] -= np.timedelta64(12, "h")

    print("combining")
    df_combined = pd.merge_ordered(tp_df, eof_reset, on="time")
    df_clean = df_combined.dropna()

    corr_s = df_clean.corrwith(df_clean["tp"])
    corr_df = corr_s.to_frame(name="corr")
    corr_df["pvalue"] = pvalue(df_clean)

    filepath = "Data/EOF_corr_pval.csv"
    corr_df.to_csv(filepath)

    return filepath
Ejemplo n.º 4
0
def StartSystem():
    """
    启动各子系统
    """
    HoldManager.Start()
    DataDownloader.Start()
    BalanceManager.Start()
    OrderManager.Start()
Ejemplo n.º 5
0
def change_maps(data_filepath, mask_filepath, variable):
    """ Maps of average annual change from 1979 to 1989, 1999, 2009 and 2019 """

    da = dd.apply_mask(data_filepath, mask_filepath)
    da_var = da[variable] * 1000  # to mm/day

    da_1979 = da_var.sel(time=slice("1979-01-16T12:00:00",
                                    str(+1) + "1980-01-01T12:00:00"))
    da_processed = cumulative_monthly(da_1979)
    basin_1979_sum = da_processed.sum(dim="time")

    basin_1989 = da_var.sel(
        time=slice("1989-01-01T12:00:00", "1990-01-01T12:00:00"))
    basin_1989_sum = cumulative_monthly(basin_1989).sum(dim="time")
    basin_1989_change = basin_1989_sum / basin_1979_sum - 1

    basin_1999 = da_var.sel(
        time=slice("1999-01-01T12:00:00", "2000-01-01T12:00:00"))
    basin_1999_sum = cumulative_monthly(basin_1999).sum(dim="time")
    basin_1999_change = basin_1999_sum / basin_1979_sum - 1

    basin_2009 = da_var.sel(
        time=slice("2009-01-01T12:00:00", "2010-01-01T12:00:00"))
    basin_2009_sum = cumulative_monthly(basin_2009).sum(dim="time")
    basin_2009_change = basin_2009_sum / basin_1979_sum - 1

    basin_2019 = da_var.sel(
        time=slice("2019-01-01T12:00:00", "2020-01-01T12:00:00"))
    basin_2019_sum = cumulative_monthly(basin_2019).sum(dim="time")
    basin_2019_change = basin_2019_sum / basin_1979_sum - 1

    basin_changes = xr.concat(
        [
            basin_1989_change, basin_1999_change, basin_2009_change,
            basin_2019_change
        ],
        pd.Index(["1989", "1999", "2009", "2019"], name="year"),
    )

    g = basin_changes.plot(
        x="longitude",
        y="latitude",
        col="year",
        col_wrap=2,
        subplot_kws={"projection": ccrs.PlateCarree()},
        cbar_kwargs={
            "label": "Precipitation change",
            "format": tck.PercentFormatter(xmax=1.0),
        },
    )

    for ax in g.axes.flat:
        ax.coastlines()
        ax.gridlines()
        ax.set_extent([71, 83, 30, 38])
        ax.add_feature(cf.BORDERS)

    plt.show()
Ejemplo n.º 6
0
def single_location_comparison(model_filepath, lat, lon):
    """ Plots model outputs for given coordinates over time """

    era5_ds = dd.collect_ERA5()
    cmip_ds = dd.collect_CMIP5()
    cordex_ds = dd.collect_CORDEX()
    cru_ds = dd.collect_CRU()
    #aphro_ds = dd.collect_APHRO()

    era5_ts = select_coords(era5_ds, lat, lon)
    cmip_ts = select_coords(cmip_ds, lat, lon)
    cordex_ts = select_coords(cordex_ds, lat, lon)
    cru_ts = select_coords(cru_ds, lat, lon)
    #aphro_ts = select_coords(aphro_ds, lat, lon)

    timeseries = [era5_ts, cmip_ts, cordex_ts, cru_ts]  #, aphro_ts]

    xtr, y_gpr_t, y_std_t = model_prep([lat, lon], model_filepath)

    tims.benchmarking_plot(timeseries, xtr, y_gpr_t, y_std_t)
    dataset_stats(timeseries, xtr, y_gpr_t, y_std_t)
    corr.dataset_correlation(timeseries, y_gpr_t)
    pdf.benchmarking_plot(timeseries, y_gpr_t)
Ejemplo n.º 7
0
def basin_comparison(model_filepath, location):
    """ Plots model outputs for given coordinates over time """

    era5_ds = dd.collect_ERA5()
    cmip_ds = dd.collect_CMIP5()
    cordex_ds = dd.collect_CORDEX()
    cru_ds = dd.collect_CRU()
    #aphro_ds = dd.collect_APHRO()

    era5_bs = select_basin(era5_ds, location)
    cmip_bs = select_basin(cmip_ds, location)
    cordex_bs = select_basin(cordex_ds, location)
    cru_bs = select_basin(cru_ds, location)
    #aphro_bs = select_basin(aphro_ds, location)

    basins = [era5_bs, cmip_bs, cordex_bs, cru_bs]  #, aphro_ts]

    xtr, y_gpr_t, y_std_t = model_prep(location, model_filepath)

    tims.benchmarking_plot(basins, xtr, y_gpr_t, y_std_t)
    dataset_stats(basins, xtr, y_gpr_t, y_std_t)
    corr.dataset_correlation(basins, y_gpr_t)
    pdf.benchmarking_plot(basins, y_gpr_t)
def merge_csv(data_dir, out_dir, plot=False, ver=False):
    if not os.path.exists(out_dir):
        os.makedirs(out_dir)
    os.chdir(out_dir)

    files = sorted(os.listdir(data_dir))
    syear,eyear = int(files[0].split('_')[3]), int(files[-1].split('_')[3])
    smonth,emonth = int(files[0].split('_')[4][0:-4]), int(files[-1].split('_')[4][0:-4])
    name = files[0].split('_')[0:3]

    if os.path.exists(out_dir+'/'+name[0]+'_'+name[1]+'_'+name[2]+'_merged.csv'):
        return name,'no errors'

    # Merge csv files into one pandas dataframe
    def read_append(data_dir,names,name,year,month,ver=False):
        path =data_dir+'/'+name[0]+'_'+name[1]+'_'+name[2]+'_%s_%s.csv' % (year,month)
        frame = pd.read_csv(path, header=14, parse_dates=['Date/Time'], index_col=['Date/Time'])
        names = names.append(frame)
        if ver==True: print path
        return names

    years = range(syear, eyear+1)
    months,smonths,emonths = range(1,12+1), range(smonth,12+1), range(1,emonth+1)
    names = pd.DataFrame()
    for year in years:
        if year==eyear:
            for month in emonths:
                try:
                    names = read_append(data_dir,names,name,year,month)
                except ValueError, e:
                    print e
                    return name,e
                except IOError, e:
                    print e
                    dd.downloader(data_dir,name[0],name[1],name[2],1,month,year,verbose='on')
                    names = read_append(data_dir,names,name,year,month)
                    return name, e
def spatial_autocorr(variable, mask_filepath):  # TODO
    """ Plots spatial autocorrelation """

    df = dd.download_data(mask_filepath)
    # detrend
    table = pd.pivot_table(df,
                           values="tp",
                           index=["latitude", "longitude"],
                           columns=["time"])
    trans_table = table.T
    detrended_table = detrend(trans_table, axis=0)
    corr_table = detrended_table.corr()
    print(corr_table)

    corr_khyber = corr_table.loc[(34.5, 73.0)]
    corr_gilgit = corr_table.loc[(36.0, 75.0)]
    corr_ngari = corr_table.loc[(33.5, 79.0)]

    corr_list = [corr_khyber, corr_gilgit, corr_ngari]

    for corr in corr_list:

        df_reset = corr.reset_index().droplevel(1, axis=1)
        df_pv = df_reset.pivot(index="latitude", columns="longitude")
        df_pv = df_pv.droplevel(0, axis=1)
        da = xr.DataArray(data=df_pv, name="Correlation")

        # Plot

        plt.figure()
        ax = plt.subplot(projection=ccrs.PlateCarree())
        ax.set_extent([71, 83, 30, 38])
        g = da.plot(
            x="longitude",
            y="latitude",
            add_colorbar=True,
            ax=ax,
            vmin=-1,
            vmax=1,
            cmap="coolwarm",
            cbar_kwargs={"pad": 0.10},
        )
        ax.gridlines(draw_labels=True)
        ax.set_xlabel("Longitude")
        ax.set_ylabel("Latitude")

    plt.show()
def averaged_timeseries(mask_filepath,
                        variable="tp",
                        longname="Total precipitation [m/day]"):
    """ Timeseries for the Upper Indus Basin"""

    df = dd.download_data(mask_filepath)

    df_var = df[["time", variable]]

    df_var["time"] = df_var["time"].astype(np.datetime64)
    df_mean = df_var.groupby("time").mean()

    df_mean.plot()
    plt.title("Upper Indus Basin")
    plt.ylabel(longname)
    plt.xlabel("Year")
    plt.grid(True)

    plt.show()
def uib_sample_linreg():
    """ Plots sample timeseries for UIB clusters """

    # Open data
    mask_filepath = "Data/Masks/ERA5_Upper_Indus_mask.nc"
    tp = dd.download_data(mask_filepath, xarray=True)
    tp_da = tp.tp * 1000  # convert from m/day to mm/day

    ## Data
    gilgit = tp_da.interp(coords={"lon": 75, "lat": 36}, method="nearest")
    ngari = tp_da.interp(coords={"lon": 81, "lat": 32}, method="nearest")
    khyber = tp_da.interp(coords={"lon": 73, "lat": 34.5}, method="nearest")
    timeseries = [gilgit, ngari, khyber]

    gilgit_linear_model = lin_reg(gilgit)
    ngari_linear_model = lin_reg(ngari)
    khyber_linear_model = lin_reg(khyber)
    linear_models = [gilgit_linear_model, ngari_linear_model, khyber_linear_model]

    linreg_plot(timeseries, linear_models)
def input_correlation_heatmap():

    df = dd.download_data(mask_filepath, all_var=True)

    # create lags
    df["N34-1"] = df["N34"].shift(periods=393)
    df["NAO-1"] = df["NAO"].shift(periods=393)
    df["N4-1"] = df["N4"].shift(periods=393)

    df = df.drop(columns=["time"])
    df_clean = df.dropna()
    df_sorted = df_clean.sort_index(axis=1)
    corr = df_sorted.corr()

    sns.set(style="white")
    plt.subplots(figsize=(11, 9))
    cmap = sns.diverging_palette(220, 10, as_cmap=True)
    mask = np.triu(np.ones_like(
        corr, dtype=np.bool))  # generate a mask for the upper triangle
    sns.heatmap(
        corr,
        mask=mask,
        cmap=cmap,
        center=0,
        vmin=-1,
        vmax=1,
        fmt="0.2f",
        square=True,
        linewidths=0.5,
        annot=True,
        annot_kws={"size": 5},
        cbar_kws={"shrink": 0.5},
    )
    plt.title("Correlation plot for Upper Indus Basin")

    plt.show()
Ejemplo n.º 13
0
def annual_map(data_filepath, mask_filepath, variable, year, cumulative=False):
    """ Annual map """

    da = dd.apply_mask(data_filepath, mask_filepath)
    ds_year = da.sel(time=slice(
        str(year) + "-01-16T12:00:00",
        str(year + 1) + "-01-01T12:00:00"))
    ds_var = ds_year[variable] * 1000  # to mm/day

    if cumulative is True:
        ds_processed = cumulative_monthly(ds_var)
        ds_final = ds_processed.sum(dim="time")
    else:
        ds_final = ds_var.std(dim="time")  # TODO weighted mean

    print(ds_final)

    plt.figure()
    ax = plt.subplot(projection=ccrs.PlateCarree())
    ax.set_extent([71, 83, 30, 38])
    g = ds_final["tp_0001"].plot(
        cmap="magma_r",
        vmin=0.001,
        cbar_kwargs={
            "label": "Precipitation standard deviation [mm/day]",
            "extend": "neither",
            "pad": 0.10
        })
    g.cmap.set_under("white")
    #ax.add_feature(cf.BORDERS)
    ax.coastlines()
    ax.gridlines(draw_labels=True)
    ax.set_xlabel("Longitude")
    ax.set_ylabel("Latitude")
    plt.title("Upper Indus Basin Total Precipitation " + str(year) + "\n \n")
    plt.show()
Ejemplo n.º 14
0
def StopSystem():
    """
    停止各子系统
    """
    OrderManager.Stop()
    DataDownloader.Stop()
Ejemplo n.º 15
0
#print DataDownloader.downloader(wd='/home/nbrown/Desktop',stationID='1706',interval='hourly',day='14',month='7',year='2001',verbose='off')


# Test multipleDownloads
l = [['51157', 'hourly', '1', '1', '2015'],
     ['51157', 'hourly', '1', '2', '2015'],
     ['51157', 'hourly', '1', '3', '2015'],
     ['51157', 'hourly', '1', '4', '2015'],
     ['51157', 'hourly', '1', '5', '2015'],
     ['51157', 'hourly', '1', '6', '2015'],
     ['51157', 'hourly', '1', '7', '2015'],
     ['51157', 'hourly', '1', '8', '2015'],
     ['51157', 'hourly', '1', '9', '2015'],
     ['51157', 'hourly', '1', '10', '2015'],
     ['51157', 'hourly', '1', '11', '2015'],
     ['51157', 'hourly', '1', '12', '2015']]

#DataDownloader.multipleDownloads('/home/nbrown/Desktop',l)



# Test findStations
wd='/home/nbrown/Desktop'
a = DataDownloader.findStations(DataDownloader.genStationsDict(wd),name='montreal',interval='hourly',tp=['1950','2014'],verbose='on')
b = DataDownloader.genDownloadList(a,verbose='on')


# Test: Check that downloader gives the right errors on wrong inputs or handles input conversion properly


Ejemplo n.º 16
0
def select_basin(dataset, location):
    """ Interpolate dataset at given coordinates """
    mask_filepath = dp.find_mask(location)
    basin = dd.apply_mask(dataset, mask_filepath)
    basin = basin.sel(time=slice(1990, 2005))
    return basin
def get_coord(stationName,stationInvFileDir):
    stationInv = dd.genStationsDict(stationInvFileDir,downloadNew=False,ver=False)
    lat,lon,elev = stationInv[stationName][5],stationInv[stationName][6],stationInv[stationName][9]
    return lat,lon,elev
Ejemplo n.º 18
0
import DataDownloader as dd
import Maps as maps

## Filepaths
mask_filepath = "Data/ERA5_Upper_Indus_mask.nc"
dem_filepath = "Data/elev.0.25-deg.nc"

## Function inputs

### Digital Elevation Model data
dem = xr.open_dataset(dem_filepath)
dem_da = (dem.data).sum(dim="time")
sliced_dem = dem_da.sel(lat=slice(38, 30), lon=slice(71.25, 82.75))

### Precipitation data
da = dd.download_data(mask_filepath, xarray=True)
tp_da = da.tp

### Decade list
decades = [1980, 1990, 2000, 2010]

### Cluster list
N = np.arange(2, 11, 1)


def seasonal_clusters(tp_da, sliced_dem, N, decades):
    """
    K-means clustering of precipitation data as a function of seasons, decades and number of clusters.
    Returns spatial graphs, overlayed with the local topography contours.

    Inputs:
Ejemplo n.º 19
0
    'DataDownloader',  # runs on multiple cpu
    'DataExtractor',  # runs on multiple cpu
    'RoleUpdater',
    'DataProcessing',  # runs on multiple cpu
    'DataShuffling',  # runs on multiple cpu
    'Learner',  # runs on gpu
    'BestPicks',
]

if __name__ == '__main__':
    if 'PlayersListing' in to_execute:
        import PlayersListing
        PlayersListing.run(m)
    if 'DataDownloader' in to_execute:
        import DataDownloader
        DataDownloader.run(m)
    if 'DataExtractor' in to_execute:
        import DataExtractor
        DataExtractor.run(m, cpu)
    if 'RoleUpdater' in to_execute:
        import RoleUpdater
        RoleUpdater.run(m)
    if 'DataProcessing' in to_execute:
        import DataProcessing
        DataProcessing.run(m, cpu)
    if 'DataShuffling' in to_execute:
        import DataShuffling
        DataShuffling.run(m, shuffling_files, keep_for_testing, cpu)
    if 'Learner' in to_execute:
        import Learner
        Learner.run(m, n, restore)
    stationInv = dd.genStationsDict(stationInvFileDir,downloadNew=False,ver=False)
    lat,lon,elev = stationInv[stationName][5],stationInv[stationName][6],stationInv[stationName][9]
    return lat,lon,elev


files = sorted(os.listdir('/home/nbrown/Desktop/plots'))
files[:] = [ x for x in files if '_merged.csv' in x ]
lats,lons,elevs = np.empty(0),np.empty(0),np.empty(0)
count=1.
for f in files:
    f = f.split('_')[0]
    try:
        lat,lon,elev = get_coord(f,'/home/nbrown/Desktop/test')
        lats = np.append(lats,lat)
        lons = np.append(lons,lon)
        elevs = np.append(elevs,elev)
    except KeyError,e:
        print e
        pass
    dd.update_progress(count/len(files))
    count=count+1.

fig1 = plt.figure(figsize=(6,9.5))

ax = plt.subplot(111,projection=ccrs.Mollweide(central_longitude=-95))
SC = ax.scatter(lons,lats,marker='o',transform=ccrs.PlateCarree())
#cbar = plt.colorbar(CS, cmap='coolwarm', orientation='horizontal')

plt.gca().coastlines(resolution='50m')
plt.grid()
fig1.show()
def cluster_correlation_heatmap():

    masks = ["Khyber_mask.nc", "Gilgit_mask.nc", "Ngari_mask.nc"]
    names = ["Gilgit regime", "Ngari regime", "Khyber regime"]

    for i in range(3):
        cluster_df = dd.download_data(masks[i])

        # create lags
        cluster_df["CGTI-1"] = cluster_df["CGTI"].shift(periods=1)
        cluster_df["CGTI-2"] = cluster_df["CGTI"].shift(periods=2)
        cluster_df["CGTI-3"] = cluster_df["CGTI"].shift(periods=3)
        cluster_df["CGTI-4"] = cluster_df["CGTI"].shift(periods=4)
        cluster_df["CGTI-5"] = cluster_df["CGTI"].shift(periods=5)
        cluster_df["CGTI-6"] = cluster_df["CGTI"].shift(periods=6)
        """'
        df_combined['N34-1'] = df_combined['N34'].shift(periods=1)
        df_combined['N34-2'] = df_combined['N34'].shift(periods=2)
        df_combined['N34-3'] = df_combined['N34'].shift(periods=3)
        df_combined['N34-4'] = df_combined['N34'].shift(periods=4)
        df_combined['N34-5'] = df_combined['N34'].shift(periods=5)
        df_combined['N34-6'] = df_combined['N34'].shift(periods=6)

        df_combined['N4-1'] = df_combined['N4'].shift(periods=1)
        df_combined['N4-2'] = df_combined['N4'].shift(periods=2)
        df_combined['N4-3'] = df_combined['N4'].shift(periods=3)
        df_combined['N4-4'] = df_combined['N4'].shift(periods=4)
        df_combined['N4-5'] = df_combined['N4'].shift(periods=5)
        df_combined['N4-6'] = df_combined['N4'].shift(periods=6)
        """
        df = cluster_df(columns=["expver", "time"])
        df_clean = df.dropna()
        df_sorted = df_clean.sort_index(axis=1)

        # Correlation matrix
        corr = df_sorted.corr()

        # Plot
        sns.set(style="white")

        plt.subplots(figsize=(11, 9))
        cmap = sns.diverging_palette(220, 10, as_cmap=True)

        # Draw the heatmap with the mask and correct aspect ratio
        mask = np.triu(np.ones_like(
            corr, dtype=np.bool))  # generate a mask for the upper triangle
        sns.heatmap(
            corr,
            mask=mask,
            cmap=cmap,
            center=0,
            vmin=-1,
            vmax=1,
            square=True,
            linewidths=0.5,
            cbar_kws={"shrink": 0.5},
        )

        plt.title(names[i] + "\n")

    plt.show()
                 return name,e
             except IOError, e:
                 print e
                 dd.downloader(data_dir,name[0],name[1],name[2],1,month,year,verbose='on')
                 names = read_append(data_dir,names,name,year,month)
                 return name, e
     elif year==syear:
         for month in smonths:
             try:
                 names = read_append(data_dir,names,name,year,month)
             except ValueError, e:
                 print e
                 return name,e
             except IOError, e:
                 print e
                 dd.downloader(data_dir,name[0],name[1],name[2],1,month,year,verbose='on')
                 names = read_append(data_dir,names,name,year,month)
                 return name, e
     else:
         for month in months:
             try:
                 names = read_append(data_dir,names,name,year,month)
             except ValueError, e:
                 print e
                 return name,e
             except IOError, e:
                 print e
                 dd.downloader(data_dir,name[0],name[1],name[2],1,month,year,verbose='on')
                 names = read_append(data_dir,names,name,year,month)
                 return name, e
 names.to_csv(out_dir+'/'+name[0]+'_'+name[1]+'_'+name[2]+'_merged.csv')
def areal_model(location,
                number=None,
                EDA_average=False,
                length=3000,
                seed=42):
    """
    Outputs test, validation and training data for total precipitation as a function of time, 2m dewpoint temperature,
    angle of sub-gridscale orography, orography, slope of sub-gridscale orography, total column water vapour,
    Nino 3.4 index for given number randomly sampled data points for a given basin.

    Inputs
        location: specify area to train model
        number, optional: specify desired ensemble run, integer 
        EDA_average, optional: specify if you want average of low resolution ensemble runs, boolean
        length, optional: specify number of points to sample, integer
        seed, optional: specify seed, integer

    Outputs
        x_train: training feature vector, numpy array
        y_train: training output vector, numpy array
        x_test: testing feature vector, numpy array
        y_test: testing output vector, numpy array
    """

    if number != None:
        da_ensemble = dd.download_data(location, xarray=True, ensemble=True)
        da = da_ensemble.sel(number=number).drop("number")

    if EDA_average == True:
        da_ensemble = dd.download_data(location, xarray=True, ensemble=True)
        da = da_ensemble.mean(dim="number")
    else:
        da = dd.download_data(location, xarray=True)

    # apply mask
    mask_filepath = find_mask(location)
    masked_da = dd.apply_mask(da, mask_filepath)

    multiindex_df = masked_da.to_dataframe()
    df_clean = multiindex_df.dropna().reset_index()
    df = sa.random_location_and_time_sampler(df_clean,
                                             length=length,
                                             seed=seed)

    df["time"] = df["time"] - 1970
    df["tp"] = log_transform(df["tp"])
    df = df[[
        "time", "lat", "lon", "slor", "anor", "z", "d2m", "tcwv", "N34", "tp"
    ]]

    # Remove last 10% of time for testing
    test_df = df[df["time"] > df["time"].max() * 0.9]
    xtest = test_df.drop(columns=["tp"]).values
    ytest = test_df["tp"].values

    # Training and validation data
    tr_df = df[df["time"] < df["time"].max() * 0.9]
    xtr = tr_df.drop(columns=["tp"]).values
    ytr = tr_df["tp"].values

    xtrain, xval, ytrain, yval = train_test_split(
        xtr, ytr, test_size=0.30,
        shuffle=False)  # Training and validation data
    """
    # Keep first of 70% for training
    train_df = df[ df['time']< df['time'].max()*0.7]
    xtrain = train_df.drop(columns=['tp']).values
    ytrain = train_df['tp'].values

    # Last 30% for evaluation
    eval_df = df[ df['time']> df['time'].max()*0.7]
    x_eval = eval_df.drop(columns=['tp']).values
    y_eval = eval_df['tp'].values

    # Training and validation data
    xval, xtest, yval, ytest = train_test_split(x_eval, y_eval, test_size=0.3333, shuffle=True)
    """
    return xtrain, xval, xtest, ytrain, yval, ytest
def point_model(location, number=None, EDA_average=False):
    """
    Outputs test, validation and training data for total precipitation as a function of time, 2m dewpoint temperature,
    angle of sub-gridscale orography, orography, slope of sub-gridscale orography, total column water vapour,
    Nino 3.4, Nino 4 and NAO index for a single point.

    Inputs
        number, optional: specify desired ensemble run, integer
        EDA_average, optional: specify if you want average of low resolution ensemble runs, boolean
        coords [latitude, longitude], optional: specify if you want a specific location, list of floats
        mask_filepath, optional:

    Outputs
        x_train: training feature vector, numpy array
        y_train: training output vector, numpy array
        x_test: testing feature vector, numpy array
        y_test: testing output vector, numpy array
    """
    if number != None:
        da_ensemble = dd.download_data(location, xarray=True, ensemble=True)
        da = da_ensemble.sel(number=number).drop("number")
    if EDA_average == True:
        da_ensemble = dd.download_data(location, xarray=True, ensemble=True)
        da = da_ensemble.mean(dim="number")
    else:
        da = dd.download_data(location, xarray=True)

    if location is str:
        multiindex_df = da.to_dataframe()
        df_clean = multiindex_df.dropna().reset_index()
        df_location = sa.random_location_sampler(df_clean)
        df = df_location.drop(columns=["lat", "lon", "slor", "anor", "z"])

    else:
        da_location = da.interp(coords={
            "lat": location[0],
            "lon": location[1]
        },
                                method="nearest")
        multiindex_df = da_location.to_dataframe()
        df_clean = multiindex_df.dropna().reset_index()
        df = df_clean.drop(columns=["lat", "lon", "slor", "anor", "z"])

    df["time"] = df["time"] - 1970  # to years
    df["tp"] = log_transform(df["tp"])
    df = df[["time", "d2m", "tcwv", "N34", "tp"]]  #format order

    # Keep first of 70% for training
    train_df = df[df["time"] < df["time"].max() * 0.7]
    xtrain = train_df.drop(columns=["tp"]).values
    ytrain = train_df["tp"].values

    # Last 30% for evaluation
    eval_df = df[df["time"] > df["time"].max() * 0.7]
    x_eval = eval_df.drop(columns=["tp"]).values
    y_eval = eval_df["tp"].values

    # Training and validation data
    xval, xtest, yval, ytest = train_test_split(x_eval,
                                                y_eval,
                                                test_size=0.3333,
                                                shuffle=False)

    return xtrain, xval, xtest, ytrain, yval, ytest
Ejemplo n.º 25
0

declineProbe = None
riseProbe = None

if __name__ == '__main__':
    InitSystem()
    StartSystem()
    logStr = "All System Started!"
    Log.Print(logStr)
    Log.Info(Const.logFile, logStr)
    while (True):
        if Terminated():
            break

        if DataDownloader.DataValid() and len(DataDownloader.realTimeBids) > 0:
            currBidPrice = DataDownloader.realTimeBids[-1]
            TryToSell(currBidPrice)

            currAskPrice = DataDownloader.realTimeAsks[-1]
            if declineProbe == None and riseProbe == None:
                declineProbe = SetProbe(currAskPrice, 0)
                riseProbe = SetProbe(currAskPrice, 1)
            else:
                if len(DataDownloader.realTimeAsks) < 60:
                    time.sleep(0.5)
                    continue

                meanPrice = np.mean(DataDownloader.realTimeAsks[-10:])
                if declineProbe.Triggered(meanPrice):
                    declineProbe = SetProbe(currAskPrice, 0, declineProbe)