def get_station_objects(db_path="/home/san/Downloads/Hydat.sqlite"):
    selected_ids = ["05BB001", "05BH005", "05BH004", "05BM004"]

    stations = cehq_station.load_from_hydat_db(natural=None, province=PROVINCE, path=db_path, selected_ids=selected_ids)
    for s in stations:
        print(s)
    # add an additional station here, if required
    return stations
def main():

    """

    :return:
    """


    s = """
    06DA002
    06CD002
    06EA002
    05FE004
    05EF001
    05BN012
    05CK004
    06AG006
    05AK001
    05QB003
    05LM006
    05KJ001
    05MD004
    05JU001
    """


    selected_ids = [tok.strip().upper() for tok in s.split("\n") if tok != ""]


    print(selected_ids)

    stations = cehq_station.load_from_hydat_db(
        province=None,
        selected_ids=selected_ids,
        skip_data_checks=True,
        natural=None
    )


    print(20 * "---")
    for station in stations:
        assert isinstance(station, Station)
        print("{}\t{:.4f}\t{:.4f}\t{}".format(station.id, station.longitude, station.latitude, station.drainage_km2))

        print(20 * "---")
def main():
    stations = cehq_station.load_from_hydat_db(natural=True, province="QC")

    dm = Crcm5ModelDataManager(
        samples_folder_path="/skynet3_rech1/huziy/from_guillimin/new_outputs/quebec_0.1_crcm5-r_spinup",
        all_files_in_samples_folder=True)

    basemap = dm.get_rotpole_basemap()

    lons = [s.longitude for s in stations]
    lats = [s.latitude for s in stations]

    n_cont_years = [len(s.get_list_of_complete_years()) for s in stations]

    x, y = basemap(lons, lats)
    basemap.scatter(x, y, c=n_cont_years)
    basemap.drawcoastlines()
    basemap.colorbar()
    plt.show()
def main():
    stations = cehq_station.load_from_hydat_db(natural=True, province="QC")

    dm = Crcm5ModelDataManager(
        samples_folder_path=
        "/skynet3_rech1/huziy/from_guillimin/new_outputs/quebec_0.1_crcm5-r_spinup",
        all_files_in_samples_folder=True)

    basemap = dm.get_rotpole_basemap()

    lons = [s.longitude for s in stations]
    lats = [s.latitude for s in stations]

    n_cont_years = [len(s.get_list_of_complete_years()) for s in stations]

    x, y = basemap(lons, lats)
    basemap.scatter(x, y, c=n_cont_years)
    basemap.drawcoastlines()
    basemap.colorbar()
    plt.show()
Beispiel #5
0
def main():
    """

    :return:
    """

    s = """
    06DA002
    06CD002
    06EA002
    05FE004
    05EF001
    05BN012
    05CK004
    06AG006
    05AK001
    05QB003
    05LM006
    05KJ001
    05MD004
    05JU001
    """

    selected_ids = [tok.strip().upper() for tok in s.split("\n") if tok != ""]

    print(selected_ids)

    stations = cehq_station.load_from_hydat_db(province=None,
                                               selected_ids=selected_ids,
                                               skip_data_checks=True,
                                               natural=None)

    print(20 * "---")
    for station in stations:
        assert isinstance(station, Station)
        print("{}\t{:.4f}\t{:.4f}\t{}".format(station.id, station.longitude,
                                              station.latitude,
                                              station.drainage_km2))

        print(20 * "---")
def main():
    # stations = cehq_station.read_grdc_stations(st_id_list=["2903430", "2909150", "2912600", "4208025"])

    selected_ids = [
        "08MH001", "08NE074", "08NG065", "08NJ013", "08NK002", "08NK016",
        "08NL004", "08NL007", "08NL024", "08NL038", "08NN002"
    ]
    stations = cehq_station.load_from_hydat_db(natural=True,
                                               province="BC",
                                               selected_ids=selected_ids)

    stations_to_mp = None

    import matplotlib.pyplot as plt

    # labels = ["CanESM", "MPI"]
    # paths = ["/skynet3_rech1/huziy/offline_stfl/canesm/discharge_1958_01_01_00_00.nc",
    # "/skynet3_rech1/huziy/offline_stfl/mpi/discharge_1958_01_01_00_00.nc"]
    #
    # colors = ["r", "b"]

    # labels = ["ERA", ]
    # colors = ["r", ]
    # paths = ["/skynet3_rech1/huziy/arctic_routing/era40/discharge_1958_01_01_00_00.nc"]

    labels = ["Glacier-only", "All"]
    colors = ["r", "b"]
    paths = [
        "/skynet3_exec2/aganji/glacier_katja/watroute_gemera/discharge_stat_glac_00_99_2000_01_01_00_00.nc",
        "/skynet3_exec2/aganji/glacier_katja/watroute_gemera/discharge_stat_both_00_992000_01_01_00_00.nc"
    ]

    start_year_current = 2000
    end_year_current = 2013

    plot_future = False
    start_year_future = 2071  # ignored when plot future is false
    end_year_future = 2100

    if not plot_future:
        start_year = start_year_current
        end_year = end_year_current
    else:
        start_year = start_year_future
        end_year = end_year_future

    stations_filtered = []
    for s in stations:
        # Also filter out stations with small accumulation areas
        if s.drainage_km2 < 1000:
            continue

        if s.latitude > 49.4:
            continue

        # Filter stations with data out of the required time frame
        year_list = s.get_list_of_complete_years()
        if max(year_list) < start_year or min(year_list) > end_year:
            continue

        stations_filtered.append(s)

    stations = stations_filtered

    min_lon = min(s.longitude for s in stations)
    stations = [s for s in stations if s.longitude == min_lon]

    print("Retained {} stations.".format(len(stations)))

    sim_to_time = {}

    monthly_dates = [datetime(2001, m, 15) for m in range(1, 13)]
    fmt = FuncFormatter(lambda x, pos: num2date(x).strftime("%b")[0])
    locator = MonthLocator()

    fig = plt.figure()

    axes = []
    row_indices = []
    col_indices = []

    ncols = 1
    shiftrow = 0 if len(stations) % ncols == 0 else 1
    nrows = len(stations) // ncols + shiftrow
    shared_ax = None
    gs = gridspec.GridSpec(ncols=ncols, nrows=nrows)

    for i, s in enumerate(stations):
        row = i // ncols
        col = i % ncols

        row_indices.append(row)
        col_indices.append(col)

        if shared_ax is None:
            ax = fig.add_subplot(gs[row, col])
            shared_ax = ax
            assert isinstance(shared_ax, Axes)

        else:
            ax = fig.add_subplot(gs[row, col])

        ax.xaxis.set_major_locator(locator)
        ax.yaxis.set_major_locator(MaxNLocator(nbins=4))

        ax.xaxis.set_major_formatter(fmt)
        sfmt = ScalarFormatter(useMathText=True)
        sfmt.set_powerlimits((-3, 4))
        ax.yaxis.set_major_formatter(sfmt)
        assert isinstance(ax, Axes)

        axes.append(ax)

    # generate daily stamp dates
    d0 = datetime(2001, 1, 1)
    stamp_dates = [d0 + timedelta(days=i) for i in range(365)]

    # plot a panel for each station
    for s, ax, row, col in zip(stations, axes, row_indices, col_indices):

        assert isinstance(s, Station)
        assert isinstance(ax, Axes)
        if s.grdc_monthly_clim_max is not None:
            ax.fill_between(monthly_dates,
                            s.grdc_monthly_clim_min,
                            s.grdc_monthly_clim_max,
                            color="0.6",
                            alpha=0.5)

        avail_years = s.get_list_of_complete_years()
        print("{}: {}".format(s.id, ",".join([str(y) for y in avail_years])))
        years = [y for y in avail_years if start_year <= y <= end_year]
        _, obs_clim_stfl = s.get_daily_climatology_for_complete_years_with_pandas(
            stamp_dates=stamp_dates, years=years)

        if obs_clim_stfl is None:
            continue

        ax.plot(stamp_dates, obs_clim_stfl, "k", lw=3, label="Obs")

        if s.river_name is not None and s.river_name != "":
            ax.set_title(s.river_name)
        else:
            ax.set_title(s.id)

        for path, sim_label, color in zip(paths, labels, colors):
            ds = Dataset(path)

            if stations_to_mp is None:
                acc_area_2d = ds.variables["accumulation_area"][:]
                lons2d, lats2d = ds.variables["longitude"][:], ds.variables[
                    "latitude"][:]
                x_index, y_index = ds.variables["x_index"][:], ds.variables[
                    "y_index"][:]
                stations_to_mp = get_dataless_model_points_for_stations(
                    stations, acc_area_2d, lons2d, lats2d, x_index, y_index)

            # read dates only once for a given simulation
            if sim_label not in sim_to_time:
                time_str = ds.variables["time"][:].astype(str)
                times = [
                    datetime.strptime("".join(t_s), TIME_FORMAT)
                    for t_s in time_str
                ]
                sim_to_time[sim_label] = times

            mp = stations_to_mp[s]
            data = ds.variables["water_discharge_accumulated"][:,
                                                               mp.cell_index]
            print(path)
            df = DataFrame(data=data,
                           index=sim_to_time[sim_label],
                           columns=["value"])
            df["year"] = df.index.map(lambda d: d.year)
            df = df.ix[df.year.isin(years), :]
            df = df.select(lambda d: not (d.month == 2 and d.day == 29))
            df = df.groupby(lambda d: datetime(stamp_dates[0].year, d.month, d.
                                               day)).mean()

            daily_model_data = [df.ix[d, "value"] for d in stamp_dates]

            # print np.mean( monthly_model ), s.river_name, sim_label
            ax.plot(stamp_dates,
                    daily_model_data,
                    color,
                    lw=3,
                    label=sim_label + "(C)")

            if plot_future:
                ax.plot(stamp_dates,
                        daily_model_data,
                        color + "--",
                        lw=3,
                        label=sim_label + "(F2)")

            ds.close()

        if row < nrows - 1:
            ax.set_xticklabels([])

    axes[0].legend(fontsize=17, loc=2)
    plt.tight_layout()
    plt.savefig("offline_validation.png", dpi=400)
    plt.close(fig)

    r = RPN(
        "/RESCUE/skynet3_rech1/huziy/CNRCWP/C3/Depth_to_bedrock_WestNA_0.25")
    r.get_first_record_for_name("8L")
    proj_params = r.get_proj_parameters_for_the_last_read_rec()
    lons, lats = r.get_longitudes_and_latitudes_for_the_last_read_rec()
    bsmp = RotatedLatLon(**proj_params).get_basemap_object_for_lons_lats(
        lons2d=lons, lats2d=lats)
    plot_utils.apply_plot_params(width_pt=None,
                                 width_cm=19,
                                 height_cm=19,
                                 font_size=12)
    plot_station_positions(manager=None, station_list=stations, bsmp=bsmp)
def main():
    model_data_path = Path("/RECH2/huziy/BC-MH/bc_mh_044deg/Diagnostics")
    # model_data_path = Path("/RECH2/huziy/BC-MH/bc_mh_044deg/Samples")

    static_data_file = "/RECH2/huziy/BC-MH/bc_mh_044deg/Samples/bc_mh_044deg_198001/pm1980010100_00000000p"

    r = RPN(static_data_file)

    fldir = r.get_first_record_for_name("FLDR")
    faa = r.get_first_record_for_name("FAA")
    lons, lats = r.get_longitudes_and_latitudes_for_the_last_read_rec()

    gc = default_domains.bc_mh_044

    cell_manager = CellManager(
        fldir, nx=fldir.shape[0], ny=fldir.shape[1], lons2d=lons, lats2d=lats, accumulation_area_km2=faa
    )

    selected_station_ids = ["06EA002"]

    stations = cehq_station.load_from_hydat_db(province="SK", selected_ids=selected_station_ids, natural=None)

    # (06EA002): CHURCHILL RIVER AT SANDY BAY at (-102.31832885742188,55.52333068847656), accum. area is 212000.0 km**2
    # TODO: plot where is this station, compare modelled and observed hydrographs

    # for s in stations:
    #     assert isinstance(s, cehq_station.Station)
    #     s.latitude += 0.9
    #     s.longitude -= 0.2
    #     print(s)

    station_to_model_point = cell_manager.get_model_points_for_stations(
        stations, drainaige_area_reldiff_limit=0.8, nneighbours=1
    )

    print(station_to_model_point[stations[0]])

    station = stations[0]
    assert isinstance(station, cehq_station.Station)

    obs_not_corrected = (
        pd.Series(index=station.dates, data=station.values).groupby(by=lambda d: d.replace(day=15)).mean()
    )
    obs_corrected = pd.read_csv(
        "mh/obs_data/Churchill Historic Monthly Apportionable Flow_06EA002.csv.bak.original", skiprows=2
    )

    print(obs_corrected.head())
    print(obs_corrected.year.iloc[0], obs_corrected.year.iloc[-1])

    date_index = pd.date_range(
        start=datetime(obs_corrected.year.iloc[0] - 1, 12, 15),
        end=datetime(obs_corrected.year.iloc[-1], 12, 15),
        freq="M",
    )

    date_index = date_index.shift(15, freq=pd.datetools.day)

    print(date_index)
    data = np.concatenate([r for r in obs_corrected.values[:, 1:-1]])

    factor = date_index.map(lambda d: 1000 / (calendar.monthrange(d.year, d.month)[1] * 24 * 3600))
    print(factor[:10])
    obs_corrected = pd.Series(index=date_index, data=data * factor)

    station_to_modelled_data = get_model_data(
        station_to_model_point,
        output_path=model_data_path,
        grid_config=gc,
        basins_of_interest_shp=default_domains.MH_BASINS_PATH,
        cell_manager=cell_manager,
        vname="STFL",
    )

    modelled_data = station_to_modelled_data[station]

    fig = plt.figure()
    ax = obs_corrected.plot(label="obs corrected")

    obs_not_corrected.plot(label="obs not corrected", ax=ax, color="k")

    modelled_data.plot(label="CRCM5", ax=ax, color="r")

    ax.legend(loc="upper left")
    img_file = img_folder.joinpath("{}_validation_monthly.png".format(station.id))
    fig.savefig(str(img_file))
    plt.close(fig)

    # climatology
    start_year = 1980
    end_year = 2010

    date_selector = lambda d: (start_year <= d.year <= end_year) and not ((d.month == 2) and (d.day == 29))

    fig = plt.figure()
    ax = obs_corrected.select(date_selector).groupby(lambda d: d.replace(year=2001)).mean().plot(label="obs corrected")

    obs_not_corrected.select(date_selector).groupby(lambda d: d.replace(year=2001)).mean().plot(
        label="obs not corrected", ax=ax, color="k"
    )

    modelled_data.select(date_selector).groupby(lambda d: d.replace(year=2001)).mean().plot(
        label="CRCM5", ax=ax, color="r"
    )

    ax.xaxis.set_major_locator(MonthLocator(bymonthday=15))
    ax.xaxis.set_major_formatter(DateFormatter("%b"))

    ax.legend(loc="upper left")

    img_file = img_folder.joinpath("{}_validation_clim.png".format(station.id))
    fig.savefig(str(img_file))
    plt.close(fig)

    # Interannual variability
    fig = plt.figure()

    obs_corrected = obs_corrected.select(lambda d: start_year <= d.year <= end_year)
    modelled_data = modelled_data.select(lambda d: start_year <= d.year <= end_year)

    corr_list = []
    for m in range(1, 13):
        obs = obs_corrected.select(lambda d: d.month == m)
        mod = modelled_data.select(lambda d: d.month == m)

        print(obs.head())

        obs.index = obs.index.map(lambda d: d.year)
        mod.index = mod.index.map(lambda d: d.year)

        corr_list.append(obs.corr(mod))

    ax = plt.gca()
    ax.plot(range(1, 13), corr_list)
    ax.set_xlabel("Month")
    ax.set_title("Inter-annual variability")

    img_file = img_folder.joinpath("{}_interannual.png".format(station.id))
    fig.tight_layout()
    fig.savefig(str(img_file), bbox_inches="tight")
    plt.close(fig)
Beispiel #8
0
def main():
    # stations = cehq_station.read_grdc_stations(st_id_list=["2903430", "2909150", "2912600", "4208025"])

    selected_station_ids = [
        "05LM006", "05BN012", "05AK001", "05QB003", "06EA002"
    ]

    stations = cehq_station.load_from_hydat_db(
        natural=None,
        province=None,
        selected_ids=selected_station_ids,
        skip_data_checks=True)

    stations_mh = cehq_station.get_manitoba_hydro_stations()

    # copy metadata from the corresponding hydat stations
    for s in stations:
        assert isinstance(s, Station)
        for s_mh in stations_mh:
            assert isinstance(s_mh, Station)

            if s == s_mh:
                s_mh.copy_metadata(s)
                break

    stations = [
        s for s in stations_mh
        if s.id in selected_station_ids and s.longitude is not None
    ]

    stations_to_mp = None

    import matplotlib.pyplot as plt

    # labels = ["CanESM", "MPI"]
    # paths = ["/skynet3_rech1/huziy/offline_stfl/canesm/discharge_1958_01_01_00_00.nc",
    # "/skynet3_rech1/huziy/offline_stfl/mpi/discharge_1958_01_01_00_00.nc"]
    #
    # colors = ["r", "b"]

    # labels = ["ERA", ]
    # colors = ["r", ]
    # paths = ["/skynet3_rech1/huziy/arctic_routing/era40/discharge_1958_01_01_00_00.nc"]

    labels = [
        "Model",
    ]
    colors = [
        "r",
    ]
    paths = [
        "/RESCUE/skynet3_rech1/huziy/water_route_mh_bc_011deg_wc/discharge_1980_01_01_12_00.nc"
    ]

    infocell_path = "/RESCUE/skynet3_rech1/huziy/water_route_mh_bc_011deg_wc/infocell.nc"

    start_year = 1980
    end_year = 2014

    stations_filtered = []
    for s in stations:
        # Also filter out stations with small accumulation areas
        # if s.drainage_km2 is not None and s.drainage_km2 < 100:
        #     continue

        # Filter stations with data out of the required time frame
        year_list = s.get_list_of_complete_years()

        print("Complete years for {}: {}".format(s.id, year_list))

        stations_filtered.append(s)

    stations = stations_filtered

    print("Retained {} stations.".format(len(stations)))

    sim_to_time = {}

    monthly_dates = [datetime(2001, m, 15) for m in range(1, 13)]
    fmt = FuncFormatter(lambda x, pos: num2date(x).strftime("%b")[0])
    locator = MonthLocator(bymonthday=15)

    fig = plt.figure()

    axes = []
    row_indices = []
    col_indices = []

    ncols = 1
    shiftrow = 0 if len(stations) % ncols == 0 else 1
    nrows = len(stations) // ncols + shiftrow
    shared_ax = None
    gs = gridspec.GridSpec(ncols=ncols, nrows=nrows)

    for i, s in enumerate(stations):
        row = i // ncols
        col = i % ncols

        row_indices.append(row)
        col_indices.append(col)

        if shared_ax is None:
            ax = fig.add_subplot(gs[row, col])
            shared_ax = ax
            assert isinstance(shared_ax, Axes)

        else:
            ax = fig.add_subplot(gs[row, col])

        ax.xaxis.set_major_locator(locator)
        ax.yaxis.set_major_locator(MaxNLocator(nbins=4))

        ax.xaxis.set_major_formatter(fmt)
        sfmt = ScalarFormatter(useMathText=True)
        sfmt.set_powerlimits((-3, 4))
        ax.yaxis.set_major_formatter(sfmt)
        assert isinstance(ax, Axes)

        axes.append(ax)

    # generate daily stamp dates
    d0 = datetime(2001, 1, 1)
    stamp_dates = [d0 + timedelta(days=i) for i in range(365)]

    # plot a panel for each station
    for s, ax, row, col in zip(stations, axes, row_indices, col_indices):

        assert isinstance(s, Station)
        assert isinstance(ax, Axes)
        if s.grdc_monthly_clim_max is not None:
            ax.fill_between(monthly_dates,
                            s.grdc_monthly_clim_min,
                            s.grdc_monthly_clim_max,
                            color="0.6",
                            alpha=0.5)

        avail_years = s.get_list_of_complete_years()
        print("{}: {}".format(s.id, ",".join([str(y) for y in avail_years])))
        years = [y for y in avail_years if start_year <= y <= end_year]
        obs_clim_stfl = s.get_monthly_climatology(years_list=years)

        if obs_clim_stfl is None:
            continue

        print(obs_clim_stfl.head())

        obs_clim_stfl.plot(color="k", lw=3, label="Obs", ax=ax)

        if s.river_name is not None and s.river_name != "":
            ax.set_title(s.river_name)
        else:
            ax.set_title(s.id)

        for path, sim_label, color in zip(paths, labels, colors):
            ds = Dataset(path)

            if stations_to_mp is None:
                acc_area_2d = ds.variables["accumulation_area"][:]
                lons2d, lats2d = ds.variables["longitude"][:], ds.variables[
                    "latitude"][:]
                x_index, y_index = ds.variables["x_index"][:], ds.variables[
                    "y_index"][:]
                stations_to_mp = get_dataless_model_points_for_stations(
                    stations, acc_area_2d, lons2d, lats2d, x_index, y_index)

            # read dates only once for a given simulation
            if sim_label not in sim_to_time:
                time_str = ds.variables["time"][:].astype(str)
                times = [
                    datetime.strptime("".join(t_s), TIME_FORMAT)
                    for t_s in time_str
                ]
                sim_to_time[sim_label] = times

            mp = stations_to_mp[s]
            data = ds.variables["water_discharge_accumulated"][:,
                                                               mp.cell_index]
            print(path)
            df = DataFrame(data=data,
                           index=sim_to_time[sim_label],
                           columns=["value"])
            df["year"] = df.index.map(lambda d: d.year)
            df = df.ix[df.year.isin(years), :]
            df = df.groupby(lambda d: datetime(2001, d.month, 15)).mean()

            # print np.mean( monthly_model ), s.river_name, sim_label
            df.plot(color=color, lw=3, label=sim_label, ax=ax, y="value")

            ds.close()

        if row < nrows - 1:
            ax.set_xticklabels([])

    axes[0].legend(fontsize=17, loc=2)
    plt.tight_layout()
    plt.savefig("mh/offline_validation_mh.png", dpi=400)
    plt.close(fig)

    with Dataset(infocell_path) as ds:

        fldir = ds.variables["flow_direction_value"][:]
        faa = ds.variables["accumulation_area"][:]

        lon, lat = [ds.variables[k][:] for k in ["lon", "lat"]]

        # plot station positions and upstream areas
        cell_manager = CellManager(fldir,
                                   nx=fldir.shape[0],
                                   ny=fldir.shape[1],
                                   lons2d=lon,
                                   lats2d=lat,
                                   accumulation_area_km2=faa)

    fig = plt.figure()
    from crcm5.mh_domains import default_domains
    gc = default_domains.bc_mh_011

    # get the basemap object
    bmp, data_mask = gc.get_basemap_using_shape_with_polygons_of_interest(
        lon, lat, shp_path=default_domains.MH_BASINS_PATH, mask_margin=5)

    xx, yy = bmp(lon, lat)
    ax = plt.gca()
    colors = ["g", "r", "m", "c", "y", "violet"]
    i = 0
    for s, mp in stations_to_mp.items():
        assert isinstance(mp, ModelPoint)
        upstream_mask = cell_manager.get_mask_of_upstream_cells_connected_with_by_indices(
            mp.ix, mp.jy)

        current_points = upstream_mask > 0.5

        bmp.drawcoastlines()
        bmp.drawrivers()

        bmp.scatter(xx[current_points],
                    yy[current_points],
                    c=colors[i % len(colors)])
        i += 1

        va = "top"
        if s.id in ["05AK001", "05LM006"]:
            va = "bottom"

        ha = "left"
        if s.id in ["05QB003"]:
            ha = "right"

        bmp.scatter(xx[mp.ix, mp.jy], yy[mp.ix, mp.jy], c="b")
        ax.annotate(s.id,
                    xy=(xx[mp.ix, mp.jy], yy[mp.ix, mp.jy]),
                    horizontalalignment=ha,
                    verticalalignment=va,
                    bbox=dict(boxstyle='round', fc='gray', alpha=0.5))

    fig.savefig("mh/offline_stations_{}.png".format("positions"))
    plt.close(fig)
def main():
    start_year = 1980
    end_year = 2010


    # model_data_path = Path("/RECH2/huziy/BC-MH/bc_mh_044deg/Diagnostics")
    model_data_path = Path("/RECH2/huziy/BC-MH/bc_mh_044deg/Samples")

    static_data_file = "/RECH2/huziy/BC-MH/bc_mh_044deg/Samples/bc_mh_044deg_198001/pm1980010100_00000000p"

    corrected_obs_data_folder = Path("mh/obs_data/")

    r = RPN(static_data_file)

    fldir = r.get_first_record_for_name("FLDR")
    faa = r.get_first_record_for_name("FAA")
    lons, lats = r.get_longitudes_and_latitudes_for_the_last_read_rec()

    gc = default_domains.bc_mh_044

    cell_manager = CellManager(fldir, nx=fldir.shape[0], ny=fldir.shape[1],
                               lons2d=lons, lats2d=lats, accumulation_area_km2=faa)

    selected_station_ids = [
        "05LM006",
        "05BN012",
        "05AK001",
        "05QB003"
    ]

    stations = cehq_station.load_from_hydat_db(province=None, selected_ids=selected_station_ids, natural=None, skip_data_checks=True)


    for s in stations:
        assert isinstance(s, cehq_station.Station)
        if s.id == "05AK001":
            s.drainage_km2 *= 2.5

        if s.id == "05BN012":
            pass


    # Manitoba natural stations
    # statons_mnb = cehq_station.load_from_hydat_db(province="MB", natural=True, start_date=datetime(start_year, 1, 1), end_date=datetime(end_year,12,31))
    # statons_ssk = cehq_station.load_from_hydat_db(province="SK", natural=True, start_date=datetime(start_year, 1, 1), end_date=datetime(end_year,12,31))
    # statons_alb = cehq_station.load_from_hydat_db(province="AB", natural=True, start_date=datetime(start_year, 1, 1), end_date=datetime(end_year,12,31))


    # for s in statons_mnb + statons_ssk + statons_alb:
    #     if s not in stations:
    #         stations.append(s)


    # (06EA002): CHURCHILL RIVER AT SANDY BAY at (-102.31832885742188,55.52333068847656), accum. area is 212000.0 km**2
    # TODO: plot where is this station, compare modelled and observed hydrographs

    for s in stations:
        print(s)

    # assert len(stations) == len(selected_station_ids), "Could not find stations for some of the specified ids"

    station_to_model_point = cell_manager.get_model_points_for_stations(stations, drainaige_area_reldiff_limit=0.9,
                                                                        nneighbours=8)


    print("Established the station to model point mapping")


    plot_validations_for_stations(station_to_model_point,
                                  cell_manager=cell_manager,
                                  corrected_obs_data_folder=corrected_obs_data_folder,
                                  model_data_path=model_data_path,
                                  grid_config=gc, start_year=start_year, end_year=end_year)
def main():
    # stations = cehq_station.read_grdc_stations(st_id_list=["2903430", "2909150", "2912600", "4208025"])

    selected_ids = ["08MH001", "08NE074", "08NG065", "08NJ013", "08NK002", "08NK016",
                    "08NL004", "08NL007", "08NL024", "08NL038", "08NN002"]
    stations = cehq_station.load_from_hydat_db(natural=True, province="BC", selected_ids=selected_ids)




    stations_to_mp = None

    import matplotlib.pyplot as plt

    # labels = ["CanESM", "MPI"]
    # paths = ["/skynet3_rech1/huziy/offline_stfl/canesm/discharge_1958_01_01_00_00.nc",
    # "/skynet3_rech1/huziy/offline_stfl/mpi/discharge_1958_01_01_00_00.nc"]
    #
    # colors = ["r", "b"]

    # labels = ["ERA", ]
    # colors = ["r", ]
    # paths = ["/skynet3_rech1/huziy/arctic_routing/era40/discharge_1958_01_01_00_00.nc"]


    labels = ["Glacier-only", "All"]
    colors = ["r", "b"]
    paths = [
        "/skynet3_exec2/aganji/glacier_katja/watroute_gemera/discharge_stat_glac_00_99_2000_01_01_00_00.nc",
        "/skynet3_exec2/aganji/glacier_katja/watroute_gemera/discharge_stat_both_00_992000_01_01_00_00.nc"]


    start_year_current = 2000
    end_year_current = 2013

    plot_future = False
    start_year_future = 2071  # ignored when plot future is false
    end_year_future = 2100


    if not plot_future:
        start_year = start_year_current
        end_year = end_year_current
    else:
        start_year = start_year_future
        end_year = end_year_future




    stations_filtered = []
    for s in stations:
        # Also filter out stations with small accumulation areas
        if s.drainage_km2 < 1000:
            continue

        if s.latitude > 49.4:
            continue

        # Filter stations with data out of the required time frame
        year_list = s.get_list_of_complete_years()
        if max(year_list) < start_year or min(year_list) > end_year:
            continue


        stations_filtered.append(s)

    stations = stations_filtered

    min_lon = min(s.longitude for s in stations)
    stations = [s for s in stations if s.longitude == min_lon]


    print("Retained {} stations.".format(len(stations)))

    sim_to_time = {}

    monthly_dates = [datetime(2001, m, 15) for m in range(1, 13)]
    fmt = FuncFormatter(lambda x, pos: num2date(x).strftime("%b")[0])
    locator = MonthLocator()

    fig = plt.figure()

    axes = []
    row_indices = []
    col_indices = []

    ncols = 1
    shiftrow = 0 if len(stations) % ncols == 0 else 1
    nrows = len(stations) // ncols + shiftrow
    shared_ax = None
    gs = gridspec.GridSpec(ncols=ncols, nrows=nrows)

    for i, s in enumerate(stations):
        row = i // ncols
        col = i % ncols

        row_indices.append(row)
        col_indices.append(col)

        if shared_ax is None:
            ax = fig.add_subplot(gs[row, col])
            shared_ax = ax
            assert isinstance(shared_ax, Axes)

        else:
            ax = fig.add_subplot(gs[row, col])

        ax.xaxis.set_major_locator(locator)
        ax.yaxis.set_major_locator(MaxNLocator(nbins=4))

        ax.xaxis.set_major_formatter(fmt)
        sfmt = ScalarFormatter(useMathText=True)
        sfmt.set_powerlimits((-3, 4))
        ax.yaxis.set_major_formatter(sfmt)
        assert isinstance(ax, Axes)

        axes.append(ax)

    # generate daily stamp dates
    d0 = datetime(2001, 1, 1)
    stamp_dates = [d0 + timedelta(days=i) for i in range(365)]



    # plot a panel for each station
    for s, ax, row, col in zip(stations, axes, row_indices, col_indices):

        assert isinstance(s, Station)
        assert isinstance(ax, Axes)
        if s.grdc_monthly_clim_max is not None:
            ax.fill_between(monthly_dates, s.grdc_monthly_clim_min, s.grdc_monthly_clim_max, color="0.6", alpha=0.5)

        avail_years = s.get_list_of_complete_years()
        print("{}: {}".format(s.id, ",".join([str(y) for y in avail_years])))
        years = [y for y in avail_years if start_year <= y <= end_year]
        _, obs_clim_stfl = s.get_daily_climatology_for_complete_years_with_pandas(stamp_dates=stamp_dates, years=years)

        if obs_clim_stfl is None:
            continue

        ax.plot(stamp_dates, obs_clim_stfl, "k", lw=3, label="Obs")

        if s.river_name is not None and s.river_name != "":
            ax.set_title(s.river_name)
        else:
            ax.set_title(s.id)

        for path, sim_label, color in zip(paths, labels, colors):
            ds = Dataset(path)

            if stations_to_mp is None:
                acc_area_2d = ds.variables["accumulation_area"][:]
                lons2d, lats2d = ds.variables["longitude"][:], ds.variables["latitude"][:]
                x_index, y_index = ds.variables["x_index"][:], ds.variables["y_index"][:]
                stations_to_mp = get_dataless_model_points_for_stations(stations, acc_area_2d,
                                                                       lons2d, lats2d, x_index, y_index)

            # read dates only once for a given simulation
            if sim_label not in sim_to_time:
                time_str = ds.variables["time"][:].astype(str)
                times = [datetime.strptime("".join(t_s), TIME_FORMAT) for t_s in time_str]
                sim_to_time[sim_label] = times

            mp = stations_to_mp[s]
            data = ds.variables["water_discharge_accumulated"][:, mp.cell_index]
            print(path)
            df = DataFrame(data=data, index=sim_to_time[sim_label], columns=["value"])
            df["year"] = df.index.map(lambda d: d.year)
            df = df.ix[df.year.isin(years), :]
            df = df.select(lambda d: not (d.month == 2 and d.day == 29))
            df = df.groupby(lambda d: datetime(stamp_dates[0].year, d.month, d.day)).mean()

            daily_model_data = [df.ix[d, "value"] for d in stamp_dates]

            # print np.mean( monthly_model ), s.river_name, sim_label
            ax.plot(stamp_dates, daily_model_data, color, lw=3, label=sim_label + "(C)")

            if plot_future:
                ax.plot(stamp_dates, daily_model_data, color + "--", lw=3, label=sim_label + "(F2)")

            ds.close()

        if row < nrows - 1:
            ax.set_xticklabels([])

    axes[0].legend(fontsize=17, loc=2)
    plt.tight_layout()
    plt.savefig("offline_validation.png", dpi=400)
    plt.close(fig)


    r = RPN("/RESCUE/skynet3_rech1/huziy/CNRCWP/C3/Depth_to_bedrock_WestNA_0.25")
    r.get_first_record_for_name("8L")
    proj_params = r.get_proj_parameters_for_the_last_read_rec()
    lons, lats = r.get_longitudes_and_latitudes_for_the_last_read_rec()
    bsmp = RotatedLatLon(**proj_params).get_basemap_object_for_lons_lats(lons2d=lons, lats2d=lats)
    plot_utils.apply_plot_params(width_pt=None, width_cm=19, height_cm=19, font_size=12)
    plot_station_positions(manager=None, station_list=stations, bsmp=bsmp)
def main():
    # stations = cehq_station.read_grdc_stations(st_id_list=["2903430", "2909150", "2912600", "4208025"])

    selected_station_ids = [
        "05LM006",
        "05BN012",
        "05AK001",
        "05QB003",
        "06EA002"
    ]

    stations = cehq_station.load_from_hydat_db(natural=None, province=None, selected_ids=selected_station_ids, skip_data_checks=True)

    stations_mh = cehq_station.get_manitoba_hydro_stations()

    # copy metadata from the corresponding hydat stations
    for s in stations:
        assert isinstance(s, Station)
        for s_mh in stations_mh:
            assert isinstance(s_mh, Station)


            if s == s_mh:
                s_mh.copy_metadata(s)
                break



    stations = [s for s in stations_mh if s.id in selected_station_ids and s.longitude is not None]

    stations_to_mp = None

    import matplotlib.pyplot as plt

    # labels = ["CanESM", "MPI"]
    # paths = ["/skynet3_rech1/huziy/offline_stfl/canesm/discharge_1958_01_01_00_00.nc",
    # "/skynet3_rech1/huziy/offline_stfl/mpi/discharge_1958_01_01_00_00.nc"]
    #
    # colors = ["r", "b"]

    # labels = ["ERA", ]
    # colors = ["r", ]
    # paths = ["/skynet3_rech1/huziy/arctic_routing/era40/discharge_1958_01_01_00_00.nc"]


    labels = ["Model", ]
    colors = ["r", ]
    paths = [
        "/RESCUE/skynet3_rech1/huziy/water_route_mh_bc_011deg_wc/discharge_1980_01_01_12_00.nc"
    ]

    infocell_path = "/RESCUE/skynet3_rech1/huziy/water_route_mh_bc_011deg_wc/infocell.nc"

    start_year = 1980
    end_year = 2014




    stations_filtered = []
    for s in stations:
        # Also filter out stations with small accumulation areas
        # if s.drainage_km2 is not None and s.drainage_km2 < 100:
        #     continue

        # Filter stations with data out of the required time frame
        year_list = s.get_list_of_complete_years()

        print("Complete years for {}: {}".format(s.id, year_list))

        stations_filtered.append(s)

    stations = stations_filtered


    print("Retained {} stations.".format(len(stations)))

    sim_to_time = {}

    monthly_dates = [datetime(2001, m, 15) for m in range(1, 13)]
    fmt = FuncFormatter(lambda x, pos: num2date(x).strftime("%b")[0])
    locator = MonthLocator(bymonthday=15)

    fig = plt.figure()

    axes = []
    row_indices = []
    col_indices = []

    ncols = 1
    shiftrow = 0 if len(stations) % ncols == 0 else 1
    nrows = len(stations) // ncols + shiftrow
    shared_ax = None
    gs = gridspec.GridSpec(ncols=ncols, nrows=nrows)

    for i, s in enumerate(stations):
        row = i // ncols
        col = i % ncols

        row_indices.append(row)
        col_indices.append(col)

        if shared_ax is None:
            ax = fig.add_subplot(gs[row, col])
            shared_ax = ax
            assert isinstance(shared_ax, Axes)

        else:
            ax = fig.add_subplot(gs[row, col])

        ax.xaxis.set_major_locator(locator)
        ax.yaxis.set_major_locator(MaxNLocator(nbins=4))

        ax.xaxis.set_major_formatter(fmt)
        sfmt = ScalarFormatter(useMathText=True)
        sfmt.set_powerlimits((-3, 4))
        ax.yaxis.set_major_formatter(sfmt)
        assert isinstance(ax, Axes)

        axes.append(ax)

    # generate daily stamp dates
    d0 = datetime(2001, 1, 1)
    stamp_dates = [d0 + timedelta(days=i) for i in range(365)]



    # plot a panel for each station
    for s, ax, row, col in zip(stations, axes, row_indices, col_indices):

        assert isinstance(s, Station)
        assert isinstance(ax, Axes)
        if s.grdc_monthly_clim_max is not None:
            ax.fill_between(monthly_dates, s.grdc_monthly_clim_min, s.grdc_monthly_clim_max, color="0.6", alpha=0.5)

        avail_years = s.get_list_of_complete_years()
        print("{}: {}".format(s.id, ",".join([str(y) for y in avail_years])))
        years = [y for y in avail_years if start_year <= y <= end_year]
        obs_clim_stfl = s.get_monthly_climatology(years_list=years)

        if obs_clim_stfl is None:
            continue

        print(obs_clim_stfl.head())

        obs_clim_stfl.plot(color="k", lw=3, label="Obs", ax=ax)

        if s.river_name is not None and s.river_name != "":
            ax.set_title(s.river_name)
        else:
            ax.set_title(s.id)

        for path, sim_label, color in zip(paths, labels, colors):
            ds = Dataset(path)

            if stations_to_mp is None:
                acc_area_2d = ds.variables["accumulation_area"][:]
                lons2d, lats2d = ds.variables["longitude"][:], ds.variables["latitude"][:]
                x_index, y_index = ds.variables["x_index"][:], ds.variables["y_index"][:]
                stations_to_mp = get_dataless_model_points_for_stations(stations, acc_area_2d,
                                                                       lons2d, lats2d, x_index, y_index)

            # read dates only once for a given simulation
            if sim_label not in sim_to_time:
                time_str = ds.variables["time"][:].astype(str)
                times = [datetime.strptime("".join(t_s), TIME_FORMAT) for t_s in time_str]
                sim_to_time[sim_label] = times

            mp = stations_to_mp[s]
            data = ds.variables["water_discharge_accumulated"][:, mp.cell_index]
            print(path)
            df = DataFrame(data=data, index=sim_to_time[sim_label], columns=["value"])
            df["year"] = df.index.map(lambda d: d.year)
            df = df.ix[df.year.isin(years), :]
            df = df.groupby(lambda d: datetime(2001, d.month, 15)).mean()


            # print np.mean( monthly_model ), s.river_name, sim_label
            df.plot(color=color, lw=3, label=sim_label, ax=ax, y="value")


            ds.close()

        if row < nrows - 1:
            ax.set_xticklabels([])

    axes[0].legend(fontsize=17, loc=2)
    plt.tight_layout()
    plt.savefig("mh/offline_validation_mh.png", dpi=400)
    plt.close(fig)






    with Dataset(infocell_path) as ds:

        fldir = ds.variables["flow_direction_value"][:]
        faa = ds.variables["accumulation_area"][:]

        lon, lat = [ds.variables[k][:] for k in ["lon", "lat"]]

        # plot station positions and upstream areas
        cell_manager = CellManager(fldir, nx=fldir.shape[0], ny=fldir.shape[1],
                                   lons2d=lon, lats2d=lat, accumulation_area_km2=faa)



    fig = plt.figure()
    from crcm5.mh_domains import default_domains
    gc = default_domains.bc_mh_011

    # get the basemap object
    bmp, data_mask = gc.get_basemap_using_shape_with_polygons_of_interest(
        lon, lat, shp_path=default_domains.MH_BASINS_PATH, mask_margin=5)

    xx, yy = bmp(lon, lat)
    ax = plt.gca()
    colors = ["g", "r", "m", "c", "y", "violet"]
    i = 0
    for s, mp in stations_to_mp.items():
        assert isinstance(mp, ModelPoint)
        upstream_mask = cell_manager.get_mask_of_upstream_cells_connected_with_by_indices(mp.ix, mp.jy)

        current_points = upstream_mask > 0.5

        bmp.drawcoastlines()
        bmp.drawrivers()

        bmp.scatter(xx[current_points], yy[current_points], c=colors[i % len(colors)])
        i += 1


        va = "top"
        if s.id in ["05AK001", "05LM006"]:
            va = "bottom"

        ha = "left"
        if s.id in ["05QB003"]:
            ha = "right"

        bmp.scatter(xx[mp.ix, mp.jy], yy[mp.ix, mp.jy], c="b")
        ax.annotate(s.id, xy=(xx[mp.ix, mp.jy], yy[mp.ix, mp.jy]), horizontalalignment=ha,
                    verticalalignment=va, bbox=dict(boxstyle='round', fc='gray', alpha=0.5))

    fig.savefig("mh/offline_stations_{}.png".format("positions"))
    plt.close(fig)
Beispiel #12
0
def main():

    stations = cehq_station.load_from_hydat_db(natural=True, province="SK")
    for s in stations:
        assert isinstance(s, Station)
        print("{}; {}; {}; {}; ".format(s.name, s.longitude, s.latitude, s.drainage_km2))
def main():
    model_data_path = Path("/RECH2/huziy/BC-MH/bc_mh_044deg/Diagnostics")
    # model_data_path = Path("/RECH2/huziy/BC-MH/bc_mh_044deg/Samples")

    static_data_file = "/RECH2/huziy/BC-MH/bc_mh_044deg/Samples/bc_mh_044deg_198001/pm1980010100_00000000p"

    r = RPN(static_data_file)

    fldir = r.get_first_record_for_name("FLDR")
    faa = r.get_first_record_for_name("FAA")
    lons, lats = r.get_longitudes_and_latitudes_for_the_last_read_rec()

    gc = default_domains.bc_mh_044

    cell_manager = CellManager(fldir, nx=fldir.shape[0], ny=fldir.shape[1],
                               lons2d=lons, lats2d=lats, accumulation_area_km2=faa)

    selected_station_ids = ["06EA002", ]

    stations = cehq_station.load_from_hydat_db(province="SK", selected_ids=selected_station_ids, natural=None)

    # (06EA002): CHURCHILL RIVER AT SANDY BAY at (-102.31832885742188,55.52333068847656), accum. area is 212000.0 km**2
    # TODO: plot where is this station, compare modelled and observed hydrographs

    # for s in stations:
    #     assert isinstance(s, cehq_station.Station)
    #     s.latitude += 0.9
    #     s.longitude -= 0.2
    #     print(s)

    station_to_model_point = cell_manager.get_model_points_for_stations(stations, drainaige_area_reldiff_limit=0.8,
                                                                        nneighbours=1)

    print(station_to_model_point[stations[0]])

    station = stations[0]
    assert isinstance(station, cehq_station.Station)

    obs_not_corrected = pd.Series(index=station.dates, data=station.values).groupby(
        by=lambda d: d.replace(day=15)).mean()
    obs_corrected = pd.read_csv("mh/obs_data/Churchill Historic Monthly Apportionable Flow_06EA002.csv.bak.original", skiprows=2)

    print(obs_corrected.head())
    print(obs_corrected.year.iloc[0], obs_corrected.year.iloc[-1])

    date_index = pd.date_range(start=datetime(obs_corrected.year.iloc[0] - 1, 12, 15),
                               end=datetime(obs_corrected.year.iloc[-1], 12, 15),
                               freq="M")

    date_index = date_index.shift(15, freq=pd.datetools.day)

    print(date_index)
    data = np.concatenate([r for r in obs_corrected.values[:, 1:-1]])

    factor = date_index.map(lambda d: 1000 / (calendar.monthrange(d.year, d.month)[1] * 24 * 3600))
    print(factor[:10])
    obs_corrected = pd.Series(index=date_index, data=data * factor)

    station_to_modelled_data = get_model_data(station_to_model_point, output_path=model_data_path,
                                              grid_config=gc, basins_of_interest_shp=default_domains.MH_BASINS_PATH,
                                              cell_manager=cell_manager, vname="STFL")

    modelled_data = station_to_modelled_data[station]

    fig = plt.figure()
    ax = obs_corrected.plot(label="obs corrected")

    obs_not_corrected.plot(label="obs not corrected", ax=ax, color="k")

    modelled_data.plot(label="CRCM5", ax=ax, color="r")

    ax.legend(loc="upper left")
    img_file = img_folder.joinpath("{}_validation_monthly.png".format(station.id))
    fig.savefig(str(img_file))
    plt.close(fig)

    # climatology
    start_year = 1980
    end_year = 2010

    date_selector = lambda d: (start_year <= d.year <= end_year) and not ((d.month == 2) and (d.day == 29))

    fig = plt.figure()
    ax = obs_corrected.select(date_selector).groupby(lambda d: d.replace(year=2001)).mean().plot(label="obs corrected")

    obs_not_corrected.select(date_selector).groupby(lambda d: d.replace(year=2001)).mean().plot(
        label="obs not corrected", ax=ax, color="k")

    modelled_data.select(date_selector).groupby(lambda d: d.replace(year=2001)).mean().plot(label="CRCM5", ax=ax,
                                                                                            color="r")

    ax.xaxis.set_major_locator(MonthLocator(bymonthday=15))
    ax.xaxis.set_major_formatter(DateFormatter("%b"))

    ax.legend(loc="upper left")

    img_file = img_folder.joinpath("{}_validation_clim.png".format(station.id))
    fig.savefig(str(img_file))
    plt.close(fig)

    # Interannual variability
    fig = plt.figure()

    obs_corrected = obs_corrected.select(lambda d: start_year <= d.year <= end_year)
    modelled_data = modelled_data.select(lambda d: start_year <= d.year <= end_year)

    corr_list = []
    for m in range(1, 13):
        obs = obs_corrected.select(lambda d: d.month == m)
        mod = modelled_data.select(lambda d: d.month == m)

        print(obs.head())

        obs.index = obs.index.map(lambda d: d.year)
        mod.index = mod.index.map(lambda d: d.year)

        corr_list.append(obs.corr(mod))

    ax = plt.gca()
    ax.plot(range(1, 13), corr_list)
    ax.set_xlabel("Month")
    ax.set_title("Inter-annual variability")

    img_file = img_folder.joinpath("{}_interannual.png".format(station.id))
    fig.tight_layout()
    fig.savefig(str(img_file), bbox_inches="tight")
    plt.close(fig)