예제 #1
0
def calculate_lake_ids(fldirs, lkfract, lkout):
    current_id = 1
    lkfr_limit = 0.6

    cmanager = CellManager(fldirs)

    iout_list, jout_list = np.where(lkout > 0.5)

    lkids = np.zeros_like(fldirs)

    lkid_to_mask = {}
    lkid_to_npoints_upstream = {}
    for i, j in zip(iout_list, jout_list):
        the_mask = cmanager.get_mask_of_upstream_cells_connected_with_by_indices(
            i, j) > 0.5
        the_mask = the_mask & ((lkfract >= lkfr_limit) | (lkout > 0.5))

        lkid_to_mask[current_id] = the_mask
        lkid_to_npoints_upstream[current_id] = the_mask.sum()
        current_id += 1

    for the_id in sorted(lkid_to_mask,
                         key=lambda xx: lkid_to_npoints_upstream[xx],
                         reverse=True):
        lkids[lkid_to_mask[the_id]] = the_id

    return lkids
def point_comparisons_at_outlets(hdf_folder="/home/huziy/skynet3_rech1/hdf_store"):
    start_year = 1979
    end_year = 1981

    sim_name_to_file_name = {
        # "CRCM5-R": "/skynet3_rech1/huziy/hdf_store/quebec_0.1_crcm5-r_spinup.hdf",
        # "CRCM5-HCD-R": "/skynet3_rech1/huziy/hdf_store/quebec_0.1_crcm5-hcd-r_spinup2.hdf",
        "CRCM5-HCD-RL": "/skynet3_rech1/huziy/hdf_store/quebec_0.1_crcm5-hcd-rl_spinup.hdf",
        "CRCM5-HCD-RL-INTFL": "/skynet3_rech1/huziy/hdf_store/quebec_0.1_crcm5-hcd-rl-intfl_do_not_discard_small.hdf",
        # "SANI=10000, ignore THFC":
        # "/skynet3_rech1/huziy/hdf_store/quebec_0.1_crcm5-hcd-rl-intfl_sani-10000_not_care_about_thfc.hdf",

        # "CRCM5-HCD-RL-ERA075": "/skynet3_rech1/huziy/hdf_store/quebec_0.1_crcm5-hcd-rl-intfl_spinup_ecoclimap_era075.hdf",
        "SANI=10000": "/skynet3_rech1/huziy/hdf_store/quebec_0.1_crcm5-hcd-rl-intfl_sani-10000.hdf"
        # "CRCM5-HCD-RL-ECOCLIMAP": "/skynet3_rech1/huziy/hdf_store/quebec_0.1_crcm5-hcd-rl-intfl_spinup_ecoclimap.hdf"
    }

    path0 = os.path.join(hdf_folder, list(sim_name_to_file_name.items())[0][1])
    path1 = os.path.join(hdf_folder, list(sim_name_to_file_name.items())[1][1])
    flow_directions = analysis.get_array_from_file(path=path0, var_name=infovar.HDF_FLOW_DIRECTIONS_NAME)
    lake_fraction = analysis.get_array_from_file(path=path0, var_name=infovar.HDF_LAKE_FRACTION_NAME)
    slope = analysis.get_array_from_file(path=path1, var_name=infovar.HDF_SLOPE_NAME)

    lons2d, lats2d, _ = analysis.get_basemap_from_hdf(file_path=path0)

    cell_manager = CellManager(flow_directions, lons2d=lons2d, lats2d=lats2d)
    mp_list = cell_manager.get_model_points_of_outlets(lower_accumulation_index_limit=10)

    assert len(mp_list) > 0

    # Get the accumulation indices so that the most important outlets can be identified
    acc_ind_list = [np.sum(cell_manager.get_mask_of_upstream_cells_connected_with_by_indices(mp.ix, mp.jy))
                    for mp in mp_list]

    for mp, acc_ind in zip(mp_list, acc_ind_list):
        mp.acc_index = acc_ind

    mp_list.sort(key=lambda x: x.acc_index)

    # do not take global lake cells into consideration, and discard points with slopes 0 or less
    mp_list = [mp for mp in mp_list if lake_fraction[mp.ix, mp.jy] < 0.6 and slope[mp.ix, mp.jy] >= 0]

    mp_list = mp_list[-12:]  # get 12 most important outlets

    print("The following outlets were chosen for analysis")
    pattern = "({0}, {1}): acc_index = {2} cells; fldr = {3}; lake_fraction = {4}"
    for mp in mp_list:
        print(pattern.format(mp.ix, mp.jy, mp.acc_index, cell_manager.flow_directions[mp.ix, mp.jy],
                             lake_fraction[mp.ix, mp.jy]))

    draw_model_comparison(model_points=mp_list, sim_name_to_file_name=sim_name_to_file_name, hdf_folder=hdf_folder,
                          start_year=start_year, end_year=end_year, cell_manager=cell_manager)
def get_mask_of_non_contrib_area(grid_config, dir_file):
    """

    :param grid_config:
    :param dir_file:
    :return: 2d numpy array with 1 for non-contributing cells and 0 otherwize
    """
    assert isinstance(grid_config, GridConfig)

    with Dataset(str(dir_file)) as ds:
        lons, lats, fldr, faa, cell_area = [
            ds.variables[k][:] for k in [
                "lon", "lat", "flow_direction_value", "accumulation_area",
                "cell_area"
            ]
        ]

    the_mask = np.zeros_like(lons)

    the_mask1 = maskoceans(lons, lats, the_mask, resolution="i", inlands=False)

    suspicious_internal_draining = (~the_mask1.mask) & ((fldr <= 0) |
                                                        (fldr >= 256))

    i_list, j_list = np.where(suspicious_internal_draining)

    print("retained {} gridcells".format(suspicious_internal_draining.sum()))

    # Remove the points close to the coasts
    for i, j in zip(i_list, j_list):
        if is_point_ocean_outlet(i, j, the_mask1.mask):
            suspicious_internal_draining[i, j] = False
            the_mask1[i, j] = np.ma.masked

    print("retained {} gridcells".format(suspicious_internal_draining.sum()))

    # Now get the mask upstream of the internal draining outlets
    cell_manager = CellManager(flow_dirs=fldr,
                               lons2d=lons,
                               lats2d=lats,
                               accumulation_area_km2=faa)
    i_list, j_list = np.where(suspicious_internal_draining)
    for i, j in zip(i_list, j_list):
        amask = cell_manager.get_mask_of_upstream_cells_connected_with_by_indices(
            i, j)

        suspicious_internal_draining |= amask > 0

    return suspicious_internal_draining
예제 #4
0
def calculate_lake_ids(fldirs, lkfract, lkout):
    current_id = 1
    lkfr_limit = 0.6

    cmanager = CellManager(fldirs)

    iout_list, jout_list = np.where(lkout > 0.5)

    lkids = np.zeros_like(fldirs)

    lkid_to_mask = {}
    lkid_to_npoints_upstream = {}
    for i, j in zip(iout_list, jout_list):
        the_mask = cmanager.get_mask_of_upstream_cells_connected_with_by_indices(i, j) > 0.5
        the_mask = the_mask & ((lkfract >= lkfr_limit) | (lkout > 0.5))

        lkid_to_mask[current_id] = the_mask
        lkid_to_npoints_upstream[current_id] = the_mask.sum()
        current_id += 1

    for the_id in sorted(lkid_to_mask, key=lambda xx: lkid_to_npoints_upstream[xx], reverse=True):
        lkids[lkid_to_mask[the_id]] = the_id

    return lkids
def get_cell_manager_from_directions_file(path="/home/san/Downloads/directions_WestCaUs_dx0.11deg.nc", margin=20):
    ds = Dataset(path)


    dirs = ds.variables["flow_direction_value"]
    lons = ds.variables["lon"]
    lats = ds.variables["lat"]
    acc_area = ds.variables["accumulation_area"]

    nc_vars = [dirs, lons, lats, acc_area]
    nc_data = []
    for i, v in enumerate(nc_vars):
        if margin is not None and margin > 0:
            nc_data.append(v[margin:-margin, margin:-margin])
        else:
            nc_data.append(v[:])

        print(type(nc_vars[i]))

    return CellManager(nc_data[0], lons2d=nc_data[1], lats2d=nc_data[2], accumulation_area_km2=nc_data[3])
def get_basin_to_outlet_indices_map(shape_file=BASIN_BOUNDARIES_FILE,
                                    bmp_info=None,
                                    directions=None,
                                    accumulation_areas=None,
                                    lake_fraction_field=None):
    assert isinstance(bmp_info, BasemapInfo)

    driver = ogr.GetDriverByName("ESRI Shapefile")
    print(driver)
    ds = driver.Open(shape_file, 0)

    assert isinstance(ds, ogr.DataSource)
    layer = ds.GetLayer()

    assert isinstance(layer, ogr.Layer)
    print(layer.GetFeatureCount())

    latlong_proj = osr.SpatialReference()
    latlong_proj.ImportFromEPSG(4326)

    utm_proj = layer.GetSpatialRef()

    # create Coordinate Transformation
    coord_transform = osr.CoordinateTransformation(latlong_proj, utm_proj)

    utm_coords = coord_transform.TransformPoints(
        list(zip(bmp_info.lons.flatten(), bmp_info.lats.flatten())))
    utm_coords = np.asarray(utm_coords)
    x_utm = utm_coords[:, 0].reshape(bmp_info.lons.shape)
    y_utm = utm_coords[:, 1].reshape(bmp_info.lons.shape)

    basin_mask = np.zeros_like(bmp_info.lons)
    cell_manager = CellManager(directions,
                               accumulation_area_km2=accumulation_areas,
                               lons2d=bmp_info.lons,
                               lats2d=bmp_info.lats)

    index = 1
    basins = []
    basin_names = []
    basin_name_to_mask = {}
    for feature in layer:
        assert isinstance(feature, ogr.Feature)
        # print feature["FID"]

        geom = feature.GetGeometryRef()
        assert isinstance(geom, ogr.Geometry)

        basins.append(ogr.CreateGeometryFromWkb(geom.ExportToWkb()))
        basin_names.append(feature["abr"])

    accumulation_areas_temp = accumulation_areas[:, :]
    lons_out, lats_out = [], []
    basin_names_out = []
    name_to_ij_out = {}

    min_basin_area = min(b.GetArea() * 1.0e-6 for b in basins)

    while len(basins):
        fm = np.max(accumulation_areas_temp)

        i, j = np.where(fm == accumulation_areas_temp)
        i, j = i[0], j[0]
        p = ogr.CreateGeometryFromWkt("POINT ({} {})".format(
            x_utm[i, j], y_utm[i, j]))
        b_selected = None
        name_selected = None
        for name, b in zip(basin_names, basins):

            assert isinstance(b, ogr.Geometry)
            assert isinstance(p, ogr.Geometry)
            if b.Contains(p.Buffer(2000 * 2**0.5)):
                # Check if there is an upstream cell from the same basin
                the_mask = cell_manager.get_mask_of_upstream_cells_connected_with_by_indices(
                    i, j)

                # Save the mask of the basin for future use
                basin_name_to_mask[name] = the_mask

                # if is_part_of_points_in(b, x_utm[the_mask == 1], y_utm[the_mask == 1]):
                # continue

                b_selected = b
                name_selected = name
                # basin_names_out.append(name)

                lons_out.append(bmp_info.lons[i, j])
                lats_out.append(bmp_info.lats[i, j])
                name_to_ij_out[name] = (i, j)

                basin_mask[the_mask == 1] = index
                index += 1

                break

        if b_selected is not None:
            basins.remove(b_selected)
            basin_names.remove(name_selected)
            outlet_index_in_basin = 1
            current_basin_name = name_selected
            while current_basin_name in basin_names_out:
                current_basin_name = name_selected + str(outlet_index_in_basin)
                outlet_index_in_basin += 1

            basin_names_out.append(current_basin_name)
            print(len(basins), basin_names_out)

        accumulation_areas_temp[i, j] = -1

    plot_utils.apply_plot_params(font_size=10,
                                 width_pt=None,
                                 width_cm=20,
                                 height_cm=12)
    gs = GridSpec(1, 2, width_ratios=[1.0, 0.5], wspace=0.01)
    fig = plt.figure()

    ax = fig.add_subplot(gs[0, 0])
    xx, yy = bmp_info.get_proj_xy()
    # im = bmp.pcolormesh(xx, yy, basin_mask.reshape(xx.shape))
    bmp_info.basemap.drawcoastlines(linewidth=0.5, ax=ax)
    bmp_info.basemap.drawrivers(zorder=5, color="0.5", ax=ax)
    bmp_info.basemap.drawparallels(np.arange(-90, 90, 10),
                                   labels=[False, True, False, False])
    # bmp.colorbar(im)

    xs, ys = bmp_info.basemap(lons_out, lats_out)
    bmp_info.basemap.scatter(xs, ys, c="0.75", s=30, zorder=10)

    cmap = cm.get_cmap("rainbow", index - 1)
    bn = BoundaryNorm(list(range(index + 1)), index - 1)

    # Do not color the basins
    # basin_mask = np.ma.masked_where(basin_mask < 0.5, basin_mask)
    # bmp_info.basemap.pcolormesh(xx, yy, basin_mask, norm=bn, cmap=cmap, ax=ax)

    for name, xa, ya, lona, lata in zip(basin_names_out, xs, ys, lons_out,
                                        lats_out):

        text_offset = (-20, 20) if name not in [
            "GEO",
        ] else (30, 20)

        if name in ["ARN"]:
            text_offset = (-10, 30)

        if name in ["FEU"]:
            text_offset = (5, 50)

        if name in ["CAN"]:
            text_offset = (-75, 50)

        if name in ["MEL"]:
            text_offset = (20, 40)

        if name in ["PYR"]:
            text_offset = (60, 60)

        if name in [
                "BAL",
        ]:
            text_offset = (50, 30)

        if name in ["BEL"]:
            text_offset = (-20, -10)

        if name in [
                "RDO",
                "STM",
                "SAG",
        ]:
            text_offset = (50, -50)

        if name in [
                "BOM",
        ]:
            text_offset = (20, -20)

        if name in [
                "MOI",
        ]:
            text_offset = (30, -20)

        if name in [
                "ROM",
        ]:
            text_offset = (40, -20)

        if name in [
                "RDO",
        ]:
            text_offset = (30, -30)

        if name in ["CHU", "NAT"]:
            text_offset = (40, 40)

        if name in [
                "MAN",
        ]:
            text_offset = (55, -45)

        ax.annotate(name,
                    xy=(xa, ya),
                    xytext=text_offset,
                    textcoords='offset points',
                    ha='right',
                    va='bottom',
                    bbox=dict(boxstyle='round,pad=0.5', fc='white'),
                    arrowprops=dict(arrowstyle='->',
                                    connectionstyle='arc3,rad=0'),
                    font_properties=FontProperties(size=8),
                    zorder=20)

    # bmp_info.basemap.readshapefile(".".join(BASIN_BOUNDARIES_FILE.split(".")[:-1]).replace("utm18", "latlon"), "basin",
    #                               linewidth=1.2, ax=ax, zorder=9)

    # Plot zonally averaged lake fraction
    ax = fig.add_subplot(gs[0, 1])
    ydata = range(lake_fraction_field.shape[1])
    ax.plot(lake_fraction_field.mean(axis=0) * 100, ydata, lw=2)

    ax.fill_betweenx(ydata, lake_fraction_field.mean(axis=0) * 100, alpha=0.5)

    ax.set_xlabel("Lake fraction (%)")
    ax.set_ylim(min(ydata), max(ydata))
    ax.xaxis.set_tick_params(direction='out', width=1)
    ax.yaxis.set_tick_params(direction='out', width=1)
    ax.xaxis.set_ticks_position("bottom")
    ax.yaxis.set_ticks_position("none")

    ax.spines["top"].set_visible(False)
    ax.spines["right"].set_visible(False)

    for tl in ax.yaxis.get_ticklabels():
        tl.set_visible(False)

    fig.savefig("qc_basin_outlets_points.png", bbox_inches="tight")
    # plt.show()
    plt.close(fig)

    return name_to_ij_out, basin_name_to_mask
def main():
    start_year = 1980
    end_year = 2010

    start_date = datetime(start_year, 1, 1)
    end_date = datetime(end_year, 12, 31)

    ids_with_lakes_upstream = [
        "104001", "093806", "093801", "081002", "081007", "080718"
    ]

    selected_station_ids = [
        "092715", "074903", "080104", "081007", "061905", "093806", "090613",
        "081002", "093801", "080718", "104001"
    ]

    selected_station_ids = ids_with_lakes_upstream

    # Get the list of stations to do the comparison with
    stations = cehq_station.read_station_data(
        start_date=start_date,
        end_date=end_date,
        selected_ids=selected_station_ids)

    # add hydat stations
    # province = "QC"
    # min_drainage_area_km2 = 10000.0
    # stations_hd = cehq_station.load_from_hydat_db(start_date=start_date, end_date=end_date,
    # province=province, min_drainage_area_km2=min_drainage_area_km2)
    # if not len(stations_hd):
    #     print "No hydat stations satisying the conditions: period {0}-{1}, province {2}".format(
    #         str(start_date), str(end_date), province
    #     )
    # stations.extend(stations_hd)

    # brewer2mpl.get_map args: set name  set type  number of colors
    bmap = brewer2mpl.get_map("Set1", "qualitative", 9)

    path1 = "/skynet3_rech1/huziy/hdf_store/quebec_0.1_crcm5-hcd-r.hdf5"
    label1 = "CRCM5-L1"

    path2 = "/skynet3_rech1/huziy/hdf_store/quebec_0.1_crcm5-hcd-rl.hdf5"
    label2 = "CRCM5-L2"

    color2, color1 = bmap.mpl_colors[:2]

    fldirs = analysis.get_array_from_file(
        path=path1, var_name=infovar.HDF_FLOW_DIRECTIONS_NAME)
    lons2d, lats2d, basemap = analysis.get_basemap_from_hdf(path1)

    lake_fractions = analysis.get_array_from_file(
        path=path1, var_name=infovar.HDF_LAKE_FRACTION_NAME)
    # cell_areas = analysis.get_array_from_file(path=path1, var_name=infovar.HDF_CELL_AREA_NAME)
    acc_area = analysis.get_array_from_file(
        path=path1, var_name=infovar.HDF_ACCUMULATION_AREA_NAME)

    cell_manager = CellManager(fldirs,
                               lons2d=lons2d,
                               lats2d=lats2d,
                               accumulation_area_km2=acc_area)

    station_to_mp = cell_manager.get_model_points_for_stations(
        station_list=stations,
        lake_fraction=lake_fractions,
        drainaige_area_reldiff_limit=0.3)

    fig, axes = plt.subplots(1, 2, gridspec_kw=dict(top=0.80, wspace=0.4))

    q90_obs_list = []
    q90_mod1_list = []
    q90_mod2_list = []

    q10_obs_list = []
    q10_mod1_list = []
    q10_mod2_list = []

    for the_station, the_mp in station_to_mp.items():
        assert isinstance(the_station, Station)
        compl_years = the_station.get_list_of_complete_years()
        if len(compl_years) < 3:
            continue

        t, stfl1 = analysis.get_daily_climatology_for_a_point(
            path=path1,
            years_of_interest=compl_years,
            i_index=the_mp.ix,
            j_index=the_mp.jy,
            var_name="STFA")

        _, stfl2 = analysis.get_daily_climatology_for_a_point(
            path=path2,
            years_of_interest=compl_years,
            i_index=the_mp.ix,
            j_index=the_mp.jy,
            var_name="STFA")

        _, stfl_obs = the_station.get_daily_climatology_for_complete_years(
            stamp_dates=t, years=compl_years)

        # Q90
        q90_obs = np.percentile(stfl_obs, 90)
        q90_mod1 = np.percentile(stfl1, 90)
        q90_mod2 = np.percentile(stfl2, 90)

        # Q10
        q10_obs = np.percentile(stfl_obs, 10)
        q10_mod1 = np.percentile(stfl1, 10)
        q10_mod2 = np.percentile(stfl2, 10)

        # save quantiles to lists for correlation calculation
        q90_obs_list.append(q90_obs)
        q90_mod1_list.append(q90_mod1)
        q90_mod2_list.append(q90_mod2)

        q10_mod1_list.append(q10_mod1)
        q10_mod2_list.append(q10_mod2)
        q10_obs_list.append(q10_obs)

        # axes[0].annotate(the_station.id, (q90_obs, np.percentile(stfl1, 90)))
        # axes[1].annotate(the_station.id, (q10_obs, np.percentile(stfl1, 10)))

    # Plot scatter plot of Q90
    the_ax = axes[0]

    # the_ax.annotate(the_station.id, (q90_obs, np.percentile(stfl1, 90)))
    the_ax.scatter(q90_obs_list, q90_mod1_list, label=label1, c=color1)
    the_ax.scatter(q90_obs_list, q90_mod2_list, label=label2, c=color2)

    # plot scatter plot of Q10
    the_ax = axes[1]
    # the_ax.annotate(the_station.id, (q10_obs, np.percentile(stfl1, 10)))
    h1 = the_ax.scatter(q10_obs_list, q10_mod1_list, label=label1, c=color1)
    h2 = the_ax.scatter(q10_obs_list, q10_mod2_list, label=label2, c=color2)

    # Add correlation coefficients to the axes
    fp = FontProperties(size=14, weight="bold")
    axes[0].annotate(r"$R^2 = {0:.2f}$".format(
        np.corrcoef(q90_mod1_list, q90_obs_list)[0, 1]**2), (0.1, 0.85),
                     color=color1,
                     xycoords="axes fraction",
                     font_properties=fp)
    axes[0].annotate(r"$R^2 = {0:.2f}$".format(
        np.corrcoef(q90_mod2_list, q90_obs_list)[0, 1]**2), (0.1, 0.70),
                     color=color2,
                     xycoords="axes fraction",
                     font_properties=fp)

    axes[1].annotate(r"$R^2 = {0:.2f}$".format(
        np.corrcoef(q10_mod1_list, q10_obs_list)[0, 1]**2), (0.1, 0.85),
                     color=color1,
                     xycoords="axes fraction",
                     font_properties=fp)
    axes[1].annotate(r"$R^2 = {0:.2f}$".format(
        np.corrcoef(q10_mod2_list, q10_obs_list)[0, 1]**2), (0.1, 0.70),
                     color=color2,
                     xycoords="axes fraction",
                     font_properties=fp)

    sf = ScalarFormatter(useMathText=True)
    sf.set_powerlimits((-2, 3))
    for ind, the_ax in enumerate(axes):
        plot_one_to_one_line(the_ax)
        if ind == 0:
            the_ax.set_xlabel(r"Observed $\left({\rm m^3/s} \right)$")
            the_ax.set_ylabel(r"Modelled $\left({\rm m^3/s} \right)$")

        the_ax.annotate(r"$Q_{90}$" if ind == 0 else r"$Q_{10}$", (0.95, 0.95),
                        xycoords="axes fraction",
                        bbox=dict(facecolor="white"),
                        va="top",
                        ha="right")

        the_ax.xaxis.set_major_formatter(sf)
        the_ax.yaxis.set_major_formatter(sf)

        locator = MaxNLocator(nbins=5)
        the_ax.xaxis.set_major_locator(locator)
        the_ax.yaxis.set_major_locator(locator)
        x1, x2 = the_ax.get_xlim()
        # Since streamflow percentiles can only be positive
        the_ax.set_xlim(0, x2)
        the_ax.set_ylim(0, x2)

    fig.legend([h1, h2], [label1, label2], loc="upper center", ncol=2)
    figpath = os.path.join(images_folder, "percentiles_comparison.png")
    # plt.tight_layout()
    fig.savefig(figpath, dpi=cpp.FIG_SAVE_DPI, bbox_inches="tight")
예제 #8
0
def main():
    # Define the simulations to be validated
    r_config = RunConfig(
        data_path=
        "/RESCUE/skynet3_rech1/huziy/hdf_store/quebec_0.1_crcm5-hcd-r.hdf5",
        start_year=1990,
        end_year=2010,
        label="CRCM5-L1")
    r_config_list = [r_config]

    r_config = RunConfig(
        data_path=
        "/RESCUE/skynet3_rech1/huziy/hdf_store/quebec_0.1_crcm5-r.hdf5",
        start_year=1990,
        end_year=2010,
        label="CRCM5-NL")
    r_config_list.append(r_config)

    bmp_info = analysis.get_basemap_info_from_hdf(file_path=r_config.data_path)
    bmp_info.should_draw_grey_map_background = True
    bmp_info.should_draw_basin_boundaries = False
    bmp_info.map_bg_color = "0.75"

    station_ids = ["104001", "093806", "093801", "081002", "081007", "080718"]

    # get river network information used in the model
    flow_directions = analysis.get_array_from_file(
        r_config.data_path, var_name=infovar.HDF_FLOW_DIRECTIONS_NAME)
    accumulation_area_km2 = analysis.get_array_from_file(
        path=r_config.data_path, var_name=infovar.HDF_ACCUMULATION_AREA_NAME)
    cell_manager = CellManager(flow_dirs=flow_directions,
                               lons2d=bmp_info.lons,
                               lats2d=bmp_info.lats,
                               accumulation_area_km2=accumulation_area_km2)

    # Get the list of stations to indicate on the bias map
    stations = cehq_station.read_station_data(start_date=None,
                                              end_date=None,
                                              selected_ids=station_ids)
    """:type : list[Station]"""

    xx, yy = bmp_info.get_proj_xy()
    station_to_modelpoint = cell_manager.get_model_points_for_stations(
        station_list=stations)
    upstream_edges = cell_manager.get_upstream_polygons_for_points(
        model_point_list=station_to_modelpoint.values(), xx=xx, yy=yy)

    bmp_info.draw_colorbar_for_each_subplot = True

    # Validate temperature, precip and swe
    obs_path_anusplin = "/home/huziy/skynet3_rech1/anusplin_links"
    obs_path_swe = "data/swe_ross_brown/swe.nc"
    model_var_to_obs_path = OrderedDict([("TT", obs_path_anusplin),
                                         ("I5", obs_path_swe)])

    model_var_to_season = OrderedDict([
        ("TT", OrderedDict([("Spring", range(3, 6))])),
        ("I5", OrderedDict([("Winter", [1, 2, 12])]))
    ])

    vname_to_obs_data = {}

    # parameters that won't change in the loop over variable names
    params_const = dict(rconfig=r_config, bmp_info=bmp_info)

    for vname, obs_path in model_var_to_obs_path.items():
        season_to_obs_data = get_seasonal_clim_obs_data(
            vname=vname,
            obs_path=obs_path,
            season_to_months=model_var_to_season[vname],
            **params_const)

        # Comment swe over lakes, since I5 calculated only for land
        if vname in [
                "I5",
        ]:
            for season in season_to_obs_data:
                season_to_obs_data[season] = maskoceans(
                    bmp_info.lons,
                    bmp_info.lats,
                    season_to_obs_data[season],
                    inlands=True)

        vname_to_obs_data[vname] = season_to_obs_data

    # Plotting
    plot_all_vars_in_one_fig = True

    fig = None
    gs = None
    if plot_all_vars_in_one_fig:
        plot_utils.apply_plot_params(font_size=12,
                                     width_pt=None,
                                     width_cm=25,
                                     height_cm=20)
        fig = plt.figure()
        ncols = len(model_var_to_obs_path) + 1
        gs = GridSpec(len(r_config_list),
                      ncols,
                      width_ratios=(ncols - 1) * [
                          1.,
                      ] + [
                          0.05,
                      ])
    else:
        plot_utils.apply_plot_params(font_size=12,
                                     width_pt=None,
                                     width_cm=25,
                                     height_cm=25)

    station_x_list = []
    station_y_list = []

    mvarname_to_cs = {}
    for row, r_config in enumerate(r_config_list):
        for col, mname in enumerate(model_var_to_obs_path):

            row_axes = [
                fig.add_subplot(gs[row, col]),
            ]

            mvarname_to_cs[mname] = compare_vars(
                vname_model=mname,
                vname_to_obs=vname_to_obs_data,
                r_config=r_config,
                season_to_months=model_var_to_season[mname],
                bmp_info_agg=bmp_info,
                axes_list=row_axes)

            # -1 in order to exclude colorbars
            for the_ax in row_axes:

                the_ax.set_title(the_ax.get_title() + ", {}".format(
                    infovar.get_long_display_label_for_var(mname)))
                # Need titles only for the first row
                if row > 0:
                    the_ax.set_title("")

                if col == 0:
                    the_ax.set_ylabel(r_config.label)
                else:
                    the_ax.set_ylabel("")

                draw_upstream_area_bounds(the_ax, upstream_edges, color="g")

                if len(station_x_list) == 0:
                    for the_station in stations:
                        xst, yst = bmp_info.basemap(the_station.longitude,
                                                    the_station.latitude)
                        station_x_list.append(xst)
                        station_y_list.append(yst)

                bmp_info.basemap.scatter(station_x_list,
                                         station_y_list,
                                         c="g",
                                         ax=the_ax,
                                         s=20,
                                         zorder=10,
                                         alpha=0.5)

    # Save the figure if necessary
    if plot_all_vars_in_one_fig:

        if not img_folder.is_dir():
            img_folder.mkdir(parents=True)

        fig_path = img_folder.joinpath("{}.png".format(
            "_".join(model_var_to_obs_path)))
        with fig_path.open("wb") as figfile:
            fig.savefig(figfile, format="png", bbox_inches="tight")

        plt.close(fig)
def main():
    model_data_path = Path("/RECH2/huziy/BC-MH/bc_mh_044deg/Diagnostics")
    # model_data_path = Path("/RECH2/huziy/BC-MH/bc_mh_044deg/Samples")

    static_data_file = "/RECH2/huziy/BC-MH/bc_mh_044deg/Samples/bc_mh_044deg_198001/pm1980010100_00000000p"

    r = RPN(static_data_file)

    fldir = r.get_first_record_for_name("FLDR")
    faa = r.get_first_record_for_name("FAA")
    lons, lats = r.get_longitudes_and_latitudes_for_the_last_read_rec()

    gc = default_domains.bc_mh_044

    cell_manager = CellManager(fldir, nx=fldir.shape[0], ny=fldir.shape[1],
                               lons2d=lons, lats2d=lats, accumulation_area_km2=faa)

    selected_station_ids = ["06EA002", ]

    stations = cehq_station.load_from_hydat_db(province="SK", selected_ids=selected_station_ids, natural=None)

    # (06EA002): CHURCHILL RIVER AT SANDY BAY at (-102.31832885742188,55.52333068847656), accum. area is 212000.0 km**2
    # TODO: plot where is this station, compare modelled and observed hydrographs

    # for s in stations:
    #     assert isinstance(s, cehq_station.Station)
    #     s.latitude += 0.9
    #     s.longitude -= 0.2
    #     print(s)

    station_to_model_point = cell_manager.get_model_points_for_stations(stations, drainaige_area_reldiff_limit=0.8,
                                                                        nneighbours=1)

    print(station_to_model_point[stations[0]])

    station = stations[0]
    assert isinstance(station, cehq_station.Station)

    obs_not_corrected = pd.Series(index=station.dates, data=station.values).groupby(
        by=lambda d: d.replace(day=15)).mean()
    obs_corrected = pd.read_csv("mh/obs_data/Churchill Historic Monthly Apportionable Flow_06EA002.csv.bak.original", skiprows=2)

    print(obs_corrected.head())
    print(obs_corrected.year.iloc[0], obs_corrected.year.iloc[-1])

    date_index = pd.date_range(start=datetime(obs_corrected.year.iloc[0] - 1, 12, 15),
                               end=datetime(obs_corrected.year.iloc[-1], 12, 15),
                               freq="M")

    date_index = date_index.shift(15, freq=pd.datetools.day)

    print(date_index)
    data = np.concatenate([r for r in obs_corrected.values[:, 1:-1]])

    factor = date_index.map(lambda d: 1000 / (calendar.monthrange(d.year, d.month)[1] * 24 * 3600))
    print(factor[:10])
    obs_corrected = pd.Series(index=date_index, data=data * factor)

    station_to_modelled_data = get_model_data(station_to_model_point, output_path=model_data_path,
                                              grid_config=gc, basins_of_interest_shp=default_domains.MH_BASINS_PATH,
                                              cell_manager=cell_manager, vname="STFL")

    modelled_data = station_to_modelled_data[station]

    fig = plt.figure()
    ax = obs_corrected.plot(label="obs corrected")

    obs_not_corrected.plot(label="obs not corrected", ax=ax, color="k")

    modelled_data.plot(label="CRCM5", ax=ax, color="r")

    ax.legend(loc="upper left")
    img_file = img_folder.joinpath("{}_validation_monthly.png".format(station.id))
    fig.savefig(str(img_file))
    plt.close(fig)

    # climatology
    start_year = 1980
    end_year = 2010

    date_selector = lambda d: (start_year <= d.year <= end_year) and not ((d.month == 2) and (d.day == 29))

    fig = plt.figure()
    ax = obs_corrected.select(date_selector).groupby(lambda d: d.replace(year=2001)).mean().plot(label="obs corrected")

    obs_not_corrected.select(date_selector).groupby(lambda d: d.replace(year=2001)).mean().plot(
        label="obs not corrected", ax=ax, color="k")

    modelled_data.select(date_selector).groupby(lambda d: d.replace(year=2001)).mean().plot(label="CRCM5", ax=ax,
                                                                                            color="r")

    ax.xaxis.set_major_locator(MonthLocator(bymonthday=15))
    ax.xaxis.set_major_formatter(DateFormatter("%b"))

    ax.legend(loc="upper left")

    img_file = img_folder.joinpath("{}_validation_clim.png".format(station.id))
    fig.savefig(str(img_file))
    plt.close(fig)

    # Interannual variability
    fig = plt.figure()

    obs_corrected = obs_corrected.select(lambda d: start_year <= d.year <= end_year)
    modelled_data = modelled_data.select(lambda d: start_year <= d.year <= end_year)

    corr_list = []
    for m in range(1, 13):
        obs = obs_corrected.select(lambda d: d.month == m)
        mod = modelled_data.select(lambda d: d.month == m)

        print(obs.head())

        obs.index = obs.index.map(lambda d: d.year)
        mod.index = mod.index.map(lambda d: d.year)

        corr_list.append(obs.corr(mod))

    ax = plt.gca()
    ax.plot(range(1, 13), corr_list)
    ax.set_xlabel("Month")
    ax.set_title("Inter-annual variability")

    img_file = img_folder.joinpath("{}_interannual.png".format(station.id))
    fig.tight_layout()
    fig.savefig(str(img_file), bbox_inches="tight")
    plt.close(fig)
예제 #10
0
def main():
    model_data_path = Path("/RECH2/huziy/BC-MH/bc_mh_044deg/Diagnostics")
    # model_data_path = Path("/RECH2/huziy/BC-MH/bc_mh_044deg/Samples")

    static_data_file = "/RECH2/huziy/BC-MH/bc_mh_044deg/Samples/bc_mh_044deg_198001/pm1980010100_00000000p"

    r = RPN(static_data_file)

    fldir = r.get_first_record_for_name("FLDR")
    faa = r.get_first_record_for_name("FAA")
    lons, lats = r.get_longitudes_and_latitudes_for_the_last_read_rec()

    gc = default_domains.bc_mh_044

    cell_manager = CellManager(
        fldir, nx=fldir.shape[0], ny=fldir.shape[1], lons2d=lons, lats2d=lats, accumulation_area_km2=faa
    )

    selected_station_ids = ["06EA002"]

    stations = cehq_station.load_from_hydat_db(province="SK", selected_ids=selected_station_ids, natural=None)

    # (06EA002): CHURCHILL RIVER AT SANDY BAY at (-102.31832885742188,55.52333068847656), accum. area is 212000.0 km**2
    # TODO: plot where is this station, compare modelled and observed hydrographs

    # for s in stations:
    #     assert isinstance(s, cehq_station.Station)
    #     s.latitude += 0.9
    #     s.longitude -= 0.2
    #     print(s)

    station_to_model_point = cell_manager.get_model_points_for_stations(
        stations, drainaige_area_reldiff_limit=0.8, nneighbours=1
    )

    print(station_to_model_point[stations[0]])

    station = stations[0]
    assert isinstance(station, cehq_station.Station)

    obs_not_corrected = (
        pd.Series(index=station.dates, data=station.values).groupby(by=lambda d: d.replace(day=15)).mean()
    )
    obs_corrected = pd.read_csv(
        "mh/obs_data/Churchill Historic Monthly Apportionable Flow_06EA002.csv.bak.original", skiprows=2
    )

    print(obs_corrected.head())
    print(obs_corrected.year.iloc[0], obs_corrected.year.iloc[-1])

    date_index = pd.date_range(
        start=datetime(obs_corrected.year.iloc[0] - 1, 12, 15),
        end=datetime(obs_corrected.year.iloc[-1], 12, 15),
        freq="M",
    )

    date_index = date_index.shift(15, freq=pd.datetools.day)

    print(date_index)
    data = np.concatenate([r for r in obs_corrected.values[:, 1:-1]])

    factor = date_index.map(lambda d: 1000 / (calendar.monthrange(d.year, d.month)[1] * 24 * 3600))
    print(factor[:10])
    obs_corrected = pd.Series(index=date_index, data=data * factor)

    station_to_modelled_data = get_model_data(
        station_to_model_point,
        output_path=model_data_path,
        grid_config=gc,
        basins_of_interest_shp=default_domains.MH_BASINS_PATH,
        cell_manager=cell_manager,
        vname="STFL",
    )

    modelled_data = station_to_modelled_data[station]

    fig = plt.figure()
    ax = obs_corrected.plot(label="obs corrected")

    obs_not_corrected.plot(label="obs not corrected", ax=ax, color="k")

    modelled_data.plot(label="CRCM5", ax=ax, color="r")

    ax.legend(loc="upper left")
    img_file = img_folder.joinpath("{}_validation_monthly.png".format(station.id))
    fig.savefig(str(img_file))
    plt.close(fig)

    # climatology
    start_year = 1980
    end_year = 2010

    date_selector = lambda d: (start_year <= d.year <= end_year) and not ((d.month == 2) and (d.day == 29))

    fig = plt.figure()
    ax = obs_corrected.select(date_selector).groupby(lambda d: d.replace(year=2001)).mean().plot(label="obs corrected")

    obs_not_corrected.select(date_selector).groupby(lambda d: d.replace(year=2001)).mean().plot(
        label="obs not corrected", ax=ax, color="k"
    )

    modelled_data.select(date_selector).groupby(lambda d: d.replace(year=2001)).mean().plot(
        label="CRCM5", ax=ax, color="r"
    )

    ax.xaxis.set_major_locator(MonthLocator(bymonthday=15))
    ax.xaxis.set_major_formatter(DateFormatter("%b"))

    ax.legend(loc="upper left")

    img_file = img_folder.joinpath("{}_validation_clim.png".format(station.id))
    fig.savefig(str(img_file))
    plt.close(fig)

    # Interannual variability
    fig = plt.figure()

    obs_corrected = obs_corrected.select(lambda d: start_year <= d.year <= end_year)
    modelled_data = modelled_data.select(lambda d: start_year <= d.year <= end_year)

    corr_list = []
    for m in range(1, 13):
        obs = obs_corrected.select(lambda d: d.month == m)
        mod = modelled_data.select(lambda d: d.month == m)

        print(obs.head())

        obs.index = obs.index.map(lambda d: d.year)
        mod.index = mod.index.map(lambda d: d.year)

        corr_list.append(obs.corr(mod))

    ax = plt.gca()
    ax.plot(range(1, 13), corr_list)
    ax.set_xlabel("Month")
    ax.set_title("Inter-annual variability")

    img_file = img_folder.joinpath("{}_interannual.png".format(station.id))
    fig.tight_layout()
    fig.savefig(str(img_file), bbox_inches="tight")
    plt.close(fig)
def main(start_year = 1980, end_year = 1989):


    soil_layer_widths = infovar.soil_layer_widths_26_to_60
    soil_tops = np.cumsum(soil_layer_widths).tolist()[:-1]
    soil_tops = [0, ] + soil_tops



    selected_station_ids = [
        "061905", "074903", "090613", "092715", "093801", "093806"
    ]

#    path1 = "/skynet3_rech1/huziy/hdf_store/quebec_0.1_crcm5-hcd-rl_spinup.hdf"
#    label1 = "CRCM5-HCD-RL"

    path1 = "/skynet3_rech1/huziy/hdf_store/quebec_0.1_crcm5-hcd-rl-intfl_spinup_ITFS.hdf5"
    label1 = "CRCM5-HCD-RL-INTFL"

    path2 = "/skynet3_rech1/huziy/hdf_store/quebec_0.1_crcm5-hcd-rl-intfl_ITFS_avoid_truncation1979-1989.hdf5"
    label2 = "CRCM5-HCD-RL-INTFL-improved"

    ############
    images_folder = "images_for_lake-river_paper/comp_soil_profiles"
    if not os.path.isdir(images_folder):
        os.mkdir(images_folder)

    fldirs = analysis.get_array_from_file(path=path1, var_name=infovar.HDF_FLOW_DIRECTIONS_NAME)
    lons2d, lats2d, basemap = analysis.get_basemap_from_hdf(path1)

    lake_fractions = analysis.get_array_from_file(path=path1, var_name=infovar.HDF_LAKE_FRACTION_NAME)
    cell_areas = analysis.get_array_from_file(path=path1, var_name=infovar.HDF_CELL_AREA_NAME_M2)
    acc_areakm2 = analysis.get_array_from_file(path=path1, var_name=infovar.HDF_ACCUMULATION_AREA_NAME)
    depth_to_bedrock = analysis.get_array_from_file(path=path1, var_name=infovar.HDF_DEPTH_TO_BEDROCK_NAME)


    cell_manager = CellManager(fldirs, lons2d=lons2d, lats2d=lats2d, accumulation_area_km2=acc_areakm2)

    #get climatologic liquid soil moisture and convert fractions to mm
    t0 = time.clock()
    daily_dates, levels, i1_nointfl = analysis.get_daily_climatology_of_3d_field(
        path_to_hdf_file=path1,
        var_name="I1",
        start_year=start_year,
        end_year=end_year
    )
    print("read I1 - 1")
    print("Spent {0} seconds ".format(time.clock() - t0))

    _, _, i1_intfl = analysis.get_daily_climatology_of_3d_field(
        path_to_hdf_file=path2,
        var_name="I1",
        start_year=start_year,
        end_year=end_year
    )
    print("read I1 - 2")

    #get climatologic frozen soil moisture and convert fractions to mm
    _, _, i2_nointfl = analysis.get_daily_climatology_of_3d_field(
        path_to_hdf_file=path1,
        var_name="I2",
        start_year=start_year,
        end_year=end_year
    )
    print("read I2 - 1")

    _, _, i2_intfl = analysis.get_daily_climatology_of_3d_field(
        path_to_hdf_file=path2,
        var_name="I2",
        start_year=start_year,
        end_year=end_year
    )
    print("read I2 - 2")
    #
    sm_intfl = i1_intfl + i2_intfl
    sm_nointfl = i1_nointfl + i2_nointfl


    #Get the list of stations to do the comparison with
    stations = cehq_station.read_station_data(
        start_date=datetime(start_year, 1, 1),
        end_date=datetime(end_year, 12, 31),
        selected_ids=selected_station_ids
    )


    print("sm_noinfl, min, max = {0}, {1}".format(sm_nointfl.min(), sm_nointfl.max()))
    print("sm_infl, min, max = {0}, {1}".format(sm_intfl.min(), sm_intfl.max()))
    diff = (sm_intfl - sm_nointfl)
    #diff *= soil_layer_widths[np.newaxis, :, np.newaxis, np.newaxis] * 1000  # to convert in mm

    #print "number of nans", np.isnan(diff).astype(int).sum()

    print("cell area min,max = {0}, {1}".format(cell_areas.min(), cell_areas.max()))
    print("acc area min,max = {0}, {1}".format(acc_areakm2.min(), acc_areakm2.max()))

    assert np.all(lake_fractions >= 0)
    print("lake fractions (min, max): ", lake_fractions.min(), lake_fractions.max())

    #Non need to go very deep
    nlayers = 3
    z, t = np.meshgrid(soil_tops[:nlayers], date2num(daily_dates))
    station_to_mp = cell_manager.get_model_points_for_stations(stations)


    plotted_global = False

    for the_station, mp in station_to_mp.items():
        assert isinstance(mp, ModelPoint)
        assert isinstance(the_station, Station)
        fig = plt.figure()
        umask = cell_manager.get_mask_of_upstream_cells_connected_with_by_indices(mp.ix, mp.jy)

        #exclude lake cells from the profiles
        sel = (umask == 1) & (depth_to_bedrock > 3) & (acc_areakm2 >= 0)

        umaskf = umask.astype(float)
        umaskf *= (1.0 - lake_fractions) * cell_areas
        umaskf[~sel] = 0.0


        profiles = np.tensordot(diff, umaskf) / umaskf.sum()
        print(profiles.shape, profiles.min(), profiles.max(), umaskf.sum(), umaskf.min(), umaskf.max())

        d = np.abs(profiles).max()
        print("d = {0}".format(d))
        clevs = np.round(np.linspace(-d, d, 12), decimals=5)

        diff_cmap = cm.get_cmap("RdBu_r", lut=len(clevs) - 1)
        bn = BoundaryNorm(clevs, len(clevs) - 1)


        plt.title("({})-({})".format(label2, label2))
        img = plt.contourf(t, z, profiles[:, :nlayers], cmap = diff_cmap, levels = clevs, norm = bn)
        plt.colorbar(img, ticks = clevs)
        ax = plt.gca()
        assert isinstance(ax, Axes)

        ax.invert_yaxis()
        ax.xaxis.set_major_formatter(DateFormatter("%b"))
        ax.xaxis.set_major_locator(MonthLocator())


        fig.savefig(os.path.join(images_folder, "{0}_{1}_{2}.jpeg".format(the_station.id, label1, label2)),
                    dpi = cpp.FIG_SAVE_DPI, bbox_inches = "tight")



        print("processed: {0}".format(the_station))
        if not plotted_global:
            plotted_global = True
            fig = plt.figure()
            sel = (depth_to_bedrock >= 0.1) & (acc_areakm2 >= 0)

            umaskf = (1.0 - lake_fractions) * cell_areas
            umaskf[~sel] = 0.0


            profiles = np.tensordot(diff, umaskf) / umaskf.sum()
            print(profiles.shape, profiles.min(), profiles.max(), umaskf.sum(), umaskf.min(), umaskf.max())

            d = np.abs(profiles).max()
            print("d = {0}".format(d))
            clevs = np.round(np.linspace(-d, d, 12), decimals=5)

            diff_cmap = cm.get_cmap("RdBu_r", lut=len(clevs) - 1)
            bn = BoundaryNorm(clevs, len(clevs) - 1)

            img = plt.contourf(t, z, profiles[:, :nlayers], cmap = diff_cmap, levels = clevs, norm = bn)
            plt.colorbar(img, ticks = clevs)
            ax = plt.gca()
            assert isinstance(ax, Axes)

            ax.invert_yaxis()
            ax.xaxis.set_major_formatter(DateFormatter("%b"))
            ax.xaxis.set_major_locator(MonthLocator())


            fig.savefig(os.path.join(images_folder, "global_mean.jpeg"),
                        dpi = cpp.FIG_SAVE_DPI, bbox_inches = "tight")


    pass
예제 #12
0
def main():
    # stations = cehq_station.read_grdc_stations(st_id_list=["2903430", "2909150", "2912600", "4208025"])

    selected_station_ids = [
        "05LM006", "05BN012", "05AK001", "05QB003", "06EA002"
    ]

    stations = cehq_station.load_from_hydat_db(
        natural=None,
        province=None,
        selected_ids=selected_station_ids,
        skip_data_checks=True)

    stations_mh = cehq_station.get_manitoba_hydro_stations()

    # copy metadata from the corresponding hydat stations
    for s in stations:
        assert isinstance(s, Station)
        for s_mh in stations_mh:
            assert isinstance(s_mh, Station)

            if s == s_mh:
                s_mh.copy_metadata(s)
                break

    stations = [
        s for s in stations_mh
        if s.id in selected_station_ids and s.longitude is not None
    ]

    stations_to_mp = None

    import matplotlib.pyplot as plt

    # labels = ["CanESM", "MPI"]
    # paths = ["/skynet3_rech1/huziy/offline_stfl/canesm/discharge_1958_01_01_00_00.nc",
    # "/skynet3_rech1/huziy/offline_stfl/mpi/discharge_1958_01_01_00_00.nc"]
    #
    # colors = ["r", "b"]

    # labels = ["ERA", ]
    # colors = ["r", ]
    # paths = ["/skynet3_rech1/huziy/arctic_routing/era40/discharge_1958_01_01_00_00.nc"]

    labels = [
        "Model",
    ]
    colors = [
        "r",
    ]
    paths = [
        "/RESCUE/skynet3_rech1/huziy/water_route_mh_bc_011deg_wc/discharge_1980_01_01_12_00.nc"
    ]

    infocell_path = "/RESCUE/skynet3_rech1/huziy/water_route_mh_bc_011deg_wc/infocell.nc"

    start_year = 1980
    end_year = 2014

    stations_filtered = []
    for s in stations:
        # Also filter out stations with small accumulation areas
        # if s.drainage_km2 is not None and s.drainage_km2 < 100:
        #     continue

        # Filter stations with data out of the required time frame
        year_list = s.get_list_of_complete_years()

        print("Complete years for {}: {}".format(s.id, year_list))

        stations_filtered.append(s)

    stations = stations_filtered

    print("Retained {} stations.".format(len(stations)))

    sim_to_time = {}

    monthly_dates = [datetime(2001, m, 15) for m in range(1, 13)]
    fmt = FuncFormatter(lambda x, pos: num2date(x).strftime("%b")[0])
    locator = MonthLocator(bymonthday=15)

    fig = plt.figure()

    axes = []
    row_indices = []
    col_indices = []

    ncols = 1
    shiftrow = 0 if len(stations) % ncols == 0 else 1
    nrows = len(stations) // ncols + shiftrow
    shared_ax = None
    gs = gridspec.GridSpec(ncols=ncols, nrows=nrows)

    for i, s in enumerate(stations):
        row = i // ncols
        col = i % ncols

        row_indices.append(row)
        col_indices.append(col)

        if shared_ax is None:
            ax = fig.add_subplot(gs[row, col])
            shared_ax = ax
            assert isinstance(shared_ax, Axes)

        else:
            ax = fig.add_subplot(gs[row, col])

        ax.xaxis.set_major_locator(locator)
        ax.yaxis.set_major_locator(MaxNLocator(nbins=4))

        ax.xaxis.set_major_formatter(fmt)
        sfmt = ScalarFormatter(useMathText=True)
        sfmt.set_powerlimits((-3, 4))
        ax.yaxis.set_major_formatter(sfmt)
        assert isinstance(ax, Axes)

        axes.append(ax)

    # generate daily stamp dates
    d0 = datetime(2001, 1, 1)
    stamp_dates = [d0 + timedelta(days=i) for i in range(365)]

    # plot a panel for each station
    for s, ax, row, col in zip(stations, axes, row_indices, col_indices):

        assert isinstance(s, Station)
        assert isinstance(ax, Axes)
        if s.grdc_monthly_clim_max is not None:
            ax.fill_between(monthly_dates,
                            s.grdc_monthly_clim_min,
                            s.grdc_monthly_clim_max,
                            color="0.6",
                            alpha=0.5)

        avail_years = s.get_list_of_complete_years()
        print("{}: {}".format(s.id, ",".join([str(y) for y in avail_years])))
        years = [y for y in avail_years if start_year <= y <= end_year]
        obs_clim_stfl = s.get_monthly_climatology(years_list=years)

        if obs_clim_stfl is None:
            continue

        print(obs_clim_stfl.head())

        obs_clim_stfl.plot(color="k", lw=3, label="Obs", ax=ax)

        if s.river_name is not None and s.river_name != "":
            ax.set_title(s.river_name)
        else:
            ax.set_title(s.id)

        for path, sim_label, color in zip(paths, labels, colors):
            ds = Dataset(path)

            if stations_to_mp is None:
                acc_area_2d = ds.variables["accumulation_area"][:]
                lons2d, lats2d = ds.variables["longitude"][:], ds.variables[
                    "latitude"][:]
                x_index, y_index = ds.variables["x_index"][:], ds.variables[
                    "y_index"][:]
                stations_to_mp = get_dataless_model_points_for_stations(
                    stations, acc_area_2d, lons2d, lats2d, x_index, y_index)

            # read dates only once for a given simulation
            if sim_label not in sim_to_time:
                time_str = ds.variables["time"][:].astype(str)
                times = [
                    datetime.strptime("".join(t_s), TIME_FORMAT)
                    for t_s in time_str
                ]
                sim_to_time[sim_label] = times

            mp = stations_to_mp[s]
            data = ds.variables["water_discharge_accumulated"][:,
                                                               mp.cell_index]
            print(path)
            df = DataFrame(data=data,
                           index=sim_to_time[sim_label],
                           columns=["value"])
            df["year"] = df.index.map(lambda d: d.year)
            df = df.ix[df.year.isin(years), :]
            df = df.groupby(lambda d: datetime(2001, d.month, 15)).mean()

            # print np.mean( monthly_model ), s.river_name, sim_label
            df.plot(color=color, lw=3, label=sim_label, ax=ax, y="value")

            ds.close()

        if row < nrows - 1:
            ax.set_xticklabels([])

    axes[0].legend(fontsize=17, loc=2)
    plt.tight_layout()
    plt.savefig("mh/offline_validation_mh.png", dpi=400)
    plt.close(fig)

    with Dataset(infocell_path) as ds:

        fldir = ds.variables["flow_direction_value"][:]
        faa = ds.variables["accumulation_area"][:]

        lon, lat = [ds.variables[k][:] for k in ["lon", "lat"]]

        # plot station positions and upstream areas
        cell_manager = CellManager(fldir,
                                   nx=fldir.shape[0],
                                   ny=fldir.shape[1],
                                   lons2d=lon,
                                   lats2d=lat,
                                   accumulation_area_km2=faa)

    fig = plt.figure()
    from crcm5.mh_domains import default_domains
    gc = default_domains.bc_mh_011

    # get the basemap object
    bmp, data_mask = gc.get_basemap_using_shape_with_polygons_of_interest(
        lon, lat, shp_path=default_domains.MH_BASINS_PATH, mask_margin=5)

    xx, yy = bmp(lon, lat)
    ax = plt.gca()
    colors = ["g", "r", "m", "c", "y", "violet"]
    i = 0
    for s, mp in stations_to_mp.items():
        assert isinstance(mp, ModelPoint)
        upstream_mask = cell_manager.get_mask_of_upstream_cells_connected_with_by_indices(
            mp.ix, mp.jy)

        current_points = upstream_mask > 0.5

        bmp.drawcoastlines()
        bmp.drawrivers()

        bmp.scatter(xx[current_points],
                    yy[current_points],
                    c=colors[i % len(colors)])
        i += 1

        va = "top"
        if s.id in ["05AK001", "05LM006"]:
            va = "bottom"

        ha = "left"
        if s.id in ["05QB003"]:
            ha = "right"

        bmp.scatter(xx[mp.ix, mp.jy], yy[mp.ix, mp.jy], c="b")
        ax.annotate(s.id,
                    xy=(xx[mp.ix, mp.jy], yy[mp.ix, mp.jy]),
                    horizontalalignment=ha,
                    verticalalignment=va,
                    bbox=dict(boxstyle='round', fc='gray', alpha=0.5))

    fig.savefig("mh/offline_stations_{}.png".format("positions"))
    plt.close(fig)
예제 #13
0
def plot_streamflow():
    plot_utils.apply_plot_params(width_pt=None, width_cm=19, height_cm=10, font_size=12)
    labels = ["Glacier-only", "All"]
    colors = ["r", "b"]
    paths = [
        "/skynet3_exec2/aganji/glacier_katja/watroute_gemera/discharge_stat_glac_00_99_2000_01_01_00_00.nc",
        "/skynet3_exec2/aganji/glacier_katja/watroute_gemera/discharge_stat_both_00_992000_01_01_00_00.nc"]

    infocell_path = "/skynet3_exec2/aganji/glacier_katja/watroute_gemera/infocell.nc"

    start_year = 2000
    end_year = 2099


    with Dataset(paths[0]) as ds:
        acc_area = ds.variables["accumulation_area"][:]
        lons = ds.variables["longitude"][:]
        lats = ds.variables["latitude"][:]
        x_index = ds.variables["x_index"][:]
        y_index = ds.variables["y_index"][:]

    with Dataset(infocell_path) as ds:
        fldr = ds.variables["flow_direction_value"][:]

    driver = ogr.GetDriverByName('ESRI Shapefile')
    data_source = driver.Open(path_to_basin_shape, 0)

    assert isinstance(data_source, ogr.DataSource)

    geom = None

    print(data_source.GetLayerCount())

    layer = data_source.GetLayer()
    assert isinstance(layer, ogr.Layer)

    print(layer.GetFeatureCount())
    for feature in layer:
        assert isinstance(feature, ogr.Feature)
        geom = feature.geometry()

        assert isinstance(geom, ogr.Geometry)
        # print(str(geom))

        # geom = ogr.CreateGeometryFromWkt(geom.ExportToWkt())

    i, j = get_outlet_indices(geom, acc_area, lons, lats)
    print("Accumulation area at the outlet (according to flow directions): {}".format(acc_area[i, j]))


    cell_manager = CellManager(flow_dirs=fldr, lons2d=lons, lats2d=lats, accumulation_area_km2=acc_area)

    model_mask = cell_manager.get_mask_of_upstream_cells_connected_with_by_indices(i, j)


    cell_index = np.where((x_index == i) & (y_index == j))[0][0]

    print(cell_index)

    if not img_folder.is_dir():
        img_folder.mkdir(parents=True)

    # Do the plotting
    fig = plt.figure()
    gs = gridspec.GridSpec(1, 2, wspace=0.0)

    # Plot the hydrograph
    ax = fig.add_subplot(gs[0, 0])

    for p, c, label in zip(paths, colors, labels):
        with Dataset(p) as ds:
            stfl = ds.variables["water_discharge_accumulated"][:, cell_index]

            time = ds.variables["time"][:].astype(str)
            time = [datetime.strptime("".join(ts), "%Y_%m_%d_%H_%M") for ts in time]
            df = pd.DataFrame(index=time, data=stfl)

            # remove 29th of February
            df = df.select(lambda d: not (d.month == 2 and d.day == 29) and (start_year <= d.year <= end_year))

            df = df.groupby(lambda d: datetime(2001, d.month, d.day)).mean()

            ax.plot(df.index, df.values, c, lw=2, label=label)

    ax.xaxis.set_major_formatter(FuncFormatter(lambda tickval, pos: num2date(tickval).strftime("%b")[0]))
    ax.xaxis.set_major_locator(MonthLocator())
    ax.legend(loc="upper left", bbox_to_anchor=(1.05, 1), borderaxespad=0)
    ax.set_title("{}-{}".format(start_year, end_year))

    # Plot the point position
    ax = fig.add_subplot(gs[0, 1])
    bsm = get_basemap_glaciers_nw_america()
    x, y = bsm(lons[i, j], lats[i, j])
    bsm.scatter(x, y, c="b", ax=ax, zorder=10)
    bsm.drawcoastlines()
    bsm.readshapefile(path_to_basin_shape.replace(".shp", ""), "basin", color="m", linewidth=2, zorder=5)

    # xx, yy = bsm(lons, lats)
    # cmap = cm.get_cmap("gray_r", 10)
    # bsm.pcolormesh(xx, yy, model_mask * 0.5, cmap=cmap, vmin=0, vmax=1)

    bsm.drawrivers(ax=ax, zorder=9, color="b")


    plt.savefig(str(img_folder.joinpath("stfl_at_outlets.pdf")), bbox_inches="tight")
    plt.close(fig)
def main():
    # stations = cehq_station.read_grdc_stations(st_id_list=["2903430", "2909150", "2912600", "4208025"])

    selected_station_ids = [
        "05LM006",
        "05BN012",
        "05AK001",
        "05QB003",
        "06EA002"
    ]

    stations = cehq_station.load_from_hydat_db(natural=None, province=None, selected_ids=selected_station_ids, skip_data_checks=True)

    stations_mh = cehq_station.get_manitoba_hydro_stations()

    # copy metadata from the corresponding hydat stations
    for s in stations:
        assert isinstance(s, Station)
        for s_mh in stations_mh:
            assert isinstance(s_mh, Station)


            if s == s_mh:
                s_mh.copy_metadata(s)
                break



    stations = [s for s in stations_mh if s.id in selected_station_ids and s.longitude is not None]

    stations_to_mp = None

    import matplotlib.pyplot as plt

    # labels = ["CanESM", "MPI"]
    # paths = ["/skynet3_rech1/huziy/offline_stfl/canesm/discharge_1958_01_01_00_00.nc",
    # "/skynet3_rech1/huziy/offline_stfl/mpi/discharge_1958_01_01_00_00.nc"]
    #
    # colors = ["r", "b"]

    # labels = ["ERA", ]
    # colors = ["r", ]
    # paths = ["/skynet3_rech1/huziy/arctic_routing/era40/discharge_1958_01_01_00_00.nc"]


    labels = ["Model", ]
    colors = ["r", ]
    paths = [
        "/RESCUE/skynet3_rech1/huziy/water_route_mh_bc_011deg_wc/discharge_1980_01_01_12_00.nc"
    ]

    infocell_path = "/RESCUE/skynet3_rech1/huziy/water_route_mh_bc_011deg_wc/infocell.nc"

    start_year = 1980
    end_year = 2014




    stations_filtered = []
    for s in stations:
        # Also filter out stations with small accumulation areas
        # if s.drainage_km2 is not None and s.drainage_km2 < 100:
        #     continue

        # Filter stations with data out of the required time frame
        year_list = s.get_list_of_complete_years()

        print("Complete years for {}: {}".format(s.id, year_list))

        stations_filtered.append(s)

    stations = stations_filtered


    print("Retained {} stations.".format(len(stations)))

    sim_to_time = {}

    monthly_dates = [datetime(2001, m, 15) for m in range(1, 13)]
    fmt = FuncFormatter(lambda x, pos: num2date(x).strftime("%b")[0])
    locator = MonthLocator(bymonthday=15)

    fig = plt.figure()

    axes = []
    row_indices = []
    col_indices = []

    ncols = 1
    shiftrow = 0 if len(stations) % ncols == 0 else 1
    nrows = len(stations) // ncols + shiftrow
    shared_ax = None
    gs = gridspec.GridSpec(ncols=ncols, nrows=nrows)

    for i, s in enumerate(stations):
        row = i // ncols
        col = i % ncols

        row_indices.append(row)
        col_indices.append(col)

        if shared_ax is None:
            ax = fig.add_subplot(gs[row, col])
            shared_ax = ax
            assert isinstance(shared_ax, Axes)

        else:
            ax = fig.add_subplot(gs[row, col])

        ax.xaxis.set_major_locator(locator)
        ax.yaxis.set_major_locator(MaxNLocator(nbins=4))

        ax.xaxis.set_major_formatter(fmt)
        sfmt = ScalarFormatter(useMathText=True)
        sfmt.set_powerlimits((-3, 4))
        ax.yaxis.set_major_formatter(sfmt)
        assert isinstance(ax, Axes)

        axes.append(ax)

    # generate daily stamp dates
    d0 = datetime(2001, 1, 1)
    stamp_dates = [d0 + timedelta(days=i) for i in range(365)]



    # plot a panel for each station
    for s, ax, row, col in zip(stations, axes, row_indices, col_indices):

        assert isinstance(s, Station)
        assert isinstance(ax, Axes)
        if s.grdc_monthly_clim_max is not None:
            ax.fill_between(monthly_dates, s.grdc_monthly_clim_min, s.grdc_monthly_clim_max, color="0.6", alpha=0.5)

        avail_years = s.get_list_of_complete_years()
        print("{}: {}".format(s.id, ",".join([str(y) for y in avail_years])))
        years = [y for y in avail_years if start_year <= y <= end_year]
        obs_clim_stfl = s.get_monthly_climatology(years_list=years)

        if obs_clim_stfl is None:
            continue

        print(obs_clim_stfl.head())

        obs_clim_stfl.plot(color="k", lw=3, label="Obs", ax=ax)

        if s.river_name is not None and s.river_name != "":
            ax.set_title(s.river_name)
        else:
            ax.set_title(s.id)

        for path, sim_label, color in zip(paths, labels, colors):
            ds = Dataset(path)

            if stations_to_mp is None:
                acc_area_2d = ds.variables["accumulation_area"][:]
                lons2d, lats2d = ds.variables["longitude"][:], ds.variables["latitude"][:]
                x_index, y_index = ds.variables["x_index"][:], ds.variables["y_index"][:]
                stations_to_mp = get_dataless_model_points_for_stations(stations, acc_area_2d,
                                                                       lons2d, lats2d, x_index, y_index)

            # read dates only once for a given simulation
            if sim_label not in sim_to_time:
                time_str = ds.variables["time"][:].astype(str)
                times = [datetime.strptime("".join(t_s), TIME_FORMAT) for t_s in time_str]
                sim_to_time[sim_label] = times

            mp = stations_to_mp[s]
            data = ds.variables["water_discharge_accumulated"][:, mp.cell_index]
            print(path)
            df = DataFrame(data=data, index=sim_to_time[sim_label], columns=["value"])
            df["year"] = df.index.map(lambda d: d.year)
            df = df.ix[df.year.isin(years), :]
            df = df.groupby(lambda d: datetime(2001, d.month, 15)).mean()


            # print np.mean( monthly_model ), s.river_name, sim_label
            df.plot(color=color, lw=3, label=sim_label, ax=ax, y="value")


            ds.close()

        if row < nrows - 1:
            ax.set_xticklabels([])

    axes[0].legend(fontsize=17, loc=2)
    plt.tight_layout()
    plt.savefig("mh/offline_validation_mh.png", dpi=400)
    plt.close(fig)






    with Dataset(infocell_path) as ds:

        fldir = ds.variables["flow_direction_value"][:]
        faa = ds.variables["accumulation_area"][:]

        lon, lat = [ds.variables[k][:] for k in ["lon", "lat"]]

        # plot station positions and upstream areas
        cell_manager = CellManager(fldir, nx=fldir.shape[0], ny=fldir.shape[1],
                                   lons2d=lon, lats2d=lat, accumulation_area_km2=faa)



    fig = plt.figure()
    from crcm5.mh_domains import default_domains
    gc = default_domains.bc_mh_011

    # get the basemap object
    bmp, data_mask = gc.get_basemap_using_shape_with_polygons_of_interest(
        lon, lat, shp_path=default_domains.MH_BASINS_PATH, mask_margin=5)

    xx, yy = bmp(lon, lat)
    ax = plt.gca()
    colors = ["g", "r", "m", "c", "y", "violet"]
    i = 0
    for s, mp in stations_to_mp.items():
        assert isinstance(mp, ModelPoint)
        upstream_mask = cell_manager.get_mask_of_upstream_cells_connected_with_by_indices(mp.ix, mp.jy)

        current_points = upstream_mask > 0.5

        bmp.drawcoastlines()
        bmp.drawrivers()

        bmp.scatter(xx[current_points], yy[current_points], c=colors[i % len(colors)])
        i += 1


        va = "top"
        if s.id in ["05AK001", "05LM006"]:
            va = "bottom"

        ha = "left"
        if s.id in ["05QB003"]:
            ha = "right"

        bmp.scatter(xx[mp.ix, mp.jy], yy[mp.ix, mp.jy], c="b")
        ax.annotate(s.id, xy=(xx[mp.ix, mp.jy], yy[mp.ix, mp.jy]), horizontalalignment=ha,
                    verticalalignment=va, bbox=dict(boxstyle='round', fc='gray', alpha=0.5))

    fig.savefig("mh/offline_stations_{}.png".format("positions"))
    plt.close(fig)
예제 #15
0
def main():
    direction_file_path = Path("/RECH2/huziy/BC-MH/bc_mh_044deg/Samples/bc_mh_044deg_198001/pm1980010100_00000000p")

    sim_label = "mh_0.44"

    start_year = 1981
    end_year = 2010

    streamflow_internal_name = "streamflow"
    selected_staion_ids = constants.selected_station_ids_for_streamflow_validation

    # ======================================================





    day = timedelta(days=1)
    t0 = datetime(2001, 1, 1)
    stamp_dates = [t0 + i * day for i in range(365)]
    print("stamp dates range {} ... {}".format(stamp_dates[0], stamp_dates[-1]))


    lake_fraction = None

    # establish the correspondence between the stations and model grid points
    with RPN(str(direction_file_path)) as r:
        assert isinstance(r, RPN)
        fldir = r.get_first_record_for_name("FLDR")
        flow_acc_area = r.get_first_record_for_name("FAA")
        lons, lats = r.get_longitudes_and_latitudes_for_the_last_read_rec()
        # lake_fraction = r.get_first_record_for_name("LF1")

    cell_manager = CellManager(fldir, lons2d=lons, lats2d=lats, accumulation_area_km2=flow_acc_area)
    stations = stfl_stations.load_stations_from_csv(selected_ids=selected_staion_ids)
    station_to_model_point = cell_manager.get_model_points_for_stations(station_list=stations, lake_fraction=lake_fraction,
                                                                        nneighbours=8)


    # Update the end year if required
    max_year_st = -1
    for station in station_to_model_point:
        y = max(station.get_list_of_complete_years())
        if y >= max_year_st:
            max_year_st = y


    if end_year > max_year_st:
        print("Updated end_year to {}, because no obs data after...".format(max_year_st))
        end_year = max_year_st



    # read model data
    mod_data_manager = DataManager(
        store_config={
            "varname_mapping": {streamflow_internal_name: "STFA"},
            "base_folder": str(direction_file_path.parent.parent),
            "data_source_type": data_source_types.SAMPLES_FOLDER_FROM_CRCM_OUTPUT,
            "level_mapping": {streamflow_internal_name: VerticalLevel(-1, level_type=level_kinds.ARBITRARY)},
            "offset_mapping": vname_to_offset_CRCM5,
            "filename_prefix_mapping": {streamflow_internal_name: "pm"}
    })


    station_to_model_data = defaultdict(list)
    for year in range(start_year, end_year + 1):
        start = Pendulum(year, 1, 1)
        p_test = Period(start, start.add(years=1).subtract(microseconds=1))
        stfl_mod = mod_data_manager.read_data_for_period(p_test, streamflow_internal_name)

        # convert to daily
        stfl_mod = stfl_mod.resample("D", "t", how="mean", closed="left", keep_attrs=True)

        assert isinstance(stfl_mod, xr.DataArray)

        for station, model_point in station_to_model_point.items():
            assert isinstance(model_point, ModelPoint)
            ts1 = stfl_mod[:, model_point.ix, model_point.jy].to_series()
            station_to_model_data[station].append(pd.Series(index=stfl_mod.t.values, data=ts1))





    # concatenate the timeseries for each point, if required
    if end_year - start_year + 1 > 1:
        for station in station_to_model_data:
            station_to_model_data[station] = pd.concat(station_to_model_data[station])
    else:
        for station in station_to_model_data:
            station_to_model_data[station] = station_to_model_data[station][0]



    # calculate observed climatology
    station_to_climatology = OrderedDict()
    for s in sorted(station_to_model_point, key=lambda st: st.latitude, reverse=True):
        assert isinstance(s, Station)
        print(s.id, len(s.get_list_of_complete_years()))

        # Check if there are continuous years for the selected period
        common_years = set(s.get_list_of_complete_years()).intersection(set(range(start_year, end_year + 1)))
        if len(common_years) > 0:
            _, station_to_climatology[s] = s.get_daily_climatology_for_complete_years_with_pandas(stamp_dates=stamp_dates,
                                                                                                  years=common_years)

            _, station_to_model_data[s] = pandas_utils.get_daily_climatology_from_pandas_series(station_to_model_data[s],
                                                                                                stamp_dates,
                                                                                                years_of_interest=common_years)


        else:
            print("Skipping {}, since it does not have enough data during the period of interest".format(s.id))







    # ---- Do the plotting ----
    ncols = 4

    nrows = len(station_to_climatology) // ncols
    nrows += int(not (len(station_to_climatology) % ncols == 0))

    axes_list = []
    plot_utils.apply_plot_params(width_cm=8 * ncols, height_cm=8 * nrows, font_size=8)
    fig = plt.figure()
    gs = GridSpec(nrows=nrows, ncols=ncols)




    for i, (s, clim) in enumerate(station_to_climatology.items()):
        assert isinstance(s, Station)

        row = i // ncols
        col = i % ncols

        print(row, col, nrows, ncols)

        # normalize by the drainage area
        if s.drainage_km2 is not None:
            station_to_model_data[s] *= s.drainage_km2 / station_to_model_point[s].accumulation_area

        if s.id in constants.stations_to_greyout:
            ax = fig.add_subplot(gs[row, col], facecolor="0.45")
        else:
            ax = fig.add_subplot(gs[row, col])

        assert isinstance(ax, Axes)

        ax.plot(stamp_dates, clim, color="k", lw=2, label="Obs.")
        ax.plot(stamp_dates, station_to_model_data[s], color="r", lw=2, label="Mod.")
        ax.xaxis.set_major_formatter(FuncFormatter(format_month_label))
        ax.xaxis.set_major_locator(MonthLocator(bymonthday=15))
        ax.xaxis.set_minor_locator(MonthLocator(bymonthday=1))
        ax.grid()





        ax.annotate(s.get_pp_name(), xy=(1.02, 1), xycoords="axes fraction",
                    horizontalalignment="left", verticalalignment="top", fontsize=8, rotation=-90)


        last_date = stamp_dates[-1]
        last_date = last_date.replace(day=calendar.monthrange(last_date.year, last_date.month)[1])

        ax.set_xlim(stamp_dates[0].replace(day=1), last_date)


        ymin, ymax = ax.get_ylim()
        ax.set_ylim(0, ymax)


        if s.drainage_km2 is not None:
            ax.set_title("{}: ({:.1f}$^\circ$E, {:.1f}$^\circ$N, DA={:.0f} km$^2$)".format(s.id, s.longitude, s.latitude, s.drainage_km2))
        else:
            ax.set_title(
                "{}: ({:.1f}$^\circ$E, {:.1f}$^\circ$N, DA not used)".format(s.id, s.longitude, s.latitude))
        axes_list.append(ax)

    # plot the legend
    axes_list[-1].legend()


    if not img_folder.exists():
        img_folder.mkdir()

    fig.tight_layout()
    img_file = img_folder / "{}_{}-{}_{}.png".format(sim_label, start_year, end_year, "-".join(sorted(s.id for s in station_to_climatology)))

    print("Saving {}".format(img_file))
    fig.savefig(str(img_file), bbox_inches="tight", dpi=300)
예제 #16
0
def plot_histograms(
    path="/home/huziy/skynet3_rech1/hdf_store/quebec_0.1_crcm5-hcd-rl-intfl_spinup_ecoclimap.hdf"
):
    fig = plt.figure()
    assert isinstance(fig, Figure)
    gs = gridspec.GridSpec(3, 3)

    lons2d, lats2d, basemap = analysis.get_basemap_from_hdf(file_path=path)

    # slope
    ch_slope = analysis.get_array_from_file(path=path, var_name="slope")
    ch_slope = maskoceans(lons2d, lats2d, ch_slope)
    ch_slope = np.ma.masked_where(ch_slope.mask | (ch_slope < 0), ch_slope)
    ax = fig.add_subplot(gs[0, 0])
    assert isinstance(ax, Axes)
    ch_slope_flat = ch_slope[~ch_slope.mask]
    the_hist, positions = np.histogram(
        ch_slope_flat, bins=25, range=[0, np.percentile(ch_slope_flat, 90)])
    the_hist = the_hist.astype(float)
    the_hist /= the_hist.sum()
    barwidth = (positions[1] - positions[0]) * 0.9
    ax.bar(positions[:-1], the_hist, color="0.75", linewidth=0, width=barwidth)
    ax.set_title(r"$\alpha$")
    ax.grid()
    ax.xaxis.set_major_locator(MaxNLocator(nbins=3))
    ax.yaxis.set_major_locator(MaxNLocator(nbins=5))

    # drainage density
    dd = analysis.get_array_from_file(path=path,
                                      var_name="drainage_density_inv_meters")
    dd *= 1000  # convert to km^-1
    ax = fig.add_subplot(gs[0, 1])
    assert isinstance(ax, Axes)
    dd_flat = dd[~ch_slope.mask]
    the_hist, positions = np.histogram(dd_flat,
                                       bins=25,
                                       range=[0, np.percentile(dd_flat, 90)])
    the_hist = the_hist.astype(np.float)
    the_hist /= the_hist.sum()
    print(the_hist.max(), the_hist.min())
    barwidth = (positions[1] - positions[0]) * 0.9
    ax.bar(positions[:-1], the_hist, color="0.75", linewidth=0, width=barwidth)
    ax.xaxis.set_major_locator(MaxNLocator(nbins=3))
    ax.yaxis.set_major_locator(MaxNLocator(nbins=5))
    ax.set_title(r"$DD {\rm \left( km^{-1} \right)}$")
    ax.grid()

    # vertical soil hydraulic conductivity
    vshc = analysis.get_array_from_file(
        path=path, var_name=infovar.HDF_VERT_SOIL_HYDR_COND_NAME)
    if vshc is not None:
        # get only on the first layer
        vshc = vshc[0, :, :]
        ax = fig.add_subplot(gs[1, 0])
        assert isinstance(ax, Axes)
        vshc_flat = vshc[~ch_slope.mask]
        the_hist, positions = np.histogram(
            vshc_flat, bins=25, range=[0, np.percentile(vshc_flat, 90)])
        the_hist = the_hist.astype(np.float)
        the_hist /= the_hist.sum()
        print(the_hist.max(), the_hist.min())
        barwidth = (positions[1] - positions[0]) * 0.9
        ax.bar(positions[:-1],
               the_hist,
               color="0.75",
               linewidth=0,
               width=barwidth)
        ax.xaxis.set_major_locator(MaxNLocator(nbins=3))
        ax.yaxis.set_major_locator(MaxNLocator(nbins=5))

        # set a scalar formatter
        sfmt = ScalarFormatter(useMathText=True)
        sfmt.set_powerlimits([-2, 2])
        ax.xaxis.set_major_formatter(sfmt)
        ax.set_title(r"$ K_{\rm V} {\rm (m/s)}$")
        ax.grid()

        # Kv * slope * DD
        ax = fig.add_subplot(gs[1, 1])
        assert isinstance(ax, Axes)

        interflow_h = 0.2  # Soulis et al 2000
        # 1e-3 is to convert drainage density to m^-1
        the_prod = dd_flat * 1e-3 * vshc_flat * ch_slope_flat * 48 * interflow_h

        print("product median: {0}".format(np.median(the_prod)))
        print("product maximum: {0}".format(the_prod.max()))
        print("product 90-quantile: {0}".format(np.percentile(the_prod, 90)))

        the_hist, positions = np.histogram(
            the_prod, bins=25, range=[0, np.percentile(the_prod, 90)])
        the_hist = the_hist.astype(np.float)
        the_hist /= the_hist.sum()
        print(the_hist.max(), the_hist.min())
        barwidth = (positions[1] - positions[0]) * 0.9
        ax.bar(positions[:-1],
               the_hist,
               color="0.75",
               linewidth=0,
               width=barwidth)
        ax.xaxis.set_major_locator(MaxNLocator(nbins=3))
        ax.yaxis.set_major_locator(MaxNLocator(nbins=5))

        # set a scalar formatter
        sfmt = ScalarFormatter(useMathText=True)
        sfmt.set_powerlimits([-2, 2])
        ax.xaxis.set_major_formatter(sfmt)
        ax.set_title(
            r"$ \beta_{\rm max}\cdot K_{\rm v} \cdot \alpha \cdot DD \cdot H {\rm (m/s)}$ "
        )
        ax.grid()

        # read flow directions
        flow_directions = analysis.get_array_from_file(
            path=path, var_name=infovar.HDF_FLOW_DIRECTIONS_NAME)
        # read cell areas
        # cell_areas = analysis.get_array_from_file(path=path, var_name=infovar.HDF_CELL_AREA_NAME)
        cell_manager = CellManager(flow_directions)
        acc_index = cell_manager.get_accumulation_index()
        acc_index_flat = acc_index[acc_index > 1]
        print(
            "acc_index: min={0}; max={1}; median={2}; 90-quantile={3}".format(
                acc_index_flat.min(), acc_index_flat.max(),
                np.median(acc_index_flat), np.percentile(acc_index_flat, 90)))

        # plot the range of the accumulation index
        ax = fig.add_subplot(gs[0, 2])
        assert isinstance(ax, Axes)
        the_hist, positions = np.histogram(
            acc_index_flat,
            bins=25,
            range=[0, np.percentile(acc_index_flat, 90)])
        the_hist = the_hist.astype(np.float)
        the_hist /= the_hist.sum()
        print(the_hist.max(), the_hist.min())
        barwidth = (positions[1] - positions[0]) * 0.9
        ax.bar(positions[:-1],
               the_hist,
               color="0.75",
               linewidth=0,
               width=barwidth)
        ax.xaxis.set_major_locator(MaxNLocator(nbins=3))
        ax.yaxis.set_major_locator(MaxNLocator(nbins=5))

        # set a scalar formatter
        sfmt = ScalarFormatter(useMathText=True)
        sfmt.set_powerlimits([-2, 2])
        ax.xaxis.set_major_formatter(sfmt)
        ax.set_title(r"Accum. index")
        ax.grid()

    # lake fraction

    # sand

    # clay

    fig_path = os.path.join(images_folder, "static_fields_histograms.jpeg")
    fig.tight_layout()
    fig.savefig(fig_path, dpi=cpp.FIG_SAVE_DPI, bbox_inches="tight")
예제 #17
0
def draw_model_comparison(model_points=None, stations=None, sim_name_to_file_name=None, hdf_folder=None,
                          start_year=None, end_year=None, cell_manager=None,
                          plot_upstream_averages=True):
    """

    :param model_points: list of model point objects
    :param stations: list of stations corresponding to the list of model points
    :param cell_manager: is a CellManager instance which can be provided for better performance if necessary
    len(model_points) == len(stations) if stations is not None.
    if stations is None - then no measured streamflow will be plotted
    """
    assert model_points is None or stations is None or len(stations) == len(model_points)

    path0 = os.path.join(hdf_folder, list(sim_name_to_file_name.items())[0][1])
    flow_directions = analysis.get_array_from_file(path=path0, var_name="flow_direction")
    lake_fraction = analysis.get_array_from_file(path=path0, var_name="lake_fraction")

    accumulation_area_km2 = analysis.get_array_from_file(path=path0, var_name=infovar.HDF_ACCUMULATION_AREA_NAME)
    cell_area_km2 = analysis.get_array_from_file(path=path0, var_name=infovar.HDF_CELL_AREA_NAME_M2)

    # print "plotting from {0}".format(path0)
    # plt.pcolormesh(lake_fraction.transpose())
    # plt.colorbar()
    # plt.show()
    # exit()

    file_scores = open(
        "scores_{0}_{1}-{2}.txt".format("_".join(list(sim_name_to_file_name.keys())), start_year, end_year),
        "w")
    # write the following columns to the scores file
    header_format = "{0:10s}\t{1:10s}\t{2:10s}\t" + "\t".join(["{" + str(i + 3) + ":10s}"
                                                               for i in range(len(sim_name_to_file_name))])
    line_format = "{0:10s}\t{1:10.1f}\t{1:10.1f}\t" + "\t".join(["{" + str(i + 3) + ":10.1f}"
                                                                 for i in range(len(sim_name_to_file_name))])

    header = ("ID", "DAo", "DAm",) + tuple(["NS({0})".format(key) for key in sim_name_to_file_name])
    file_scores.write(header_format.format(*header) + "\n")

    lons2d, lats2d, basemap = analysis.get_basemap_from_hdf(file_path=path0)

    # Create a cell manager if it is not provided
    if cell_manager is None:
        cell_manager = CellManager(flow_directions, accumulation_area_km2=accumulation_area_km2,
                                   lons2d=lons2d, lats2d=lats2d)

    if stations is not None:
        # Get the list of the corresponding model points
        station_to_modelpoint_list = cell_manager.get_lake_model_points_for_stations(station_list=stations,
                                                                                     lake_fraction=lake_fraction,
                                                                                     nneighbours=1)
        station_list = list(station_to_modelpoint_list.keys())
        station_list.sort(key=lambda st1: st1.latitude, reverse=True)
        processed_stations = station_list

    else:
        mp_list = model_points
        station_list = None
        # sort so that the northernmost stations appear uppermost
        mp_list.sort(key=lambda mpt: mpt.latitude, reverse=True)

        # set ids to the model points so they can be distinguished easier
        model_point.set_model_point_ids(mp_list)
        processed_stations = mp_list
        station_to_modelpoint_list = {}


    # brewer2mpl.get_map args: set name  set type  number of colors
    bmap = brewer2mpl.get_map("Set1", "qualitative", 9)
    # Change the default colors
    mpl.rcParams["axes.color_cycle"] = bmap.mpl_colors


    # For the streamflow only plot
    ncols = 3
    nrows = max(len(station_to_modelpoint_list) // ncols, 1)
    if ncols * nrows < len(station_to_modelpoint_list):
        nrows += 1

    figure_panel = plt.figure()
    gs_panel = gridspec.GridSpec(nrows=nrows + 1, ncols=ncols)
    #  a flag which signifies if a legend should be added to the plot, it is needed so we ahve only one legend per plot
    legend_added = False

    label_list = list(sim_name_to_file_name.keys())  # Needed to keep the order the same for all subplots
    all_years = [y for y in range(start_year, end_year + 1)]


    # processed_model_points = mp_list

    # plot_point_positions_with_upstream_areas(processed_stations, processed_model_points, basemap, cell_manager)



    if plot_upstream_averages:
        # create obs data managers
        anusplin_tmin = AnuSplinManager(variable="stmn")
        anusplin_tmax = AnuSplinManager(variable="stmx")
        anusplin_pcp = AnuSplinManager(variable="pcp")

        daily_dates, obs_tmin_fields = anusplin_tmin.get_daily_clim_fields_interpolated_to(
            start_year=start_year, end_year=end_year,
            lons_target=lons2d, lats_target=lats2d)

        _, obs_tmax_fields = anusplin_tmax.get_daily_clim_fields_interpolated_to(
            start_year=start_year, end_year=end_year,
            lons_target=lons2d, lats_target=lats2d)

        _, obs_pcp_fields = anusplin_pcp.get_daily_clim_fields_interpolated_to(
            start_year=start_year, end_year=end_year,
            lons_target=lons2d, lats_target=lats2d)

        swe_manager = SweDataManager(var_name="SWE")
        obs_swe_daily_clim = swe_manager.get_daily_climatology(start_year, end_year)
        interpolated_obs_swe_clim = swe_manager.interpolate_daily_climatology_to(obs_swe_daily_clim,
                                                                                 lons2d_target=lons2d,
                                                                                 lats2d_target=lats2d)




    # clear the folder with images (to avoid confusion of different versions)
    _remove_previous_images(processed_stations[0])

    ax_panel = figure_panel.add_subplot(gs_panel[0, :])

    plot_positions_of_station_list(ax_panel, station_list,
                                   [station_to_modelpoint_list[s][0] for s in station_list],
                                   basemap=basemap, cell_manager=cell_manager, fill_upstream_areas=False)

    ax_to_share = None
    for i, the_station in enumerate(station_list):
        # +1 due to the plot with station positions
        ax_panel = figure_panel.add_subplot(gs_panel[1 + i // ncols, i % ncols],
                                            sharex=ax_to_share)
        if ax_to_share is None:
            ax_to_share = ax_panel


        # Check the number of years accessible for the station if the list of stations is given
        if the_station is not None:
            assert isinstance(the_station, Station)
            year_list = the_station.get_list_of_complete_years()
            year_list = list(filter(lambda yi: start_year <= yi <= end_year, year_list))

            if len(year_list) < 1:
                continue

            print("Working on station: {0}".format(the_station.id))
        else:
            year_list = all_years

        fig = plt.figure()

        gs = gridspec.GridSpec(4, 4, wspace=1)


        # plot station position
        ax = fig.add_subplot(gs[3, 0:2])
        upstream_mask = _plot_station_position(ax, the_station, basemap, cell_manager,
                                               station_to_modelpoint_list[the_station][0])


        # plot streamflows
        ax = fig.add_subplot(gs[0:2, 0:2])

        dates = None
        model_daily_temp_clim = {}
        model_daily_precip_clim = {}
        model_daily_clim_surf_runoff = {}
        model_daily_clim_subsurf_runoff = {}
        model_daily_clim_swe = {}
        model_daily_clim_evap = {}

        # get model data for the list of years
        for label in label_list:
            fname = sim_name_to_file_name[label]
            fpath = os.path.join(hdf_folder, fname)

            if plot_upstream_averages:
                # read temperature data and calculate daily climatologic fileds
                dates, model_daily_temp_clim[label] = analysis.get_daily_climatology(
                    path_to_hdf_file=fpath, var_name="TT", level=1, start_year=start_year, end_year=end_year)

                # read modelled precip and calculate daily climatologic fields
                _, model_daily_precip_clim[label] = analysis.get_daily_climatology(
                    path_to_hdf_file=fpath, var_name="PR", level=None, start_year=start_year, end_year=end_year)

                # read modelled surface runoff and calculate daily climatologic fields
                _, model_daily_clim_surf_runoff[label] = analysis.get_daily_climatology(
                    path_to_hdf_file=fpath, var_name="TRAF", level=1, start_year=start_year, end_year=end_year)

                # read modelled subsurface runoff and calculate daily climatologic fields
                _, model_daily_clim_subsurf_runoff[label] = analysis.get_daily_climatology(
                    path_to_hdf_file=fpath, var_name="TDRA", level=1, start_year=start_year, end_year=end_year)

                # read modelled swe and calculate daily climatologic fields
                _, model_daily_clim_swe[label] = analysis.get_daily_climatology(
                    path_to_hdf_file=fpath, var_name="I5", level=None, start_year=start_year, end_year=end_year)

            values_model = None

            # lake level due to evap/precip
            values_model_evp = None

            lf_total = 0
            for the_model_point in station_to_modelpoint_list[the_station]:

                if the_model_point.lake_fraction is None:
                    mult = 1.0
                else:
                    mult = the_model_point.lake_fraction
                lf_total += mult

                # Calculate lake depth variation for this simulation, since I forgot to uncomment it in the model
                if label.lower() != "crcm5-hcd-r":
                    assert isinstance(the_model_point, ModelPoint)
                    _, temp = analysis.get_daily_climatology_for_a_point(path=fpath,
                                                                         var_name="CLDP",
                                                                         years_of_interest=year_list,
                                                                         i_index=the_model_point.ix,
                                                                         j_index=the_model_point.jy)

                    if values_model is None:
                        values_model = mult * np.asarray(temp)
                    else:
                        values_model = mult * np.asarray(temp) + values_model
                else:
                    raise NotImplementedError("Cannot handle lake depth for {0}".format(label))

                if label.lower() in ["crcm5-hcd-rl", "crcm5-l2"]:
                    dates, temp = analysis.get_daily_climatology_for_a_point_cldp_due_to_precip_evap(
                        path=fpath, i_index=the_model_point.ix, j_index=the_model_point.jy,
                        year_list=year_list, point_label=the_station.id)

                    if values_model_evp is None:
                        values_model_evp = mult * np.asarray(temp)
                    else:
                        values_model_evp = mult * np.asarray(temp) + values_model_evp

            values_model /= float(lf_total)
            values_model = values_model - np.mean(values_model)
            print("lake level anomaly ranges for {0}:{1:.8g};{2:.8g}".format(label, values_model.min(),
                                                                             values_model.max()))
            ax.plot(dates, values_model, label=label, lw=2)
            ax_panel.plot(dates, values_model, label=label, lw=2)

            if values_model_evp is not None:
                # normalize cldp
                values_model_evp /= float(lf_total)
                # convert to m/s
                values_model_evp /= 1000.0
                values_model_evp = values_model_evp - np.mean(values_model_evp)
                ax.plot(dates, values_model_evp, label=label + "(P-E)", lw=2)
                ax_panel.plot(dates, values_model_evp, label=label + "(P-E)", lw=2)

        if the_station is not None:
            print(type(dates[0]))
            dates, values_obs = the_station.get_daily_climatology_for_complete_years_with_pandas(stamp_dates=dates,
                                                                                                 years=year_list)


            # To keep the colors consistent for all the variables, the obs Should be plotted last
            ax.plot(dates, values_obs - np.mean(values_obs), label="Obs.", lw=2, color="k")
            ax_panel.plot(dates, values_obs - np.mean(values_obs), label="Obs.", lw=2, color="k")


            # calculate nash sutcliff coefficient and skip if too small

        ax.set_ylabel(r"Level variation: (${\rm m}$)")
        assert isinstance(ax, Axes)
        assert isinstance(fig, Figure)

        upstream_area_km2 = np.sum(cell_area_km2[upstream_mask == 1])
        # Put some information about the point
        if the_station is not None:
            point_info = "{0}".format(the_station.id)
        else:
            point_info = "{0}".format(the_model_point.point_id)

        ax.annotate(point_info, (0.9, 0.9), xycoords="axes fraction", bbox=dict(facecolor="white"))
        ax_panel.annotate(point_info, (0.96, 0.96), xycoords="axes fraction", bbox=dict(facecolor="white"),
                          va="top", ha="right")

        ax.legend(loc=(0.0, 1.05), borderaxespad=0, ncol=3)
        ax.xaxis.set_major_formatter(FuncFormatter(lambda val, pos: num2date(val).strftime("%b")[0]))
        # ax.xaxis.set_minor_locator(MonthLocator())
        ax.xaxis.set_major_locator(MonthLocator())
        ax.grid()
        streamflow_axes = ax  # save streamflow axes for later use

        if not legend_added:
            ax_panel.legend(loc=(0.0, 1.1), borderaxespad=0.5, ncol=1)
            ax_panel.xaxis.set_minor_formatter(FuncFormatter(lambda val, pos: num2date(val).strftime("%b")[0]))
            ax_panel.xaxis.set_minor_locator(MonthLocator(bymonthday=15))
            ax_panel.xaxis.set_major_locator(MonthLocator())
            ax_panel.xaxis.set_major_formatter(FuncFormatter(lambda val, pos: ""))
            ax_panel.set_ylabel(r"Level variation (${\rm m}$)")
            legend_added = True

        ax_panel.yaxis.set_major_locator(MaxNLocator(nbins=5))
        ax_panel.grid()

        if plot_upstream_averages:
            # plot temperature comparisons (tmod - daily with anusplin tmin and tmax)
            ax = fig.add_subplot(gs[3, 2:], sharex=streamflow_axes)
            success = _validate_temperature_with_anusplin(ax, the_model_point, cell_area_km2=cell_area_km2,
                                                          upstream_mask=upstream_mask,
                                                          daily_dates=daily_dates,
                                                          obs_tmin_clim_fields=obs_tmin_fields,
                                                          obs_tmax_clim_fields=obs_tmax_fields,
                                                          model_data_dict=model_daily_temp_clim,
                                                          simlabel_list=label_list)





            # plot temperature comparisons (tmod - daily with anusplin tmin and tmax)
            ax = fig.add_subplot(gs[2, 2:], sharex=streamflow_axes)
            _validate_precip_with_anusplin(ax, the_model_point, cell_area_km2=cell_area_km2,
                                           upstream_mask=upstream_mask,
                                           daily_dates=daily_dates,
                                           obs_precip_clim_fields=obs_pcp_fields,
                                           model_data_dict=model_daily_precip_clim,
                                           simlabel_list=label_list)


            # plot mean upstream surface runoff
            ax = fig.add_subplot(gs[0, 2:], sharex=streamflow_axes)
            _plot_upstream_surface_runoff(ax, the_model_point, cell_area_km2=cell_area_km2,
                                          upstream_mask=upstream_mask,
                                          daily_dates=daily_dates,
                                          model_data_dict=model_daily_clim_surf_runoff,
                                          simlabel_list=label_list)


            # plot mean upstream subsurface runoff
            ax = fig.add_subplot(gs[1, 2:], sharex=streamflow_axes, sharey=ax)
            _plot_upstream_subsurface_runoff(ax, the_model_point, cell_area_km2=cell_area_km2,
                                             upstream_mask=upstream_mask,
                                             daily_dates=daily_dates,
                                             model_data_dict=model_daily_clim_subsurf_runoff,
                                             simlabel_list=label_list)

            # plot mean upstream swe comparison
            ax = fig.add_subplot(gs[2, 0:2], sharex=streamflow_axes)
            _validate_swe_with_ross_brown(ax, the_model_point, cell_area_km2=cell_area_km2,
                                          upstream_mask=upstream_mask,
                                          daily_dates=daily_dates,
                                          model_data_dict=model_daily_clim_swe,
                                          obs_swe_clim_fields=interpolated_obs_swe_clim,
                                          simlabel_list=label_list)

        if the_station is not None:
            im_name = "comp_point_with_obs_{0}_{1}_{2}.pdf".format(the_station.id,
                                                                   the_station.source,
                                                                   "_".join(label_list))

            im_folder_path = os.path.join(images_folder, the_station.source + "_levels")
        else:
            im_name = "comp_point_with_obs_{0}_{1}.pdf".format(the_model_point.point_id,
                                                               "_".join(label_list))
            im_folder_path = os.path.join(images_folder, "outlets_point_comp_levels")


        # create a folder for a given source of observed streamflow if it does not exist yet
        if not os.path.isdir(im_folder_path):
            os.mkdir(im_folder_path)

        im_path = os.path.join(im_folder_path, im_name)

        if plot_upstream_averages:
            fig.savefig(im_path, dpi=cpp.FIG_SAVE_DPI, bbox_inches="tight")

        plt.close(fig)

    assert isinstance(figure_panel, Figure)
    figure_panel.tight_layout()
    figure_panel.savefig(
        os.path.join(images_folder, "comp_lake-levels_at_point_with_obs_{0}.png".format("_".join(label_list))),
        bbox_inches="tight")
    plt.close(figure_panel)
    file_scores.close()
def plot_basin_outlets(shape_file=BASIN_BOUNDARIES_FILE,
                       bmp_info=None,
                       directions=None,
                       accumulation_areas=None,
                       lake_fraction_field=None):
    assert isinstance(bmp_info, BasemapInfo)

    driver = ogr.GetDriverByName("ESRI Shapefile")
    print(driver)
    ds = driver.Open(shape_file, 0)

    assert isinstance(ds, ogr.DataSource)
    layer = ds.GetLayer()

    assert isinstance(layer, ogr.Layer)
    print(layer.GetFeatureCount())

    latlong_proj = osr.SpatialReference()
    latlong_proj.ImportFromEPSG(4326)

    utm_proj = layer.GetSpatialRef()

    # create Coordinate Transformation
    coord_transform = osr.CoordinateTransformation(latlong_proj, utm_proj)

    utm_coords = coord_transform.TransformPoints(
        list(zip(bmp_info.lons.flatten(), bmp_info.lats.flatten())))
    utm_coords = np.asarray(utm_coords)
    x_utm = utm_coords[:, 0].reshape(bmp_info.lons.shape)
    y_utm = utm_coords[:, 1].reshape(bmp_info.lons.shape)

    basin_mask = np.zeros_like(bmp_info.lons)
    cell_manager = CellManager(directions,
                               accumulation_area_km2=accumulation_areas,
                               lons2d=bmp_info.lons,
                               lats2d=bmp_info.lats)

    index = 1
    basins = []
    basin_names = []
    basin_name_to_mask = {}
    for feature in layer:
        assert isinstance(feature, ogr.Feature)
        # print feature["FID"]

        geom = feature.GetGeometryRef()
        assert isinstance(geom, ogr.Geometry)

        basins.append(ogr.CreateGeometryFromWkb(geom.ExportToWkb()))
        basin_names.append(feature["abr"])

    accumulation_areas_temp = accumulation_areas.copy()
    lons_out, lats_out = [], []
    basin_names_out = []
    name_to_ij_out = OrderedDict()

    min_basin_area = min(b.GetArea() * 1.0e-6 for b in basins)

    while len(basins):
        fm = np.max(accumulation_areas_temp)

        i, j = np.where(fm == accumulation_areas_temp)
        i, j = i[0], j[0]
        p = ogr.CreateGeometryFromWkt("POINT ({} {})".format(
            x_utm[i, j], y_utm[i, j]))
        b_selected = None
        name_selected = None
        for name, b in zip(basin_names, basins):

            assert isinstance(b, ogr.Geometry)
            assert isinstance(p, ogr.Geometry)
            if b.Contains(p.Buffer(2000 * 2**0.5)):
                # Check if there is an upstream cell from the same basin
                the_mask = cell_manager.get_mask_of_upstream_cells_connected_with_by_indices(
                    i, j)

                # Save the mask of the basin for future use
                basin_name_to_mask[name] = the_mask

                # if is_part_of_points_in(b, x_utm[the_mask == 1], y_utm[the_mask == 1]):
                # continue

                b_selected = b
                name_selected = name
                # basin_names_out.append(name)

                lons_out.append(bmp_info.lons[i, j])
                lats_out.append(bmp_info.lats[i, j])
                name_to_ij_out[name] = (i, j)

                basin_mask[the_mask == 1] = index
                index += 1
                break

        if b_selected is not None:
            basins.remove(b_selected)
            basin_names.remove(name_selected)
            outlet_index_in_basin = 1
            current_basin_name = name_selected
            while current_basin_name in basin_names_out:
                current_basin_name = name_selected + str(outlet_index_in_basin)
                outlet_index_in_basin += 1

            basin_names_out.append(current_basin_name)
            print(len(basins), basin_names_out)

        accumulation_areas_temp[i, j] = -1

    plot_utils.apply_plot_params(font_size=12,
                                 width_pt=None,
                                 width_cm=20,
                                 height_cm=20)
    gs = GridSpec(2, 2, width_ratios=[1.0, 0.5], wspace=0.01)
    fig = plt.figure()

    ax = fig.add_subplot(gs[1, 0])
    xx, yy = bmp_info.get_proj_xy()
    bmp_info.basemap.drawcoastlines(linewidth=0.5, ax=ax)
    bmp_info.basemap.drawrivers(zorder=5, color="0.5", ax=ax)

    upstream_edges = cell_manager.get_upstream_polygons_for_points(
        model_point_list=[
            ModelPoint(ix=i, jy=j) for (i, j) in name_to_ij_out.values()
        ],
        xx=xx,
        yy=yy)

    upstream_edges_latlon = cell_manager.get_upstream_polygons_for_points(
        model_point_list=[
            ModelPoint(ix=i, jy=j) for (i, j) in name_to_ij_out.values()
        ],
        xx=bmp_info.lons,
        yy=bmp_info.lats)

    plot_utils.draw_upstream_area_bounds(ax,
                                         upstream_edges=upstream_edges,
                                         color="r",
                                         linewidth=0.6)
    plot_utils.save_to_shape_file(upstream_edges_latlon, in_proj=None)

    xs, ys = bmp_info.basemap(lons_out, lats_out)
    bmp_info.basemap.scatter(xs, ys, c="0.75", s=30, zorder=10)
    bmp_info.basemap.drawparallels(np.arange(-90, 90, 5),
                                   labels=[True, False, False, False],
                                   linewidth=0.5)

    bmp_info.basemap.drawmeridians(np.arange(-180, 180, 5),
                                   labels=[False, False, False, True],
                                   linewidth=0.5)

    cmap = cm.get_cmap("rainbow", index - 1)
    bn = BoundaryNorm(list(range(index + 1)), index - 1)

    # basin_mask = np.ma.masked_where(basin_mask < 0.5, basin_mask)
    # bmp_info.basemap.pcolormesh(xx, yy, basin_mask, norm=bn, cmap=cmap, ax=ax)

    xmin, xmax = ax.get_xlim()
    ymin, ymax = ax.get_ylim()

    print(xmin, xmax, ymin, ymax)
    dx = xmax - xmin
    dy = ymax - ymin
    step_y = 0.1
    step_x = 0.12
    y0_frac = 0.75
    y0_frac_bottom = 0.02
    x0_frac = 0.35
    bname_to_text_coords = {
        "RDO": (xmin + x0_frac * dx, ymin + y0_frac_bottom * dy),
        "STM": (xmin + (x0_frac + step_x) * dx, ymin + y0_frac_bottom * dy),
        "SAG":
        (xmin + (x0_frac + 2 * step_x) * dx, ymin + y0_frac_bottom * dy),
        "BOM":
        (xmin + (x0_frac + 3 * step_x) * dx, ymin + y0_frac_bottom * dy),
        "MAN":
        (xmin + (x0_frac + 4 * step_x) * dx, ymin + y0_frac_bottom * dy),
        "MOI":
        (xmin + (x0_frac + 5 * step_x) * dx, ymin + y0_frac_bottom * dy),
        "ROM": (xmin + (x0_frac + 5 * step_x) * dx,
                ymin + (y0_frac_bottom + step_y) * dy),
        "NAT": (xmin + (x0_frac + 5 * step_x) * dx,
                ymin + (y0_frac_bottom + 2 * step_y) * dy),

        ######
        "CHU": (xmin + (x0_frac + 5 * step_x) * dx, ymin + y0_frac * dy),
        "GEO": (xmin + (x0_frac + 5 * step_x) * dx,
                ymin + (y0_frac + step_y) * dy),
        "BAL": (xmin + (x0_frac + 5 * step_x) * dx,
                ymin + (y0_frac + 2 * step_y) * dy),
        "PYR": (xmin + (x0_frac + 4 * step_x) * dx,
                ymin + (y0_frac + 2 * step_y) * dy),
        "MEL": (xmin + (x0_frac + 3 * step_x) * dx,
                ymin + (y0_frac + 2 * step_y) * dy),
        "FEU": (xmin + (x0_frac + 2 * step_x) * dx,
                ymin + (y0_frac + 2 * step_y) * dy),
        "ARN": (xmin + (x0_frac + 1 * step_x) * dx,
                ymin + (y0_frac + 2 * step_y) * dy),

        ######
        "CAN": (xmin + 0.1 * dx, ymin + 0.80 * dy),
        "GRB": (xmin + 0.1 * dx, ymin + (0.80 - step_y) * dy),
        "LGR": (xmin + 0.1 * dx, ymin + (0.80 - 2 * step_y) * dy),
        "RUP": (xmin + 0.1 * dx, ymin + (0.80 - 3 * step_y) * dy),
        "WAS": (xmin + 0.1 * dx, ymin + (0.80 - 4 * step_y) * dy),
        "BEL": (xmin + 0.1 * dx, ymin + (0.80 - 5 * step_y) * dy),
    }

    # bmp_info.basemap.readshapefile(".".join(BASIN_BOUNDARIES_FILE.split(".")[:-1]).replace("utm18", "latlon"), "basin",
    #                                linewidth=1.2, ax=ax, zorder=9)

    for name, xa, ya, lona, lata in zip(basin_names_out, xs, ys, lons_out,
                                        lats_out):
        ax.annotate(name,
                    xy=(xa, ya),
                    xytext=bname_to_text_coords[name],
                    textcoords='data',
                    ha='right',
                    va='bottom',
                    bbox=dict(boxstyle='round,pad=0.4', fc='white'),
                    arrowprops=dict(arrowstyle='->',
                                    connectionstyle='arc3,rad=0',
                                    linewidth=0.25),
                    font_properties=FontProperties(size=8),
                    zorder=20)

        print(r"{} & {:.0f} \\".format(
            name, accumulation_areas[name_to_ij_out[name]]))

    # Plot zonally averaged lake fraction
    ax = fig.add_subplot(gs[1, 1])
    ydata = range(lake_fraction_field.shape[1])
    ax.plot(lake_fraction_field.mean(axis=0) * 100, ydata, lw=2)

    ax.fill_betweenx(ydata, lake_fraction_field.mean(axis=0) * 100, alpha=0.5)

    ax.set_xlabel("Lake fraction (%)")
    ax.set_ylim(min(ydata), max(ydata))
    ax.xaxis.set_tick_params(direction='out', width=1)
    ax.yaxis.set_tick_params(direction='out', width=1)
    ax.xaxis.set_ticks_position("bottom")
    ax.yaxis.set_ticks_position("none")

    ax.spines["top"].set_visible(False)
    ax.spines["right"].set_visible(False)

    for tl in ax.yaxis.get_ticklabels():
        tl.set_visible(False)

    # plot elevation, buffer zone, big lakes, grid cells
    ax = fig.add_subplot(gs[0, :])
    geophy_file = "/RESCUE/skynet3_rech1/huziy/from_guillimin/geophys_Quebec_0.1deg_260x260_with_dd_v6"

    r = RPN(geophy_file)
    elev = r.get_first_record_for_name("ME")
    lkfr = r.get_first_record_for_name("LKFR")
    fldr = r.get_first_record_for_name("FLDR")

    params = r.get_proj_parameters_for_the_last_read_rec()
    lons, lats = r.get_longitudes_and_latitudes_for_the_last_read_rec()
    rll = RotatedLatLon(**params)

    bsmp = rll.get_basemap_object_for_lons_lats(lons2d=lons,
                                                lats2d=lats,
                                                resolution="l")
    xx, yy = bsmp(lons, lats)

    dx = (xx[0, 0] - xx[-1, 0]) / xx.shape[0]
    dy = (yy[0, 0] - yy[0, -1]) / yy.shape[1]

    xx_ll_crnrs = xx - dx / 2
    yy_ll_crnrs = yy - dy / 2

    xx_ur_crnrs = xx + dx / 2
    yy_ur_crnrs = yy + dy / 2

    ll_lon, ll_lat = bsmp(xx_ll_crnrs[0, 0], yy_ll_crnrs[0, 0], inverse=True)
    ur_lon, ur_lat = bsmp(xx_ur_crnrs[-1, -1],
                          yy_ur_crnrs[-1, -1],
                          inverse=True)

    crnr_lons = np.array([[ll_lon, ll_lon], [ur_lon, ur_lon]])

    crnr_lats = np.array([[ll_lat, ll_lat], [ur_lat, ur_lat]])

    bsmp = rll.get_basemap_object_for_lons_lats(lons2d=crnr_lons,
                                                lats2d=crnr_lats)

    # plot elevation
    levs = [0, 100, 200, 300, 500, 700, 1000, 1500, 2000, 2800]
    norm = BoundaryNorm(levs, len(levs) - 1)
    the_cmap = my_colormaps.get_cmap_from_ncl_spec_file(
        path="colormap_files/OceanLakeLandSnow.rgb", ncolors=len(levs) - 1)

    lons[lons > 180] -= 360
    me_to_plot = maskoceans(lons, lats, elev, resolution="l")
    im = bsmp.contourf(xx,
                       yy,
                       me_to_plot,
                       cmap=the_cmap,
                       levels=levs,
                       norm=norm,
                       ax=ax)
    bsmp.colorbar(im)

    bsmp.drawcoastlines(linewidth=0.5, ax=ax)

    # show large lake points
    gl_lakes = np.ma.masked_where((lkfr < 0.6) | (fldr <= 0) | (fldr > 128),
                                  lkfr)
    gl_lakes[~gl_lakes.mask] = 1.0
    bsmp.pcolormesh(xx,
                    yy,
                    gl_lakes,
                    cmap=cm.get_cmap("Blues"),
                    ax=ax,
                    vmin=0,
                    vmax=1,
                    zorder=3)

    # show free zone border
    margin = 20
    x1 = xx_ll_crnrs[margin, margin]
    x2 = xx_ur_crnrs[-margin, margin]
    y1 = yy_ll_crnrs[margin, margin]
    y2 = yy_ur_crnrs[margin, -margin]
    pol_corners = ((x1, y1), (x2, y1), (x2, y2), (x1, y2))
    ax.add_patch(Polygon(xy=pol_corners, fc="none", ls="solid", lw=3,
                         zorder=5))

    # show blending zone border (with halo zone)
    margin = 10
    x1 = xx_ll_crnrs[margin, margin]
    x2 = xx_ur_crnrs[-margin, margin]
    y1 = yy_ll_crnrs[margin, margin]
    y2 = yy_ur_crnrs[margin, -margin]
    pol_corners = ((x1, y1), (x2, y1), (x2, y2), (x1, y2))
    ax.add_patch(
        Polygon(xy=pol_corners, fc="none", ls="dashed", lw=3, zorder=5))

    # show the grid
    step = 20
    xx_ll_crnrs_ext = np.zeros([n + 1 for n in xx_ll_crnrs.shape])
    yy_ll_crnrs_ext = np.zeros([n + 1 for n in yy_ll_crnrs.shape])

    xx_ll_crnrs_ext[:-1, :-1] = xx_ll_crnrs
    yy_ll_crnrs_ext[:-1, :-1] = yy_ll_crnrs
    xx_ll_crnrs_ext[:-1, -1] = xx_ll_crnrs[:, -1]
    yy_ll_crnrs_ext[-1, :-1] = yy_ll_crnrs[-1, :]

    xx_ll_crnrs_ext[-1, :] = xx_ur_crnrs[-1, -1]
    yy_ll_crnrs_ext[:, -1] = yy_ur_crnrs[-1, -1]

    bsmp.pcolormesh(xx_ll_crnrs_ext[::step, ::step],
                    yy_ll_crnrs_ext[::step, ::step],
                    np.ma.masked_all_like(xx_ll_crnrs_ext)[::step, ::step],
                    edgecolors="0.6",
                    ax=ax,
                    linewidth=0.05,
                    zorder=4,
                    alpha=0.5)

    ax.set_title("Elevation (m)")

    # plt.show()
    fig.savefig("qc_basin_outlets_points.png", bbox_inches="tight")
    # plt.show()
    plt.close(fig)

    return name_to_ij_out, basin_name_to_mask
예제 #19
0
def main(directions_file_path: Path):
    """
    compare drainage areas, longitudes and latitudes from the stations and model
    """
    stations = stfl_stations.load_stations_from_csv()
    lake_fraction=None

    with Dataset(str(directions_file_path)) as ds:
        flow_dirs = ds.variables["flow_direction_value"][:]
        flow_acc_area = ds.variables["accumulation_area"][:]
        lons_2d, lats_2d = [ds.variables[k][:] for k in ["lon", "lat"]]

        # lake_fraction = ds.variables["lake_fraction"][:]


    cell_manager = CellManager(flow_dirs, lons2d=lons_2d, lats2d=lats_2d, accumulation_area_km2=flow_acc_area)

    station_to_mod_point = cell_manager.get_model_points_for_stations(station_list=stations, lake_fraction=lake_fraction,
                                                                      nneighbours=8)


    lons_m, lats_m, da_m = [], [], []
    lons_o, lats_o, da_o = [], [], []


    for s, mp in station_to_mod_point.items():
        assert isinstance(s, Station)
        assert isinstance(mp, ModelPoint)

        # obs
        lons_o.append(s.longitude if s.longitude < 180 else s.longitude - 360)
        lats_o.append(s.latitude)
        da_o.append(s.drainage_km2)

        # model
        lons_m.append(mp.longitude if mp.longitude < 180 else mp.longitude - 360)
        lats_m.append(mp.latitude)
        da_m.append(mp.accumulation_area)


        print("m  | s ({})".format(s.id))
        print("{} | {}".format(mp.longitude, s.longitude))
        print("{} | {}".format(mp.latitude, s.latitude))
        print("{} | {}".format(mp.accumulation_area, s.drainage_km2))


    axes_list = []
    plot_utils.apply_plot_params(width_cm=25, height_cm=10, font_size=8)
    fig = plt.figure()
    gs = GridSpec(1, 3)

    ax = fig.add_subplot(gs[0, 0])
    ax.set_title("Longitude")
    ax.scatter(lons_o, lons_m)
    axes_list.append(ax)
    ax.set_ylabel("Model")


    ax = fig.add_subplot(gs[0, 1])
    ax.set_title("Latitude")
    ax.scatter(lats_o, lats_m)
    axes_list.append(ax)
    ax.set_xlabel("Obs")

    ax = fig.add_subplot(gs[0, 2])
    ax.set_title("Drainage area (km$^2$)")
    ax.scatter(da_o, da_m)
    sf = ScalarFormatter(useMathText=True)
    sf.set_powerlimits((-2, 3))

    ax.set_xscale("log")
    ax.set_yscale("log")


    axes_list.append(ax)



    # plot the 1-1 line
    for ax in axes_list:
        assert isinstance(ax, Axes)

        ax.plot(ax.get_xlim(), ax.get_xlim(), "--", color="gray")
        ax.grid()


    img_file = img_folder.joinpath("lon_lat_da_scatter_{}_{}.png".format(directions_file_path.name,
                                                                         "-".join(sorted(s.id for s in station_to_mod_point))))
    fig.savefig(str(img_file), bbox_inches="tight")
def main():
    start_year = 1980
    end_year = 2010


    # model_data_path = Path("/RECH2/huziy/BC-MH/bc_mh_044deg/Diagnostics")
    model_data_path = Path("/RECH2/huziy/BC-MH/bc_mh_044deg/Samples")

    static_data_file = "/RECH2/huziy/BC-MH/bc_mh_044deg/Samples/bc_mh_044deg_198001/pm1980010100_00000000p"

    corrected_obs_data_folder = Path("mh/obs_data/")

    r = RPN(static_data_file)

    fldir = r.get_first_record_for_name("FLDR")
    faa = r.get_first_record_for_name("FAA")
    lons, lats = r.get_longitudes_and_latitudes_for_the_last_read_rec()

    gc = default_domains.bc_mh_044

    cell_manager = CellManager(fldir, nx=fldir.shape[0], ny=fldir.shape[1],
                               lons2d=lons, lats2d=lats, accumulation_area_km2=faa)

    selected_station_ids = [
        "05LM006",
        "05BN012",
        "05AK001",
        "05QB003"
    ]

    stations = cehq_station.load_from_hydat_db(province=None, selected_ids=selected_station_ids, natural=None, skip_data_checks=True)


    for s in stations:
        assert isinstance(s, cehq_station.Station)
        if s.id == "05AK001":
            s.drainage_km2 *= 2.5

        if s.id == "05BN012":
            pass


    # Manitoba natural stations
    # statons_mnb = cehq_station.load_from_hydat_db(province="MB", natural=True, start_date=datetime(start_year, 1, 1), end_date=datetime(end_year,12,31))
    # statons_ssk = cehq_station.load_from_hydat_db(province="SK", natural=True, start_date=datetime(start_year, 1, 1), end_date=datetime(end_year,12,31))
    # statons_alb = cehq_station.load_from_hydat_db(province="AB", natural=True, start_date=datetime(start_year, 1, 1), end_date=datetime(end_year,12,31))


    # for s in statons_mnb + statons_ssk + statons_alb:
    #     if s not in stations:
    #         stations.append(s)


    # (06EA002): CHURCHILL RIVER AT SANDY BAY at (-102.31832885742188,55.52333068847656), accum. area is 212000.0 km**2
    # TODO: plot where is this station, compare modelled and observed hydrographs

    for s in stations:
        print(s)

    # assert len(stations) == len(selected_station_ids), "Could not find stations for some of the specified ids"

    station_to_model_point = cell_manager.get_model_points_for_stations(stations, drainaige_area_reldiff_limit=0.9,
                                                                        nneighbours=8)


    print("Established the station to model point mapping")


    plot_validations_for_stations(station_to_model_point,
                                  cell_manager=cell_manager,
                                  corrected_obs_data_folder=corrected_obs_data_folder,
                                  model_data_path=model_data_path,
                                  grid_config=gc, start_year=start_year, end_year=end_year)
def plot_station_positions(directions_file_path: Path, stations: list, ax: Axes, grid_config: GridConfig=default_domains.bc_mh_044,
                           save_upstream_boundaries_to_shp=False):


    with Dataset(str(directions_file_path)) as ds:
        flow_dirs = ds.variables["flow_direction_value"][:]
        flow_acc_area = ds.variables["accumulation_area"][:]
        lons_2d, lats_2d = [ds.variables[k][:] for k in ["lon", "lat"]]



    basemap, reg_of_interest = grid_config.get_basemap_using_shape_with_polygons_of_interest(lons_2d, lats_2d,
                                                                                             shp_path=default_domains.MH_BASINS_PATH,
                                                                                             resolution="i")


    cell_manager = CellManager(flow_dirs, lons2d=lons_2d, lats2d=lats_2d, accumulation_area_km2=flow_acc_area)
    station_to_model_point = cell_manager.get_model_points_for_stations(station_list=stations, nneighbours=8)

    #####
    xx, yy = basemap(lons_2d, lats_2d)
    upstream_edges = cell_manager.get_upstream_polygons_for_points(
        model_point_list=list(station_to_model_point.values()), xx=xx, yy=yy)

    upstream_edges_latlon = cell_manager.get_upstream_polygons_for_points(
        model_point_list=list(station_to_model_point.values()), xx=lons_2d, yy=lats_2d)




    plot_utils.draw_upstream_area_bounds(ax, upstream_edges=upstream_edges, color="r", linewidth=0.6)

    if save_upstream_boundaries_to_shp:
        plot_utils.save_to_shape_file(upstream_edges_latlon, folder_path="mh/engage_report/upstream_stations_areas/mh_{}".format(grid_config.dx), in_proj=None)


    basemap.drawrivers(linewidth=0.2)
    basemap.drawstates(linewidth=0.1)
    basemap.drawcountries(linewidth=0.1)
    basemap.drawcoastlines(linewidth=0.2)


    pos_ids, lons_pos, lats_pos = [], [], []
    pos_labels = []
    legend_lines = []
    for i, (s, mp) in enumerate(sorted(station_to_model_point.items(), key=lambda p: p[0].latitude, reverse=True), start=1):
        pos_ids.append(s.id)
        pos_labels.append(i)
        lons_pos.append(mp.longitude)
        lats_pos.append(mp.latitude)

        legend_lines.append("{}: {}".format(i, s.id))

    xm, ym = basemap(lons_pos, lats_pos)
    ax.scatter(xm, ym, c="g", s=20)
    for txt, x1, y1, pos_label in zip(pos_ids, xm, ym, pos_labels):
        ax.annotate(pos_label, xy=(x1, y1))



    at = AnchoredText("\n".join(legend_lines), prop=dict(size=8), frameon=True, loc=1)

    at.patch.set_boxstyle("round,pad=0.,rounding_size=0.2")
    ax.add_artist(at)
예제 #22
0
def plot_histograms(path="/home/huziy/skynet3_rech1/hdf_store/quebec_0.1_crcm5-hcd-rl-intfl_spinup_ecoclimap.hdf"):
    fig = plt.figure()
    assert isinstance(fig, Figure)
    gs = gridspec.GridSpec(3, 3)

    lons2d, lats2d, basemap = analysis.get_basemap_from_hdf(file_path=path)

    # slope
    ch_slope = analysis.get_array_from_file(path=path, var_name="slope")
    ch_slope = maskoceans(lons2d, lats2d, ch_slope)
    ch_slope = np.ma.masked_where(ch_slope.mask | (ch_slope < 0), ch_slope)
    ax = fig.add_subplot(gs[0, 0])
    assert isinstance(ax, Axes)
    ch_slope_flat = ch_slope[~ch_slope.mask]
    the_hist, positions = np.histogram(ch_slope_flat, bins=25, range=[0, np.percentile(ch_slope_flat, 90)])
    the_hist = the_hist.astype(float)
    the_hist /= the_hist.sum()
    barwidth = (positions[1] - positions[0]) * 0.9
    ax.bar(positions[:-1], the_hist, color="0.75", linewidth=0, width=barwidth)
    ax.set_title(r"$\alpha$")
    ax.grid()
    ax.xaxis.set_major_locator(MaxNLocator(nbins=3))
    ax.yaxis.set_major_locator(MaxNLocator(nbins=5))

    # drainage density
    dd = analysis.get_array_from_file(path=path, var_name="drainage_density_inv_meters")
    dd *= 1000  # convert to km^-1
    ax = fig.add_subplot(gs[0, 1])
    assert isinstance(ax, Axes)
    dd_flat = dd[~ch_slope.mask]
    the_hist, positions = np.histogram(dd_flat, bins=25, range=[0, np.percentile(dd_flat, 90)])
    the_hist = the_hist.astype(np.float)
    the_hist /= the_hist.sum()
    print(the_hist.max(), the_hist.min())
    barwidth = (positions[1] - positions[0]) * 0.9
    ax.bar(positions[:-1], the_hist, color="0.75", linewidth=0, width=barwidth)
    ax.xaxis.set_major_locator(MaxNLocator(nbins=3))
    ax.yaxis.set_major_locator(MaxNLocator(nbins=5))
    ax.set_title(r"$DD {\rm \left( km^{-1} \right)}$")
    ax.grid()


    # vertical soil hydraulic conductivity
    vshc = analysis.get_array_from_file(path=path, var_name=infovar.HDF_VERT_SOIL_HYDR_COND_NAME)
    if vshc is not None:
        # get only on the first layer
        vshc = vshc[0, :, :]
        ax = fig.add_subplot(gs[1, 0])
        assert isinstance(ax, Axes)
        vshc_flat = vshc[~ch_slope.mask]
        the_hist, positions = np.histogram(vshc_flat, bins=25, range=[0, np.percentile(vshc_flat, 90)])
        the_hist = the_hist.astype(np.float)
        the_hist /= the_hist.sum()
        print(the_hist.max(), the_hist.min())
        barwidth = (positions[1] - positions[0]) * 0.9
        ax.bar(positions[:-1], the_hist, color="0.75", linewidth=0, width=barwidth)
        ax.xaxis.set_major_locator(MaxNLocator(nbins=3))
        ax.yaxis.set_major_locator(MaxNLocator(nbins=5))

        # set a scalar formatter
        sfmt = ScalarFormatter(useMathText=True)
        sfmt.set_powerlimits([-2, 2])
        ax.xaxis.set_major_formatter(sfmt)
        ax.set_title(r"$ K_{\rm V} {\rm (m/s)}$")
        ax.grid()

        # Kv * slope * DD
        ax = fig.add_subplot(gs[1, 1])
        assert isinstance(ax, Axes)

        interflow_h = 0.2  # Soulis et al 2000
        # 1e-3 is to convert drainage density to m^-1
        the_prod = dd_flat * 1e-3 * vshc_flat * ch_slope_flat * 48 * interflow_h

        print("product median: {0}".format(np.median(the_prod)))
        print("product maximum: {0}".format(the_prod.max()))
        print("product 90-quantile: {0}".format(np.percentile(the_prod, 90)))

        the_hist, positions = np.histogram(the_prod, bins=25, range=[0, np.percentile(the_prod, 90)])
        the_hist = the_hist.astype(np.float)
        the_hist /= the_hist.sum()
        print(the_hist.max(), the_hist.min())
        barwidth = (positions[1] - positions[0]) * 0.9
        ax.bar(positions[:-1], the_hist, color="0.75", linewidth=0, width=barwidth)
        ax.xaxis.set_major_locator(MaxNLocator(nbins=3))
        ax.yaxis.set_major_locator(MaxNLocator(nbins=5))

        # set a scalar formatter
        sfmt = ScalarFormatter(useMathText=True)
        sfmt.set_powerlimits([-2, 2])
        ax.xaxis.set_major_formatter(sfmt)
        ax.set_title(r"$ \beta_{\rm max}\cdot K_{\rm v} \cdot \alpha \cdot DD \cdot H {\rm (m/s)}$ ")
        ax.grid()

        # read flow directions
        flow_directions = analysis.get_array_from_file(path=path, var_name=infovar.HDF_FLOW_DIRECTIONS_NAME)
        # read cell areas
        # cell_areas = analysis.get_array_from_file(path=path, var_name=infovar.HDF_CELL_AREA_NAME)
        cell_manager = CellManager(flow_directions)
        acc_index = cell_manager.get_accumulation_index()
        acc_index_flat = acc_index[acc_index > 1]
        print("acc_index: min={0}; max={1}; median={2}; 90-quantile={3}".format(
            acc_index_flat.min(), acc_index_flat.max(), np.median(acc_index_flat), np.percentile(acc_index_flat, 90)))

        # plot the range of the accumulation index
        ax = fig.add_subplot(gs[0, 2])
        assert isinstance(ax, Axes)
        the_hist, positions = np.histogram(acc_index_flat, bins=25, range=[0, np.percentile(acc_index_flat, 90)])
        the_hist = the_hist.astype(np.float)
        the_hist /= the_hist.sum()
        print(the_hist.max(), the_hist.min())
        barwidth = (positions[1] - positions[0]) * 0.9
        ax.bar(positions[:-1], the_hist, color="0.75", linewidth=0, width=barwidth)
        ax.xaxis.set_major_locator(MaxNLocator(nbins=3))
        ax.yaxis.set_major_locator(MaxNLocator(nbins=5))

        # set a scalar formatter
        sfmt = ScalarFormatter(useMathText=True)
        sfmt.set_powerlimits([-2, 2])
        ax.xaxis.set_major_formatter(sfmt)
        ax.set_title(r"Accum. index")
        ax.grid()





    # lake fraction


    # sand

    # clay


    fig_path = os.path.join(images_folder, "static_fields_histograms.jpeg")
    fig.tight_layout()
    fig.savefig(fig_path, dpi=cpp.FIG_SAVE_DPI, bbox_inches="tight")
def draw_model_comparison(model_points=None, stations=None, sim_name_to_file_name=None, hdf_folder=None,
                          start_year=None, end_year=None, cell_manager=None, stfl_name="STFA",
                          drainage_area_reldiff_min=0.1, plot_upstream_area_averaged=True,
                          sim_name_to_color=None):
    """

    :param model_points: list of model point objects
    :param stations: list of stations corresponding to the list of model points
    :param cell_manager: is a CellManager instance which can be provided for better performance if necessary
    len(model_points) == len(stations) if stations is not None.
    if stations is None - then no measured streamflow will be plotted
    """
    assert model_points is None or stations is None or len(stations) == len(model_points)
    label_list = list(sim_name_to_file_name.keys())  # Needed to keep the order the same for all subplots
    path0 = os.path.join(hdf_folder, list(sim_name_to_file_name.items())[0][1])
    flow_directions = analysis.get_array_from_file(path=path0, var_name="flow_direction")
    lake_fraction = analysis.get_array_from_file(path=path0, var_name="lake_fraction")

    # mask lake fraction in the ocean
    lake_fraction = np.ma.masked_where((flow_directions <= 0) | (flow_directions > 128), lake_fraction)

    accumulation_area_km2 = analysis.get_array_from_file(path=path0, var_name=infovar.HDF_ACCUMULATION_AREA_NAME)
    area_m2 = analysis.get_array_from_file(path=path0, var_name=infovar.HDF_CELL_AREA_NAME_M2)

    # Try to read cell areas im meters if it is not Ok then try in km2
    if area_m2 is not None:
        cell_area_km2 = area_m2 * 1.0e-6
    else:
        cell_area_km2 = analysis.get_array_from_file(path=path0, var_name=infovar.HDF_CELL_AREA_NAME_KM2)

    print("cell area ranges from {} to {}".format(cell_area_km2.min(), cell_area_km2.max()))

    # print "plotting from {0}".format(path0)
    # plt.pcolormesh(lake_fraction.transpose())
    # plt.colorbar()
    # plt.show()
    # exit()

    file_scores = open("scores_{0}_{1}-{2}.txt".format("_".join(label_list), start_year, end_year), "w")
    file_correlations = open("corr_{0}_{1}-{2}.txt".format("_".join(label_list), start_year, end_year), "w")
    file_annual_discharge = open("flow_{0}_{1}-{2}.txt".format("_".join(label_list), start_year, end_year), "w")

    text_files = [file_scores, file_correlations, file_annual_discharge]
    # write the following columns to the scores file
    header_format = "{0:10s}\t{1:10s}\t{2:10s}\t" + "\t".join(["{" + str(i + 3) + ":10s}"
                                                               for i in range(len(sim_name_to_file_name))])
    line_format = "{0:10s}\t{1:10.1f}\t{2:10.1f}\t" + "\t".join(["{" + str(i + 3) + ":10.1f}"
                                                                 for i in range(len(sim_name_to_file_name))])

    header_ns = ("ID", "DAo", "DAm",) + tuple(["NS({0})".format(key) for key in sim_name_to_file_name])
    file_scores.write(header_format.format(*header_ns) + "\n")

    header_qyear = ("ID", "DAo", "DAm",) + tuple(["Qyear({0})".format(key) for key in label_list]) + \
                   ("Qyear(obs)",)
    header_format_qyear = header_format + "\t{" + str(len(label_list) + 3) + ":10s}"
    file_annual_discharge.write(header_format_qyear.format(*header_qyear) + "\n")

    lons2d, lats2d, basemap = analysis.get_basemap_from_hdf(file_path=path0)

    # Create a cell manager if it is not provided
    if cell_manager is None:
        cell_manager = CellManager(flow_directions, accumulation_area_km2=accumulation_area_km2,
                                   lons2d=lons2d, lats2d=lats2d)

    if stations is not None:
        # Get the list of the corresponding model points
        station_to_modelpoint = cell_manager.get_model_points_for_stations(
            station_list=stations,
            lake_fraction=lake_fraction,
            drainaige_area_reldiff_limit=drainage_area_reldiff_min)

        station_list = list(station_to_modelpoint.keys())
        station_list.sort(key=lambda st1: st1.latitude, reverse=True)
        mp_list = [station_to_modelpoint[st] for st in station_list]
    else:
        mp_list = model_points
        station_list = None
        # sort so that the northernmost stations appear uppermost
        mp_list.sort(key=lambda mpt: mpt.latitude, reverse=True)


    # set ids to the model points so they can be distinguished easier
    model_point.set_model_point_ids(mp_list)


    # ###Uncomment the lines below for the validation plot in paper 2
    # brewer2mpl.get_map args: set name  set type  number of colors
    # bmap = brewer2mpl.get_map("Set1", "qualitative", 9)
    # Change the default colors
    # mpl.rcParams["axes.color_cycle"] = bmap.mpl_colors

    # For the streamflow only plot
    ncols = 3
    nrows = max(len(mp_list) // ncols, 1)
    if ncols * nrows < len(mp_list):
        nrows += 1

    figure_stfl = plt.figure(figsize=(4 * ncols, 3 * nrows))
    gs_stfl = gridspec.GridSpec(nrows=nrows, ncols=ncols)
    # a flag which signifies if a legend should be added to the plot, it is needed so we ahve only one legend per plot
    legend_added = False

    ax_stfl = None
    all_years = [y for y in range(start_year, end_year + 1)]

    if station_list is not None:
        processed_stations = station_list
    else:
        processed_stations = [None] * len(mp_list)
    processed_model_points = mp_list
    plot_point_positions_with_upstream_areas(processed_stations, processed_model_points, basemap,
                                             cell_manager, lake_fraction_field=lake_fraction)

    if plot_upstream_area_averaged:
        # create obs data managers
        anusplin_tmin = AnuSplinManager(variable="stmn")
        anusplin_tmax = AnuSplinManager(variable="stmx")
        anusplin_pcp = AnuSplinManager(variable="pcp")

        daily_dates, obs_tmin_fields = anusplin_tmin.get_daily_clim_fields_interpolated_to(
            start_year=start_year, end_year=end_year,
            lons_target=lons2d, lats_target=lats2d)

        _, obs_tmax_fields = anusplin_tmax.get_daily_clim_fields_interpolated_to(
            start_year=start_year, end_year=end_year,
            lons_target=lons2d, lats_target=lats2d)

        _, obs_pcp_fields = anusplin_pcp.get_daily_clim_fields_interpolated_to(
            start_year=start_year, end_year=end_year,
            lons_target=lons2d, lats_target=lats2d)

        swe_path = "/skynet3_rech1/huziy/swe_ross_brown/swe.nc4"
        if not os.path.isfile(os.path.realpath(swe_path)):
            raise IOError("SWE-obs file {} does not exist".format(swe_path))

        swe_manager = SweDataManager(path=swe_path, var_name="SWE")
        obs_swe_daily_clim = swe_manager.get_daily_climatology(start_year, end_year)
        interpolated_obs_swe_clim = swe_manager.interpolate_daily_climatology_to(obs_swe_daily_clim,
                                                                                 lons2d_target=lons2d,
                                                                                 lats2d_target=lats2d)
    values_obs = None

    for i, the_model_point in enumerate(mp_list):

        ax_stfl = figure_stfl.add_subplot(gs_stfl[i // ncols, i % ncols], sharex=ax_stfl)

        assert isinstance(the_model_point, ModelPoint)

        # Check the number of years accessible for the station if the list of stations is given
        the_station = None if station_list is None else station_list[i]
        if the_station is not None:
            assert isinstance(the_station, Station)
            year_list = the_station.get_list_of_complete_years()
            year_list = list(filter(lambda yi: start_year <= yi <= end_year, year_list))

            if len(year_list) < 1:
                continue
        else:
            year_list = all_years

        fig = plt.figure(figsize=(12, 15))

        gs = gridspec.GridSpec(4, 4, wspace=1)


        # plot station position
        ax = fig.add_subplot(gs[3, 0:2])
        upstream_mask = _plot_station_position(ax, the_station, basemap, cell_manager, the_model_point)



        # plot streamflows
        ax = fig.add_subplot(gs[0:2, 0:2])

        dates = None
        model_daily_temp_clim = {}
        model_daily_precip_clim = {}
        model_daily_clim_surf_runoff = {}
        model_daily_clim_subsurf_runoff = {}
        model_daily_clim_swe = {}

        # get model data for the list of years
        simlabel_to_vals = {}
        for label in label_list:
            fname = sim_name_to_file_name[label]

            if hdf_folder is None:
                fpath = fname
            else:
                fpath = os.path.join(hdf_folder, fname)

            if plot_upstream_area_averaged:
                # read temperature data and calculate daily climatologic fileds
                _, model_daily_temp_clim[label] = analysis.get_daily_climatology(
                    path_to_hdf_file=fpath, var_name="TT", level=0, start_year=start_year, end_year=end_year)

                # read modelled precip and calculate daily climatologic fields
                _, model_daily_precip_clim[label] = analysis.get_daily_climatology(
                    path_to_hdf_file=fpath, var_name="PR", level=0, start_year=start_year, end_year=end_year)

                # read modelled surface runoff and calculate daily climatologic fields
                _, model_daily_clim_surf_runoff[label] = analysis.get_daily_climatology(
                    path_to_hdf_file=fpath, var_name="TRAF", level=0, start_year=start_year, end_year=end_year)

                # read modelled subsurface runoff and calculate daily climatologic fields
                _, model_daily_clim_subsurf_runoff[label] = analysis.get_daily_climatology(
                    path_to_hdf_file=fpath, var_name="TDRA", level=0, start_year=start_year, end_year=end_year)

                # read modelled swe and calculate daily climatologic fields
                _, model_daily_clim_swe[label] = analysis.get_daily_climatology(
                    path_to_hdf_file=fpath, var_name="I5", level=0, start_year=start_year, end_year=end_year)

            dates, values_model = analysis.get_daily_climatology_for_a_point(path=fpath,
                                                                             var_name=stfl_name,
                                                                             years_of_interest=year_list,
                                                                             i_index=the_model_point.ix,
                                                                             j_index=the_model_point.jy)

            ax.plot(dates, values_model, label=label, lw=2)

            if sim_name_to_color is None:
                ax_stfl.plot(dates, values_model, label=label, lw=2)
            else:
                ax_stfl.plot(dates, values_model, sim_name_to_color[label], label=label, lw=2)

                print(20 * "!!!")
                print("{} -> {}".format(label, sim_name_to_color[label]))
                print(20 * "!!!")

            simlabel_to_vals[label] = values_model

        if the_station is not None:
            assert isinstance(the_station, Station)
            dates, values_obs = the_station.get_daily_climatology_for_complete_years_with_pandas(stamp_dates=dates,
                                                                                                 years=year_list)

            # To keep the colors consistent for all the variables, the obs Should be plotted last
            ax.plot(dates, values_obs, label="Obs.", lw=2)
            # no ticklabels for streamflow plot
            plt.setp(ax.get_xticklabels(), visible=False)

            if sim_name_to_color is None:
                ax_stfl.plot(dates, values_obs, label="Obs.", lw=2)
            else:
                ax_stfl.plot(dates, values_obs, label="Obs.", lw=2, color=sim_name_to_color["Obs."])

            # Print excesss from streamflow validation
            for label, values_model in simlabel_to_vals.items():
                calclulate_spring_peak_err(dates, values_obs, values_model,
                                           st_id="{}: {}".format(label, the_station.id),
                                           da_mod=the_model_point.accumulation_area,
                                           da_obs=the_station.drainage_km2)





        ax.set_ylabel(r"Streamflow: ${\rm m^3/s}$")
        assert isinstance(ax, Axes)
        assert isinstance(fig, Figure)

        upstream_area_km2 = np.sum(cell_area_km2[upstream_mask == 1])
        # Put some information about the point
        if the_station is not None:
            lf_upstream = lake_fraction[upstream_mask == 1]
            point_info = "{0}".format(the_station.id)
            write_annual_flows_to_txt(label_list, simlabel_to_vals, values_obs, file_annual_discharge,
                                      station_id=the_station.id,
                                      da_obs=the_station.drainage_km2, da_mod=the_model_point.accumulation_area)

        else:
            point_info = "{0}".format(the_model_point.point_id)

        ax.annotate(point_info, (0.8, 0.8), xycoords="axes fraction",
                    bbox=dict(facecolor="white", alpha=0.5),
                    va="top", ha="right")

        ax.legend(loc=(0.0, 1.05), borderaxespad=0, ncol=3)
        ax.xaxis.set_minor_formatter(FuncFormatter(lambda x, pos: num2date(x).strftime("%b")[0]))
        ax.xaxis.set_minor_locator(MonthLocator(bymonthday=15))
        ax.xaxis.set_major_locator(MonthLocator())

        ax.grid()

        streamflow_axes = ax  # save streamflow axes for later use

        if not legend_added:
            ax_stfl.legend(loc="lower left", bbox_to_anchor=(0, 1.15), borderaxespad=0, ncol=3)
            ax_stfl.xaxis.set_minor_formatter(FuncFormatter(lambda x, pos: num2date(x).strftime("%b")[0]))
            ax_stfl.xaxis.set_minor_locator(MonthLocator(bymonthday=15))
            ax_stfl.xaxis.set_major_locator(MonthLocator())

            ax_stfl.set_ylabel(r"Streamflow ${\rm m^3/s}$")
            legend_added = True

        plt.setp(ax_stfl.get_xmajorticklabels(), visible=False)
        ax_stfl.yaxis.set_major_locator(MaxNLocator(nbins=5))
        sfmt = ScalarFormatter(useMathText=True)
        sfmt.set_powerlimits((-2, 2))
        ax_stfl.yaxis.set_major_formatter(sfmt)
        ax_stfl.grid()

        # annotate streamflow-only panel plot
        ax_stfl.annotate(point_info, (0.05, 0.95), xycoords="axes fraction",
                         bbox=dict(facecolor="white"),
                         va="top", ha="left")


        if plot_upstream_area_averaged:
            # plot temperature comparisons (tmod - daily with anusplin tmin and tmax)
            ax = fig.add_subplot(gs[3, 2:], sharex=streamflow_axes)
            _validate_temperature_with_anusplin(ax, the_model_point, cell_area_km2=cell_area_km2,
                                                upstream_mask=upstream_mask,
                                                daily_dates=daily_dates,
                                                obs_tmin_clim_fields=obs_tmin_fields,
                                                obs_tmax_clim_fields=obs_tmax_fields,
                                                model_data_dict=model_daily_temp_clim,
                                                simlabel_list=label_list)

            # plot temperature comparisons (tmod - daily with anusplin tmin and tmax)
            ax = fig.add_subplot(gs[2, 2:], sharex=streamflow_axes)
            _validate_precip_with_anusplin(ax, the_model_point, cell_area_km2=cell_area_km2,
                                           upstream_mask=upstream_mask,
                                           daily_dates=daily_dates,
                                           obs_precip_clim_fields=obs_pcp_fields,
                                           model_data_dict=model_daily_precip_clim,
                                           simlabel_list=label_list)


            # plot mean upstream surface runoff
            ax = fig.add_subplot(gs[0, 2:], sharex=streamflow_axes)
            _plot_upstream_surface_runoff(ax, the_model_point, cell_area_km2=cell_area_km2,
                                          upstream_mask=upstream_mask,
                                          daily_dates=daily_dates,
                                          model_data_dict=model_daily_clim_surf_runoff,
                                          simlabel_list=label_list)


            # plot mean upstream subsurface runoff
            ax = fig.add_subplot(gs[1, 2:], sharex=streamflow_axes, sharey=ax)
            _plot_upstream_subsurface_runoff(ax, the_model_point, cell_area_km2=cell_area_km2,
                                             upstream_mask=upstream_mask,
                                             daily_dates=daily_dates,
                                             model_data_dict=model_daily_clim_subsurf_runoff,
                                             simlabel_list=label_list)

            # plot mean upstream swe comparison
            ax = fig.add_subplot(gs[2, 0:2], sharex=streamflow_axes)
            print("Validating SWE for ", the_station.id, "--" * 20)
            _validate_swe_with_ross_brown(ax, the_model_point, cell_area_km2=cell_area_km2,
                                          upstream_mask=upstream_mask,
                                          daily_dates=daily_dates,
                                          model_data_dict=model_daily_clim_swe,
                                          obs_swe_clim_fields=interpolated_obs_swe_clim,
                                          simlabel_list=label_list)

        if the_station is not None:
            im_name = "comp_point_with_obs_{0}_{1}_{2}.png".format(the_station.id,
                                                                   the_station.source,
                                                                   "_".join(label_list))
            im_folder_path = os.path.join(images_folder, the_station.source)
        else:
            im_name = "comp_point_with_obs_{0}_{1}.png".format(the_model_point.point_id,
                                                               "_".join(label_list))
            im_folder_path = os.path.join(images_folder, "outlets_point_comp")


        # create a folder for a given source of observed streamflow if it does not exist yet
        if not os.path.isdir(im_folder_path):
            os.mkdir(im_folder_path)

        im_path = os.path.join(im_folder_path, im_name)

        if plot_upstream_area_averaged:
            fig.savefig(im_path, dpi=cpp.FIG_SAVE_DPI, bbox_inches="tight", transparent=True)

        plt.close(fig)


        # return  # temporary plot only one point

    assert isinstance(figure_stfl, Figure)
    figure_stfl.tight_layout()
    figure_stfl.savefig(os.path.join(images_folder,
                                     "comp_point_with_obs_{0}.png".format("_".join(label_list))),
                        bbox_inches="tight", transparent=True, dpi=cpp.FIG_SAVE_DPI)
    plt.close(figure_stfl)

    # close information text files
    for f in text_files:
        f.close()
def main():
    start_year = 1980
    end_year = 2010

    start_date = datetime(start_year, 1, 1)
    end_date = datetime(end_year, 12, 31)

    ids_with_lakes_upstream = [
        "104001", "093806", "093801", "081002", "081007", "080718"
    ]

    selected_station_ids = ["092715", "074903", "080104", "081007", "061905",
                            "093806", "090613", "081002", "093801", "080718", "104001"]

    selected_station_ids = ids_with_lakes_upstream

    # Get the list of stations to do the comparison with
    stations = cehq_station.read_station_data(
        start_date=start_date,
        end_date=end_date,
        selected_ids=selected_station_ids
    )


    # add hydat stations
    # province = "QC"
    # min_drainage_area_km2 = 10000.0
    # stations_hd = cehq_station.load_from_hydat_db(start_date=start_date, end_date=end_date,
    # province=province, min_drainage_area_km2=min_drainage_area_km2)
    # if not len(stations_hd):
    #     print "No hydat stations satisying the conditions: period {0}-{1}, province {2}".format(
    #         str(start_date), str(end_date), province
    #     )
    # stations.extend(stations_hd)

    # brewer2mpl.get_map args: set name  set type  number of colors
    bmap = brewer2mpl.get_map("Set1", "qualitative", 9)

    path1 = "/skynet3_rech1/huziy/hdf_store/quebec_0.1_crcm5-hcd-r.hdf5"
    label1 = "CRCM5-L1"

    path2 = "/skynet3_rech1/huziy/hdf_store/quebec_0.1_crcm5-hcd-rl.hdf5"
    label2 = "CRCM5-L2"

    color2, color1 = bmap.mpl_colors[:2]

    fldirs = analysis.get_array_from_file(path=path1, var_name=infovar.HDF_FLOW_DIRECTIONS_NAME)
    lons2d, lats2d, basemap = analysis.get_basemap_from_hdf(path1)

    lake_fractions = analysis.get_array_from_file(path=path1, var_name=infovar.HDF_LAKE_FRACTION_NAME)
    # cell_areas = analysis.get_array_from_file(path=path1, var_name=infovar.HDF_CELL_AREA_NAME)
    acc_area = analysis.get_array_from_file(path=path1, var_name=infovar.HDF_ACCUMULATION_AREA_NAME)

    cell_manager = CellManager(fldirs, lons2d=lons2d, lats2d=lats2d, accumulation_area_km2=acc_area)

    station_to_mp = cell_manager.get_model_points_for_stations(station_list=stations,
                                                               lake_fraction=lake_fractions,
                                                               drainaige_area_reldiff_limit=0.3)

    fig, axes = plt.subplots(1, 2, gridspec_kw=dict(top=0.80, wspace=0.4))

    q90_obs_list = []
    q90_mod1_list = []
    q90_mod2_list = []

    q10_obs_list = []
    q10_mod1_list = []
    q10_mod2_list = []

    for the_station, the_mp in station_to_mp.items():
        assert isinstance(the_station, Station)
        compl_years = the_station.get_list_of_complete_years()
        if len(compl_years) < 3:
            continue

        t, stfl1 = analysis.get_daily_climatology_for_a_point(path=path1, years_of_interest=compl_years,
                                                              i_index=the_mp.ix, j_index=the_mp.jy, var_name="STFA")

        _, stfl2 = analysis.get_daily_climatology_for_a_point(path=path2, years_of_interest=compl_years,
                                                              i_index=the_mp.ix, j_index=the_mp.jy, var_name="STFA")

        _, stfl_obs = the_station.get_daily_climatology_for_complete_years(stamp_dates=t, years=compl_years)

        # Q90
        q90_obs = np.percentile(stfl_obs, 90)
        q90_mod1 = np.percentile(stfl1, 90)
        q90_mod2 = np.percentile(stfl2, 90)

        # Q10
        q10_obs = np.percentile(stfl_obs, 10)
        q10_mod1 = np.percentile(stfl1, 10)
        q10_mod2 = np.percentile(stfl2, 10)

        # save quantiles to lists for correlation calculation
        q90_obs_list.append(q90_obs)
        q90_mod1_list.append(q90_mod1)
        q90_mod2_list.append(q90_mod2)

        q10_mod1_list.append(q10_mod1)
        q10_mod2_list.append(q10_mod2)
        q10_obs_list.append(q10_obs)


        # axes[0].annotate(the_station.id, (q90_obs, np.percentile(stfl1, 90)))
        # axes[1].annotate(the_station.id, (q10_obs, np.percentile(stfl1, 10)))




    # Plot scatter plot of Q90
    the_ax = axes[0]

    # the_ax.annotate(the_station.id, (q90_obs, np.percentile(stfl1, 90)))
    the_ax.scatter(q90_obs_list, q90_mod1_list, label=label1, c=color1)
    the_ax.scatter(q90_obs_list, q90_mod2_list, label=label2, c=color2)



    # plot scatter plot of Q10
    the_ax = axes[1]
    # the_ax.annotate(the_station.id, (q10_obs, np.percentile(stfl1, 10)))
    h1 = the_ax.scatter(q10_obs_list, q10_mod1_list, label=label1, c=color1)
    h2 = the_ax.scatter(q10_obs_list, q10_mod2_list, label=label2, c=color2)



    # Add correlation coefficients to the axes
    fp = FontProperties(size=14, weight="bold")
    axes[0].annotate(r"$R^2 = {0:.2f}$".format(np.corrcoef(q90_mod1_list, q90_obs_list)[0, 1] ** 2),
                     (0.1, 0.85), color=color1, xycoords="axes fraction", font_properties=fp)
    axes[0].annotate(r"$R^2 = {0:.2f}$".format(np.corrcoef(q90_mod2_list, q90_obs_list)[0, 1] ** 2),
                     (0.1, 0.70), color=color2, xycoords="axes fraction", font_properties=fp)

    axes[1].annotate(r"$R^2 = {0:.2f}$".format(np.corrcoef(q10_mod1_list, q10_obs_list)[0, 1] ** 2),
                     (0.1, 0.85), color=color1, xycoords="axes fraction", font_properties=fp)
    axes[1].annotate(r"$R^2 = {0:.2f}$".format(np.corrcoef(q10_mod2_list, q10_obs_list)[0, 1] ** 2),
                     (0.1, 0.70), color=color2, xycoords="axes fraction", font_properties=fp)


    sf = ScalarFormatter(useMathText=True)
    sf.set_powerlimits((-2, 3))
    for ind, the_ax in enumerate(axes):
        plot_one_to_one_line(the_ax)
        if ind == 0:
            the_ax.set_xlabel(r"Observed $\left({\rm m^3/s} \right)$")
            the_ax.set_ylabel(r"Modelled $\left({\rm m^3/s} \right)$")

        the_ax.annotate(r"$Q_{90}$" if ind == 0 else r"$Q_{10}$",
                        (0.95, 0.95), xycoords="axes fraction",
                        bbox=dict(facecolor="white"),
                        va="top", ha="right")

        the_ax.xaxis.set_major_formatter(sf)
        the_ax.yaxis.set_major_formatter(sf)

        locator = MaxNLocator(nbins=5)
        the_ax.xaxis.set_major_locator(locator)
        the_ax.yaxis.set_major_locator(locator)
        x1, x2 = the_ax.get_xlim()
        # Since streamflow percentiles can only be positive
        the_ax.set_xlim(0, x2)
        the_ax.set_ylim(0, x2)

    fig.legend([h1, h2], [label1, label2], loc="upper center", ncol=2)
    figpath = os.path.join(images_folder, "percentiles_comparison.png")
    # plt.tight_layout()
    fig.savefig(figpath, dpi=cpp.FIG_SAVE_DPI, bbox_inches="tight")
def get_basin_to_outlet_indices_map(shape_file=BASIN_BOUNDARIES_FILE, lons=None, lats=None,
                                    directions=None, accumulation_areas=None):
    driver = ogr.GetDriverByName("ESRI Shapefile")
    print(driver)
    ds = driver.Open(shape_file, 0)

    assert isinstance(ds, ogr.DataSource)
    layer = ds.GetLayer()

    assert isinstance(layer, ogr.Layer)
    print(layer.GetFeatureCount())

    latlong_proj = osr.SpatialReference()
    latlong_proj.ImportFromEPSG(4326)

    utm_proj = layer.GetSpatialRef()

    # create Coordinate Transformation
    coord_transform = osr.CoordinateTransformation(latlong_proj, utm_proj)

    utm_coords = coord_transform.TransformPoints(list(zip(lons.flatten(), lats.flatten())))
    utm_coords = np.asarray(utm_coords)
    x_utm = utm_coords[:, 0].reshape(lons.shape)
    y_utm = utm_coords[:, 1].reshape(lons.shape)

    basin_mask = np.zeros_like(lons)
    cell_manager = CellManager(directions, accumulation_area_km2=accumulation_areas, lons2d=lons, lats2d=lats)

    index = 1
    basins = []
    basin_names = []
    basin_name_to_mask = {}
    for feature in layer:
        assert isinstance(feature, ogr.Feature)
        # print feature["FID"]

        geom = feature.GetGeometryRef()
        assert isinstance(geom, ogr.Geometry)
        basins.append(ogr.CreateGeometryFromWkb(geom.ExportToWkb()))
        basin_names.append(feature["abr"])

    accumulation_areas_temp = accumulation_areas[:, :]
    lons_out, lats_out = [], []
    basin_names_out = []
    name_to_ij_out = {}

    min_basin_area = min(b.GetArea() * 1.0e-6 for b in basins)

    while len(basins):
        fm = np.max(accumulation_areas_temp)

        i, j = np.where(fm == accumulation_areas_temp)
        i, j = i[0], j[0]
        p = ogr.CreateGeometryFromWkt("POINT ({} {})".format(x_utm[i, j], y_utm[i, j]))
        b_selected = None
        name_selected = None
        for name, b in zip(basin_names, basins):
            assert isinstance(b, ogr.Geometry)
            assert isinstance(p, ogr.Geometry)
            if b.Contains(p.Buffer(2000 * 2 ** 0.5)):
                # Check if there is an upstream cell from the same basin
                the_mask = cell_manager.get_mask_of_upstream_cells_connected_with_by_indices(i, j)

                # Save the mask of the basin for future use
                basin_name_to_mask[name] = the_mask

                # if is_part_of_points_in(b, x_utm[the_mask == 1], y_utm[the_mask == 1]):
                # continue


                b_selected = b
                name_selected = name
                # basin_names_out.append(name)

                lons_out.append(lons[i, j])
                lats_out.append(lats[i, j])
                name_to_ij_out[name] = (i, j)

                basin_mask[the_mask == 1] = index
                index += 1

                break

        if b_selected is not None:
            basins.remove(b_selected)
            basin_names.remove(name_selected)
            outlet_index_in_basin = 1
            current_basin_name = name_selected
            while current_basin_name in basin_names_out:
                current_basin_name = name_selected + str(outlet_index_in_basin)
                outlet_index_in_basin += 1

            basin_names_out.append(current_basin_name)
            print(len(basins), basin_names_out)

        accumulation_areas_temp[i, j] = -1

    return name_to_ij_out, basin_name_to_mask
def get_basin_to_outlet_indices_map(shape_file=BASIN_BOUNDARIES_FILE,
                                    lons=None,
                                    lats=None,
                                    directions=None,
                                    accumulation_areas=None):
    driver = ogr.GetDriverByName("ESRI Shapefile")
    print(driver)
    ds = driver.Open(shape_file, 0)

    assert isinstance(ds, ogr.DataSource)
    layer = ds.GetLayer()

    assert isinstance(layer, ogr.Layer)
    print(layer.GetFeatureCount())

    latlong_proj = osr.SpatialReference()
    latlong_proj.ImportFromEPSG(4326)

    utm_proj = layer.GetSpatialRef()

    # create Coordinate Transformation
    coord_transform = osr.CoordinateTransformation(latlong_proj, utm_proj)

    utm_coords = coord_transform.TransformPoints(
        list(zip(lons.flatten(), lats.flatten())))
    utm_coords = np.asarray(utm_coords)
    x_utm = utm_coords[:, 0].reshape(lons.shape)
    y_utm = utm_coords[:, 1].reshape(lons.shape)

    basin_mask = np.zeros_like(lons)
    cell_manager = CellManager(directions,
                               accumulation_area_km2=accumulation_areas,
                               lons2d=lons,
                               lats2d=lats)

    index = 1
    basins = []
    basin_names = []
    basin_name_to_mask = {}
    for feature in layer:
        assert isinstance(feature, ogr.Feature)
        # print feature["FID"]

        geom = feature.GetGeometryRef()
        assert isinstance(geom, ogr.Geometry)
        basins.append(ogr.CreateGeometryFromWkb(geom.ExportToWkb()))
        basin_names.append(feature["abr"])

    accumulation_areas_temp = accumulation_areas[:, :]
    lons_out, lats_out = [], []
    basin_names_out = []
    name_to_ij_out = {}

    min_basin_area = min(b.GetArea() * 1.0e-6 for b in basins)

    while len(basins):
        fm = np.max(accumulation_areas_temp)

        i, j = np.where(fm == accumulation_areas_temp)
        i, j = i[0], j[0]
        p = ogr.CreateGeometryFromWkt("POINT ({} {})".format(
            x_utm[i, j], y_utm[i, j]))
        b_selected = None
        name_selected = None
        for name, b in zip(basin_names, basins):
            assert isinstance(b, ogr.Geometry)
            assert isinstance(p, ogr.Geometry)
            if b.Contains(p.Buffer(2000 * 2**0.5)):
                # Check if there is an upstream cell from the same basin
                the_mask = cell_manager.get_mask_of_upstream_cells_connected_with_by_indices(
                    i, j)

                # Save the mask of the basin for future use
                basin_name_to_mask[name] = the_mask

                # if is_part_of_points_in(b, x_utm[the_mask == 1], y_utm[the_mask == 1]):
                # continue

                b_selected = b
                name_selected = name
                # basin_names_out.append(name)

                lons_out.append(lons[i, j])
                lats_out.append(lats[i, j])
                name_to_ij_out[name] = (i, j)

                basin_mask[the_mask == 1] = index
                index += 1

                break

        if b_selected is not None:
            basins.remove(b_selected)
            basin_names.remove(name_selected)
            outlet_index_in_basin = 1
            current_basin_name = name_selected
            while current_basin_name in basin_names_out:
                current_basin_name = name_selected + str(outlet_index_in_basin)
                outlet_index_in_basin += 1

            basin_names_out.append(current_basin_name)
            print(len(basins), basin_names_out)

        accumulation_areas_temp[i, j] = -1

    return name_to_ij_out, basin_name_to_mask
def main(directions_file_path: Path):
    """
    compare drainage areas, longitudes and latitudes from the stations and model
    """
    stations = stfl_stations.load_stations_from_csv()
    lake_fraction = None

    with Dataset(str(directions_file_path)) as ds:
        flow_dirs = ds.variables["flow_direction_value"][:]
        flow_acc_area = ds.variables["accumulation_area"][:]
        lons_2d, lats_2d = [ds.variables[k][:] for k in ["lon", "lat"]]

        # lake_fraction = ds.variables["lake_fraction"][:]

    cell_manager = CellManager(flow_dirs,
                               lons2d=lons_2d,
                               lats2d=lats_2d,
                               accumulation_area_km2=flow_acc_area)

    station_to_mod_point = cell_manager.get_model_points_for_stations(
        station_list=stations, lake_fraction=lake_fraction, nneighbours=8)

    lons_m, lats_m, da_m = [], [], []
    lons_o, lats_o, da_o = [], [], []

    for s, mp in station_to_mod_point.items():
        assert isinstance(s, Station)
        assert isinstance(mp, ModelPoint)

        # obs
        lons_o.append(s.longitude if s.longitude < 180 else s.longitude - 360)
        lats_o.append(s.latitude)
        da_o.append(s.drainage_km2)

        # model
        lons_m.append(mp.longitude if mp.longitude < 180 else mp.longitude -
                      360)
        lats_m.append(mp.latitude)
        da_m.append(mp.accumulation_area)

        print("m  | s ({})".format(s.id))
        print("{} | {}".format(mp.longitude, s.longitude))
        print("{} | {}".format(mp.latitude, s.latitude))
        print("{} | {}".format(mp.accumulation_area, s.drainage_km2))

    axes_list = []
    plot_utils.apply_plot_params(width_cm=25, height_cm=10, font_size=8)
    fig = plt.figure()
    gs = GridSpec(1, 3)

    ax = fig.add_subplot(gs[0, 0])
    ax.set_title("Longitude")
    ax.scatter(lons_o, lons_m)
    axes_list.append(ax)
    ax.set_ylabel("Model")

    ax = fig.add_subplot(gs[0, 1])
    ax.set_title("Latitude")
    ax.scatter(lats_o, lats_m)
    axes_list.append(ax)
    ax.set_xlabel("Obs")

    ax = fig.add_subplot(gs[0, 2])
    ax.set_title("Drainage area (km$^2$)")
    ax.scatter(da_o, da_m)
    sf = ScalarFormatter(useMathText=True)
    sf.set_powerlimits((-2, 3))

    ax.set_xscale("log")
    ax.set_yscale("log")

    axes_list.append(ax)

    # plot the 1-1 line
    for ax in axes_list:
        assert isinstance(ax, Axes)

        ax.plot(ax.get_xlim(), ax.get_xlim(), "--", color="gray")
        ax.grid()

    img_file = img_folder.joinpath("lon_lat_da_scatter_{}_{}.png".format(
        directions_file_path.name,
        "-".join(sorted(s.id for s in station_to_mod_point))))
    fig.savefig(str(img_file), bbox_inches="tight")
def main():
    direction_file_path = Path(
        "/RECH2/huziy/BC-MH/bc_mh_044deg/Samples/bc_mh_044deg_198001/pm1980010100_00000000p"
    )

    sim_label = "mh_0.44"

    start_year = 1981
    end_year = 2010

    streamflow_internal_name = "streamflow"
    selected_staion_ids = constants.selected_station_ids_for_streamflow_validation

    # ======================================================

    day = timedelta(days=1)
    t0 = datetime(2001, 1, 1)
    stamp_dates = [t0 + i * day for i in range(365)]
    print("stamp dates range {} ... {}".format(stamp_dates[0],
                                               stamp_dates[-1]))

    lake_fraction = None

    # establish the correspondence between the stations and model grid points
    with RPN(str(direction_file_path)) as r:
        assert isinstance(r, RPN)
        fldir = r.get_first_record_for_name("FLDR")
        flow_acc_area = r.get_first_record_for_name("FAA")
        lons, lats = r.get_longitudes_and_latitudes_for_the_last_read_rec()
        # lake_fraction = r.get_first_record_for_name("LF1")

    cell_manager = CellManager(fldir,
                               lons2d=lons,
                               lats2d=lats,
                               accumulation_area_km2=flow_acc_area)
    stations = stfl_stations.load_stations_from_csv(
        selected_ids=selected_staion_ids)
    station_to_model_point = cell_manager.get_model_points_for_stations(
        station_list=stations, lake_fraction=lake_fraction, nneighbours=8)

    # Update the end year if required
    max_year_st = -1
    for station in station_to_model_point:
        y = max(station.get_list_of_complete_years())
        if y >= max_year_st:
            max_year_st = y

    if end_year > max_year_st:
        print("Updated end_year to {}, because no obs data after...".format(
            max_year_st))
        end_year = max_year_st

    # read model data
    mod_data_manager = DataManager(
        store_config={
            "varname_mapping": {
                streamflow_internal_name: "STFA"
            },
            "base_folder": str(direction_file_path.parent.parent),
            "data_source_type":
            data_source_types.SAMPLES_FOLDER_FROM_CRCM_OUTPUT,
            "level_mapping": {
                streamflow_internal_name:
                VerticalLevel(-1, level_type=level_kinds.ARBITRARY)
            },
            "offset_mapping": vname_to_offset_CRCM5,
            "filename_prefix_mapping": {
                streamflow_internal_name: "pm"
            }
        })

    station_to_model_data = defaultdict(list)
    for year in range(start_year, end_year + 1):
        start = Pendulum(year, 1, 1)
        p_test = Period(start, start.add(years=1).subtract(microseconds=1))
        stfl_mod = mod_data_manager.read_data_for_period(
            p_test, streamflow_internal_name)

        # convert to daily
        stfl_mod = stfl_mod.resample("D",
                                     "t",
                                     how="mean",
                                     closed="left",
                                     keep_attrs=True)

        assert isinstance(stfl_mod, xr.DataArray)

        for station, model_point in station_to_model_point.items():
            assert isinstance(model_point, ModelPoint)
            ts1 = stfl_mod[:, model_point.ix, model_point.jy].to_series()
            station_to_model_data[station].append(
                pd.Series(index=stfl_mod.t.values, data=ts1))

    # concatenate the timeseries for each point, if required
    if end_year - start_year + 1 > 1:
        for station in station_to_model_data:
            station_to_model_data[station] = pd.concat(
                station_to_model_data[station])
    else:
        for station in station_to_model_data:
            station_to_model_data[station] = station_to_model_data[station][0]

    # calculate observed climatology
    station_to_climatology = OrderedDict()
    for s in sorted(station_to_model_point,
                    key=lambda st: st.latitude,
                    reverse=True):
        assert isinstance(s, Station)
        print(s.id, len(s.get_list_of_complete_years()))

        # Check if there are continuous years for the selected period
        common_years = set(s.get_list_of_complete_years()).intersection(
            set(range(start_year, end_year + 1)))
        if len(common_years) > 0:
            _, station_to_climatology[
                s] = s.get_daily_climatology_for_complete_years_with_pandas(
                    stamp_dates=stamp_dates, years=common_years)

            _, station_to_model_data[
                s] = pandas_utils.get_daily_climatology_from_pandas_series(
                    station_to_model_data[s],
                    stamp_dates,
                    years_of_interest=common_years)

        else:
            print(
                "Skipping {}, since it does not have enough data during the period of interest"
                .format(s.id))

    # ---- Do the plotting ----
    ncols = 4

    nrows = len(station_to_climatology) // ncols
    nrows += int(not (len(station_to_climatology) % ncols == 0))

    axes_list = []
    plot_utils.apply_plot_params(width_cm=8 * ncols,
                                 height_cm=8 * nrows,
                                 font_size=8)
    fig = plt.figure()
    gs = GridSpec(nrows=nrows, ncols=ncols)

    for i, (s, clim) in enumerate(station_to_climatology.items()):
        assert isinstance(s, Station)

        row = i // ncols
        col = i % ncols

        print(row, col, nrows, ncols)

        # normalize by the drainage area
        if s.drainage_km2 is not None:
            station_to_model_data[
                s] *= s.drainage_km2 / station_to_model_point[
                    s].accumulation_area

        if s.id in constants.stations_to_greyout:
            ax = fig.add_subplot(gs[row, col], facecolor="0.45")
        else:
            ax = fig.add_subplot(gs[row, col])

        assert isinstance(ax, Axes)

        ax.plot(stamp_dates, clim, color="k", lw=2, label="Obs.")
        ax.plot(stamp_dates,
                station_to_model_data[s],
                color="r",
                lw=2,
                label="Mod.")
        ax.xaxis.set_major_formatter(FuncFormatter(format_month_label))
        ax.xaxis.set_major_locator(MonthLocator(bymonthday=15))
        ax.xaxis.set_minor_locator(MonthLocator(bymonthday=1))
        ax.grid()

        ax.annotate(s.get_pp_name(),
                    xy=(1.02, 1),
                    xycoords="axes fraction",
                    horizontalalignment="left",
                    verticalalignment="top",
                    fontsize=8,
                    rotation=-90)

        last_date = stamp_dates[-1]
        last_date = last_date.replace(
            day=calendar.monthrange(last_date.year, last_date.month)[1])

        ax.set_xlim(stamp_dates[0].replace(day=1), last_date)

        ymin, ymax = ax.get_ylim()
        ax.set_ylim(0, ymax)

        if s.drainage_km2 is not None:
            ax.set_title(
                "{}: ({:.1f}$^\circ$E, {:.1f}$^\circ$N, DA={:.0f} km$^2$)".
                format(s.id, s.longitude, s.latitude, s.drainage_km2))
        else:
            ax.set_title(
                "{}: ({:.1f}$^\circ$E, {:.1f}$^\circ$N, DA not used)".format(
                    s.id, s.longitude, s.latitude))
        axes_list.append(ax)

    # plot the legend
    axes_list[-1].legend()

    if not img_folder.exists():
        img_folder.mkdir()

    fig.tight_layout()
    img_file = img_folder / "{}_{}-{}_{}.png".format(
        sim_label, start_year, end_year, "-".join(
            sorted(s.id for s in station_to_climatology)))

    print("Saving {}".format(img_file))
    fig.savefig(str(img_file), bbox_inches="tight", dpi=300)
def main():
    season_to_months = DEFAULT_SEASON_TO_MONTHS

    r_config = RunConfig(
        data_path="/RESCUE/skynet3_rech1/huziy/hdf_store/quebec_0.1_crcm5-hcd-r.hdf5",
        start_year=1990, end_year=2010, label="CRCM5-L"
    )

    bmp_info = analysis.get_basemap_info_from_hdf(file_path=r_config.data_path)
    bmp_info.should_draw_grey_map_background = True
    bmp_info.should_draw_basin_boundaries = False
    bmp_info.map_bg_color = "0.75"

    station_ids = [
        "104001", "093806", "093801", "081002", "081007", "080718"
    ]

    # get river network information used in the model
    flow_directions = analysis.get_array_from_file(r_config.data_path, var_name=infovar.HDF_FLOW_DIRECTIONS_NAME)
    accumulation_area_km2 = analysis.get_array_from_file(path=r_config.data_path,
                                                         var_name=infovar.HDF_ACCUMULATION_AREA_NAME)
    cell_manager = CellManager(flow_dirs=flow_directions,
                               lons2d=bmp_info.lons,
                               lats2d=bmp_info.lats, accumulation_area_km2=accumulation_area_km2)

    # Get the list of stations to indicate on the bias map
    stations = cehq_station.read_station_data(
        start_date=None, end_date=None, selected_ids=station_ids
    )
    """:type : list[Station]"""

    xx, yy = bmp_info.get_proj_xy()
    station_to_modelpoint = cell_manager.get_model_points_for_stations(station_list=stations)
    upstream_edges = cell_manager.get_upstream_polygons_for_points(
        model_point_list=station_to_modelpoint.values(), xx=xx, yy=yy)


    # Validate temperature, precip and swe
    obs_path_anusplin = "/home/huziy/skynet3_rech1/anusplin_links"
    obs_path_swe = "data/swe_ross_brown/swe.nc"
    model_var_to_obs_path = OrderedDict([
        ("TT", obs_path_anusplin),
    #    ("PR", obs_path_anusplin),
        ("I5", obs_path_swe)
    ])



    vname_to_obs_data = {}

    # parameters that won't change in the loop over variable names
    params_const = dict(rconfig=r_config, bmp_info=bmp_info, season_to_months=season_to_months)

    for vname, obs_path in model_var_to_obs_path.items():
        season_to_obs_data = get_seasonal_clim_obs_data(vname=vname, obs_path=obs_path, **params_const)

        # Comment swe over lakes, since I5 calculated only for land
        if vname in ["I5", ]:
            for season in season_to_obs_data:
                season_to_obs_data[season] = maskoceans(bmp_info.lons, bmp_info.lats,
                                                        season_to_obs_data[season],
                                                        inlands=True)

        vname_to_obs_data[vname] = season_to_obs_data


    # Plotting
    plot_all_vars_in_one_fig = True

    fig = None
    gs = None
    row_axes = []
    ncols = None
    if plot_all_vars_in_one_fig:
        plot_utils.apply_plot_params(font_size=12, width_pt=None, width_cm=25, height_cm=20)
        fig = plt.figure()
        ncols = len(season_to_months) + 1
        gs = GridSpec(len(model_var_to_obs_path), ncols, width_ratios=(ncols - 1) * [1., ] + [0.05, ])
    else:
        plot_utils.apply_plot_params(font_size=12, width_pt=None, width_cm=25, height_cm=25)

    row = 0
    station_x_list = []
    station_y_list = []
    for mname in model_var_to_obs_path:

        if plot_all_vars_in_one_fig:
            row_axes = [fig.add_subplot(gs[row, col]) for col in range(ncols)]

        compare_vars(vname_model=mname, vname_to_obs=vname_to_obs_data,
                     r_config=r_config,
                     season_to_months=season_to_months,
                     bmp_info_agg=bmp_info,
                     axes_list=row_axes)

        # -1 in order to exclude colorbars
        for the_ax in row_axes[:-1]:

            # Need titles only for the first row
            if row > 0:
                the_ax.set_title("")

            draw_upstream_area_bounds(the_ax, upstream_edges)

            if len(station_x_list) == 0:
                for the_station in stations:
                    xst, yst = bmp_info.basemap(the_station.longitude, the_station.latitude)
                    station_x_list.append(xst)
                    station_y_list.append(yst)

            bmp_info.basemap.scatter(station_x_list, station_y_list, c="g", ax=the_ax, s=5, zorder=10, alpha=0.5)



        # Hide fall swe
        if mname in ["I5"]:
            row_axes[-2].set_visible(False)

        row += 1


    # Save the figure if necessary
    if plot_all_vars_in_one_fig:
        fig_path = img_folder.joinpath("{}.png".format("_".join(model_var_to_obs_path)))
        with fig_path.open("wb") as figfile:
            fig.savefig(figfile, format="png", bbox_inches="tight")

        plt.close(fig)
def main(hdf_folder="/home/huziy/skynet3_rech1/hdf_store", start_year=1980, end_year=2010):
    prepare()

    all_markers = ["*", "s", "p", "+", "x", "d", "h"]

    excluded = ["white", "w", "aliceblue", "azure"]
    excluded.extend([ci for ci in colors.cnames if "yellow" in ci])

    all_colors = ["k", "b", "r", "g", "m"] + sorted([ci for ci in colors.cnames if ci not in excluded])

    # Station ids to get from the CEHQ database
    ids_with_lakes_upstream = [
        "104001", "093806", "093801", "081002", "081007", "080718"
    ]

    selected_ids = ids_with_lakes_upstream

    filedir = Path(hdf_folder)
    sim_name_to_file_path = OrderedDict([
        # ("CRCM5-LI", filedir.joinpath("quebec_0.1_crcm5-hcd-r.hdf5").as_posix()),

        ("ERAI-CRCM5-L", filedir.joinpath("quebec_0.1_crcm5-hcd-rl.hdf5").as_posix()),

        # ("CanESM2-CRCM5-NL", filedir.joinpath("cc-canesm2-driven/quebec_0.1_crcm5-r-cc-canesm2-1980-2010.hdf5").as_posix()),

        ("CanESM2-CRCM5-L",
         filedir.joinpath("cc-canesm2-driven/quebec_0.1_crcm5-hcd-rl-cc-canesm2-1980-2010.hdf5").as_posix()),

        # ("CanESM2-CRCM5-LI", filedir.joinpath("cc-canesm2-driven/quebec_0.1_crcm5-hcd-rl-intfl-cc-canesm2-1980-2010.hdf5").as_posix()),


    ])

    obs_label = "Obs."
    labels = [obs_label, ] + list(sim_name_to_file_path.keys())

    label_to_marker = dict(zip(labels, all_markers))
    label_to_color = dict(zip(labels, all_colors))

    # Get the list of stations to do the comparison with
    start_date = datetime(start_year, 1, 1)
    end_date = datetime(end_year, 12, 31)
    stations = cehq_station.read_station_data(
        start_date=start_date, end_date=end_date, selected_ids=selected_ids
    )

    # Get geophysical fields from one of the model simulations
    path0 = list(sim_name_to_file_path.values())[0]
    lons2d, lats2d, basemap = analysis.get_basemap_from_hdf(file_path=path0)
    flow_directions = analysis.get_array_from_file(path=path0, var_name=infovar.HDF_FLOW_DIRECTIONS_NAME)
    lake_fraction = analysis.get_array_from_file(path=path0, var_name=infovar.HDF_LAKE_FRACTION_NAME)

    accumulation_area_km2 = analysis.get_array_from_file(path=path0, var_name=infovar.HDF_ACCUMULATION_AREA_NAME)
    area_m2 = analysis.get_array_from_file(path=path0, var_name=infovar.HDF_CELL_AREA_NAME_M2)

    # Try to read cell areas im meters if it is not Ok then try in km2
    if area_m2 is not None:
        cell_area_km2 = area_m2 * 1.0e-6
    else:
        cell_area_km2 = analysis.get_array_from_file(path=path0, var_name=infovar.HDF_CELL_AREA_NAME_KM2)

    # Create a cell manager if it is not provided
    cell_manager = CellManager(flow_directions, accumulation_area_km2=accumulation_area_km2,
                               lons2d=lons2d, lats2d=lats2d)

    # Get the list of the corresponding model points
    station_to_modelpoint = cell_manager.get_model_points_for_stations(
        station_list=stations,
        lake_fraction=lake_fraction,
        drainaige_area_reldiff_limit=0.1)

    # plot_utils.apply_plot_params(font_size=10, width_cm=20, height_cm=18)
    fig = plt.figure()

    ncols = max([len(rp_list) for et, rp_list in ExtremeProperties.extreme_type_to_return_periods.items()])
    nrows = len(ExtremeProperties.extreme_types)
    gs = GridSpec(nrows, ncols)

    ext_type_to_rp_to_ax = OrderedDict()
    ax_with_legend = None

    label_to_ax_to_xdata = {}
    label_to_ax_to_ydata = {}
    for row, ext_type in enumerate(ExtremeProperties.extreme_types):
        ext_type_to_rp_to_ax[ext_type] = OrderedDict()
        for col, rperiod in enumerate(ExtremeProperties.extreme_type_to_return_periods[ext_type]):
            ax = fig.add_subplot(gs[row, col])
            ext_type_to_rp_to_ax[ext_type][rperiod] = ax

            if col == 0:
                ax.set_ylabel(ext_type)

            if row == nrows - 1 and col == ncols - 1:
                ax_with_legend = ax

            # Set axes labels
            if row == nrows - 1:
                ax.set_xlabel("Observations")

            if col == 0:
                ax.set_ylabel("Model")

            for label in sim_name_to_file_path:

                if label not in label_to_ax_to_xdata:
                    label_to_ax_to_xdata[label] = {ax: []}
                    label_to_ax_to_ydata[label] = {ax: []}
                else:
                    label_to_ax_to_xdata[label][ax] = []
                    label_to_ax_to_ydata[label][ax] = []

            ax.set_xscale("log")
            ax.set_yscale("log")

    print("Initial list of stations:")

    sim_label_to_handle = {}
    for s in stations:
        print("{0}".format(s))
        assert isinstance(s, Station)

        print(len([y for y in s.get_list_of_complete_years() if start_year <= y <= end_year]))
        df_ext_obs = extreme_commons.get_annual_extrema(ts_times=s.dates, ts_vals=s.values,
                                                        start_year=start_year, end_year=end_year)
        mp = station_to_modelpoint[s]

        assert isinstance(mp, ModelPoint)

        years_of_interest = df_ext_obs.index

        label_to_extrema_model = {}



        # label -> ext_type -> [return period -> ret level, return period -> std]
        label_to_return_levels = OrderedDict(
            [(obs_label, OrderedDict())]
        )
        for sim_label, sim_path in sim_name_to_file_path.items():
            label_to_return_levels[sim_label] = OrderedDict()
            label_to_extrema_model[sim_label] = OrderedDict()



        # Calculate the return levels and standard deviations
        for ext_type in ExtremeProperties.extreme_types:

            return_periods = ExtremeProperties.extreme_type_to_return_periods[ext_type]

            # fit GEV distribution and apply non-parametric bootstrap to get std
            label_to_return_levels[obs_label][ext_type] = gevfit.do_gevfit_for_a_point(df_ext_obs[ext_type].values,
                                                                                       extreme_type=ext_type,
                                                                                       return_periods=return_periods)
            return_levels_obs, rl_stds_obs = label_to_return_levels[obs_label][ext_type]


            # get annual extremas for the model output at the points colose to the stations
            for sim_label, sim_path in sim_name_to_file_path.items():
                label_to_return_levels[sim_label] = OrderedDict()

                ext_field = analysis.get_annual_extrema(
                    rconfig=RunConfig(data_path=sim_path, start_year=start_year, end_year=end_year),
                    varname="STFL", months_of_interest=ExtremeProperties.extreme_type_to_month_of_interest[ext_type],
                    n_avg_days=ExtremeProperties.extreme_type_to_n_agv_days[ext_type],
                    high_flow=ext_type == ExtremeProperties.high)

                # Select only those years when obs are available
                ts_data = [v for y, v in zip(range(start_year, end_year + 1), ext_field[:, mp.ix, mp.jy]) if
                           y in years_of_interest]
                ts_data = np.array(ts_data)
                return_levels, rl_stds = gevfit.do_gevfit_for_a_point(ts_data, extreme_type=ext_type,
                                                                      return_periods=return_periods)





                # Do the plotting
                for rp in return_periods:
                    ax = ext_type_to_rp_to_ax[ext_type][rp]
                    ax.set_title("T = {rp}-year".format(rp=rp))

                    # h = ax.errorbar(return_levels_obs[rp], return_levels[rp],
                    # marker=label_to_marker[sim_label], color=label_to_color[sim_label], label=sim_label,
                    #                 xerr=rl_stds_obs[rp] * 1.96, yerr=rl_stds[rp] * 1.96)

                    h = ax.scatter(return_levels_obs[rp], return_levels[rp],
                                   marker=label_to_marker[sim_label], color=label_to_color[sim_label], label=sim_label)



                    # save the data for maybe further calculation of the correlation coefficients
                    label_to_ax_to_xdata[sim_label][ax].append(return_levels_obs[rp])
                    label_to_ax_to_ydata[sim_label][ax].append(return_levels[rp])

                    sim_label_to_handle[sim_label] = h



    # Calculate the biases
    for sim_label in sim_name_to_file_path:
        for ext_type in ExtremeProperties.extreme_types:
            ret_periods = ExtremeProperties.extreme_type_to_return_periods[ext_type]
            for rp in ret_periods:

                ax = ext_type_to_rp_to_ax[ext_type][rp]
                mod = np.asarray(label_to_ax_to_ydata[sim_label][ax])
                obs = np.asarray(label_to_ax_to_xdata[sim_label][ax])

                bias = np.mean((mod - obs)/obs)
                corr, pv = stats.pearsonr(mod, obs)
                print("({sim_label}) Mean bias for {rp}-year {ext_type}-flow return level is: {bias}; corr={corr:.2f}; corr_pval={corr_pval:2g}".format(
                    sim_label=sim_label, rp=rp, bias=bias, corr=corr, corr_pval=pv,
                    ext_type=ext_type
                ))




    sfmt = ScalarFormatter(useMathText=True)
    sfmt.set_powerlimits((-2, 2))
    for et, rp_to_ax in ext_type_to_rp_to_ax.items():
        for rp, ax in rp_to_ax.items():
            xmin, xmax = ax.get_xlim()
            ymin, ymax = ax.get_ylim()
            x1 = min(xmin, ymin)
            x2 = min(xmax, ymax)
            ax.plot([x1, x2], [x1, x2], "k--")
            # ax.xaxis.set_major_locator(MaxNLocator(nbins=5))
            # ax.yaxis.set_major_locator(MaxNLocator(nbins=5))
            # ax.xaxis.set_major_formatter(sfmt)
            # ax.yaxis.set_major_formatter(sfmt)

    sim_labels = list(sim_name_to_file_path.keys())
    ax_with_legend.legend([sim_label_to_handle[sl] for sl in sim_labels], sim_labels,
                          bbox_to_anchor=(1, -0.25), borderaxespad=0.0, loc="upper right",
                          ncol=2, scatterpoints=1, numpoints=1)

    # Save the plot
    img_file = "{}.eps".format("_".join(sorted(label_to_marker.keys())))
    img_file = img_folder.joinpath(img_file)

    fig.tight_layout()
    with img_file.open("wb") as f:
        fig.savefig(f, bbox_inches="tight")
예제 #31
0
def main(start_year=1980, end_year=1989):

    soil_layer_widths = infovar.soil_layer_widths_26_to_60
    soil_tops = np.cumsum(soil_layer_widths).tolist()[:-1]
    soil_tops = [
        0,
    ] + soil_tops

    selected_station_ids = [
        "061905", "074903", "090613", "092715", "093801", "093806"
    ]

    #    path1 = "/skynet3_rech1/huziy/hdf_store/quebec_0.1_crcm5-hcd-rl_spinup.hdf"
    #    label1 = "CRCM5-HCD-RL"

    path1 = "/skynet3_rech1/huziy/hdf_store/quebec_0.1_crcm5-hcd-rl-intfl_spinup_ITFS.hdf5"
    label1 = "CRCM5-HCD-RL-INTFL"

    path2 = "/skynet3_rech1/huziy/hdf_store/quebec_0.1_crcm5-hcd-rl-intfl_ITFS_avoid_truncation1979-1989.hdf5"
    label2 = "CRCM5-HCD-RL-INTFL-improved"

    ############
    images_folder = "images_for_lake-river_paper/comp_soil_profiles"
    if not os.path.isdir(images_folder):
        os.mkdir(images_folder)

    fldirs = analysis.get_array_from_file(
        path=path1, var_name=infovar.HDF_FLOW_DIRECTIONS_NAME)
    lons2d, lats2d, basemap = analysis.get_basemap_from_hdf(path1)

    lake_fractions = analysis.get_array_from_file(
        path=path1, var_name=infovar.HDF_LAKE_FRACTION_NAME)
    cell_areas = analysis.get_array_from_file(
        path=path1, var_name=infovar.HDF_CELL_AREA_NAME_M2)
    acc_areakm2 = analysis.get_array_from_file(
        path=path1, var_name=infovar.HDF_ACCUMULATION_AREA_NAME)
    depth_to_bedrock = analysis.get_array_from_file(
        path=path1, var_name=infovar.HDF_DEPTH_TO_BEDROCK_NAME)

    cell_manager = CellManager(fldirs,
                               lons2d=lons2d,
                               lats2d=lats2d,
                               accumulation_area_km2=acc_areakm2)

    #get climatologic liquid soil moisture and convert fractions to mm
    t0 = time.clock()
    daily_dates, levels, i1_nointfl = analysis.get_daily_climatology_of_3d_field(
        path_to_hdf_file=path1,
        var_name="I1",
        start_year=start_year,
        end_year=end_year)
    print("read I1 - 1")
    print("Spent {0} seconds ".format(time.clock() - t0))

    _, _, i1_intfl = analysis.get_daily_climatology_of_3d_field(
        path_to_hdf_file=path2,
        var_name="I1",
        start_year=start_year,
        end_year=end_year)
    print("read I1 - 2")

    #get climatologic frozen soil moisture and convert fractions to mm
    _, _, i2_nointfl = analysis.get_daily_climatology_of_3d_field(
        path_to_hdf_file=path1,
        var_name="I2",
        start_year=start_year,
        end_year=end_year)
    print("read I2 - 1")

    _, _, i2_intfl = analysis.get_daily_climatology_of_3d_field(
        path_to_hdf_file=path2,
        var_name="I2",
        start_year=start_year,
        end_year=end_year)
    print("read I2 - 2")
    #
    sm_intfl = i1_intfl + i2_intfl
    sm_nointfl = i1_nointfl + i2_nointfl

    #Get the list of stations to do the comparison with
    stations = cehq_station.read_station_data(
        start_date=datetime(start_year, 1, 1),
        end_date=datetime(end_year, 12, 31),
        selected_ids=selected_station_ids)

    print("sm_noinfl, min, max = {0}, {1}".format(sm_nointfl.min(),
                                                  sm_nointfl.max()))
    print("sm_infl, min, max = {0}, {1}".format(sm_intfl.min(),
                                                sm_intfl.max()))
    diff = (sm_intfl - sm_nointfl)
    #diff *= soil_layer_widths[np.newaxis, :, np.newaxis, np.newaxis] * 1000  # to convert in mm

    #print "number of nans", np.isnan(diff).astype(int).sum()

    print("cell area min,max = {0}, {1}".format(cell_areas.min(),
                                                cell_areas.max()))
    print("acc area min,max = {0}, {1}".format(acc_areakm2.min(),
                                               acc_areakm2.max()))

    assert np.all(lake_fractions >= 0)
    print("lake fractions (min, max): ", lake_fractions.min(),
          lake_fractions.max())

    #Non need to go very deep
    nlayers = 3
    z, t = np.meshgrid(soil_tops[:nlayers], date2num(daily_dates))
    station_to_mp = cell_manager.get_model_points_for_stations(stations)

    plotted_global = False

    for the_station, mp in station_to_mp.items():
        assert isinstance(mp, ModelPoint)
        assert isinstance(the_station, Station)
        fig = plt.figure()
        umask = cell_manager.get_mask_of_upstream_cells_connected_with_by_indices(
            mp.ix, mp.jy)

        #exclude lake cells from the profiles
        sel = (umask == 1) & (depth_to_bedrock > 3) & (acc_areakm2 >= 0)

        umaskf = umask.astype(float)
        umaskf *= (1.0 - lake_fractions) * cell_areas
        umaskf[~sel] = 0.0

        profiles = np.tensordot(diff, umaskf) / umaskf.sum()
        print(profiles.shape, profiles.min(), profiles.max(), umaskf.sum(),
              umaskf.min(), umaskf.max())

        d = np.abs(profiles).max()
        print("d = {0}".format(d))
        clevs = np.round(np.linspace(-d, d, 12), decimals=5)

        diff_cmap = cm.get_cmap("RdBu_r", lut=len(clevs) - 1)
        bn = BoundaryNorm(clevs, len(clevs) - 1)

        plt.title("({})-({})".format(label2, label2))
        img = plt.contourf(t,
                           z,
                           profiles[:, :nlayers],
                           cmap=diff_cmap,
                           levels=clevs,
                           norm=bn)
        plt.colorbar(img, ticks=clevs)
        ax = plt.gca()
        assert isinstance(ax, Axes)

        ax.invert_yaxis()
        ax.xaxis.set_major_formatter(DateFormatter("%b"))
        ax.xaxis.set_major_locator(MonthLocator())

        fig.savefig(os.path.join(
            images_folder, "{0}_{1}_{2}.jpeg".format(the_station.id, label1,
                                                     label2)),
                    dpi=cpp.FIG_SAVE_DPI,
                    bbox_inches="tight")

        print("processed: {0}".format(the_station))
        if not plotted_global:
            plotted_global = True
            fig = plt.figure()
            sel = (depth_to_bedrock >= 0.1) & (acc_areakm2 >= 0)

            umaskf = (1.0 - lake_fractions) * cell_areas
            umaskf[~sel] = 0.0

            profiles = np.tensordot(diff, umaskf) / umaskf.sum()
            print(profiles.shape, profiles.min(), profiles.max(), umaskf.sum(),
                  umaskf.min(), umaskf.max())

            d = np.abs(profiles).max()
            print("d = {0}".format(d))
            clevs = np.round(np.linspace(-d, d, 12), decimals=5)

            diff_cmap = cm.get_cmap("RdBu_r", lut=len(clevs) - 1)
            bn = BoundaryNorm(clevs, len(clevs) - 1)

            img = plt.contourf(t,
                               z,
                               profiles[:, :nlayers],
                               cmap=diff_cmap,
                               levels=clevs,
                               norm=bn)
            plt.colorbar(img, ticks=clevs)
            ax = plt.gca()
            assert isinstance(ax, Axes)

            ax.invert_yaxis()
            ax.xaxis.set_major_formatter(DateFormatter("%b"))
            ax.xaxis.set_major_locator(MonthLocator())

            fig.savefig(os.path.join(images_folder, "global_mean.jpeg"),
                        dpi=cpp.FIG_SAVE_DPI,
                        bbox_inches="tight")

    pass