Esempio n. 1
0
def calculate_lake_ids(fldirs, lkfract, lkout):
    current_id = 1
    lkfr_limit = 0.6

    cmanager = CellManager(fldirs)

    iout_list, jout_list = np.where(lkout > 0.5)

    lkids = np.zeros_like(fldirs)

    lkid_to_mask = {}
    lkid_to_npoints_upstream = {}
    for i, j in zip(iout_list, jout_list):
        the_mask = cmanager.get_mask_of_upstream_cells_connected_with_by_indices(
            i, j) > 0.5
        the_mask = the_mask & ((lkfract >= lkfr_limit) | (lkout > 0.5))

        lkid_to_mask[current_id] = the_mask
        lkid_to_npoints_upstream[current_id] = the_mask.sum()
        current_id += 1

    for the_id in sorted(lkid_to_mask,
                         key=lambda xx: lkid_to_npoints_upstream[xx],
                         reverse=True):
        lkids[lkid_to_mask[the_id]] = the_id

    return lkids
def point_comparisons_at_outlets(hdf_folder="/home/huziy/skynet3_rech1/hdf_store"):
    start_year = 1979
    end_year = 1981

    sim_name_to_file_name = {
        # "CRCM5-R": "/skynet3_rech1/huziy/hdf_store/quebec_0.1_crcm5-r_spinup.hdf",
        # "CRCM5-HCD-R": "/skynet3_rech1/huziy/hdf_store/quebec_0.1_crcm5-hcd-r_spinup2.hdf",
        "CRCM5-HCD-RL": "/skynet3_rech1/huziy/hdf_store/quebec_0.1_crcm5-hcd-rl_spinup.hdf",
        "CRCM5-HCD-RL-INTFL": "/skynet3_rech1/huziy/hdf_store/quebec_0.1_crcm5-hcd-rl-intfl_do_not_discard_small.hdf",
        # "SANI=10000, ignore THFC":
        # "/skynet3_rech1/huziy/hdf_store/quebec_0.1_crcm5-hcd-rl-intfl_sani-10000_not_care_about_thfc.hdf",

        # "CRCM5-HCD-RL-ERA075": "/skynet3_rech1/huziy/hdf_store/quebec_0.1_crcm5-hcd-rl-intfl_spinup_ecoclimap_era075.hdf",
        "SANI=10000": "/skynet3_rech1/huziy/hdf_store/quebec_0.1_crcm5-hcd-rl-intfl_sani-10000.hdf"
        # "CRCM5-HCD-RL-ECOCLIMAP": "/skynet3_rech1/huziy/hdf_store/quebec_0.1_crcm5-hcd-rl-intfl_spinup_ecoclimap.hdf"
    }

    path0 = os.path.join(hdf_folder, list(sim_name_to_file_name.items())[0][1])
    path1 = os.path.join(hdf_folder, list(sim_name_to_file_name.items())[1][1])
    flow_directions = analysis.get_array_from_file(path=path0, var_name=infovar.HDF_FLOW_DIRECTIONS_NAME)
    lake_fraction = analysis.get_array_from_file(path=path0, var_name=infovar.HDF_LAKE_FRACTION_NAME)
    slope = analysis.get_array_from_file(path=path1, var_name=infovar.HDF_SLOPE_NAME)

    lons2d, lats2d, _ = analysis.get_basemap_from_hdf(file_path=path0)

    cell_manager = CellManager(flow_directions, lons2d=lons2d, lats2d=lats2d)
    mp_list = cell_manager.get_model_points_of_outlets(lower_accumulation_index_limit=10)

    assert len(mp_list) > 0

    # Get the accumulation indices so that the most important outlets can be identified
    acc_ind_list = [np.sum(cell_manager.get_mask_of_upstream_cells_connected_with_by_indices(mp.ix, mp.jy))
                    for mp in mp_list]

    for mp, acc_ind in zip(mp_list, acc_ind_list):
        mp.acc_index = acc_ind

    mp_list.sort(key=lambda x: x.acc_index)

    # do not take global lake cells into consideration, and discard points with slopes 0 or less
    mp_list = [mp for mp in mp_list if lake_fraction[mp.ix, mp.jy] < 0.6 and slope[mp.ix, mp.jy] >= 0]

    mp_list = mp_list[-12:]  # get 12 most important outlets

    print("The following outlets were chosen for analysis")
    pattern = "({0}, {1}): acc_index = {2} cells; fldr = {3}; lake_fraction = {4}"
    for mp in mp_list:
        print(pattern.format(mp.ix, mp.jy, mp.acc_index, cell_manager.flow_directions[mp.ix, mp.jy],
                             lake_fraction[mp.ix, mp.jy]))

    draw_model_comparison(model_points=mp_list, sim_name_to_file_name=sim_name_to_file_name, hdf_folder=hdf_folder,
                          start_year=start_year, end_year=end_year, cell_manager=cell_manager)
def get_mask_of_non_contrib_area(grid_config, dir_file):
    """

    :param grid_config:
    :param dir_file:
    :return: 2d numpy array with 1 for non-contributing cells and 0 otherwize
    """
    assert isinstance(grid_config, GridConfig)

    with Dataset(str(dir_file)) as ds:
        lons, lats, fldr, faa, cell_area = [
            ds.variables[k][:] for k in [
                "lon", "lat", "flow_direction_value", "accumulation_area",
                "cell_area"
            ]
        ]

    the_mask = np.zeros_like(lons)

    the_mask1 = maskoceans(lons, lats, the_mask, resolution="i", inlands=False)

    suspicious_internal_draining = (~the_mask1.mask) & ((fldr <= 0) |
                                                        (fldr >= 256))

    i_list, j_list = np.where(suspicious_internal_draining)

    print("retained {} gridcells".format(suspicious_internal_draining.sum()))

    # Remove the points close to the coasts
    for i, j in zip(i_list, j_list):
        if is_point_ocean_outlet(i, j, the_mask1.mask):
            suspicious_internal_draining[i, j] = False
            the_mask1[i, j] = np.ma.masked

    print("retained {} gridcells".format(suspicious_internal_draining.sum()))

    # Now get the mask upstream of the internal draining outlets
    cell_manager = CellManager(flow_dirs=fldr,
                               lons2d=lons,
                               lats2d=lats,
                               accumulation_area_km2=faa)
    i_list, j_list = np.where(suspicious_internal_draining)
    for i, j in zip(i_list, j_list):
        amask = cell_manager.get_mask_of_upstream_cells_connected_with_by_indices(
            i, j)

        suspicious_internal_draining |= amask > 0

    return suspicious_internal_draining
Esempio n. 4
0
def calculate_lake_ids(fldirs, lkfract, lkout):
    current_id = 1
    lkfr_limit = 0.6

    cmanager = CellManager(fldirs)

    iout_list, jout_list = np.where(lkout > 0.5)

    lkids = np.zeros_like(fldirs)

    lkid_to_mask = {}
    lkid_to_npoints_upstream = {}
    for i, j in zip(iout_list, jout_list):
        the_mask = cmanager.get_mask_of_upstream_cells_connected_with_by_indices(i, j) > 0.5
        the_mask = the_mask & ((lkfract >= lkfr_limit) | (lkout > 0.5))

        lkid_to_mask[current_id] = the_mask
        lkid_to_npoints_upstream[current_id] = the_mask.sum()
        current_id += 1

    for the_id in sorted(lkid_to_mask, key=lambda xx: lkid_to_npoints_upstream[xx], reverse=True):
        lkids[lkid_to_mask[the_id]] = the_id

    return lkids
def get_basin_to_outlet_indices_map(shape_file=BASIN_BOUNDARIES_FILE,
                                    lons=None,
                                    lats=None,
                                    directions=None,
                                    accumulation_areas=None):
    driver = ogr.GetDriverByName("ESRI Shapefile")
    print(driver)
    ds = driver.Open(shape_file, 0)

    assert isinstance(ds, ogr.DataSource)
    layer = ds.GetLayer()

    assert isinstance(layer, ogr.Layer)
    print(layer.GetFeatureCount())

    latlong_proj = osr.SpatialReference()
    latlong_proj.ImportFromEPSG(4326)

    utm_proj = layer.GetSpatialRef()

    # create Coordinate Transformation
    coord_transform = osr.CoordinateTransformation(latlong_proj, utm_proj)

    utm_coords = coord_transform.TransformPoints(
        list(zip(lons.flatten(), lats.flatten())))
    utm_coords = np.asarray(utm_coords)
    x_utm = utm_coords[:, 0].reshape(lons.shape)
    y_utm = utm_coords[:, 1].reshape(lons.shape)

    basin_mask = np.zeros_like(lons)
    cell_manager = CellManager(directions,
                               accumulation_area_km2=accumulation_areas,
                               lons2d=lons,
                               lats2d=lats)

    index = 1
    basins = []
    basin_names = []
    basin_name_to_mask = {}
    for feature in layer:
        assert isinstance(feature, ogr.Feature)
        # print feature["FID"]

        geom = feature.GetGeometryRef()
        assert isinstance(geom, ogr.Geometry)
        basins.append(ogr.CreateGeometryFromWkb(geom.ExportToWkb()))
        basin_names.append(feature["abr"])

    accumulation_areas_temp = accumulation_areas[:, :]
    lons_out, lats_out = [], []
    basin_names_out = []
    name_to_ij_out = {}

    min_basin_area = min(b.GetArea() * 1.0e-6 for b in basins)

    while len(basins):
        fm = np.max(accumulation_areas_temp)

        i, j = np.where(fm == accumulation_areas_temp)
        i, j = i[0], j[0]
        p = ogr.CreateGeometryFromWkt("POINT ({} {})".format(
            x_utm[i, j], y_utm[i, j]))
        b_selected = None
        name_selected = None
        for name, b in zip(basin_names, basins):
            assert isinstance(b, ogr.Geometry)
            assert isinstance(p, ogr.Geometry)
            if b.Contains(p.Buffer(2000 * 2**0.5)):
                # Check if there is an upstream cell from the same basin
                the_mask = cell_manager.get_mask_of_upstream_cells_connected_with_by_indices(
                    i, j)

                # Save the mask of the basin for future use
                basin_name_to_mask[name] = the_mask

                # if is_part_of_points_in(b, x_utm[the_mask == 1], y_utm[the_mask == 1]):
                # continue

                b_selected = b
                name_selected = name
                # basin_names_out.append(name)

                lons_out.append(lons[i, j])
                lats_out.append(lats[i, j])
                name_to_ij_out[name] = (i, j)

                basin_mask[the_mask == 1] = index
                index += 1

                break

        if b_selected is not None:
            basins.remove(b_selected)
            basin_names.remove(name_selected)
            outlet_index_in_basin = 1
            current_basin_name = name_selected
            while current_basin_name in basin_names_out:
                current_basin_name = name_selected + str(outlet_index_in_basin)
                outlet_index_in_basin += 1

            basin_names_out.append(current_basin_name)
            print(len(basins), basin_names_out)

        accumulation_areas_temp[i, j] = -1

    return name_to_ij_out, basin_name_to_mask
Esempio n. 6
0
def main():
    # stations = cehq_station.read_grdc_stations(st_id_list=["2903430", "2909150", "2912600", "4208025"])

    selected_station_ids = [
        "05LM006", "05BN012", "05AK001", "05QB003", "06EA002"
    ]

    stations = cehq_station.load_from_hydat_db(
        natural=None,
        province=None,
        selected_ids=selected_station_ids,
        skip_data_checks=True)

    stations_mh = cehq_station.get_manitoba_hydro_stations()

    # copy metadata from the corresponding hydat stations
    for s in stations:
        assert isinstance(s, Station)
        for s_mh in stations_mh:
            assert isinstance(s_mh, Station)

            if s == s_mh:
                s_mh.copy_metadata(s)
                break

    stations = [
        s for s in stations_mh
        if s.id in selected_station_ids and s.longitude is not None
    ]

    stations_to_mp = None

    import matplotlib.pyplot as plt

    # labels = ["CanESM", "MPI"]
    # paths = ["/skynet3_rech1/huziy/offline_stfl/canesm/discharge_1958_01_01_00_00.nc",
    # "/skynet3_rech1/huziy/offline_stfl/mpi/discharge_1958_01_01_00_00.nc"]
    #
    # colors = ["r", "b"]

    # labels = ["ERA", ]
    # colors = ["r", ]
    # paths = ["/skynet3_rech1/huziy/arctic_routing/era40/discharge_1958_01_01_00_00.nc"]

    labels = [
        "Model",
    ]
    colors = [
        "r",
    ]
    paths = [
        "/RESCUE/skynet3_rech1/huziy/water_route_mh_bc_011deg_wc/discharge_1980_01_01_12_00.nc"
    ]

    infocell_path = "/RESCUE/skynet3_rech1/huziy/water_route_mh_bc_011deg_wc/infocell.nc"

    start_year = 1980
    end_year = 2014

    stations_filtered = []
    for s in stations:
        # Also filter out stations with small accumulation areas
        # if s.drainage_km2 is not None and s.drainage_km2 < 100:
        #     continue

        # Filter stations with data out of the required time frame
        year_list = s.get_list_of_complete_years()

        print("Complete years for {}: {}".format(s.id, year_list))

        stations_filtered.append(s)

    stations = stations_filtered

    print("Retained {} stations.".format(len(stations)))

    sim_to_time = {}

    monthly_dates = [datetime(2001, m, 15) for m in range(1, 13)]
    fmt = FuncFormatter(lambda x, pos: num2date(x).strftime("%b")[0])
    locator = MonthLocator(bymonthday=15)

    fig = plt.figure()

    axes = []
    row_indices = []
    col_indices = []

    ncols = 1
    shiftrow = 0 if len(stations) % ncols == 0 else 1
    nrows = len(stations) // ncols + shiftrow
    shared_ax = None
    gs = gridspec.GridSpec(ncols=ncols, nrows=nrows)

    for i, s in enumerate(stations):
        row = i // ncols
        col = i % ncols

        row_indices.append(row)
        col_indices.append(col)

        if shared_ax is None:
            ax = fig.add_subplot(gs[row, col])
            shared_ax = ax
            assert isinstance(shared_ax, Axes)

        else:
            ax = fig.add_subplot(gs[row, col])

        ax.xaxis.set_major_locator(locator)
        ax.yaxis.set_major_locator(MaxNLocator(nbins=4))

        ax.xaxis.set_major_formatter(fmt)
        sfmt = ScalarFormatter(useMathText=True)
        sfmt.set_powerlimits((-3, 4))
        ax.yaxis.set_major_formatter(sfmt)
        assert isinstance(ax, Axes)

        axes.append(ax)

    # generate daily stamp dates
    d0 = datetime(2001, 1, 1)
    stamp_dates = [d0 + timedelta(days=i) for i in range(365)]

    # plot a panel for each station
    for s, ax, row, col in zip(stations, axes, row_indices, col_indices):

        assert isinstance(s, Station)
        assert isinstance(ax, Axes)
        if s.grdc_monthly_clim_max is not None:
            ax.fill_between(monthly_dates,
                            s.grdc_monthly_clim_min,
                            s.grdc_monthly_clim_max,
                            color="0.6",
                            alpha=0.5)

        avail_years = s.get_list_of_complete_years()
        print("{}: {}".format(s.id, ",".join([str(y) for y in avail_years])))
        years = [y for y in avail_years if start_year <= y <= end_year]
        obs_clim_stfl = s.get_monthly_climatology(years_list=years)

        if obs_clim_stfl is None:
            continue

        print(obs_clim_stfl.head())

        obs_clim_stfl.plot(color="k", lw=3, label="Obs", ax=ax)

        if s.river_name is not None and s.river_name != "":
            ax.set_title(s.river_name)
        else:
            ax.set_title(s.id)

        for path, sim_label, color in zip(paths, labels, colors):
            ds = Dataset(path)

            if stations_to_mp is None:
                acc_area_2d = ds.variables["accumulation_area"][:]
                lons2d, lats2d = ds.variables["longitude"][:], ds.variables[
                    "latitude"][:]
                x_index, y_index = ds.variables["x_index"][:], ds.variables[
                    "y_index"][:]
                stations_to_mp = get_dataless_model_points_for_stations(
                    stations, acc_area_2d, lons2d, lats2d, x_index, y_index)

            # read dates only once for a given simulation
            if sim_label not in sim_to_time:
                time_str = ds.variables["time"][:].astype(str)
                times = [
                    datetime.strptime("".join(t_s), TIME_FORMAT)
                    for t_s in time_str
                ]
                sim_to_time[sim_label] = times

            mp = stations_to_mp[s]
            data = ds.variables["water_discharge_accumulated"][:,
                                                               mp.cell_index]
            print(path)
            df = DataFrame(data=data,
                           index=sim_to_time[sim_label],
                           columns=["value"])
            df["year"] = df.index.map(lambda d: d.year)
            df = df.ix[df.year.isin(years), :]
            df = df.groupby(lambda d: datetime(2001, d.month, 15)).mean()

            # print np.mean( monthly_model ), s.river_name, sim_label
            df.plot(color=color, lw=3, label=sim_label, ax=ax, y="value")

            ds.close()

        if row < nrows - 1:
            ax.set_xticklabels([])

    axes[0].legend(fontsize=17, loc=2)
    plt.tight_layout()
    plt.savefig("mh/offline_validation_mh.png", dpi=400)
    plt.close(fig)

    with Dataset(infocell_path) as ds:

        fldir = ds.variables["flow_direction_value"][:]
        faa = ds.variables["accumulation_area"][:]

        lon, lat = [ds.variables[k][:] for k in ["lon", "lat"]]

        # plot station positions and upstream areas
        cell_manager = CellManager(fldir,
                                   nx=fldir.shape[0],
                                   ny=fldir.shape[1],
                                   lons2d=lon,
                                   lats2d=lat,
                                   accumulation_area_km2=faa)

    fig = plt.figure()
    from crcm5.mh_domains import default_domains
    gc = default_domains.bc_mh_011

    # get the basemap object
    bmp, data_mask = gc.get_basemap_using_shape_with_polygons_of_interest(
        lon, lat, shp_path=default_domains.MH_BASINS_PATH, mask_margin=5)

    xx, yy = bmp(lon, lat)
    ax = plt.gca()
    colors = ["g", "r", "m", "c", "y", "violet"]
    i = 0
    for s, mp in stations_to_mp.items():
        assert isinstance(mp, ModelPoint)
        upstream_mask = cell_manager.get_mask_of_upstream_cells_connected_with_by_indices(
            mp.ix, mp.jy)

        current_points = upstream_mask > 0.5

        bmp.drawcoastlines()
        bmp.drawrivers()

        bmp.scatter(xx[current_points],
                    yy[current_points],
                    c=colors[i % len(colors)])
        i += 1

        va = "top"
        if s.id in ["05AK001", "05LM006"]:
            va = "bottom"

        ha = "left"
        if s.id in ["05QB003"]:
            ha = "right"

        bmp.scatter(xx[mp.ix, mp.jy], yy[mp.ix, mp.jy], c="b")
        ax.annotate(s.id,
                    xy=(xx[mp.ix, mp.jy], yy[mp.ix, mp.jy]),
                    horizontalalignment=ha,
                    verticalalignment=va,
                    bbox=dict(boxstyle='round', fc='gray', alpha=0.5))

    fig.savefig("mh/offline_stations_{}.png".format("positions"))
    plt.close(fig)
def plot_basin_outlets(shape_file=BASIN_BOUNDARIES_FILE,
                       bmp_info=None,
                       directions=None,
                       accumulation_areas=None,
                       lake_fraction_field=None):
    assert isinstance(bmp_info, BasemapInfo)

    driver = ogr.GetDriverByName("ESRI Shapefile")
    print(driver)
    ds = driver.Open(shape_file, 0)

    assert isinstance(ds, ogr.DataSource)
    layer = ds.GetLayer()

    assert isinstance(layer, ogr.Layer)
    print(layer.GetFeatureCount())

    latlong_proj = osr.SpatialReference()
    latlong_proj.ImportFromEPSG(4326)

    utm_proj = layer.GetSpatialRef()

    # create Coordinate Transformation
    coord_transform = osr.CoordinateTransformation(latlong_proj, utm_proj)

    utm_coords = coord_transform.TransformPoints(
        list(zip(bmp_info.lons.flatten(), bmp_info.lats.flatten())))
    utm_coords = np.asarray(utm_coords)
    x_utm = utm_coords[:, 0].reshape(bmp_info.lons.shape)
    y_utm = utm_coords[:, 1].reshape(bmp_info.lons.shape)

    basin_mask = np.zeros_like(bmp_info.lons)
    cell_manager = CellManager(directions,
                               accumulation_area_km2=accumulation_areas,
                               lons2d=bmp_info.lons,
                               lats2d=bmp_info.lats)

    index = 1
    basins = []
    basin_names = []
    basin_name_to_mask = {}
    for feature in layer:
        assert isinstance(feature, ogr.Feature)
        # print feature["FID"]

        geom = feature.GetGeometryRef()
        assert isinstance(geom, ogr.Geometry)

        basins.append(ogr.CreateGeometryFromWkb(geom.ExportToWkb()))
        basin_names.append(feature["abr"])

    accumulation_areas_temp = accumulation_areas.copy()
    lons_out, lats_out = [], []
    basin_names_out = []
    name_to_ij_out = OrderedDict()

    min_basin_area = min(b.GetArea() * 1.0e-6 for b in basins)

    while len(basins):
        fm = np.max(accumulation_areas_temp)

        i, j = np.where(fm == accumulation_areas_temp)
        i, j = i[0], j[0]
        p = ogr.CreateGeometryFromWkt("POINT ({} {})".format(
            x_utm[i, j], y_utm[i, j]))
        b_selected = None
        name_selected = None
        for name, b in zip(basin_names, basins):

            assert isinstance(b, ogr.Geometry)
            assert isinstance(p, ogr.Geometry)
            if b.Contains(p.Buffer(2000 * 2**0.5)):
                # Check if there is an upstream cell from the same basin
                the_mask = cell_manager.get_mask_of_upstream_cells_connected_with_by_indices(
                    i, j)

                # Save the mask of the basin for future use
                basin_name_to_mask[name] = the_mask

                # if is_part_of_points_in(b, x_utm[the_mask == 1], y_utm[the_mask == 1]):
                # continue

                b_selected = b
                name_selected = name
                # basin_names_out.append(name)

                lons_out.append(bmp_info.lons[i, j])
                lats_out.append(bmp_info.lats[i, j])
                name_to_ij_out[name] = (i, j)

                basin_mask[the_mask == 1] = index
                index += 1
                break

        if b_selected is not None:
            basins.remove(b_selected)
            basin_names.remove(name_selected)
            outlet_index_in_basin = 1
            current_basin_name = name_selected
            while current_basin_name in basin_names_out:
                current_basin_name = name_selected + str(outlet_index_in_basin)
                outlet_index_in_basin += 1

            basin_names_out.append(current_basin_name)
            print(len(basins), basin_names_out)

        accumulation_areas_temp[i, j] = -1

    plot_utils.apply_plot_params(font_size=12,
                                 width_pt=None,
                                 width_cm=20,
                                 height_cm=20)
    gs = GridSpec(2, 2, width_ratios=[1.0, 0.5], wspace=0.01)
    fig = plt.figure()

    ax = fig.add_subplot(gs[1, 0])
    xx, yy = bmp_info.get_proj_xy()
    bmp_info.basemap.drawcoastlines(linewidth=0.5, ax=ax)
    bmp_info.basemap.drawrivers(zorder=5, color="0.5", ax=ax)

    upstream_edges = cell_manager.get_upstream_polygons_for_points(
        model_point_list=[
            ModelPoint(ix=i, jy=j) for (i, j) in name_to_ij_out.values()
        ],
        xx=xx,
        yy=yy)

    upstream_edges_latlon = cell_manager.get_upstream_polygons_for_points(
        model_point_list=[
            ModelPoint(ix=i, jy=j) for (i, j) in name_to_ij_out.values()
        ],
        xx=bmp_info.lons,
        yy=bmp_info.lats)

    plot_utils.draw_upstream_area_bounds(ax,
                                         upstream_edges=upstream_edges,
                                         color="r",
                                         linewidth=0.6)
    plot_utils.save_to_shape_file(upstream_edges_latlon, in_proj=None)

    xs, ys = bmp_info.basemap(lons_out, lats_out)
    bmp_info.basemap.scatter(xs, ys, c="0.75", s=30, zorder=10)
    bmp_info.basemap.drawparallels(np.arange(-90, 90, 5),
                                   labels=[True, False, False, False],
                                   linewidth=0.5)

    bmp_info.basemap.drawmeridians(np.arange(-180, 180, 5),
                                   labels=[False, False, False, True],
                                   linewidth=0.5)

    cmap = cm.get_cmap("rainbow", index - 1)
    bn = BoundaryNorm(list(range(index + 1)), index - 1)

    # basin_mask = np.ma.masked_where(basin_mask < 0.5, basin_mask)
    # bmp_info.basemap.pcolormesh(xx, yy, basin_mask, norm=bn, cmap=cmap, ax=ax)

    xmin, xmax = ax.get_xlim()
    ymin, ymax = ax.get_ylim()

    print(xmin, xmax, ymin, ymax)
    dx = xmax - xmin
    dy = ymax - ymin
    step_y = 0.1
    step_x = 0.12
    y0_frac = 0.75
    y0_frac_bottom = 0.02
    x0_frac = 0.35
    bname_to_text_coords = {
        "RDO": (xmin + x0_frac * dx, ymin + y0_frac_bottom * dy),
        "STM": (xmin + (x0_frac + step_x) * dx, ymin + y0_frac_bottom * dy),
        "SAG":
        (xmin + (x0_frac + 2 * step_x) * dx, ymin + y0_frac_bottom * dy),
        "BOM":
        (xmin + (x0_frac + 3 * step_x) * dx, ymin + y0_frac_bottom * dy),
        "MAN":
        (xmin + (x0_frac + 4 * step_x) * dx, ymin + y0_frac_bottom * dy),
        "MOI":
        (xmin + (x0_frac + 5 * step_x) * dx, ymin + y0_frac_bottom * dy),
        "ROM": (xmin + (x0_frac + 5 * step_x) * dx,
                ymin + (y0_frac_bottom + step_y) * dy),
        "NAT": (xmin + (x0_frac + 5 * step_x) * dx,
                ymin + (y0_frac_bottom + 2 * step_y) * dy),

        ######
        "CHU": (xmin + (x0_frac + 5 * step_x) * dx, ymin + y0_frac * dy),
        "GEO": (xmin + (x0_frac + 5 * step_x) * dx,
                ymin + (y0_frac + step_y) * dy),
        "BAL": (xmin + (x0_frac + 5 * step_x) * dx,
                ymin + (y0_frac + 2 * step_y) * dy),
        "PYR": (xmin + (x0_frac + 4 * step_x) * dx,
                ymin + (y0_frac + 2 * step_y) * dy),
        "MEL": (xmin + (x0_frac + 3 * step_x) * dx,
                ymin + (y0_frac + 2 * step_y) * dy),
        "FEU": (xmin + (x0_frac + 2 * step_x) * dx,
                ymin + (y0_frac + 2 * step_y) * dy),
        "ARN": (xmin + (x0_frac + 1 * step_x) * dx,
                ymin + (y0_frac + 2 * step_y) * dy),

        ######
        "CAN": (xmin + 0.1 * dx, ymin + 0.80 * dy),
        "GRB": (xmin + 0.1 * dx, ymin + (0.80 - step_y) * dy),
        "LGR": (xmin + 0.1 * dx, ymin + (0.80 - 2 * step_y) * dy),
        "RUP": (xmin + 0.1 * dx, ymin + (0.80 - 3 * step_y) * dy),
        "WAS": (xmin + 0.1 * dx, ymin + (0.80 - 4 * step_y) * dy),
        "BEL": (xmin + 0.1 * dx, ymin + (0.80 - 5 * step_y) * dy),
    }

    # bmp_info.basemap.readshapefile(".".join(BASIN_BOUNDARIES_FILE.split(".")[:-1]).replace("utm18", "latlon"), "basin",
    #                                linewidth=1.2, ax=ax, zorder=9)

    for name, xa, ya, lona, lata in zip(basin_names_out, xs, ys, lons_out,
                                        lats_out):
        ax.annotate(name,
                    xy=(xa, ya),
                    xytext=bname_to_text_coords[name],
                    textcoords='data',
                    ha='right',
                    va='bottom',
                    bbox=dict(boxstyle='round,pad=0.4', fc='white'),
                    arrowprops=dict(arrowstyle='->',
                                    connectionstyle='arc3,rad=0',
                                    linewidth=0.25),
                    font_properties=FontProperties(size=8),
                    zorder=20)

        print(r"{} & {:.0f} \\".format(
            name, accumulation_areas[name_to_ij_out[name]]))

    # Plot zonally averaged lake fraction
    ax = fig.add_subplot(gs[1, 1])
    ydata = range(lake_fraction_field.shape[1])
    ax.plot(lake_fraction_field.mean(axis=0) * 100, ydata, lw=2)

    ax.fill_betweenx(ydata, lake_fraction_field.mean(axis=0) * 100, alpha=0.5)

    ax.set_xlabel("Lake fraction (%)")
    ax.set_ylim(min(ydata), max(ydata))
    ax.xaxis.set_tick_params(direction='out', width=1)
    ax.yaxis.set_tick_params(direction='out', width=1)
    ax.xaxis.set_ticks_position("bottom")
    ax.yaxis.set_ticks_position("none")

    ax.spines["top"].set_visible(False)
    ax.spines["right"].set_visible(False)

    for tl in ax.yaxis.get_ticklabels():
        tl.set_visible(False)

    # plot elevation, buffer zone, big lakes, grid cells
    ax = fig.add_subplot(gs[0, :])
    geophy_file = "/RESCUE/skynet3_rech1/huziy/from_guillimin/geophys_Quebec_0.1deg_260x260_with_dd_v6"

    r = RPN(geophy_file)
    elev = r.get_first_record_for_name("ME")
    lkfr = r.get_first_record_for_name("LKFR")
    fldr = r.get_first_record_for_name("FLDR")

    params = r.get_proj_parameters_for_the_last_read_rec()
    lons, lats = r.get_longitudes_and_latitudes_for_the_last_read_rec()
    rll = RotatedLatLon(**params)

    bsmp = rll.get_basemap_object_for_lons_lats(lons2d=lons,
                                                lats2d=lats,
                                                resolution="l")
    xx, yy = bsmp(lons, lats)

    dx = (xx[0, 0] - xx[-1, 0]) / xx.shape[0]
    dy = (yy[0, 0] - yy[0, -1]) / yy.shape[1]

    xx_ll_crnrs = xx - dx / 2
    yy_ll_crnrs = yy - dy / 2

    xx_ur_crnrs = xx + dx / 2
    yy_ur_crnrs = yy + dy / 2

    ll_lon, ll_lat = bsmp(xx_ll_crnrs[0, 0], yy_ll_crnrs[0, 0], inverse=True)
    ur_lon, ur_lat = bsmp(xx_ur_crnrs[-1, -1],
                          yy_ur_crnrs[-1, -1],
                          inverse=True)

    crnr_lons = np.array([[ll_lon, ll_lon], [ur_lon, ur_lon]])

    crnr_lats = np.array([[ll_lat, ll_lat], [ur_lat, ur_lat]])

    bsmp = rll.get_basemap_object_for_lons_lats(lons2d=crnr_lons,
                                                lats2d=crnr_lats)

    # plot elevation
    levs = [0, 100, 200, 300, 500, 700, 1000, 1500, 2000, 2800]
    norm = BoundaryNorm(levs, len(levs) - 1)
    the_cmap = my_colormaps.get_cmap_from_ncl_spec_file(
        path="colormap_files/OceanLakeLandSnow.rgb", ncolors=len(levs) - 1)

    lons[lons > 180] -= 360
    me_to_plot = maskoceans(lons, lats, elev, resolution="l")
    im = bsmp.contourf(xx,
                       yy,
                       me_to_plot,
                       cmap=the_cmap,
                       levels=levs,
                       norm=norm,
                       ax=ax)
    bsmp.colorbar(im)

    bsmp.drawcoastlines(linewidth=0.5, ax=ax)

    # show large lake points
    gl_lakes = np.ma.masked_where((lkfr < 0.6) | (fldr <= 0) | (fldr > 128),
                                  lkfr)
    gl_lakes[~gl_lakes.mask] = 1.0
    bsmp.pcolormesh(xx,
                    yy,
                    gl_lakes,
                    cmap=cm.get_cmap("Blues"),
                    ax=ax,
                    vmin=0,
                    vmax=1,
                    zorder=3)

    # show free zone border
    margin = 20
    x1 = xx_ll_crnrs[margin, margin]
    x2 = xx_ur_crnrs[-margin, margin]
    y1 = yy_ll_crnrs[margin, margin]
    y2 = yy_ur_crnrs[margin, -margin]
    pol_corners = ((x1, y1), (x2, y1), (x2, y2), (x1, y2))
    ax.add_patch(Polygon(xy=pol_corners, fc="none", ls="solid", lw=3,
                         zorder=5))

    # show blending zone border (with halo zone)
    margin = 10
    x1 = xx_ll_crnrs[margin, margin]
    x2 = xx_ur_crnrs[-margin, margin]
    y1 = yy_ll_crnrs[margin, margin]
    y2 = yy_ur_crnrs[margin, -margin]
    pol_corners = ((x1, y1), (x2, y1), (x2, y2), (x1, y2))
    ax.add_patch(
        Polygon(xy=pol_corners, fc="none", ls="dashed", lw=3, zorder=5))

    # show the grid
    step = 20
    xx_ll_crnrs_ext = np.zeros([n + 1 for n in xx_ll_crnrs.shape])
    yy_ll_crnrs_ext = np.zeros([n + 1 for n in yy_ll_crnrs.shape])

    xx_ll_crnrs_ext[:-1, :-1] = xx_ll_crnrs
    yy_ll_crnrs_ext[:-1, :-1] = yy_ll_crnrs
    xx_ll_crnrs_ext[:-1, -1] = xx_ll_crnrs[:, -1]
    yy_ll_crnrs_ext[-1, :-1] = yy_ll_crnrs[-1, :]

    xx_ll_crnrs_ext[-1, :] = xx_ur_crnrs[-1, -1]
    yy_ll_crnrs_ext[:, -1] = yy_ur_crnrs[-1, -1]

    bsmp.pcolormesh(xx_ll_crnrs_ext[::step, ::step],
                    yy_ll_crnrs_ext[::step, ::step],
                    np.ma.masked_all_like(xx_ll_crnrs_ext)[::step, ::step],
                    edgecolors="0.6",
                    ax=ax,
                    linewidth=0.05,
                    zorder=4,
                    alpha=0.5)

    ax.set_title("Elevation (m)")

    # plt.show()
    fig.savefig("qc_basin_outlets_points.png", bbox_inches="tight")
    # plt.show()
    plt.close(fig)

    return name_to_ij_out, basin_name_to_mask
def get_basin_to_outlet_indices_map(shape_file=BASIN_BOUNDARIES_FILE,
                                    bmp_info=None,
                                    directions=None,
                                    accumulation_areas=None,
                                    lake_fraction_field=None):
    assert isinstance(bmp_info, BasemapInfo)

    driver = ogr.GetDriverByName("ESRI Shapefile")
    print(driver)
    ds = driver.Open(shape_file, 0)

    assert isinstance(ds, ogr.DataSource)
    layer = ds.GetLayer()

    assert isinstance(layer, ogr.Layer)
    print(layer.GetFeatureCount())

    latlong_proj = osr.SpatialReference()
    latlong_proj.ImportFromEPSG(4326)

    utm_proj = layer.GetSpatialRef()

    # create Coordinate Transformation
    coord_transform = osr.CoordinateTransformation(latlong_proj, utm_proj)

    utm_coords = coord_transform.TransformPoints(
        list(zip(bmp_info.lons.flatten(), bmp_info.lats.flatten())))
    utm_coords = np.asarray(utm_coords)
    x_utm = utm_coords[:, 0].reshape(bmp_info.lons.shape)
    y_utm = utm_coords[:, 1].reshape(bmp_info.lons.shape)

    basin_mask = np.zeros_like(bmp_info.lons)
    cell_manager = CellManager(directions,
                               accumulation_area_km2=accumulation_areas,
                               lons2d=bmp_info.lons,
                               lats2d=bmp_info.lats)

    index = 1
    basins = []
    basin_names = []
    basin_name_to_mask = {}
    for feature in layer:
        assert isinstance(feature, ogr.Feature)
        # print feature["FID"]

        geom = feature.GetGeometryRef()
        assert isinstance(geom, ogr.Geometry)

        basins.append(ogr.CreateGeometryFromWkb(geom.ExportToWkb()))
        basin_names.append(feature["abr"])

    accumulation_areas_temp = accumulation_areas[:, :]
    lons_out, lats_out = [], []
    basin_names_out = []
    name_to_ij_out = {}

    min_basin_area = min(b.GetArea() * 1.0e-6 for b in basins)

    while len(basins):
        fm = np.max(accumulation_areas_temp)

        i, j = np.where(fm == accumulation_areas_temp)
        i, j = i[0], j[0]
        p = ogr.CreateGeometryFromWkt("POINT ({} {})".format(
            x_utm[i, j], y_utm[i, j]))
        b_selected = None
        name_selected = None
        for name, b in zip(basin_names, basins):

            assert isinstance(b, ogr.Geometry)
            assert isinstance(p, ogr.Geometry)
            if b.Contains(p.Buffer(2000 * 2**0.5)):
                # Check if there is an upstream cell from the same basin
                the_mask = cell_manager.get_mask_of_upstream_cells_connected_with_by_indices(
                    i, j)

                # Save the mask of the basin for future use
                basin_name_to_mask[name] = the_mask

                # if is_part_of_points_in(b, x_utm[the_mask == 1], y_utm[the_mask == 1]):
                # continue

                b_selected = b
                name_selected = name
                # basin_names_out.append(name)

                lons_out.append(bmp_info.lons[i, j])
                lats_out.append(bmp_info.lats[i, j])
                name_to_ij_out[name] = (i, j)

                basin_mask[the_mask == 1] = index
                index += 1

                break

        if b_selected is not None:
            basins.remove(b_selected)
            basin_names.remove(name_selected)
            outlet_index_in_basin = 1
            current_basin_name = name_selected
            while current_basin_name in basin_names_out:
                current_basin_name = name_selected + str(outlet_index_in_basin)
                outlet_index_in_basin += 1

            basin_names_out.append(current_basin_name)
            print(len(basins), basin_names_out)

        accumulation_areas_temp[i, j] = -1

    plot_utils.apply_plot_params(font_size=10,
                                 width_pt=None,
                                 width_cm=20,
                                 height_cm=12)
    gs = GridSpec(1, 2, width_ratios=[1.0, 0.5], wspace=0.01)
    fig = plt.figure()

    ax = fig.add_subplot(gs[0, 0])
    xx, yy = bmp_info.get_proj_xy()
    # im = bmp.pcolormesh(xx, yy, basin_mask.reshape(xx.shape))
    bmp_info.basemap.drawcoastlines(linewidth=0.5, ax=ax)
    bmp_info.basemap.drawrivers(zorder=5, color="0.5", ax=ax)
    bmp_info.basemap.drawparallels(np.arange(-90, 90, 10),
                                   labels=[False, True, False, False])
    # bmp.colorbar(im)

    xs, ys = bmp_info.basemap(lons_out, lats_out)
    bmp_info.basemap.scatter(xs, ys, c="0.75", s=30, zorder=10)

    cmap = cm.get_cmap("rainbow", index - 1)
    bn = BoundaryNorm(list(range(index + 1)), index - 1)

    # Do not color the basins
    # basin_mask = np.ma.masked_where(basin_mask < 0.5, basin_mask)
    # bmp_info.basemap.pcolormesh(xx, yy, basin_mask, norm=bn, cmap=cmap, ax=ax)

    for name, xa, ya, lona, lata in zip(basin_names_out, xs, ys, lons_out,
                                        lats_out):

        text_offset = (-20, 20) if name not in [
            "GEO",
        ] else (30, 20)

        if name in ["ARN"]:
            text_offset = (-10, 30)

        if name in ["FEU"]:
            text_offset = (5, 50)

        if name in ["CAN"]:
            text_offset = (-75, 50)

        if name in ["MEL"]:
            text_offset = (20, 40)

        if name in ["PYR"]:
            text_offset = (60, 60)

        if name in [
                "BAL",
        ]:
            text_offset = (50, 30)

        if name in ["BEL"]:
            text_offset = (-20, -10)

        if name in [
                "RDO",
                "STM",
                "SAG",
        ]:
            text_offset = (50, -50)

        if name in [
                "BOM",
        ]:
            text_offset = (20, -20)

        if name in [
                "MOI",
        ]:
            text_offset = (30, -20)

        if name in [
                "ROM",
        ]:
            text_offset = (40, -20)

        if name in [
                "RDO",
        ]:
            text_offset = (30, -30)

        if name in ["CHU", "NAT"]:
            text_offset = (40, 40)

        if name in [
                "MAN",
        ]:
            text_offset = (55, -45)

        ax.annotate(name,
                    xy=(xa, ya),
                    xytext=text_offset,
                    textcoords='offset points',
                    ha='right',
                    va='bottom',
                    bbox=dict(boxstyle='round,pad=0.5', fc='white'),
                    arrowprops=dict(arrowstyle='->',
                                    connectionstyle='arc3,rad=0'),
                    font_properties=FontProperties(size=8),
                    zorder=20)

    # bmp_info.basemap.readshapefile(".".join(BASIN_BOUNDARIES_FILE.split(".")[:-1]).replace("utm18", "latlon"), "basin",
    #                               linewidth=1.2, ax=ax, zorder=9)

    # Plot zonally averaged lake fraction
    ax = fig.add_subplot(gs[0, 1])
    ydata = range(lake_fraction_field.shape[1])
    ax.plot(lake_fraction_field.mean(axis=0) * 100, ydata, lw=2)

    ax.fill_betweenx(ydata, lake_fraction_field.mean(axis=0) * 100, alpha=0.5)

    ax.set_xlabel("Lake fraction (%)")
    ax.set_ylim(min(ydata), max(ydata))
    ax.xaxis.set_tick_params(direction='out', width=1)
    ax.yaxis.set_tick_params(direction='out', width=1)
    ax.xaxis.set_ticks_position("bottom")
    ax.yaxis.set_ticks_position("none")

    ax.spines["top"].set_visible(False)
    ax.spines["right"].set_visible(False)

    for tl in ax.yaxis.get_ticklabels():
        tl.set_visible(False)

    fig.savefig("qc_basin_outlets_points.png", bbox_inches="tight")
    # plt.show()
    plt.close(fig)

    return name_to_ij_out, basin_name_to_mask
def main(start_year = 1980, end_year = 1989):


    soil_layer_widths = infovar.soil_layer_widths_26_to_60
    soil_tops = np.cumsum(soil_layer_widths).tolist()[:-1]
    soil_tops = [0, ] + soil_tops



    selected_station_ids = [
        "061905", "074903", "090613", "092715", "093801", "093806"
    ]

#    path1 = "/skynet3_rech1/huziy/hdf_store/quebec_0.1_crcm5-hcd-rl_spinup.hdf"
#    label1 = "CRCM5-HCD-RL"

    path1 = "/skynet3_rech1/huziy/hdf_store/quebec_0.1_crcm5-hcd-rl-intfl_spinup_ITFS.hdf5"
    label1 = "CRCM5-HCD-RL-INTFL"

    path2 = "/skynet3_rech1/huziy/hdf_store/quebec_0.1_crcm5-hcd-rl-intfl_ITFS_avoid_truncation1979-1989.hdf5"
    label2 = "CRCM5-HCD-RL-INTFL-improved"

    ############
    images_folder = "images_for_lake-river_paper/comp_soil_profiles"
    if not os.path.isdir(images_folder):
        os.mkdir(images_folder)

    fldirs = analysis.get_array_from_file(path=path1, var_name=infovar.HDF_FLOW_DIRECTIONS_NAME)
    lons2d, lats2d, basemap = analysis.get_basemap_from_hdf(path1)

    lake_fractions = analysis.get_array_from_file(path=path1, var_name=infovar.HDF_LAKE_FRACTION_NAME)
    cell_areas = analysis.get_array_from_file(path=path1, var_name=infovar.HDF_CELL_AREA_NAME_M2)
    acc_areakm2 = analysis.get_array_from_file(path=path1, var_name=infovar.HDF_ACCUMULATION_AREA_NAME)
    depth_to_bedrock = analysis.get_array_from_file(path=path1, var_name=infovar.HDF_DEPTH_TO_BEDROCK_NAME)


    cell_manager = CellManager(fldirs, lons2d=lons2d, lats2d=lats2d, accumulation_area_km2=acc_areakm2)

    #get climatologic liquid soil moisture and convert fractions to mm
    t0 = time.clock()
    daily_dates, levels, i1_nointfl = analysis.get_daily_climatology_of_3d_field(
        path_to_hdf_file=path1,
        var_name="I1",
        start_year=start_year,
        end_year=end_year
    )
    print("read I1 - 1")
    print("Spent {0} seconds ".format(time.clock() - t0))

    _, _, i1_intfl = analysis.get_daily_climatology_of_3d_field(
        path_to_hdf_file=path2,
        var_name="I1",
        start_year=start_year,
        end_year=end_year
    )
    print("read I1 - 2")

    #get climatologic frozen soil moisture and convert fractions to mm
    _, _, i2_nointfl = analysis.get_daily_climatology_of_3d_field(
        path_to_hdf_file=path1,
        var_name="I2",
        start_year=start_year,
        end_year=end_year
    )
    print("read I2 - 1")

    _, _, i2_intfl = analysis.get_daily_climatology_of_3d_field(
        path_to_hdf_file=path2,
        var_name="I2",
        start_year=start_year,
        end_year=end_year
    )
    print("read I2 - 2")
    #
    sm_intfl = i1_intfl + i2_intfl
    sm_nointfl = i1_nointfl + i2_nointfl


    #Get the list of stations to do the comparison with
    stations = cehq_station.read_station_data(
        start_date=datetime(start_year, 1, 1),
        end_date=datetime(end_year, 12, 31),
        selected_ids=selected_station_ids
    )


    print("sm_noinfl, min, max = {0}, {1}".format(sm_nointfl.min(), sm_nointfl.max()))
    print("sm_infl, min, max = {0}, {1}".format(sm_intfl.min(), sm_intfl.max()))
    diff = (sm_intfl - sm_nointfl)
    #diff *= soil_layer_widths[np.newaxis, :, np.newaxis, np.newaxis] * 1000  # to convert in mm

    #print "number of nans", np.isnan(diff).astype(int).sum()

    print("cell area min,max = {0}, {1}".format(cell_areas.min(), cell_areas.max()))
    print("acc area min,max = {0}, {1}".format(acc_areakm2.min(), acc_areakm2.max()))

    assert np.all(lake_fractions >= 0)
    print("lake fractions (min, max): ", lake_fractions.min(), lake_fractions.max())

    #Non need to go very deep
    nlayers = 3
    z, t = np.meshgrid(soil_tops[:nlayers], date2num(daily_dates))
    station_to_mp = cell_manager.get_model_points_for_stations(stations)


    plotted_global = False

    for the_station, mp in station_to_mp.items():
        assert isinstance(mp, ModelPoint)
        assert isinstance(the_station, Station)
        fig = plt.figure()
        umask = cell_manager.get_mask_of_upstream_cells_connected_with_by_indices(mp.ix, mp.jy)

        #exclude lake cells from the profiles
        sel = (umask == 1) & (depth_to_bedrock > 3) & (acc_areakm2 >= 0)

        umaskf = umask.astype(float)
        umaskf *= (1.0 - lake_fractions) * cell_areas
        umaskf[~sel] = 0.0


        profiles = np.tensordot(diff, umaskf) / umaskf.sum()
        print(profiles.shape, profiles.min(), profiles.max(), umaskf.sum(), umaskf.min(), umaskf.max())

        d = np.abs(profiles).max()
        print("d = {0}".format(d))
        clevs = np.round(np.linspace(-d, d, 12), decimals=5)

        diff_cmap = cm.get_cmap("RdBu_r", lut=len(clevs) - 1)
        bn = BoundaryNorm(clevs, len(clevs) - 1)


        plt.title("({})-({})".format(label2, label2))
        img = plt.contourf(t, z, profiles[:, :nlayers], cmap = diff_cmap, levels = clevs, norm = bn)
        plt.colorbar(img, ticks = clevs)
        ax = plt.gca()
        assert isinstance(ax, Axes)

        ax.invert_yaxis()
        ax.xaxis.set_major_formatter(DateFormatter("%b"))
        ax.xaxis.set_major_locator(MonthLocator())


        fig.savefig(os.path.join(images_folder, "{0}_{1}_{2}.jpeg".format(the_station.id, label1, label2)),
                    dpi = cpp.FIG_SAVE_DPI, bbox_inches = "tight")



        print("processed: {0}".format(the_station))
        if not plotted_global:
            plotted_global = True
            fig = plt.figure()
            sel = (depth_to_bedrock >= 0.1) & (acc_areakm2 >= 0)

            umaskf = (1.0 - lake_fractions) * cell_areas
            umaskf[~sel] = 0.0


            profiles = np.tensordot(diff, umaskf) / umaskf.sum()
            print(profiles.shape, profiles.min(), profiles.max(), umaskf.sum(), umaskf.min(), umaskf.max())

            d = np.abs(profiles).max()
            print("d = {0}".format(d))
            clevs = np.round(np.linspace(-d, d, 12), decimals=5)

            diff_cmap = cm.get_cmap("RdBu_r", lut=len(clevs) - 1)
            bn = BoundaryNorm(clevs, len(clevs) - 1)

            img = plt.contourf(t, z, profiles[:, :nlayers], cmap = diff_cmap, levels = clevs, norm = bn)
            plt.colorbar(img, ticks = clevs)
            ax = plt.gca()
            assert isinstance(ax, Axes)

            ax.invert_yaxis()
            ax.xaxis.set_major_formatter(DateFormatter("%b"))
            ax.xaxis.set_major_locator(MonthLocator())


            fig.savefig(os.path.join(images_folder, "global_mean.jpeg"),
                        dpi = cpp.FIG_SAVE_DPI, bbox_inches = "tight")


    pass
def main():
    # stations = cehq_station.read_grdc_stations(st_id_list=["2903430", "2909150", "2912600", "4208025"])

    selected_station_ids = [
        "05LM006",
        "05BN012",
        "05AK001",
        "05QB003",
        "06EA002"
    ]

    stations = cehq_station.load_from_hydat_db(natural=None, province=None, selected_ids=selected_station_ids, skip_data_checks=True)

    stations_mh = cehq_station.get_manitoba_hydro_stations()

    # copy metadata from the corresponding hydat stations
    for s in stations:
        assert isinstance(s, Station)
        for s_mh in stations_mh:
            assert isinstance(s_mh, Station)


            if s == s_mh:
                s_mh.copy_metadata(s)
                break



    stations = [s for s in stations_mh if s.id in selected_station_ids and s.longitude is not None]

    stations_to_mp = None

    import matplotlib.pyplot as plt

    # labels = ["CanESM", "MPI"]
    # paths = ["/skynet3_rech1/huziy/offline_stfl/canesm/discharge_1958_01_01_00_00.nc",
    # "/skynet3_rech1/huziy/offline_stfl/mpi/discharge_1958_01_01_00_00.nc"]
    #
    # colors = ["r", "b"]

    # labels = ["ERA", ]
    # colors = ["r", ]
    # paths = ["/skynet3_rech1/huziy/arctic_routing/era40/discharge_1958_01_01_00_00.nc"]


    labels = ["Model", ]
    colors = ["r", ]
    paths = [
        "/RESCUE/skynet3_rech1/huziy/water_route_mh_bc_011deg_wc/discharge_1980_01_01_12_00.nc"
    ]

    infocell_path = "/RESCUE/skynet3_rech1/huziy/water_route_mh_bc_011deg_wc/infocell.nc"

    start_year = 1980
    end_year = 2014




    stations_filtered = []
    for s in stations:
        # Also filter out stations with small accumulation areas
        # if s.drainage_km2 is not None and s.drainage_km2 < 100:
        #     continue

        # Filter stations with data out of the required time frame
        year_list = s.get_list_of_complete_years()

        print("Complete years for {}: {}".format(s.id, year_list))

        stations_filtered.append(s)

    stations = stations_filtered


    print("Retained {} stations.".format(len(stations)))

    sim_to_time = {}

    monthly_dates = [datetime(2001, m, 15) for m in range(1, 13)]
    fmt = FuncFormatter(lambda x, pos: num2date(x).strftime("%b")[0])
    locator = MonthLocator(bymonthday=15)

    fig = plt.figure()

    axes = []
    row_indices = []
    col_indices = []

    ncols = 1
    shiftrow = 0 if len(stations) % ncols == 0 else 1
    nrows = len(stations) // ncols + shiftrow
    shared_ax = None
    gs = gridspec.GridSpec(ncols=ncols, nrows=nrows)

    for i, s in enumerate(stations):
        row = i // ncols
        col = i % ncols

        row_indices.append(row)
        col_indices.append(col)

        if shared_ax is None:
            ax = fig.add_subplot(gs[row, col])
            shared_ax = ax
            assert isinstance(shared_ax, Axes)

        else:
            ax = fig.add_subplot(gs[row, col])

        ax.xaxis.set_major_locator(locator)
        ax.yaxis.set_major_locator(MaxNLocator(nbins=4))

        ax.xaxis.set_major_formatter(fmt)
        sfmt = ScalarFormatter(useMathText=True)
        sfmt.set_powerlimits((-3, 4))
        ax.yaxis.set_major_formatter(sfmt)
        assert isinstance(ax, Axes)

        axes.append(ax)

    # generate daily stamp dates
    d0 = datetime(2001, 1, 1)
    stamp_dates = [d0 + timedelta(days=i) for i in range(365)]



    # plot a panel for each station
    for s, ax, row, col in zip(stations, axes, row_indices, col_indices):

        assert isinstance(s, Station)
        assert isinstance(ax, Axes)
        if s.grdc_monthly_clim_max is not None:
            ax.fill_between(monthly_dates, s.grdc_monthly_clim_min, s.grdc_monthly_clim_max, color="0.6", alpha=0.5)

        avail_years = s.get_list_of_complete_years()
        print("{}: {}".format(s.id, ",".join([str(y) for y in avail_years])))
        years = [y for y in avail_years if start_year <= y <= end_year]
        obs_clim_stfl = s.get_monthly_climatology(years_list=years)

        if obs_clim_stfl is None:
            continue

        print(obs_clim_stfl.head())

        obs_clim_stfl.plot(color="k", lw=3, label="Obs", ax=ax)

        if s.river_name is not None and s.river_name != "":
            ax.set_title(s.river_name)
        else:
            ax.set_title(s.id)

        for path, sim_label, color in zip(paths, labels, colors):
            ds = Dataset(path)

            if stations_to_mp is None:
                acc_area_2d = ds.variables["accumulation_area"][:]
                lons2d, lats2d = ds.variables["longitude"][:], ds.variables["latitude"][:]
                x_index, y_index = ds.variables["x_index"][:], ds.variables["y_index"][:]
                stations_to_mp = get_dataless_model_points_for_stations(stations, acc_area_2d,
                                                                       lons2d, lats2d, x_index, y_index)

            # read dates only once for a given simulation
            if sim_label not in sim_to_time:
                time_str = ds.variables["time"][:].astype(str)
                times = [datetime.strptime("".join(t_s), TIME_FORMAT) for t_s in time_str]
                sim_to_time[sim_label] = times

            mp = stations_to_mp[s]
            data = ds.variables["water_discharge_accumulated"][:, mp.cell_index]
            print(path)
            df = DataFrame(data=data, index=sim_to_time[sim_label], columns=["value"])
            df["year"] = df.index.map(lambda d: d.year)
            df = df.ix[df.year.isin(years), :]
            df = df.groupby(lambda d: datetime(2001, d.month, 15)).mean()


            # print np.mean( monthly_model ), s.river_name, sim_label
            df.plot(color=color, lw=3, label=sim_label, ax=ax, y="value")


            ds.close()

        if row < nrows - 1:
            ax.set_xticklabels([])

    axes[0].legend(fontsize=17, loc=2)
    plt.tight_layout()
    plt.savefig("mh/offline_validation_mh.png", dpi=400)
    plt.close(fig)






    with Dataset(infocell_path) as ds:

        fldir = ds.variables["flow_direction_value"][:]
        faa = ds.variables["accumulation_area"][:]

        lon, lat = [ds.variables[k][:] for k in ["lon", "lat"]]

        # plot station positions and upstream areas
        cell_manager = CellManager(fldir, nx=fldir.shape[0], ny=fldir.shape[1],
                                   lons2d=lon, lats2d=lat, accumulation_area_km2=faa)



    fig = plt.figure()
    from crcm5.mh_domains import default_domains
    gc = default_domains.bc_mh_011

    # get the basemap object
    bmp, data_mask = gc.get_basemap_using_shape_with_polygons_of_interest(
        lon, lat, shp_path=default_domains.MH_BASINS_PATH, mask_margin=5)

    xx, yy = bmp(lon, lat)
    ax = plt.gca()
    colors = ["g", "r", "m", "c", "y", "violet"]
    i = 0
    for s, mp in stations_to_mp.items():
        assert isinstance(mp, ModelPoint)
        upstream_mask = cell_manager.get_mask_of_upstream_cells_connected_with_by_indices(mp.ix, mp.jy)

        current_points = upstream_mask > 0.5

        bmp.drawcoastlines()
        bmp.drawrivers()

        bmp.scatter(xx[current_points], yy[current_points], c=colors[i % len(colors)])
        i += 1


        va = "top"
        if s.id in ["05AK001", "05LM006"]:
            va = "bottom"

        ha = "left"
        if s.id in ["05QB003"]:
            ha = "right"

        bmp.scatter(xx[mp.ix, mp.jy], yy[mp.ix, mp.jy], c="b")
        ax.annotate(s.id, xy=(xx[mp.ix, mp.jy], yy[mp.ix, mp.jy]), horizontalalignment=ha,
                    verticalalignment=va, bbox=dict(boxstyle='round', fc='gray', alpha=0.5))

    fig.savefig("mh/offline_stations_{}.png".format("positions"))
    plt.close(fig)
Esempio n. 11
0
def plot_streamflow():
    plot_utils.apply_plot_params(width_pt=None, width_cm=19, height_cm=10, font_size=12)
    labels = ["Glacier-only", "All"]
    colors = ["r", "b"]
    paths = [
        "/skynet3_exec2/aganji/glacier_katja/watroute_gemera/discharge_stat_glac_00_99_2000_01_01_00_00.nc",
        "/skynet3_exec2/aganji/glacier_katja/watroute_gemera/discharge_stat_both_00_992000_01_01_00_00.nc"]

    infocell_path = "/skynet3_exec2/aganji/glacier_katja/watroute_gemera/infocell.nc"

    start_year = 2000
    end_year = 2099


    with Dataset(paths[0]) as ds:
        acc_area = ds.variables["accumulation_area"][:]
        lons = ds.variables["longitude"][:]
        lats = ds.variables["latitude"][:]
        x_index = ds.variables["x_index"][:]
        y_index = ds.variables["y_index"][:]

    with Dataset(infocell_path) as ds:
        fldr = ds.variables["flow_direction_value"][:]

    driver = ogr.GetDriverByName('ESRI Shapefile')
    data_source = driver.Open(path_to_basin_shape, 0)

    assert isinstance(data_source, ogr.DataSource)

    geom = None

    print(data_source.GetLayerCount())

    layer = data_source.GetLayer()
    assert isinstance(layer, ogr.Layer)

    print(layer.GetFeatureCount())
    for feature in layer:
        assert isinstance(feature, ogr.Feature)
        geom = feature.geometry()

        assert isinstance(geom, ogr.Geometry)
        # print(str(geom))

        # geom = ogr.CreateGeometryFromWkt(geom.ExportToWkt())

    i, j = get_outlet_indices(geom, acc_area, lons, lats)
    print("Accumulation area at the outlet (according to flow directions): {}".format(acc_area[i, j]))


    cell_manager = CellManager(flow_dirs=fldr, lons2d=lons, lats2d=lats, accumulation_area_km2=acc_area)

    model_mask = cell_manager.get_mask_of_upstream_cells_connected_with_by_indices(i, j)


    cell_index = np.where((x_index == i) & (y_index == j))[0][0]

    print(cell_index)

    if not img_folder.is_dir():
        img_folder.mkdir(parents=True)

    # Do the plotting
    fig = plt.figure()
    gs = gridspec.GridSpec(1, 2, wspace=0.0)

    # Plot the hydrograph
    ax = fig.add_subplot(gs[0, 0])

    for p, c, label in zip(paths, colors, labels):
        with Dataset(p) as ds:
            stfl = ds.variables["water_discharge_accumulated"][:, cell_index]

            time = ds.variables["time"][:].astype(str)
            time = [datetime.strptime("".join(ts), "%Y_%m_%d_%H_%M") for ts in time]
            df = pd.DataFrame(index=time, data=stfl)

            # remove 29th of February
            df = df.select(lambda d: not (d.month == 2 and d.day == 29) and (start_year <= d.year <= end_year))

            df = df.groupby(lambda d: datetime(2001, d.month, d.day)).mean()

            ax.plot(df.index, df.values, c, lw=2, label=label)

    ax.xaxis.set_major_formatter(FuncFormatter(lambda tickval, pos: num2date(tickval).strftime("%b")[0]))
    ax.xaxis.set_major_locator(MonthLocator())
    ax.legend(loc="upper left", bbox_to_anchor=(1.05, 1), borderaxespad=0)
    ax.set_title("{}-{}".format(start_year, end_year))

    # Plot the point position
    ax = fig.add_subplot(gs[0, 1])
    bsm = get_basemap_glaciers_nw_america()
    x, y = bsm(lons[i, j], lats[i, j])
    bsm.scatter(x, y, c="b", ax=ax, zorder=10)
    bsm.drawcoastlines()
    bsm.readshapefile(path_to_basin_shape.replace(".shp", ""), "basin", color="m", linewidth=2, zorder=5)

    # xx, yy = bsm(lons, lats)
    # cmap = cm.get_cmap("gray_r", 10)
    # bsm.pcolormesh(xx, yy, model_mask * 0.5, cmap=cmap, vmin=0, vmax=1)

    bsm.drawrivers(ax=ax, zorder=9, color="b")


    plt.savefig(str(img_folder.joinpath("stfl_at_outlets.pdf")), bbox_inches="tight")
    plt.close(fig)
Esempio n. 12
0
def main(start_year=1980, end_year=1989):

    soil_layer_widths = infovar.soil_layer_widths_26_to_60
    soil_tops = np.cumsum(soil_layer_widths).tolist()[:-1]
    soil_tops = [
        0,
    ] + soil_tops

    selected_station_ids = [
        "061905", "074903", "090613", "092715", "093801", "093806"
    ]

    #    path1 = "/skynet3_rech1/huziy/hdf_store/quebec_0.1_crcm5-hcd-rl_spinup.hdf"
    #    label1 = "CRCM5-HCD-RL"

    path1 = "/skynet3_rech1/huziy/hdf_store/quebec_0.1_crcm5-hcd-rl-intfl_spinup_ITFS.hdf5"
    label1 = "CRCM5-HCD-RL-INTFL"

    path2 = "/skynet3_rech1/huziy/hdf_store/quebec_0.1_crcm5-hcd-rl-intfl_ITFS_avoid_truncation1979-1989.hdf5"
    label2 = "CRCM5-HCD-RL-INTFL-improved"

    ############
    images_folder = "images_for_lake-river_paper/comp_soil_profiles"
    if not os.path.isdir(images_folder):
        os.mkdir(images_folder)

    fldirs = analysis.get_array_from_file(
        path=path1, var_name=infovar.HDF_FLOW_DIRECTIONS_NAME)
    lons2d, lats2d, basemap = analysis.get_basemap_from_hdf(path1)

    lake_fractions = analysis.get_array_from_file(
        path=path1, var_name=infovar.HDF_LAKE_FRACTION_NAME)
    cell_areas = analysis.get_array_from_file(
        path=path1, var_name=infovar.HDF_CELL_AREA_NAME_M2)
    acc_areakm2 = analysis.get_array_from_file(
        path=path1, var_name=infovar.HDF_ACCUMULATION_AREA_NAME)
    depth_to_bedrock = analysis.get_array_from_file(
        path=path1, var_name=infovar.HDF_DEPTH_TO_BEDROCK_NAME)

    cell_manager = CellManager(fldirs,
                               lons2d=lons2d,
                               lats2d=lats2d,
                               accumulation_area_km2=acc_areakm2)

    #get climatologic liquid soil moisture and convert fractions to mm
    t0 = time.clock()
    daily_dates, levels, i1_nointfl = analysis.get_daily_climatology_of_3d_field(
        path_to_hdf_file=path1,
        var_name="I1",
        start_year=start_year,
        end_year=end_year)
    print("read I1 - 1")
    print("Spent {0} seconds ".format(time.clock() - t0))

    _, _, i1_intfl = analysis.get_daily_climatology_of_3d_field(
        path_to_hdf_file=path2,
        var_name="I1",
        start_year=start_year,
        end_year=end_year)
    print("read I1 - 2")

    #get climatologic frozen soil moisture and convert fractions to mm
    _, _, i2_nointfl = analysis.get_daily_climatology_of_3d_field(
        path_to_hdf_file=path1,
        var_name="I2",
        start_year=start_year,
        end_year=end_year)
    print("read I2 - 1")

    _, _, i2_intfl = analysis.get_daily_climatology_of_3d_field(
        path_to_hdf_file=path2,
        var_name="I2",
        start_year=start_year,
        end_year=end_year)
    print("read I2 - 2")
    #
    sm_intfl = i1_intfl + i2_intfl
    sm_nointfl = i1_nointfl + i2_nointfl

    #Get the list of stations to do the comparison with
    stations = cehq_station.read_station_data(
        start_date=datetime(start_year, 1, 1),
        end_date=datetime(end_year, 12, 31),
        selected_ids=selected_station_ids)

    print("sm_noinfl, min, max = {0}, {1}".format(sm_nointfl.min(),
                                                  sm_nointfl.max()))
    print("sm_infl, min, max = {0}, {1}".format(sm_intfl.min(),
                                                sm_intfl.max()))
    diff = (sm_intfl - sm_nointfl)
    #diff *= soil_layer_widths[np.newaxis, :, np.newaxis, np.newaxis] * 1000  # to convert in mm

    #print "number of nans", np.isnan(diff).astype(int).sum()

    print("cell area min,max = {0}, {1}".format(cell_areas.min(),
                                                cell_areas.max()))
    print("acc area min,max = {0}, {1}".format(acc_areakm2.min(),
                                               acc_areakm2.max()))

    assert np.all(lake_fractions >= 0)
    print("lake fractions (min, max): ", lake_fractions.min(),
          lake_fractions.max())

    #Non need to go very deep
    nlayers = 3
    z, t = np.meshgrid(soil_tops[:nlayers], date2num(daily_dates))
    station_to_mp = cell_manager.get_model_points_for_stations(stations)

    plotted_global = False

    for the_station, mp in station_to_mp.items():
        assert isinstance(mp, ModelPoint)
        assert isinstance(the_station, Station)
        fig = plt.figure()
        umask = cell_manager.get_mask_of_upstream_cells_connected_with_by_indices(
            mp.ix, mp.jy)

        #exclude lake cells from the profiles
        sel = (umask == 1) & (depth_to_bedrock > 3) & (acc_areakm2 >= 0)

        umaskf = umask.astype(float)
        umaskf *= (1.0 - lake_fractions) * cell_areas
        umaskf[~sel] = 0.0

        profiles = np.tensordot(diff, umaskf) / umaskf.sum()
        print(profiles.shape, profiles.min(), profiles.max(), umaskf.sum(),
              umaskf.min(), umaskf.max())

        d = np.abs(profiles).max()
        print("d = {0}".format(d))
        clevs = np.round(np.linspace(-d, d, 12), decimals=5)

        diff_cmap = cm.get_cmap("RdBu_r", lut=len(clevs) - 1)
        bn = BoundaryNorm(clevs, len(clevs) - 1)

        plt.title("({})-({})".format(label2, label2))
        img = plt.contourf(t,
                           z,
                           profiles[:, :nlayers],
                           cmap=diff_cmap,
                           levels=clevs,
                           norm=bn)
        plt.colorbar(img, ticks=clevs)
        ax = plt.gca()
        assert isinstance(ax, Axes)

        ax.invert_yaxis()
        ax.xaxis.set_major_formatter(DateFormatter("%b"))
        ax.xaxis.set_major_locator(MonthLocator())

        fig.savefig(os.path.join(
            images_folder, "{0}_{1}_{2}.jpeg".format(the_station.id, label1,
                                                     label2)),
                    dpi=cpp.FIG_SAVE_DPI,
                    bbox_inches="tight")

        print("processed: {0}".format(the_station))
        if not plotted_global:
            plotted_global = True
            fig = plt.figure()
            sel = (depth_to_bedrock >= 0.1) & (acc_areakm2 >= 0)

            umaskf = (1.0 - lake_fractions) * cell_areas
            umaskf[~sel] = 0.0

            profiles = np.tensordot(diff, umaskf) / umaskf.sum()
            print(profiles.shape, profiles.min(), profiles.max(), umaskf.sum(),
                  umaskf.min(), umaskf.max())

            d = np.abs(profiles).max()
            print("d = {0}".format(d))
            clevs = np.round(np.linspace(-d, d, 12), decimals=5)

            diff_cmap = cm.get_cmap("RdBu_r", lut=len(clevs) - 1)
            bn = BoundaryNorm(clevs, len(clevs) - 1)

            img = plt.contourf(t,
                               z,
                               profiles[:, :nlayers],
                               cmap=diff_cmap,
                               levels=clevs,
                               norm=bn)
            plt.colorbar(img, ticks=clevs)
            ax = plt.gca()
            assert isinstance(ax, Axes)

            ax.invert_yaxis()
            ax.xaxis.set_major_formatter(DateFormatter("%b"))
            ax.xaxis.set_major_locator(MonthLocator())

            fig.savefig(os.path.join(images_folder, "global_mean.jpeg"),
                        dpi=cpp.FIG_SAVE_DPI,
                        bbox_inches="tight")

    pass
def get_basin_to_outlet_indices_map(shape_file=BASIN_BOUNDARIES_FILE, lons=None, lats=None,
                                    directions=None, accumulation_areas=None):
    driver = ogr.GetDriverByName("ESRI Shapefile")
    print(driver)
    ds = driver.Open(shape_file, 0)

    assert isinstance(ds, ogr.DataSource)
    layer = ds.GetLayer()

    assert isinstance(layer, ogr.Layer)
    print(layer.GetFeatureCount())

    latlong_proj = osr.SpatialReference()
    latlong_proj.ImportFromEPSG(4326)

    utm_proj = layer.GetSpatialRef()

    # create Coordinate Transformation
    coord_transform = osr.CoordinateTransformation(latlong_proj, utm_proj)

    utm_coords = coord_transform.TransformPoints(list(zip(lons.flatten(), lats.flatten())))
    utm_coords = np.asarray(utm_coords)
    x_utm = utm_coords[:, 0].reshape(lons.shape)
    y_utm = utm_coords[:, 1].reshape(lons.shape)

    basin_mask = np.zeros_like(lons)
    cell_manager = CellManager(directions, accumulation_area_km2=accumulation_areas, lons2d=lons, lats2d=lats)

    index = 1
    basins = []
    basin_names = []
    basin_name_to_mask = {}
    for feature in layer:
        assert isinstance(feature, ogr.Feature)
        # print feature["FID"]

        geom = feature.GetGeometryRef()
        assert isinstance(geom, ogr.Geometry)
        basins.append(ogr.CreateGeometryFromWkb(geom.ExportToWkb()))
        basin_names.append(feature["abr"])

    accumulation_areas_temp = accumulation_areas[:, :]
    lons_out, lats_out = [], []
    basin_names_out = []
    name_to_ij_out = {}

    min_basin_area = min(b.GetArea() * 1.0e-6 for b in basins)

    while len(basins):
        fm = np.max(accumulation_areas_temp)

        i, j = np.where(fm == accumulation_areas_temp)
        i, j = i[0], j[0]
        p = ogr.CreateGeometryFromWkt("POINT ({} {})".format(x_utm[i, j], y_utm[i, j]))
        b_selected = None
        name_selected = None
        for name, b in zip(basin_names, basins):
            assert isinstance(b, ogr.Geometry)
            assert isinstance(p, ogr.Geometry)
            if b.Contains(p.Buffer(2000 * 2 ** 0.5)):
                # Check if there is an upstream cell from the same basin
                the_mask = cell_manager.get_mask_of_upstream_cells_connected_with_by_indices(i, j)

                # Save the mask of the basin for future use
                basin_name_to_mask[name] = the_mask

                # if is_part_of_points_in(b, x_utm[the_mask == 1], y_utm[the_mask == 1]):
                # continue


                b_selected = b
                name_selected = name
                # basin_names_out.append(name)

                lons_out.append(lons[i, j])
                lats_out.append(lats[i, j])
                name_to_ij_out[name] = (i, j)

                basin_mask[the_mask == 1] = index
                index += 1

                break

        if b_selected is not None:
            basins.remove(b_selected)
            basin_names.remove(name_selected)
            outlet_index_in_basin = 1
            current_basin_name = name_selected
            while current_basin_name in basin_names_out:
                current_basin_name = name_selected + str(outlet_index_in_basin)
                outlet_index_in_basin += 1

            basin_names_out.append(current_basin_name)
            print(len(basins), basin_names_out)

        accumulation_areas_temp[i, j] = -1

    return name_to_ij_out, basin_name_to_mask