Esempio n. 1
0
def main():
    #url = "http://maps.googleapis.com/maps/api/staticmap?center=46.5,-72&zoom=15&size=800x800&maptype=satellite&sensor=false"
    params = {
        "center": "46.5,-72",
        "zoom": "5",
        "size": "800x800",
        "maptype": "satellite",
        "sensor": "false"
    }
    script = "http://maps.googleapis.com/maps/api/staticmap?"
    print(urllib.parse.urlencode(params))
    url = script + urllib.parse.urlencode(params)

    stations = cehq_station.read_station_data(folder="data/cehq_levels")[:5]
    for s in stations:
        assert isinstance(s, cehq_station.Station)
        url += "&" + urllib.parse.urlencode({
            "markers":
            "color:red|label:%s|%f,%f" % (s.id, s.latitude, s.longitude)
        })

    print(url)
    urllib.request.urlretrieve(url, filename="gmap.png")

    #TODO: implement
    pass
Esempio n. 2
0
def regenerate_station_to_gridcell_mapping(start_year, end_year,
                                           model_manager):
    """
    should be called when grid or search algorithm change
    """

    assert isinstance(model_manager, Crcm5ModelDataManager)

    ktree = model_manager.kdtree
    model_acc_area = model_manager.accumulation_area_km2
    model_acc_area_1d = model_acc_area.flatten()

    #    selected_ids = ["104001", "103715",
    #                    "093806", "093801",
    #                    "092715",
    #                    "081006", "040830"]

    selected_ids = None
    start_date = datetime(start_year, 1, 1)
    end_date = datetime(end_year, 12, 31)

    stations = cehq_station.read_station_data(selected_ids=selected_ids,
                                              start_date=start_date,
                                              end_date=end_date)

    station_to_grid_point = {}
    for s in stations:
        assert isinstance(s, Station)
        x, y, z = lat_lon.lon_lat_to_cartesian(s.longitude, s.latitude)
        dists, inds = ktree.query((x, y, z), k=8)

        deltaDaMin = np.min(np.abs(model_acc_area_1d[inds] - s.drainage_km2))

        imin = np.where(
            np.abs(model_acc_area_1d[inds] - s.drainage_km2) == deltaDaMin)[0]

        deltaDa2D = np.abs(model_acc_area - s.drainage_km2)

        ij = np.where(deltaDa2D == deltaDaMin)

        mp = ModelPoint()
        mp.accumulation_area = model_acc_area[ij[0][0], ij[1][0]]
        mp.ix = ij[0][0]
        mp.jy = ij[1][0]
        mp.longitude = model_manager.lons2D[mp.ix, mp.jy]
        mp.latitude = model_manager.lats2D[mp.ix, mp.jy]

        #flow in mask
        mp.flow_in_mask = model_manager.get_mask_for_cells_upstream(
            mp.ix, mp.jy)

        station_to_grid_point[s] = mp

        print("da_diff (sel) = ", deltaDaMin)
        print("dist (sel) = ", dists[imin])

    return station_to_grid_point
def main_for_cc_paper(start_date=None, end_date=None):

    # Station ids to get from the CEHQ database
    ids_with_lakes_upstream = [
        "104001", "093806", "093801", "081002", "081007", "080718"
    ]

    # for a presentation
    # ids_with_lakes_upstream = [
    #     "104001", "093806", "081002", "081007",
    # ]



    selected_ids = ids_with_lakes_upstream

    sim_labels = [
        # "CRCM5-R",
        # "CRCM5-L2",
        "ERAI-CRCM5-L",
        "CanESM2-CRCM5-L"
        # "CRCM5-HCD-RL-INTF-a"
    ]

    sim_file_names = [
        "/RESCUE/skynet3_rech1/huziy/hdf_store/quebec_0.1_crcm5-hcd-rl.hdf5",
        "/RESCUE/skynet3_rech1/huziy/hdf_store/cc-canesm2-driven/quebec_0.1_crcm5-hcd-rl-cc-canesm2-1980-2010.hdf5"
    ]

    color_list = ["dodgerblue", "r", "g"]
    sim_name_to_color = OrderedDict([
        ("Obs.", "k")
    ])

    for sim_name, the_color in zip(sim_labels, color_list):
        sim_name_to_color[sim_name] = the_color

    sim_name_to_file_name = OrderedDict(zip(sim_labels, sim_file_names))

    # Get the list of stations to do the comparison with
    stations = cehq_station.read_station_data(
        start_date=start_date, end_date=end_date, selected_ids=selected_ids
    )

    print("Initial list of stations:")
    for s in stations:
        print("{0}".format(s))

    plot_utils.apply_plot_params(font_size=16, width_pt=None, width_cm=25, height_cm=12)
    draw_model_comparison(model_points=None, sim_name_to_file_name=sim_name_to_file_name,
                          hdf_folder=None,
                          start_year=start_date.year, end_year=end_date.year, stations=stations,
                          stfl_name="STFL",
                          drainage_area_reldiff_min=0.1,
                          plot_upstream_area_averaged=False,
                          sim_name_to_color=sim_name_to_color)
Esempio n. 4
0
def main():
    start_date = datetime(1985,1,1)
    end_date = datetime(1990,12, 31)
    stations = cehq_station.read_station_data(start_date = start_date, end_date=end_date)
    print("Read {0} stations".format(len(stations)))
    for s in stations:
        if not len(s.dates):
            print(s.id, "=>", len(s.dates), "from: -- to -- ")
            continue
        print(s.id, "=>", len(s.dates), "from: ", s.dates[0], " to ", s.dates[-1])
    pass
def main():
    model_file = ""

    selected_ids = [

    ]

    #Get the list of stations to plot
    stations = cehq_station.read_station_data(
        start_date=None, end_date=None, selected_ids=selected_ids
    )
Esempio n. 6
0
def main():

    data_dir = "data/cehq_data/MontrealFlood2017_station_data/streamflow/daily"


    basin_shapes = [
        "/RESCUE/skynet3_rech1/huziy/Netbeans Projects/Python/RPN/data/shp/mtl_flood_2017_basins/02JKL_SDA_Ottawa.shp",
        #"/RESCUE/skynet3_rech1/huziy/Netbeans Projects/Python/RPN/data/shp/mtl_flood_2017_basins/02MOP_SDA_St_Lawrence.shp",
        # "/RESCUE/skynet3_rech1/huziy/Netbeans Projects/Python/RPN/data/shp/mtl_flood_2017_basins/02N_SDA_St_Maurice.shp"
    ]


    selected_ids = [""]

    excluded_ids = ["120201"]

    start_date = datetime(1980, 1, 1)
    end_date = datetime(2017, 6, 1)

    stations = cehq_station.read_station_data(start_date=start_date, end_date=end_date, folder=data_dir,
                                              min_number_of_complete_years=0, only_natural=None)

    for s in stations:

        assert isinstance(s, Station)

        print(s)
        print(s.get_list_of_complete_years())




    stations = [s for s in stations if s.id not in excluded_ids]







    plot_utils.apply_plot_params(font_size=10)
    img_file = "stfl_station_positions.png"
    img_file = commons.img_folder / img_file

    plot_station_positions(stations, img_file=str(img_file), shp_paths=basin_shapes,
                           min_lon=-81.5, max_lon=-70, min_lat=43.5, max_lat=49)


    pass
Esempio n. 7
0
def main():
    # url = "http://maps.googleapis.com/maps/api/staticmap?center=46.5,-72&zoom=15&size=800x800&maptype=satellite&sensor=false"
    params = {"center": "46.5,-72", "zoom": "5", "size": "800x800", "maptype": "satellite", "sensor": "false"}
    script = "http://maps.googleapis.com/maps/api/staticmap?"
    print(urllib.parse.urlencode(params))
    url = script + urllib.parse.urlencode(params)

    stations = cehq_station.read_station_data(folder="data/cehq_levels")[:5]
    for s in stations:
        assert isinstance(s, cehq_station.Station)
        url += "&" + urllib.parse.urlencode({"markers": "color:red|label:%s|%f,%f" % (s.id, s.latitude, s.longitude)})

    print(url)
    urllib.request.urlretrieve(url, filename="gmap.png")

    # TODO: implement
    pass
Esempio n. 8
0
def main():

    data_dir = "data/cehq_data/MontrealFlood2017_station_data/streamflow/daily"

    basin_shapes = [
        "/RESCUE/skynet3_rech1/huziy/Netbeans Projects/Python/RPN/data/shp/mtl_flood_2017_basins/02JKL_SDA_Ottawa.shp",
        #"/RESCUE/skynet3_rech1/huziy/Netbeans Projects/Python/RPN/data/shp/mtl_flood_2017_basins/02MOP_SDA_St_Lawrence.shp",
        # "/RESCUE/skynet3_rech1/huziy/Netbeans Projects/Python/RPN/data/shp/mtl_flood_2017_basins/02N_SDA_St_Maurice.shp"
    ]

    selected_ids = [""]

    excluded_ids = ["120201"]

    start_date = datetime(1980, 1, 1)
    end_date = datetime(2017, 6, 1)

    stations = cehq_station.read_station_data(start_date=start_date,
                                              end_date=end_date,
                                              folder=data_dir,
                                              min_number_of_complete_years=0,
                                              only_natural=None)

    for s in stations:

        assert isinstance(s, Station)

        print(s)
        print(s.get_list_of_complete_years())

    stations = [s for s in stations if s.id not in excluded_ids]

    plot_utils.apply_plot_params(font_size=10)
    img_file = "stfl_station_positions.png"
    img_file = commons.img_folder / img_file

    plot_station_positions(stations,
                           img_file=str(img_file),
                           shp_paths=basin_shapes,
                           min_lon=-81.5,
                           max_lon=-70,
                           min_lat=43.5,
                           max_lat=49)

    pass
Esempio n. 9
0
def main(hdf_folder="/home/huziy/skynet3_rech1/hdf_store", start_date=None, end_date=None):
    # Station ids to get from the CEHQ database
    # selected_ids = ["092715", "080101", "074903", "050304", "080104", "081007", "061905",
    #                 "041903", "040830", "093806", "090613", "081002", "093801", "080718"]

    selected_ids = ["092715", "074903", "080104", "081007", "061905",
                    "093806", "090613", "081002", "093801", "080718", "104001"]

    # selected_ids = [
    #     "074903", "061905", "090613", "092715", "093801", "093806", "081002"
    # ]

    # selected_ids = ["081002", "104001"]



    # selected_ids = ["090613", ]

    sim_labels = [
        "CRCM5-L2",
    ]

    sim_file_names = [
        "quebec_0.1_crcm5-hcd-rl.hdf5",
    ]

    sim_name_to_file_name = OrderedDict()
    for k, v in zip(sim_labels, sim_file_names):
        sim_name_to_file_name[k] = v

        #sim_name_to_file_name = {
        #"CRCM5-R": "quebec_0.1_crcm5-r.hdf5",
        #"CRCM5-HCD-R": "quebec_0.1_crcm5-hcd-r.hdf5",
        #"CRCM5-HCD-RL": "/skynet3_rech1/huziy/hdf_store/quebec_0.1_crcm5-hcd-rl_spinup.hdf",
        #"CRCM5-HCD-RL-INTFL": "/skynet3_rech1/huziy/hdf_store/quebec_0.1_crcm5-hcd-rl-intfl_do_not_discard_small.hdf",
        #"SANI=10000, ignore THFC":
        #    "/skynet3_rech1/huziy/hdf_store/quebec_0.1_crcm5-hcd-rl-intfl_sani-10000_not_care_about_thfc.hdf",

        #"CRCM5-HCD-RL-ERA075": "/skynet3_rech1/huziy/hdf_store/quebec_0.1_crcm5-hcd-rl-intfl_spinup_ecoclimap_era075.hdf",
        #"SANI=10000": "/skynet3_rech1/huziy/hdf_store/quebec_0.1_crcm5-hcd-rl-intfl_sani-10000.hdf"
        #"CRCM5-HCD-RL-ECOCLIMAP": "/skynet3_rech1/huziy/hdf_store/quebec_0.1_crcm5-hcd-rl-intfl_spinup_ecoclimap.hdf"
    #}


    skip_list = [
        "061303",  # regulated
        "061305", "061304", "051012", "050437", "050914", "030247", "030268"

    ]


    # Get the list of stations to do the comparison with
    stations = cehq_station.read_station_data(
        folder="/home/huziy/skynet3_rech1/CEHQ",
        start_date=start_date, end_date=end_date,
        min_number_of_complete_years=1
    )



    # stations.extend(
    #    cehq_station.load_from_hydat_db(start_date=start_date, end_date=end_date, datavariable="level")
    # )

    # Commented hydat station for performance during testing
    # province = "QC"
    # stations_hd = cehq_station.load_from_hydat_db(start_date=start_date, end_date=end_date, province=province)
    # if not len(stations_hd):
    #     print "No hydat stations satisying the conditions: period {0}-{1}, province {2}".format(
    #         str(start_date), str(end_date), province
    #     )
    # stations.extend(stations_hd)

    # province = "ON"
    # stations_hd = cehq_station.load_from_hydat_db(start_date=start_date, end_date=end_date, province=province)
    # stations.extend(stations_hd)




    # Do not process skip stations
    stations = [the_s for the_s in stations if the_s.id not in skip_list]

    # debug:
    for s in stations:
        assert isinstance(s, Station)
        print(s.get_list_of_complete_years())
        print(s.drainage_km2)

    assert len(stations)

    draw_model_comparison(model_points=None, sim_name_to_file_name=sim_name_to_file_name,
                          hdf_folder=hdf_folder,
                          start_year=start_date.year,
                          end_year=end_date.year, stations=stations, plot_upstream_averages=False)
def main():
    start_year = 1980
    end_year = 2010

    start_date = datetime(start_year, 1, 1)
    end_date = datetime(end_year, 12, 31)

    ids_with_lakes_upstream = [
        "104001", "093806", "093801", "081002", "081007", "080718"
    ]

    selected_station_ids = ["092715", "074903", "080104", "081007", "061905",
                            "093806", "090613", "081002", "093801", "080718", "104001"]

    selected_station_ids = ids_with_lakes_upstream

    # Get the list of stations to do the comparison with
    stations = cehq_station.read_station_data(
        start_date=start_date,
        end_date=end_date,
        selected_ids=selected_station_ids
    )


    # add hydat stations
    # province = "QC"
    # min_drainage_area_km2 = 10000.0
    # stations_hd = cehq_station.load_from_hydat_db(start_date=start_date, end_date=end_date,
    # province=province, min_drainage_area_km2=min_drainage_area_km2)
    # if not len(stations_hd):
    #     print "No hydat stations satisying the conditions: period {0}-{1}, province {2}".format(
    #         str(start_date), str(end_date), province
    #     )
    # stations.extend(stations_hd)

    # brewer2mpl.get_map args: set name  set type  number of colors
    bmap = brewer2mpl.get_map("Set1", "qualitative", 9)

    path1 = "/skynet3_rech1/huziy/hdf_store/quebec_0.1_crcm5-hcd-r.hdf5"
    label1 = "CRCM5-L1"

    path2 = "/skynet3_rech1/huziy/hdf_store/quebec_0.1_crcm5-hcd-rl.hdf5"
    label2 = "CRCM5-L2"

    color2, color1 = bmap.mpl_colors[:2]

    fldirs = analysis.get_array_from_file(path=path1, var_name=infovar.HDF_FLOW_DIRECTIONS_NAME)
    lons2d, lats2d, basemap = analysis.get_basemap_from_hdf(path1)

    lake_fractions = analysis.get_array_from_file(path=path1, var_name=infovar.HDF_LAKE_FRACTION_NAME)
    # cell_areas = analysis.get_array_from_file(path=path1, var_name=infovar.HDF_CELL_AREA_NAME)
    acc_area = analysis.get_array_from_file(path=path1, var_name=infovar.HDF_ACCUMULATION_AREA_NAME)

    cell_manager = CellManager(fldirs, lons2d=lons2d, lats2d=lats2d, accumulation_area_km2=acc_area)

    station_to_mp = cell_manager.get_model_points_for_stations(station_list=stations,
                                                               lake_fraction=lake_fractions,
                                                               drainaige_area_reldiff_limit=0.3)

    fig, axes = plt.subplots(1, 2, gridspec_kw=dict(top=0.80, wspace=0.4))

    q90_obs_list = []
    q90_mod1_list = []
    q90_mod2_list = []

    q10_obs_list = []
    q10_mod1_list = []
    q10_mod2_list = []

    for the_station, the_mp in station_to_mp.items():
        assert isinstance(the_station, Station)
        compl_years = the_station.get_list_of_complete_years()
        if len(compl_years) < 3:
            continue

        t, stfl1 = analysis.get_daily_climatology_for_a_point(path=path1, years_of_interest=compl_years,
                                                              i_index=the_mp.ix, j_index=the_mp.jy, var_name="STFA")

        _, stfl2 = analysis.get_daily_climatology_for_a_point(path=path2, years_of_interest=compl_years,
                                                              i_index=the_mp.ix, j_index=the_mp.jy, var_name="STFA")

        _, stfl_obs = the_station.get_daily_climatology_for_complete_years(stamp_dates=t, years=compl_years)

        # Q90
        q90_obs = np.percentile(stfl_obs, 90)
        q90_mod1 = np.percentile(stfl1, 90)
        q90_mod2 = np.percentile(stfl2, 90)

        # Q10
        q10_obs = np.percentile(stfl_obs, 10)
        q10_mod1 = np.percentile(stfl1, 10)
        q10_mod2 = np.percentile(stfl2, 10)

        # save quantiles to lists for correlation calculation
        q90_obs_list.append(q90_obs)
        q90_mod1_list.append(q90_mod1)
        q90_mod2_list.append(q90_mod2)

        q10_mod1_list.append(q10_mod1)
        q10_mod2_list.append(q10_mod2)
        q10_obs_list.append(q10_obs)


        # axes[0].annotate(the_station.id, (q90_obs, np.percentile(stfl1, 90)))
        # axes[1].annotate(the_station.id, (q10_obs, np.percentile(stfl1, 10)))




    # Plot scatter plot of Q90
    the_ax = axes[0]

    # the_ax.annotate(the_station.id, (q90_obs, np.percentile(stfl1, 90)))
    the_ax.scatter(q90_obs_list, q90_mod1_list, label=label1, c=color1)
    the_ax.scatter(q90_obs_list, q90_mod2_list, label=label2, c=color2)



    # plot scatter plot of Q10
    the_ax = axes[1]
    # the_ax.annotate(the_station.id, (q10_obs, np.percentile(stfl1, 10)))
    h1 = the_ax.scatter(q10_obs_list, q10_mod1_list, label=label1, c=color1)
    h2 = the_ax.scatter(q10_obs_list, q10_mod2_list, label=label2, c=color2)



    # Add correlation coefficients to the axes
    fp = FontProperties(size=14, weight="bold")
    axes[0].annotate(r"$R^2 = {0:.2f}$".format(np.corrcoef(q90_mod1_list, q90_obs_list)[0, 1] ** 2),
                     (0.1, 0.85), color=color1, xycoords="axes fraction", font_properties=fp)
    axes[0].annotate(r"$R^2 = {0:.2f}$".format(np.corrcoef(q90_mod2_list, q90_obs_list)[0, 1] ** 2),
                     (0.1, 0.70), color=color2, xycoords="axes fraction", font_properties=fp)

    axes[1].annotate(r"$R^2 = {0:.2f}$".format(np.corrcoef(q10_mod1_list, q10_obs_list)[0, 1] ** 2),
                     (0.1, 0.85), color=color1, xycoords="axes fraction", font_properties=fp)
    axes[1].annotate(r"$R^2 = {0:.2f}$".format(np.corrcoef(q10_mod2_list, q10_obs_list)[0, 1] ** 2),
                     (0.1, 0.70), color=color2, xycoords="axes fraction", font_properties=fp)


    sf = ScalarFormatter(useMathText=True)
    sf.set_powerlimits((-2, 3))
    for ind, the_ax in enumerate(axes):
        plot_one_to_one_line(the_ax)
        if ind == 0:
            the_ax.set_xlabel(r"Observed $\left({\rm m^3/s} \right)$")
            the_ax.set_ylabel(r"Modelled $\left({\rm m^3/s} \right)$")

        the_ax.annotate(r"$Q_{90}$" if ind == 0 else r"$Q_{10}$",
                        (0.95, 0.95), xycoords="axes fraction",
                        bbox=dict(facecolor="white"),
                        va="top", ha="right")

        the_ax.xaxis.set_major_formatter(sf)
        the_ax.yaxis.set_major_formatter(sf)

        locator = MaxNLocator(nbins=5)
        the_ax.xaxis.set_major_locator(locator)
        the_ax.yaxis.set_major_locator(locator)
        x1, x2 = the_ax.get_xlim()
        # Since streamflow percentiles can only be positive
        the_ax.set_xlim(0, x2)
        the_ax.set_ylim(0, x2)

    fig.legend([h1, h2], [label1, label2], loc="upper center", ncol=2)
    figpath = os.path.join(images_folder, "percentiles_comparison.png")
    # plt.tight_layout()
    fig.savefig(figpath, dpi=cpp.FIG_SAVE_DPI, bbox_inches="tight")
Esempio n. 11
0
def plot_at_indices(ix,jy):
    var_name_liquid = "I1"
    var_name_solid = "I2"
    #peirod of interest
    start_year = 1979
    end_year = 1988



    #simulation names corresponding to the paths
    sim_names = ["crcm5-hcd-rl", "crcm5-hcd-rl-intfl"]

    sim_labels = [x.upper() for x in sim_names]


    layer_widths = [0.1, 0.2, 0.3, 0.4, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 1.0, 3.0, 5.0,
                    5.0, 5.0, 5.0, 5.0, 5.0, 5.0, 5.0, 5.0, 5.0]

    layer_depths = np.cumsum(layer_widths)


    paths = [
        "/home/huziy/skynet3_rech1/from_guillimin/new_outputs/quebec_0.1_crcm5-hcd-rl_spinup",
        "/home/huziy/skynet3_rech1/from_guillimin/new_outputs/quebec_0.1_crcm5-hcd-rl-intfl_spinup2/Samples_all_in_one"
    ]


    managers = [
        Crcm5ModelDataManager(samples_folder_path=path, file_name_prefix="pm",
            all_files_in_samples_folder=True, need_cell_manager= (i == 0)) for i, path in enumerate(paths)
    ]

    #share the cell manager
    a_data_manager = managers[0]
    assert isinstance(a_data_manager, Crcm5ModelDataManager)
    cell_manager = a_data_manager.cell_manager
    assert isinstance(cell_manager, CellManager)
    for m in managers[1:]:
        assert isinstance(m, Crcm5ModelDataManager)
        m.cell_manager = cell_manager

    #share the lake fraction field
    lake_fraction = a_data_manager.lake_fraction



    selected_ids = ["092715", "080101", "074903", "050304", "080104", "081007", "061905",
                      "041903", "040830", "093806", "090613", "081002", "093801", "080718"]
    start_date = datetime(start_year, 1, 1)
    end_date = datetime(end_year, 12, 31)

    stations = cehq_station.read_station_data(selected_ids = selected_ids,
            start_date=start_date, end_date=end_date
    )

    #stations with corresponding model points
    station_to_mp = a_data_manager.get_dataless_model_points_for_stations(stations)

    #figure out levels in soil



    sim_label_to_profiles = {}
    fig = plt.figure()
    fmt = ScalarFormatter(useMathText=True)
    fmt.set_powerlimits([-2, 2])

    for m, label in zip(managers, sim_labels):
        assert isinstance(m, Crcm5ModelDataManager)

        monthly_means_liquid = _get_cached_monthly_mean_fields(label, start_year, end_year, var_name_liquid)
        if monthly_means_liquid is None:
            monthly_means_liquid = m.get_monthly_climatology_of_3d_field(var_name=var_name_liquid, start_year=start_year
                , end_year=end_year)
            _cache_monthly_mean_fields(monthly_means_liquid, label, start_year, end_year, var_name_liquid)

        monthly_means_solid = _get_cached_monthly_mean_fields(label, start_year, end_year, var_name_solid)
        if monthly_means_solid is None:
            monthly_means_solid = m.get_monthly_climatology_of_3d_field(var_name=var_name_solid, start_year=start_year,
                end_year=end_year)
            _cache_monthly_mean_fields(monthly_means_solid, label, start_year, end_year, var_name_solid)

        profiles = [monthly_means_liquid[i][ix,jy, :] + monthly_means_solid[i][ix, jy, :] for i
                    in range(12)]

        sim_label_to_profiles[label] = np.array(profiles)

    x = list(range(12))
    y = layer_depths

    y2d, x2d = np.meshgrid(y, x)
    plt.contourf(x2d, y2d, sim_label_to_profiles[sim_labels[1]] - sim_label_to_profiles[sim_labels[0]])
    plt.gca().invert_yaxis()
    plt.colorbar()

    #fig.tight_layout()

    fig.savefig("soil_profile_at_ix={0};jy={1}.pdf".format(ix, jy))
def main(hdf_folder="/home/huziy/skynet3_rech1/hdf_store", start_year=1980, end_year=2010):
    prepare()

    all_markers = ["*", "s", "p", "+", "x", "d", "h"]

    excluded = ["white", "w", "aliceblue", "azure"]
    excluded.extend([ci for ci in colors.cnames if "yellow" in ci])

    all_colors = ["k", "b", "r", "g", "m"] + sorted([ci for ci in colors.cnames if ci not in excluded])

    # Station ids to get from the CEHQ database
    ids_with_lakes_upstream = [
        "104001", "093806", "093801", "081002", "081007", "080718"
    ]

    selected_ids = ids_with_lakes_upstream

    filedir = Path(hdf_folder)
    sim_name_to_file_path = OrderedDict([
        # ("CRCM5-LI", filedir.joinpath("quebec_0.1_crcm5-hcd-r.hdf5").as_posix()),

        ("ERAI-CRCM5-L", filedir.joinpath("quebec_0.1_crcm5-hcd-rl.hdf5").as_posix()),

        # ("CanESM2-CRCM5-NL", filedir.joinpath("cc-canesm2-driven/quebec_0.1_crcm5-r-cc-canesm2-1980-2010.hdf5").as_posix()),

        ("CanESM2-CRCM5-L",
         filedir.joinpath("cc-canesm2-driven/quebec_0.1_crcm5-hcd-rl-cc-canesm2-1980-2010.hdf5").as_posix()),

        # ("CanESM2-CRCM5-LI", filedir.joinpath("cc-canesm2-driven/quebec_0.1_crcm5-hcd-rl-intfl-cc-canesm2-1980-2010.hdf5").as_posix()),


    ])

    obs_label = "Obs."
    labels = [obs_label, ] + list(sim_name_to_file_path.keys())

    label_to_marker = dict(zip(labels, all_markers))
    label_to_color = dict(zip(labels, all_colors))

    # Get the list of stations to do the comparison with
    start_date = datetime(start_year, 1, 1)
    end_date = datetime(end_year, 12, 31)
    stations = cehq_station.read_station_data(
        start_date=start_date, end_date=end_date, selected_ids=selected_ids
    )

    # Get geophysical fields from one of the model simulations
    path0 = list(sim_name_to_file_path.values())[0]
    lons2d, lats2d, basemap = analysis.get_basemap_from_hdf(file_path=path0)
    flow_directions = analysis.get_array_from_file(path=path0, var_name=infovar.HDF_FLOW_DIRECTIONS_NAME)
    lake_fraction = analysis.get_array_from_file(path=path0, var_name=infovar.HDF_LAKE_FRACTION_NAME)

    accumulation_area_km2 = analysis.get_array_from_file(path=path0, var_name=infovar.HDF_ACCUMULATION_AREA_NAME)
    area_m2 = analysis.get_array_from_file(path=path0, var_name=infovar.HDF_CELL_AREA_NAME_M2)

    # Try to read cell areas im meters if it is not Ok then try in km2
    if area_m2 is not None:
        cell_area_km2 = area_m2 * 1.0e-6
    else:
        cell_area_km2 = analysis.get_array_from_file(path=path0, var_name=infovar.HDF_CELL_AREA_NAME_KM2)

    # Create a cell manager if it is not provided
    cell_manager = CellManager(flow_directions, accumulation_area_km2=accumulation_area_km2,
                               lons2d=lons2d, lats2d=lats2d)

    # Get the list of the corresponding model points
    station_to_modelpoint = cell_manager.get_model_points_for_stations(
        station_list=stations,
        lake_fraction=lake_fraction,
        drainaige_area_reldiff_limit=0.1)

    # plot_utils.apply_plot_params(font_size=10, width_cm=20, height_cm=18)
    fig = plt.figure()

    ncols = max([len(rp_list) for et, rp_list in ExtremeProperties.extreme_type_to_return_periods.items()])
    nrows = len(ExtremeProperties.extreme_types)
    gs = GridSpec(nrows, ncols)

    ext_type_to_rp_to_ax = OrderedDict()
    ax_with_legend = None

    label_to_ax_to_xdata = {}
    label_to_ax_to_ydata = {}
    for row, ext_type in enumerate(ExtremeProperties.extreme_types):
        ext_type_to_rp_to_ax[ext_type] = OrderedDict()
        for col, rperiod in enumerate(ExtremeProperties.extreme_type_to_return_periods[ext_type]):
            ax = fig.add_subplot(gs[row, col])
            ext_type_to_rp_to_ax[ext_type][rperiod] = ax

            if col == 0:
                ax.set_ylabel(ext_type)

            if row == nrows - 1 and col == ncols - 1:
                ax_with_legend = ax

            # Set axes labels
            if row == nrows - 1:
                ax.set_xlabel("Observations")

            if col == 0:
                ax.set_ylabel("Model")

            for label in sim_name_to_file_path:

                if label not in label_to_ax_to_xdata:
                    label_to_ax_to_xdata[label] = {ax: []}
                    label_to_ax_to_ydata[label] = {ax: []}
                else:
                    label_to_ax_to_xdata[label][ax] = []
                    label_to_ax_to_ydata[label][ax] = []

            ax.set_xscale("log")
            ax.set_yscale("log")

    print("Initial list of stations:")

    sim_label_to_handle = {}
    for s in stations:
        print("{0}".format(s))
        assert isinstance(s, Station)

        print(len([y for y in s.get_list_of_complete_years() if start_year <= y <= end_year]))
        df_ext_obs = extreme_commons.get_annual_extrema(ts_times=s.dates, ts_vals=s.values,
                                                        start_year=start_year, end_year=end_year)
        mp = station_to_modelpoint[s]

        assert isinstance(mp, ModelPoint)

        years_of_interest = df_ext_obs.index

        label_to_extrema_model = {}



        # label -> ext_type -> [return period -> ret level, return period -> std]
        label_to_return_levels = OrderedDict(
            [(obs_label, OrderedDict())]
        )
        for sim_label, sim_path in sim_name_to_file_path.items():
            label_to_return_levels[sim_label] = OrderedDict()
            label_to_extrema_model[sim_label] = OrderedDict()



        # Calculate the return levels and standard deviations
        for ext_type in ExtremeProperties.extreme_types:

            return_periods = ExtremeProperties.extreme_type_to_return_periods[ext_type]

            # fit GEV distribution and apply non-parametric bootstrap to get std
            label_to_return_levels[obs_label][ext_type] = gevfit.do_gevfit_for_a_point(df_ext_obs[ext_type].values,
                                                                                       extreme_type=ext_type,
                                                                                       return_periods=return_periods)
            return_levels_obs, rl_stds_obs = label_to_return_levels[obs_label][ext_type]


            # get annual extremas for the model output at the points colose to the stations
            for sim_label, sim_path in sim_name_to_file_path.items():
                label_to_return_levels[sim_label] = OrderedDict()

                ext_field = analysis.get_annual_extrema(
                    rconfig=RunConfig(data_path=sim_path, start_year=start_year, end_year=end_year),
                    varname="STFL", months_of_interest=ExtremeProperties.extreme_type_to_month_of_interest[ext_type],
                    n_avg_days=ExtremeProperties.extreme_type_to_n_agv_days[ext_type],
                    high_flow=ext_type == ExtremeProperties.high)

                # Select only those years when obs are available
                ts_data = [v for y, v in zip(range(start_year, end_year + 1), ext_field[:, mp.ix, mp.jy]) if
                           y in years_of_interest]
                ts_data = np.array(ts_data)
                return_levels, rl_stds = gevfit.do_gevfit_for_a_point(ts_data, extreme_type=ext_type,
                                                                      return_periods=return_periods)





                # Do the plotting
                for rp in return_periods:
                    ax = ext_type_to_rp_to_ax[ext_type][rp]
                    ax.set_title("T = {rp}-year".format(rp=rp))

                    # h = ax.errorbar(return_levels_obs[rp], return_levels[rp],
                    # marker=label_to_marker[sim_label], color=label_to_color[sim_label], label=sim_label,
                    #                 xerr=rl_stds_obs[rp] * 1.96, yerr=rl_stds[rp] * 1.96)

                    h = ax.scatter(return_levels_obs[rp], return_levels[rp],
                                   marker=label_to_marker[sim_label], color=label_to_color[sim_label], label=sim_label)



                    # save the data for maybe further calculation of the correlation coefficients
                    label_to_ax_to_xdata[sim_label][ax].append(return_levels_obs[rp])
                    label_to_ax_to_ydata[sim_label][ax].append(return_levels[rp])

                    sim_label_to_handle[sim_label] = h



    # Calculate the biases
    for sim_label in sim_name_to_file_path:
        for ext_type in ExtremeProperties.extreme_types:
            ret_periods = ExtremeProperties.extreme_type_to_return_periods[ext_type]
            for rp in ret_periods:

                ax = ext_type_to_rp_to_ax[ext_type][rp]
                mod = np.asarray(label_to_ax_to_ydata[sim_label][ax])
                obs = np.asarray(label_to_ax_to_xdata[sim_label][ax])

                bias = np.mean((mod - obs)/obs)
                corr, pv = stats.pearsonr(mod, obs)
                print("({sim_label}) Mean bias for {rp}-year {ext_type}-flow return level is: {bias}; corr={corr:.2f}; corr_pval={corr_pval:2g}".format(
                    sim_label=sim_label, rp=rp, bias=bias, corr=corr, corr_pval=pv,
                    ext_type=ext_type
                ))




    sfmt = ScalarFormatter(useMathText=True)
    sfmt.set_powerlimits((-2, 2))
    for et, rp_to_ax in ext_type_to_rp_to_ax.items():
        for rp, ax in rp_to_ax.items():
            xmin, xmax = ax.get_xlim()
            ymin, ymax = ax.get_ylim()
            x1 = min(xmin, ymin)
            x2 = min(xmax, ymax)
            ax.plot([x1, x2], [x1, x2], "k--")
            # ax.xaxis.set_major_locator(MaxNLocator(nbins=5))
            # ax.yaxis.set_major_locator(MaxNLocator(nbins=5))
            # ax.xaxis.set_major_formatter(sfmt)
            # ax.yaxis.set_major_formatter(sfmt)

    sim_labels = list(sim_name_to_file_path.keys())
    ax_with_legend.legend([sim_label_to_handle[sl] for sl in sim_labels], sim_labels,
                          bbox_to_anchor=(1, -0.25), borderaxespad=0.0, loc="upper right",
                          ncol=2, scatterpoints=1, numpoints=1)

    # Save the plot
    img_file = "{}.eps".format("_".join(sorted(label_to_marker.keys())))
    img_file = img_folder.joinpath(img_file)

    fig.tight_layout()
    with img_file.open("wb") as f:
        fig.savefig(f, bbox_inches="tight")
Esempio n. 13
0
def compare_hydrographs_at_stations(manager_list,
                                    start_date=None,
                                    end_date=None,
                                    img_path="hydrographs.png",
                                    colors=None,
                                    fig=None):
    selected_ids = None
    stations = cehq_station.read_station_data(selected_ids=selected_ids,
                                              start_date=start_date,
                                              end_date=end_date)

    if colors is None:
        colors = len(manager_list) * [None]
    skip_stations = ["080718", "095003", "094206", "090613", "092715"]
    # 090613 is skipped for the 0.5 deg resolution since the drainaige network is not fully
    # represented by the model

    lines_model = []
    station_to_list_of_model_ts = {}
    run_id_list = [m.run_id for m in manager_list]

    filtered_stations = []
    for s in stations:
        assert isinstance(s, Station)

        if s.id in skip_stations:
            continue

        # skip stations with smaller accumulation areas
        if s.drainage_km2 <= 4 * np.radians(
                0.5)**2 * lat_lon.EARTH_RADIUS_METERS**2 * 1.0e-6:
            continue

        if not s.passes_rough_continuity_test(start_date, end_date):
            continue

        filtered_stations.append(s)

    stations = filtered_stations

    print(len(filtered_stations))
    # if True: raise Exception()

    #save all run ids
    plot_utils.apply_plot_params(width_pt=None,
                                 height_cm=40.0,
                                 width_cm=30.0,
                                 font_size=10)
    run_id_to_dataframe = {}
    run_id_to_cell_props = {}
    for manager in manager_list:
        assert isinstance(manager, Crcm5ModelDataManager)
        df, station_to_cellprops = manager.get_streamflow_dataframe_for_stations(
            stations,
            start_date=start_date,
            end_date=end_date,
            var_name="STFL",
            nneighbours=9)
        assert isinstance(df, pandas.DataFrame)
        df = df.dropna(axis=1)
        run_id_to_cell_props[manager.run_id] = station_to_cellprops

        df = df.groupby(lambda i: datetime(2001, i.month + 1, 1)
                        if i.month == 2 and i.day == 29 else datetime(
                            2001, i.month, i.day)).mean()

        print(df)

        #again filter the stations with data time interval overlapping with model time interval
        stations = list(filter(lambda s: s.id in df.columns, stations))
        run_id_to_dataframe[manager.run_id] = df

    if fig is None:
        fig = plt.figure()
    #two columns
    ncols = 2
    nrows = len(stations) / ncols
    if nrows * ncols < len(stations):
        nrows += 1
    gs = GridSpec(nrows, ncols, hspace=0.4, wspace=0.4)
    line_model, line_obs = None, None
    stations.sort(key=lambda x: x.latitude, reverse=True)

    plot_station_positions(manager_list[0], stations)

    i = -1
    ns_list = []
    station_list = []
    flow_acc_area_list = []

    #one_day = timedelta(days = 1)
    one_day_sec = 24 * 60 * 60.0
    for s in stations:
        i += 1
        ax = fig.add_subplot(gs[i // ncols, i % ncols])

        assert isinstance(s, Station)

        year_dates, sta_clims = s.get_daily_normals()

        #plot error limits
        ax.fill_between(year_dates,
                        sta_clims * 1.256,
                        sta_clims * 0.744,
                        alpha=0.25,
                        color="b")
        line_obs = ax.plot(year_dates,
                           sta_clims,
                           color="b",
                           label="Observation",
                           lw=3,
                           alpha=0.5)

        ax.annotate(
            "{0:.3g}".format(sum(sta_clims) * one_day_sec), (0.1, 0.95),
            xycoords="axes fraction",
            color="b",
            alpha=0.5)  #integral flow since those values are daily normals

        for run_id, color, color_index in zip(run_id_list, colors,
                                              list(range(len(colors)))):
            df = run_id_to_dataframe[run_id]
            the_line = ax.plot(year_dates,
                               df[s.id],
                               color=color,
                               label=run_id,
                               lw=1)
            ax.annotate("{0:.3g}".format(sum(df[s.id]) * one_day_sec),
                        (0.1, 0.9 - color_index * 0.05),
                        xycoords="axes fraction",
                        color=color
                        )  #integral flow since those values are daily normals
            if not i:  #save the labels only for the first step
                lines_model.append(the_line)

        #dt = model_ts.time[1] - model_ts.time[0]
        #dt_sec = dt.days * 24 * 60 * 60 + dt.seconds
        #ax.annotate( "{0:g}".format( sum(mod_vals) * dt_sec ) + " ${\\rm m^3}$", xy = (0.7,0.7), xycoords= "axes fraction", color = "b")
        #ax.annotate( "{0:g}".format( sum(s.values) * dt_sec) + " ${\\rm m^3}$", xy = (0.7,0.6), xycoords= "axes fraction", color = "r")
        metadata = list(run_id_to_cell_props.items())[0][1][s.id]
        da_mod = metadata["acc_area_km2"]
        dist = metadata["distance_to_obs_km"]
        #ax.set_title("{0}: $\delta DA = {1:.1f}$ %, dist = {2:.1f} km".format(s.id,
        #    (da_mod - s.drainage_km2) / s.drainage_km2 * 100.0, dist )  )
        ax.set_title("{0}: $DA = {1:.1f}$ {2}".format(s.id, s.drainage_km2,
                                                      "${\\rm km ^ 2}$"))
        ax.xaxis.set_major_formatter(DateFormatter("%m"))
        #ax.xaxis.set_major_locator(YearLocator())
        assert isinstance(ax, Axes)
        ax.xaxis.axis_date()
        #ax.xaxis.tick_bottom().set_rotation(60)

    lines = lines_model + [
        line_obs,
    ]
    labels = run_id_list + [
        "Observation",
    ]
    fig.legend(lines, labels, ncol=5)
    if img_path is not None:
        fig.savefig(img_path)
def main():
    season_to_months = DEFAULT_SEASON_TO_MONTHS

    r_config = RunConfig(
        data_path="/RESCUE/skynet3_rech1/huziy/hdf_store/quebec_0.1_crcm5-hcd-r.hdf5",
        start_year=1990, end_year=2010, label="CRCM5-L"
    )

    bmp_info = analysis.get_basemap_info_from_hdf(file_path=r_config.data_path)
    bmp_info.should_draw_grey_map_background = True
    bmp_info.should_draw_basin_boundaries = False
    bmp_info.map_bg_color = "0.75"

    station_ids = [
        "104001", "093806", "093801", "081002", "081007", "080718"
    ]

    # get river network information used in the model
    flow_directions = analysis.get_array_from_file(r_config.data_path, var_name=infovar.HDF_FLOW_DIRECTIONS_NAME)
    accumulation_area_km2 = analysis.get_array_from_file(path=r_config.data_path,
                                                         var_name=infovar.HDF_ACCUMULATION_AREA_NAME)
    cell_manager = CellManager(flow_dirs=flow_directions,
                               lons2d=bmp_info.lons,
                               lats2d=bmp_info.lats, accumulation_area_km2=accumulation_area_km2)

    # Get the list of stations to indicate on the bias map
    stations = cehq_station.read_station_data(
        start_date=None, end_date=None, selected_ids=station_ids
    )
    """:type : list[Station]"""

    xx, yy = bmp_info.get_proj_xy()
    station_to_modelpoint = cell_manager.get_model_points_for_stations(station_list=stations)
    upstream_edges = cell_manager.get_upstream_polygons_for_points(
        model_point_list=station_to_modelpoint.values(), xx=xx, yy=yy)


    # Validate temperature, precip and swe
    obs_path_anusplin = "/home/huziy/skynet3_rech1/anusplin_links"
    obs_path_swe = "data/swe_ross_brown/swe.nc"
    model_var_to_obs_path = OrderedDict([
        ("TT", obs_path_anusplin),
    #    ("PR", obs_path_anusplin),
        ("I5", obs_path_swe)
    ])



    vname_to_obs_data = {}

    # parameters that won't change in the loop over variable names
    params_const = dict(rconfig=r_config, bmp_info=bmp_info, season_to_months=season_to_months)

    for vname, obs_path in model_var_to_obs_path.items():
        season_to_obs_data = get_seasonal_clim_obs_data(vname=vname, obs_path=obs_path, **params_const)

        # Comment swe over lakes, since I5 calculated only for land
        if vname in ["I5", ]:
            for season in season_to_obs_data:
                season_to_obs_data[season] = maskoceans(bmp_info.lons, bmp_info.lats,
                                                        season_to_obs_data[season],
                                                        inlands=True)

        vname_to_obs_data[vname] = season_to_obs_data


    # Plotting
    plot_all_vars_in_one_fig = True

    fig = None
    gs = None
    row_axes = []
    ncols = None
    if plot_all_vars_in_one_fig:
        plot_utils.apply_plot_params(font_size=12, width_pt=None, width_cm=25, height_cm=20)
        fig = plt.figure()
        ncols = len(season_to_months) + 1
        gs = GridSpec(len(model_var_to_obs_path), ncols, width_ratios=(ncols - 1) * [1., ] + [0.05, ])
    else:
        plot_utils.apply_plot_params(font_size=12, width_pt=None, width_cm=25, height_cm=25)

    row = 0
    station_x_list = []
    station_y_list = []
    for mname in model_var_to_obs_path:

        if plot_all_vars_in_one_fig:
            row_axes = [fig.add_subplot(gs[row, col]) for col in range(ncols)]

        compare_vars(vname_model=mname, vname_to_obs=vname_to_obs_data,
                     r_config=r_config,
                     season_to_months=season_to_months,
                     bmp_info_agg=bmp_info,
                     axes_list=row_axes)

        # -1 in order to exclude colorbars
        for the_ax in row_axes[:-1]:

            # Need titles only for the first row
            if row > 0:
                the_ax.set_title("")

            draw_upstream_area_bounds(the_ax, upstream_edges)

            if len(station_x_list) == 0:
                for the_station in stations:
                    xst, yst = bmp_info.basemap(the_station.longitude, the_station.latitude)
                    station_x_list.append(xst)
                    station_y_list.append(yst)

            bmp_info.basemap.scatter(station_x_list, station_y_list, c="g", ax=the_ax, s=5, zorder=10, alpha=0.5)



        # Hide fall swe
        if mname in ["I5"]:
            row_axes[-2].set_visible(False)

        row += 1


    # Save the figure if necessary
    if plot_all_vars_in_one_fig:
        fig_path = img_folder.joinpath("{}.png".format("_".join(model_var_to_obs_path)))
        with fig_path.open("wb") as figfile:
            fig.savefig(figfile, format="png", bbox_inches="tight")

        plt.close(fig)
def plot_at_indices(ix, jy):
    var_name_liquid = "I1"
    var_name_solid = "I2"
    #peirod of interest
    start_year = 1979
    end_year = 1988

    #simulation names corresponding to the paths
    sim_names = ["crcm5-hcd-rl", "crcm5-hcd-rl-intfl"]

    sim_labels = [x.upper() for x in sim_names]

    layer_widths = [
        0.1, 0.2, 0.3, 0.4, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5,
        1.0, 3.0, 5.0, 5.0, 5.0, 5.0, 5.0, 5.0, 5.0, 5.0, 5.0, 5.0
    ]

    layer_depths = np.cumsum(layer_widths)

    paths = [
        "/home/huziy/skynet3_rech1/from_guillimin/new_outputs/quebec_0.1_crcm5-hcd-rl_spinup",
        "/home/huziy/skynet3_rech1/from_guillimin/new_outputs/quebec_0.1_crcm5-hcd-rl-intfl_spinup2/Samples_all_in_one"
    ]

    managers = [
        Crcm5ModelDataManager(samples_folder_path=path,
                              file_name_prefix="pm",
                              all_files_in_samples_folder=True,
                              need_cell_manager=(i == 0))
        for i, path in enumerate(paths)
    ]

    #share the cell manager
    a_data_manager = managers[0]
    assert isinstance(a_data_manager, Crcm5ModelDataManager)
    cell_manager = a_data_manager.cell_manager
    assert isinstance(cell_manager, CellManager)
    for m in managers[1:]:
        assert isinstance(m, Crcm5ModelDataManager)
        m.cell_manager = cell_manager

    #share the lake fraction field
    lake_fraction = a_data_manager.lake_fraction

    selected_ids = [
        "092715", "080101", "074903", "050304", "080104", "081007", "061905",
        "041903", "040830", "093806", "090613", "081002", "093801", "080718"
    ]
    start_date = datetime(start_year, 1, 1)
    end_date = datetime(end_year, 12, 31)

    stations = cehq_station.read_station_data(selected_ids=selected_ids,
                                              start_date=start_date,
                                              end_date=end_date)

    #stations with corresponding model points
    station_to_mp = a_data_manager.get_dataless_model_points_for_stations(
        stations)

    #figure out levels in soil

    sim_label_to_profiles = {}
    fig = plt.figure()
    fmt = ScalarFormatter(useMathText=True)
    fmt.set_powerlimits([-2, 2])

    for m, label in zip(managers, sim_labels):
        assert isinstance(m, Crcm5ModelDataManager)

        monthly_means_liquid = _get_cached_monthly_mean_fields(
            label, start_year, end_year, var_name_liquid)
        if monthly_means_liquid is None:
            monthly_means_liquid = m.get_monthly_climatology_of_3d_field(
                var_name=var_name_liquid,
                start_year=start_year,
                end_year=end_year)
            _cache_monthly_mean_fields(monthly_means_liquid, label, start_year,
                                       end_year, var_name_liquid)

        monthly_means_solid = _get_cached_monthly_mean_fields(
            label, start_year, end_year, var_name_solid)
        if monthly_means_solid is None:
            monthly_means_solid = m.get_monthly_climatology_of_3d_field(
                var_name=var_name_solid,
                start_year=start_year,
                end_year=end_year)
            _cache_monthly_mean_fields(monthly_means_solid, label, start_year,
                                       end_year, var_name_solid)

        profiles = [
            monthly_means_liquid[i][ix, jy, :] +
            monthly_means_solid[i][ix, jy, :] for i in range(12)
        ]

        sim_label_to_profiles[label] = np.array(profiles)

    x = list(range(12))
    y = layer_depths

    y2d, x2d = np.meshgrid(y, x)
    plt.contourf(
        x2d, y2d, sim_label_to_profiles[sim_labels[1]] -
        sim_label_to_profiles[sim_labels[0]])
    plt.gca().invert_yaxis()
    plt.colorbar()

    #fig.tight_layout()

    fig.savefig("soil_profile_at_ix={0};jy={1}.pdf".format(ix, jy))
Esempio n. 16
0
def validate_precip(model_file="",
                    simlabel="",
                    obs_manager=None,
                    season_to_months=None,
                    start_year=None,
                    end_year=None,
                    season_to_plot_indices=None,
                    station_ids_list=None):
    """
    :param model_file:
    :param obs_manager: should implement the method
        getMeanFieldForMonthsInterpolatedTo(self, months = None, lonsTarget = None, latsTarget = None)
        anusplin data is in mm/day
        model data is in m/s
    """

    model_var_name = "PR"

    if obs_manager is None:
        print(
            "Skipping validation of {}, since the obs manager is None.".format(
                model_var_name))
        return

    if station_ids_list is not None:
        # Get the list of stations to indicate on the bias map
        stations = cehq_station.read_station_data(
            start_date=None, end_date=None, selected_ids=station_ids_list)
    else:
        stations = []

    model_level = 0
    reasonable_error_mm_per_day = 1

    assert isinstance(obs_manager, AnuSplinManager)
    fig = plt.figure()
    assert isinstance(fig, Figure)

    fig.suptitle("({0}) - ({1})".format(simlabel, "Obs."))

    lon, lat, basemap = analysis.get_basemap_from_hdf(file_path=model_file)

    # do calculations and only after that do the plotting
    season_to_field = {}

    # calculate global min and max for plotting
    vmin = None
    vmax = None

    for season, months in season_to_months.items():
        model_field = analysis.get_seasonal_climatology(
            start_year=start_year,
            end_year=end_year,
            months=months,
            level=model_level,
            var_name=model_var_name,
            hdf_path=model_file)

        # convert m/s to mm/day for comparison with anusplin data
        model_field *= 1000.0 * 60 * 60 * 24

        obs_field = obs_manager.getMeanFieldForMonthsInterpolatedTo(
            months=months,
            lonstarget=lon,
            latstarget=lat,
            start_year=start_year,
            end_year=end_year)

        # calculate the difference between the modelled and observed fields
        the_diff = model_field - obs_field
        current_min = np.min(the_diff)
        current_max = np.max(the_diff)

        if vmin is not None:
            vmin = current_min if current_min < vmin else vmin
            vmax = current_max if current_max > vmax else vmax
        else:
            vmin = current_min
            vmax = current_max

        season_to_field[season] = the_diff

    ncolors = 12
    gs = gridspec.GridSpec(2, 3, width_ratios=[1, 1, 0.05])

    cmap = cm.get_cmap("RdBu_r", ncolors)
    x, y = basemap(lon, lat)
    im = None

    d = min(abs(vmin), abs(vmax))
    vmin = -d
    vmax = d
    bn, bounds, _, _ = infovar.get_boundary_norm(vmin,
                                                 vmax,
                                                 ncolors,
                                                 exclude_zero=False)

    print("bounds: ", bounds)

    cs = None
    for season, field in season_to_field.items():
        row, col = season_to_plot_indices[season]
        ax = fig.add_subplot(gs[row, col])
        ax.set_title(season)
        basemap.drawmapboundary(fill_color="gray", ax=ax)
        im = basemap.pcolormesh(x,
                                y,
                                season_to_field[season],
                                vmin=vmin,
                                vmax=vmax,
                                cmap=cmap,
                                norm=bn)
        basemap.drawcoastlines(ax=ax, linewidth=cpp.COASTLINE_WIDTH)

        for the_station in stations:
            assert isinstance(the_station, Station)
            xst, yst = basemap(the_station.longitude, the_station.latitude)
            # ax.annotate(the_station.id, (xst, yst), font_properties=FontProperties(size=6),
            #             bbox=dict(facecolor="w"), va="top", ha="right")
            basemap.scatter(xst, yst, c="g", ax=ax)

        # small_error = (np.abs(season_to_field[season]) < reasonable_error_mm_per_day).astype(int)
        # nlevs = 1
        # ax.contour(x, y, small_error, nlevs, colors = "black", linestyle = "-")
        # cs = ax.contourf(x, y, small_error, nlevs, colors="none", hatches=["/", None], extend="lower", linewidth=2)

    # artists, labels = cs.legend_elements()
    # plt.legend(artists, labels, handleheight=2)

    cax = fig.add_subplot(gs[:, 2])
    cax.set_title("mm/day\n")
    plt.colorbar(im, cax=cax, extend="both")
    seasons_str = "_".join(
        sorted([str(s) for s in list(season_to_field.keys())]))
    atm_val_folder = os.path.join(images_folder, "validate_atm")
    if not os.path.isdir(atm_val_folder):
        os.mkdir(atm_val_folder)

    out_filename = "{3}/validate_2d_{0}_{1}_{2}.png".format(
        model_var_name, simlabel, seasons_str, atm_val_folder)
    fig.savefig(os.path.join(images_folder, out_filename), bbox_inches="tight")
Esempio n. 17
0
def plot_for_different_months(start_date = None, end_date = None):
    lake_names = [
        #"Matagami",
        "Mistassini", "Nemiscau"]
    lake_areas_km2 = [
        #370.7,
        2162.7, 148.3]

    level_st_ids = [
        #"080716",
        "081003",  "081001"
    ]
    level_stations = cehq.read_station_data(folder="data/cehq_levels",
                        selected_ids=level_st_ids)

    stfl_st_ids = [
        #"080707",
        "081007", "081002"
    ]
    stfl_stations = cehq.read_station_data(folder="data/cehq_measure_data",
            selected_ids=stfl_st_ids)


    for lev_station, stfl_station, lake_name, lake_area_km2, c in zip(level_stations,
                                                                    stfl_stations, lake_names, lake_areas_km2,
                                                                    ["r","g","b"]
                                                                ):
        assert isinstance(lev_station, Station)
        assert isinstance(stfl_station, Station)

        all_q_obs = []
        all_q_calc = []
        all_k = []
        all_b = []


        intersection_dates = list( sorted( filter( lambda d: d in stfl_station.dates, lev_station.dates) ) )

        q_vals = [stfl_station.get_value_for_date(d) for d in intersection_dates]
        h_vals = [lev_station.get_value_for_date(d) for d in intersection_dates]
        #change the way streamflow calculated here
        q_calc = get_streamflows_from_active_stores_bowling(get_active_storages(h_vals,lake_area_km2), lake_area_km2)


        q_min = min( min(q_vals), min(q_calc) )
        q_max = max( max(q_vals), max(q_calc) )

        plot_utils.apply_plot_params(width_pt=None, width_cm=30, height_cm=20, font_size=10)
        fig = plt.figure()
        gs = gridspec.GridSpec(3,4,wspace=0.5, hspace=0.5)

        if start_date is None:
            start_date = intersection_dates[0]

        if end_date is None:
            end_date = intersection_dates[-1]
        print("lake name: {0}".format(lake_name))
        print("data are from " + str( intersection_dates[0] ) + " to " + str( intersection_dates[-1] ))


        ##As a base month we are using August (assuming that we don't have ice in August)
        base_month = 8
        the_dates = list( filter( lambda d: (start_date <= d <= end_date) and
                                                        d.month == base_month, intersection_dates) )
        q_base = np.mean([stfl_station.get_value_for_date(d) for d in the_dates])
        h_vals = [lev_station.get_value_for_date(d) for d in the_dates]
        store_base = np.mean(get_active_storages(h_vals,lake_area_km2))

        ice_factors = []
        for month in range(1, 13):
            ax = fig.add_subplot(gs[(month - 1)//4, (month - 1) % 4])

            the_dates = list( filter( lambda d: (start_date <= d <= end_date) and
                                                           d.month == month, intersection_dates) )

            q_vals = [stfl_station.get_value_for_date(d) for d in the_dates]
            h_vals = [lev_station.get_value_for_date(d) for d in the_dates]
            s_vals = get_active_storages(h_vals,lake_area_km2)
            print(len(h_vals))
            q_calc = get_streamflows_from_active_stores_bowling(s_vals, lake_area_km2)


            ice_factors.append(get_ice_factor(np.mean(q_vals), np.mean(s_vals), q_base, store_base))


            all_q_obs.extend(q_vals)
            all_q_calc.extend(q_calc)

            ax.scatter(q_vals, q_calc, linewidths = 0)

            print("len(q_vals) = {0}".format(len(q_vals)))

            ax.set_xlabel("$Q_{\\rm obs}$")
            ax.set_ylabel("$Q_{\\rm mod}$")

            the_poly = np.polyfit(q_vals, q_calc, 1)
            k, b = the_poly

            all_k.append(k)
            all_b.append(b)

            ax.scatter(q_vals, [(x - b) / k for x in q_calc], c ="r", linewidth = 0, zorder = 6)




            #ax.annotate("k={0:.2f}; \nb={1:d}".format(k, int(b)), xy = (0.6, 0.05),
            #    xycoords = "axes fraction", zorder = 5
            #)
            ax.plot([q_min, q_max], [q_min, q_max], "k-",lw = 3, zorder = 5)
            d = datetime(2000, month, 1)
            ax.set_title(d.strftime("%b") + "( k={0:.7f}; b={1:.2f})".format(k, b))
            ax.xaxis.set_major_locator(MultipleLocator(base = np.round((q_max - q_min )/ 10) * 10 / 2  ))
            ax.yaxis.set_major_locator(MultipleLocator(base = np.round((q_max - q_min ) / 10) * 10 / 2))

        fig.suptitle(lake_name)
        fig.savefig("{0}.png".format(lake_name))
        assert isinstance(fig, Figure)


        plot_utils.apply_plot_params(width_pt=None, width_cm=30, height_cm=20, font_size=25)
        #plot k and b as a function of month
        fig = plt.figure()
        ax = fig.add_subplot(1,1,1)
        assert isinstance(ax, Axes)
        ax.set_title("Correction coefficients applied \n q'=(1/k)*q-b/k")
        ax.plot(list(range(1,13)), 1.0 / np.array(all_k), lw = 3, label = "1/k")
        ax.legend()
        fig.savefig("{0}_coefs_1_k.png".format(lake_name))

        fig = plt.figure()
        ax = fig.add_subplot(1,1,1)
        assert isinstance(ax, Axes)
        ax.set_title("Correction coefficients applied \n q'=(1/k)*q-b/k")
        ax.plot(list(range(1,13)), -np.array(all_b) / np.array(all_k), lw = 3, label = "-b/k")
        ax.legend()
        fig.savefig("{0}_coefs_b_k.png".format(lake_name))


        plt.figure()
        plt.plot(list(range(1,13)), ice_factors, lw = 3)
        plt.title("Ice factor for {0}".format(lake_name))
        plt.savefig("{0}_ice_factors.png".format(lake_name))


        ###plot q-q for all season in the same plot
        plot_utils.apply_plot_params(width_pt=None, width_cm=30, height_cm=20, font_size=20)
        fig1 = plt.figure()
        ax = fig1.add_subplot(1,1,1)
        ax.set_title(lake_name)
        ax.scatter(all_q_obs, all_q_calc, c="b", linewidths=0)
        the_poly = np.polyfit(all_q_obs, all_q_calc, 1)
        k,b = the_poly

        print(k, b)
        print("len(all_q) = {0}".format(len(all_q_calc)))

        ax.scatter(all_q_obs, [(x - b) / k for x in all_q_calc], c ="r",
            linewidth = 0, zorder = 6)
        ax.annotate("k={0:.2f}; \nb={1:.2f}".format(k, b), xy = (0.6, 0.05),
                        xycoords = "axes fraction", zorder = 7
                    )

        print(np.polyfit(all_q_obs, [(x - b) / k for x in all_q_calc],1))

        print("min(all_q_calc) = {0}, max(all_q_calc) = {1}".format(min(all_q_calc), max(all_q_calc)))


#        ax.plot([q_min, q_max], [k * q_min + b, k * q_max + b], "g" )

        ax.plot([q_min, q_max], [q_min, q_max], "k-",lw = 1, zorder = 5)

#        for the_k, the_b in zip(all_k, all_b):
#            ax.plot([q_min, q_max], [the_k * q_min + the_b, the_k * q_max + the_b] )


        ax.grid(b = True)
        ax.set_xlabel("$Q_{\\rm obs}$")
        ax.set_ylabel("$Q_{\\rm mod}$")

        ax.xaxis.set_major_locator(MultipleLocator(base = np.round((q_max - q_min )/ 10) * 10 / 2  ))
        ax.yaxis.set_major_locator(MultipleLocator(base = np.round((q_max - q_min ) / 10) * 10 / 2))

        print("all_{0}.png".format(lake_name))
        #plt.show()
        fig1.subplots_adjust(left = 0.2)
        fig1.savefig("all_{0}.png".format(lake_name))
def main():
    var_name_liquid = "I1"
    var_name_solid = "I2"
    #peirod of interest
    start_year = 1979
    end_year = 1988

    #spatial averaging will be done over upstream points to the stations
    selected_ids = [
        "092715", "080101", "074903", "050304", "080104", "081007", "061905",
        "041903", "040830", "093806", "090613", "081002", "093801", "080718"
    ]

    selected_ids = ["074903"]

    #simulation names corresponding to the paths
    sim_names = ["crcm5-hcd-rl", "crcm5-hcd-rl-intfl"]

    sim_labels = [x.upper() for x in sim_names]

    colors = ["blue", "violet"]

    layer_widths = [
        0.1, 0.2, 0.3, 0.4, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5,
        1.0, 3.0, 5.0, 5.0, 5.0, 5.0, 5.0, 5.0, 5.0, 5.0, 5.0, 5.0
    ]

    layer_depths = np.cumsum(layer_widths)

    paths = [
        "/home/huziy/skynet3_rech1/from_guillimin/new_outputs/quebec_0.1_crcm5-hcd-rl_spinup",
        "/home/huziy/skynet3_rech1/from_guillimin/new_outputs/quebec_0.1_crcm5-hcd-rl-intfl_spinup2/Samples_all_in_one"
    ]

    seasons = [[12, 1, 2], [3, 4, 5], [6, 7, 8], [9, 10, 11]]

    season_names = ["DJF", "MAM", "JJA", "SON"]

    managers = [
        Crcm5ModelDataManager(samples_folder_path=path,
                              file_name_prefix="pm",
                              all_files_in_samples_folder=True,
                              need_cell_manager=(i == 0))
        for i, path in enumerate(paths)
    ]

    #share the cell manager
    a_data_manager = managers[0]
    assert isinstance(a_data_manager, Crcm5ModelDataManager)
    cell_manager = a_data_manager.cell_manager
    assert isinstance(cell_manager, CellManager)
    for m in managers[1:]:
        assert isinstance(m, Crcm5ModelDataManager)
        m.cell_manager = cell_manager

    #share the lake fraction field
    lake_fraction = a_data_manager.lake_fraction

    #selected_ids = ["092715", "080101", "074903", "050304", "080104", "081007", "061905",
    #                  "041903", "040830", "093806", "090613", "081002", "093801", "080718"]
    start_date = datetime(start_year, 1, 1)
    end_date = datetime(end_year, 12, 31)

    stations = cehq_station.read_station_data(selected_ids=selected_ids,
                                              start_date=start_date,
                                              end_date=end_date)

    #stations with corresponding model points
    station_to_mp = a_data_manager.get_dataless_model_points_for_stations(
        stations)

    #figure out levels in soil

    sim_label_to_profiles = {}
    for s, mp in station_to_mp.items():
        assert isinstance(mp, ModelPoint)
        mask = (mp.flow_in_mask == 1) & (lake_fraction < 0.6)
        fig = plt.figure()
        fmt = ScalarFormatter(useMathText=True)
        fmt.set_powerlimits([-2, 2])

        print(mp.ix, mp.jy, s.id)

        for m, label, color in zip(managers, sim_labels, colors):
            assert isinstance(m, Crcm5ModelDataManager)

            monthly_means_liquid = _get_cached_monthly_mean_fields(
                label, start_year, end_year, var_name_liquid)
            if monthly_means_liquid is None:
                monthly_means_liquid = m.get_monthly_climatology_of_3d_field(
                    var_name=var_name_liquid,
                    start_year=start_year,
                    end_year=end_year)
                _cache_monthly_mean_fields(monthly_means_liquid, label,
                                           start_year, end_year,
                                           var_name_liquid)

            monthly_means_solid = _get_cached_monthly_mean_fields(
                label, start_year, end_year, var_name_solid)
            if monthly_means_solid is None:
                monthly_means_solid = m.get_monthly_climatology_of_3d_field(
                    var_name=var_name_solid,
                    start_year=start_year,
                    end_year=end_year)
                _cache_monthly_mean_fields(monthly_means_solid, label,
                                           start_year, end_year,
                                           var_name_solid)

            profiles = [
                monthly_means_liquid[i][mask, :].mean(axis=0) +
                monthly_means_solid[i][mask, :].mean(axis=0) for i in range(12)
            ]

            sim_label_to_profiles[label] = np.array(profiles)

        x = [date2num(datetime(2001, month, 1)) for month in range(1, 13)]
        y = layer_depths

        y2d, x2d = np.meshgrid(y, x)
        delta = (sim_label_to_profiles[sim_labels[1]] -
                 sim_label_to_profiles[sim_labels[0]]
                 ) / sim_label_to_profiles[sim_labels[0]] * 100

        #delta = np.ma.masked_where(delta < 0.1, delta)

        cmap = my_colormaps.get_cmap_from_ncl_spec_file(
            path="colormap_files/BlueRed.rgb", ncolors=10)
        the_min = -6.0
        the_max = 6.0
        step = (the_max - the_min) / float(cmap.N)

        plt.pcolormesh(x2d[:, :8],
                       y2d[:, :8],
                       delta[:, :8],
                       cmap=cmap,
                       vmin=the_min,
                       vmax=the_max)  #, levels = np.arange(-6,7,1))
        plt.gca().invert_yaxis()
        plt.colorbar(ticks=np.arange(the_min, the_max + step, step))
        plt.gca().set_ylabel("Depth (m)")

        plt.gca().xaxis.set_major_formatter(DateFormatter("%b"))

        #fig.tight_layout()
        fig.savefig("soil_profile_upstream_of_{0}.pdf".format(s.id))

    pass
Esempio n. 19
0
def main():
    # data_path = "/home/huziy/skynet3_exec1/from_guillimin/quebec_highres_spinup_12_month_with_lakes"

    data_path = "/home/huziy/skynet3_exec1/from_guillimin/quebec_test_198501_198612_0.1deg"
    coord_file = os.path.join(data_path, "pm1985010100_00000000p")

    manager = Crcm5ModelDataManager(
        samples_folder_path=data_path, file_name_prefix="pm", all_files_in_samples_folder=True
    )

    assert isinstance(manager, Crcm5ModelDataManager)
    selected_ids = ["104001", "103715", "093806", "093801", "092715", "081006", "061502", "040830", "080718"]

    start_date = datetime(1985, 1, 1)
    end_date = datetime(1985, 12, 31)

    stations = cehq_station.read_station_data(selected_ids=selected_ids, start_date=start_date, end_date=end_date)

    plot_utils.apply_plot_params(width_pt=None, height_cm=30.0, width_cm=16, font_size=10)
    fig = plt.figure()
    # two columns
    gs = GridSpec(len(stations) // 2 + len(stations) % 2, 2, hspace=0.4, wspace=0.4)
    line_model, line_obs = None, None
    stations.sort(key=lambda x: x.latitude, reverse=True)

    for i, s in enumerate(stations):

        model_ts = manager.get_streamflow_timeseries_for_station(
            s, start_date=start_date, end_date=end_date, var_name="SWSR"
        )
        ax = fig.add_subplot(gs[i // 2, i % 2])

        assert isinstance(model_ts, TimeSeries)

        # [t, m_data] = model_ts.get_daily_normals()
        # [t, s_data] = s.get_daily_normals()

        assert isinstance(s, Station)

        # climatologies
        # line_model = ax.plot(t, m_data, label = "Model (CRCM5)", lw = 3, color = "b")
        # line_obs = ax.plot(t, s_data, label = "Observation", lw = 3, color = "r")

        model_ts = model_ts.get_ts_of_daily_means()
        print(model_ts.time[0], model_ts.time[-1])
        print(model_ts.data[0:10])

        mod_vals = model_ts.get_data_for_dates(s.dates)
        print(mod_vals[:20])
        print("+" * 20)
        assert len(mod_vals) == len(s.dates)

        line_model = ax.plot(s.dates, mod_vals, label="Model (CRCM5)", lw=1, color="b")
        # line_obs = ax.plot(s.dates, s.values, label = "Observation", lw = 3, color = "r", alpha = 0.5)

        bf_store = model_ts.metadata["bankfull_store_m3"]
        ax.plot([s.dates[0], s.dates[-1]], [bf_store, bf_store], color="k")

        ax.set_title(
            "%s: da_diff=%.2f %%, dist = %.1f"
            % (
                s.id,
                (-s.drainage_km2 + model_ts.metadata["acc_area_km2"]) / s.drainage_km2 * 100.0,
                model_ts.metadata["distance_to_obs"],
            )
        )

        ax.xaxis.set_major_formatter(DateFormatter("%Y"))
        ax.xaxis.set_major_locator(YearLocator())

    fig.savefig("storage.png")

    pass
def main():
    start_year = 1980
    end_year = 2010

    start_date = datetime(start_year, 1, 1)
    end_date = datetime(end_year, 12, 31)

    ids_with_lakes_upstream = [
        "104001", "093806", "093801", "081002", "081007", "080718"
    ]

    selected_station_ids = [
        "092715", "074903", "080104", "081007", "061905", "093806", "090613",
        "081002", "093801", "080718", "104001"
    ]

    selected_station_ids = ids_with_lakes_upstream

    # Get the list of stations to do the comparison with
    stations = cehq_station.read_station_data(
        start_date=start_date,
        end_date=end_date,
        selected_ids=selected_station_ids)

    # add hydat stations
    # province = "QC"
    # min_drainage_area_km2 = 10000.0
    # stations_hd = cehq_station.load_from_hydat_db(start_date=start_date, end_date=end_date,
    # province=province, min_drainage_area_km2=min_drainage_area_km2)
    # if not len(stations_hd):
    #     print "No hydat stations satisying the conditions: period {0}-{1}, province {2}".format(
    #         str(start_date), str(end_date), province
    #     )
    # stations.extend(stations_hd)

    # brewer2mpl.get_map args: set name  set type  number of colors
    bmap = brewer2mpl.get_map("Set1", "qualitative", 9)

    path1 = "/skynet3_rech1/huziy/hdf_store/quebec_0.1_crcm5-hcd-r.hdf5"
    label1 = "CRCM5-L1"

    path2 = "/skynet3_rech1/huziy/hdf_store/quebec_0.1_crcm5-hcd-rl.hdf5"
    label2 = "CRCM5-L2"

    color2, color1 = bmap.mpl_colors[:2]

    fldirs = analysis.get_array_from_file(
        path=path1, var_name=infovar.HDF_FLOW_DIRECTIONS_NAME)
    lons2d, lats2d, basemap = analysis.get_basemap_from_hdf(path1)

    lake_fractions = analysis.get_array_from_file(
        path=path1, var_name=infovar.HDF_LAKE_FRACTION_NAME)
    # cell_areas = analysis.get_array_from_file(path=path1, var_name=infovar.HDF_CELL_AREA_NAME)
    acc_area = analysis.get_array_from_file(
        path=path1, var_name=infovar.HDF_ACCUMULATION_AREA_NAME)

    cell_manager = CellManager(fldirs,
                               lons2d=lons2d,
                               lats2d=lats2d,
                               accumulation_area_km2=acc_area)

    station_to_mp = cell_manager.get_model_points_for_stations(
        station_list=stations,
        lake_fraction=lake_fractions,
        drainaige_area_reldiff_limit=0.3)

    fig, axes = plt.subplots(1, 2, gridspec_kw=dict(top=0.80, wspace=0.4))

    q90_obs_list = []
    q90_mod1_list = []
    q90_mod2_list = []

    q10_obs_list = []
    q10_mod1_list = []
    q10_mod2_list = []

    for the_station, the_mp in station_to_mp.items():
        assert isinstance(the_station, Station)
        compl_years = the_station.get_list_of_complete_years()
        if len(compl_years) < 3:
            continue

        t, stfl1 = analysis.get_daily_climatology_for_a_point(
            path=path1,
            years_of_interest=compl_years,
            i_index=the_mp.ix,
            j_index=the_mp.jy,
            var_name="STFA")

        _, stfl2 = analysis.get_daily_climatology_for_a_point(
            path=path2,
            years_of_interest=compl_years,
            i_index=the_mp.ix,
            j_index=the_mp.jy,
            var_name="STFA")

        _, stfl_obs = the_station.get_daily_climatology_for_complete_years(
            stamp_dates=t, years=compl_years)

        # Q90
        q90_obs = np.percentile(stfl_obs, 90)
        q90_mod1 = np.percentile(stfl1, 90)
        q90_mod2 = np.percentile(stfl2, 90)

        # Q10
        q10_obs = np.percentile(stfl_obs, 10)
        q10_mod1 = np.percentile(stfl1, 10)
        q10_mod2 = np.percentile(stfl2, 10)

        # save quantiles to lists for correlation calculation
        q90_obs_list.append(q90_obs)
        q90_mod1_list.append(q90_mod1)
        q90_mod2_list.append(q90_mod2)

        q10_mod1_list.append(q10_mod1)
        q10_mod2_list.append(q10_mod2)
        q10_obs_list.append(q10_obs)

        # axes[0].annotate(the_station.id, (q90_obs, np.percentile(stfl1, 90)))
        # axes[1].annotate(the_station.id, (q10_obs, np.percentile(stfl1, 10)))

    # Plot scatter plot of Q90
    the_ax = axes[0]

    # the_ax.annotate(the_station.id, (q90_obs, np.percentile(stfl1, 90)))
    the_ax.scatter(q90_obs_list, q90_mod1_list, label=label1, c=color1)
    the_ax.scatter(q90_obs_list, q90_mod2_list, label=label2, c=color2)

    # plot scatter plot of Q10
    the_ax = axes[1]
    # the_ax.annotate(the_station.id, (q10_obs, np.percentile(stfl1, 10)))
    h1 = the_ax.scatter(q10_obs_list, q10_mod1_list, label=label1, c=color1)
    h2 = the_ax.scatter(q10_obs_list, q10_mod2_list, label=label2, c=color2)

    # Add correlation coefficients to the axes
    fp = FontProperties(size=14, weight="bold")
    axes[0].annotate(r"$R^2 = {0:.2f}$".format(
        np.corrcoef(q90_mod1_list, q90_obs_list)[0, 1]**2), (0.1, 0.85),
                     color=color1,
                     xycoords="axes fraction",
                     font_properties=fp)
    axes[0].annotate(r"$R^2 = {0:.2f}$".format(
        np.corrcoef(q90_mod2_list, q90_obs_list)[0, 1]**2), (0.1, 0.70),
                     color=color2,
                     xycoords="axes fraction",
                     font_properties=fp)

    axes[1].annotate(r"$R^2 = {0:.2f}$".format(
        np.corrcoef(q10_mod1_list, q10_obs_list)[0, 1]**2), (0.1, 0.85),
                     color=color1,
                     xycoords="axes fraction",
                     font_properties=fp)
    axes[1].annotate(r"$R^2 = {0:.2f}$".format(
        np.corrcoef(q10_mod2_list, q10_obs_list)[0, 1]**2), (0.1, 0.70),
                     color=color2,
                     xycoords="axes fraction",
                     font_properties=fp)

    sf = ScalarFormatter(useMathText=True)
    sf.set_powerlimits((-2, 3))
    for ind, the_ax in enumerate(axes):
        plot_one_to_one_line(the_ax)
        if ind == 0:
            the_ax.set_xlabel(r"Observed $\left({\rm m^3/s} \right)$")
            the_ax.set_ylabel(r"Modelled $\left({\rm m^3/s} \right)$")

        the_ax.annotate(r"$Q_{90}$" if ind == 0 else r"$Q_{10}$", (0.95, 0.95),
                        xycoords="axes fraction",
                        bbox=dict(facecolor="white"),
                        va="top",
                        ha="right")

        the_ax.xaxis.set_major_formatter(sf)
        the_ax.yaxis.set_major_formatter(sf)

        locator = MaxNLocator(nbins=5)
        the_ax.xaxis.set_major_locator(locator)
        the_ax.yaxis.set_major_locator(locator)
        x1, x2 = the_ax.get_xlim()
        # Since streamflow percentiles can only be positive
        the_ax.set_xlim(0, x2)
        the_ax.set_ylim(0, x2)

    fig.legend([h1, h2], [label1, label2], loc="upper center", ncol=2)
    figpath = os.path.join(images_folder, "percentiles_comparison.png")
    # plt.tight_layout()
    fig.savefig(figpath, dpi=cpp.FIG_SAVE_DPI, bbox_inches="tight")
Esempio n. 21
0
def main(start_year=1980, end_year=1989):

    soil_layer_widths = infovar.soil_layer_widths_26_to_60
    soil_tops = np.cumsum(soil_layer_widths).tolist()[:-1]
    soil_tops = [
        0,
    ] + soil_tops

    selected_station_ids = [
        "061905", "074903", "090613", "092715", "093801", "093806"
    ]

    #    path1 = "/skynet3_rech1/huziy/hdf_store/quebec_0.1_crcm5-hcd-rl_spinup.hdf"
    #    label1 = "CRCM5-HCD-RL"

    path1 = "/skynet3_rech1/huziy/hdf_store/quebec_0.1_crcm5-hcd-rl-intfl_spinup_ITFS.hdf5"
    label1 = "CRCM5-HCD-RL-INTFL"

    path2 = "/skynet3_rech1/huziy/hdf_store/quebec_0.1_crcm5-hcd-rl-intfl_ITFS_avoid_truncation1979-1989.hdf5"
    label2 = "CRCM5-HCD-RL-INTFL-improved"

    ############
    images_folder = "images_for_lake-river_paper/comp_soil_profiles"
    if not os.path.isdir(images_folder):
        os.mkdir(images_folder)

    fldirs = analysis.get_array_from_file(
        path=path1, var_name=infovar.HDF_FLOW_DIRECTIONS_NAME)
    lons2d, lats2d, basemap = analysis.get_basemap_from_hdf(path1)

    lake_fractions = analysis.get_array_from_file(
        path=path1, var_name=infovar.HDF_LAKE_FRACTION_NAME)
    cell_areas = analysis.get_array_from_file(
        path=path1, var_name=infovar.HDF_CELL_AREA_NAME_M2)
    acc_areakm2 = analysis.get_array_from_file(
        path=path1, var_name=infovar.HDF_ACCUMULATION_AREA_NAME)
    depth_to_bedrock = analysis.get_array_from_file(
        path=path1, var_name=infovar.HDF_DEPTH_TO_BEDROCK_NAME)

    cell_manager = CellManager(fldirs,
                               lons2d=lons2d,
                               lats2d=lats2d,
                               accumulation_area_km2=acc_areakm2)

    #get climatologic liquid soil moisture and convert fractions to mm
    t0 = time.clock()
    daily_dates, levels, i1_nointfl = analysis.get_daily_climatology_of_3d_field(
        path_to_hdf_file=path1,
        var_name="I1",
        start_year=start_year,
        end_year=end_year)
    print("read I1 - 1")
    print("Spent {0} seconds ".format(time.clock() - t0))

    _, _, i1_intfl = analysis.get_daily_climatology_of_3d_field(
        path_to_hdf_file=path2,
        var_name="I1",
        start_year=start_year,
        end_year=end_year)
    print("read I1 - 2")

    #get climatologic frozen soil moisture and convert fractions to mm
    _, _, i2_nointfl = analysis.get_daily_climatology_of_3d_field(
        path_to_hdf_file=path1,
        var_name="I2",
        start_year=start_year,
        end_year=end_year)
    print("read I2 - 1")

    _, _, i2_intfl = analysis.get_daily_climatology_of_3d_field(
        path_to_hdf_file=path2,
        var_name="I2",
        start_year=start_year,
        end_year=end_year)
    print("read I2 - 2")
    #
    sm_intfl = i1_intfl + i2_intfl
    sm_nointfl = i1_nointfl + i2_nointfl

    #Get the list of stations to do the comparison with
    stations = cehq_station.read_station_data(
        start_date=datetime(start_year, 1, 1),
        end_date=datetime(end_year, 12, 31),
        selected_ids=selected_station_ids)

    print("sm_noinfl, min, max = {0}, {1}".format(sm_nointfl.min(),
                                                  sm_nointfl.max()))
    print("sm_infl, min, max = {0}, {1}".format(sm_intfl.min(),
                                                sm_intfl.max()))
    diff = (sm_intfl - sm_nointfl)
    #diff *= soil_layer_widths[np.newaxis, :, np.newaxis, np.newaxis] * 1000  # to convert in mm

    #print "number of nans", np.isnan(diff).astype(int).sum()

    print("cell area min,max = {0}, {1}".format(cell_areas.min(),
                                                cell_areas.max()))
    print("acc area min,max = {0}, {1}".format(acc_areakm2.min(),
                                               acc_areakm2.max()))

    assert np.all(lake_fractions >= 0)
    print("lake fractions (min, max): ", lake_fractions.min(),
          lake_fractions.max())

    #Non need to go very deep
    nlayers = 3
    z, t = np.meshgrid(soil_tops[:nlayers], date2num(daily_dates))
    station_to_mp = cell_manager.get_model_points_for_stations(stations)

    plotted_global = False

    for the_station, mp in station_to_mp.items():
        assert isinstance(mp, ModelPoint)
        assert isinstance(the_station, Station)
        fig = plt.figure()
        umask = cell_manager.get_mask_of_upstream_cells_connected_with_by_indices(
            mp.ix, mp.jy)

        #exclude lake cells from the profiles
        sel = (umask == 1) & (depth_to_bedrock > 3) & (acc_areakm2 >= 0)

        umaskf = umask.astype(float)
        umaskf *= (1.0 - lake_fractions) * cell_areas
        umaskf[~sel] = 0.0

        profiles = np.tensordot(diff, umaskf) / umaskf.sum()
        print(profiles.shape, profiles.min(), profiles.max(), umaskf.sum(),
              umaskf.min(), umaskf.max())

        d = np.abs(profiles).max()
        print("d = {0}".format(d))
        clevs = np.round(np.linspace(-d, d, 12), decimals=5)

        diff_cmap = cm.get_cmap("RdBu_r", lut=len(clevs) - 1)
        bn = BoundaryNorm(clevs, len(clevs) - 1)

        plt.title("({})-({})".format(label2, label2))
        img = plt.contourf(t,
                           z,
                           profiles[:, :nlayers],
                           cmap=diff_cmap,
                           levels=clevs,
                           norm=bn)
        plt.colorbar(img, ticks=clevs)
        ax = plt.gca()
        assert isinstance(ax, Axes)

        ax.invert_yaxis()
        ax.xaxis.set_major_formatter(DateFormatter("%b"))
        ax.xaxis.set_major_locator(MonthLocator())

        fig.savefig(os.path.join(
            images_folder, "{0}_{1}_{2}.jpeg".format(the_station.id, label1,
                                                     label2)),
                    dpi=cpp.FIG_SAVE_DPI,
                    bbox_inches="tight")

        print("processed: {0}".format(the_station))
        if not plotted_global:
            plotted_global = True
            fig = plt.figure()
            sel = (depth_to_bedrock >= 0.1) & (acc_areakm2 >= 0)

            umaskf = (1.0 - lake_fractions) * cell_areas
            umaskf[~sel] = 0.0

            profiles = np.tensordot(diff, umaskf) / umaskf.sum()
            print(profiles.shape, profiles.min(), profiles.max(), umaskf.sum(),
                  umaskf.min(), umaskf.max())

            d = np.abs(profiles).max()
            print("d = {0}".format(d))
            clevs = np.round(np.linspace(-d, d, 12), decimals=5)

            diff_cmap = cm.get_cmap("RdBu_r", lut=len(clevs) - 1)
            bn = BoundaryNorm(clevs, len(clevs) - 1)

            img = plt.contourf(t,
                               z,
                               profiles[:, :nlayers],
                               cmap=diff_cmap,
                               levels=clevs,
                               norm=bn)
            plt.colorbar(img, ticks=clevs)
            ax = plt.gca()
            assert isinstance(ax, Axes)

            ax.invert_yaxis()
            ax.xaxis.set_major_formatter(DateFormatter("%b"))
            ax.xaxis.set_major_locator(MonthLocator())

            fig.savefig(os.path.join(images_folder, "global_mean.jpeg"),
                        dpi=cpp.FIG_SAVE_DPI,
                        bbox_inches="tight")

    pass
Esempio n. 22
0
def main():
    #data_path = "/home/huziy/skynet3_exec1/from_guillimin/quebec_highres_spinup_12_month_with_lakes"

    data_path = "/home/huziy/skynet3_exec1/from_guillimin/quebec_test_198501_198612_0.1deg"
    coord_file = os.path.join(data_path, "pm1985010100_00000000p")

    manager = Crcm5ModelDataManager(samples_folder_path=data_path,
                                    file_name_prefix="pm",
                                    all_files_in_samples_folder=True)

    assert isinstance(manager, Crcm5ModelDataManager)
    selected_ids = [
        "104001", "103715", "093806", "093801", "092715", "081006", "061502",
        "040830", "080718"
    ]

    start_date = datetime(1985, 1, 1)
    end_date = datetime(1985, 12, 31)

    stations = cehq_station.read_station_data(selected_ids=selected_ids,
                                              start_date=start_date,
                                              end_date=end_date)

    plot_utils.apply_plot_params(width_pt=None,
                                 height_cm=30.0,
                                 width_cm=16,
                                 font_size=10)
    fig = plt.figure()
    #two columns
    gs = GridSpec(len(stations) // 2 + len(stations) % 2,
                  2,
                  hspace=0.4,
                  wspace=0.4)
    line_model, line_obs = None, None
    stations.sort(key=lambda x: x.latitude, reverse=True)

    for i, s in enumerate(stations):

        model_ts = manager.get_streamflow_timeseries_for_station(
            s, start_date=start_date, end_date=end_date, var_name="SWSR")
        ax = fig.add_subplot(gs[i // 2, i % 2])

        assert isinstance(model_ts, TimeSeries)

        #[t, m_data] = model_ts.get_daily_normals()
        #[t, s_data] = s.get_daily_normals()

        assert isinstance(s, Station)

        #climatologies
        #line_model = ax.plot(t, m_data, label = "Model (CRCM5)", lw = 3, color = "b")
        #line_obs = ax.plot(t, s_data, label = "Observation", lw = 3, color = "r")

        model_ts = model_ts.get_ts_of_daily_means()
        print(model_ts.time[0], model_ts.time[-1])
        print(model_ts.data[0:10])

        mod_vals = model_ts.get_data_for_dates(s.dates)
        print(mod_vals[:20])
        print("+" * 20)
        assert len(mod_vals) == len(s.dates)

        line_model = ax.plot(s.dates,
                             mod_vals,
                             label="Model (CRCM5)",
                             lw=1,
                             color="b")
        #line_obs = ax.plot(s.dates, s.values, label = "Observation", lw = 3, color = "r", alpha = 0.5)

        bf_store = model_ts.metadata["bankfull_store_m3"]
        ax.plot([s.dates[0], s.dates[-1]], [bf_store, bf_store], color="k")

        ax.set_title(
            "%s: da_diff=%.2f %%, dist = %.1f" %
            (s.id, (-s.drainage_km2 + model_ts.metadata["acc_area_km2"]) /
             s.drainage_km2 * 100.0, model_ts.metadata["distance_to_obs"]))

        ax.xaxis.set_major_formatter(DateFormatter("%Y"))
        ax.xaxis.set_major_locator(YearLocator())

    fig.savefig("storage.png")

    pass
Esempio n. 23
0
def validate_swe(model_file,
                 obs_manager,
                 season_to_months,
                 simlabel,
                 season_to_plot_indices,
                 start_year,
                 end_year,
                 lake_fraction=None,
                 station_ids_list=None):
    model_var_name = "I5"
    model_level = None
    reasonable_error_mm = 100.0
    assert isinstance(obs_manager, SweDataManager)

    if station_ids_list is not None:
        # Get the list of stations to indicate on the bias map
        stations = cehq_station.read_station_data(
            start_date=None, end_date=None, selected_ids=station_ids_list)
    else:
        stations = []

    if lake_fraction is not None:
        print("lake fraction ranges: {0}, {1}".format(lake_fraction.min(),
                                                      lake_fraction.max()))

    fig = plt.figure()
    obs_manager.name = "Obs."
    fig.suptitle("({0}) - ({1})".format(simlabel, obs_manager.name))

    # 1. read model results
    # 2. plot the differences (model - obs)

    lon, lat, basemap = analysis.get_basemap_from_hdf(file_path=model_file)

    # do calculations and only after that do the plotting
    season_to_field = {}

    # calculate global min and max for plotting
    vmin = None
    vmax = None

    season_to_obs_field = {}
    for season, months in season_to_months.items():
        model_field = analysis.get_seasonal_climatology(
            start_year=start_year,
            end_year=end_year,
            months=months,
            level=model_level,
            var_name=model_var_name,
            hdf_path=model_file)

        obs_field = obs_manager.getMeanFieldForMonthsInterpolatedTo(
            months=months,
            lons_target=lon,
            lats_target=lat,
            start_year=start_year,
            end_year=end_year)

        season_to_obs_field[season] = obs_field
        # calculate the difference between the modelled and observed fields
        the_diff = model_field - obs_field
        current_min = np.min(the_diff)
        current_max = np.max(the_diff)

        if vmin is not None:
            vmin = current_min if current_min < vmin else vmin
            vmax = current_max if current_max > vmax else vmax
        else:
            vmin = current_min
            vmax = current_max

        season_to_field[season] = the_diff

    ncolors = 11
    gs = gridspec.GridSpec(2, 3, width_ratios=[1, 1, 0.05])

    x, y = basemap(lon, lat)
    im = None

    d = min(abs(vmin), abs(vmax))

    d = 100  # limit module of the difference to 200 mm

    vmin = -d
    vmax = d
    # bn, bounds, _, _ = infovar.get_boundary_norm(vmin, vmax, ncolors, exclude_zero=True)

    bounds = [-100, -80, -50, -20, -10, -5]
    bounds += [
        0,
    ] + [-b for b in reversed(bounds)]
    bn = BoundaryNorm(bounds, ncolors=len(bounds) - 1)
    cmap = cm.get_cmap("RdBu_r", len(bounds) - 1)

    print("bounds: ", bounds)

    cs = None
    for season, field in season_to_field.items():

        if season.lower() == "summer":
            print("Warning: skipping summer season for SWE")
            continue

        row, col = season_to_plot_indices[season]
        ax = fig.add_subplot(gs[row, col])
        ax.set_title(season)

        basemap.drawmapboundary(fill_color="gray")
        if lake_fraction is not None:
            to_plot = np.ma.masked_where((lake_fraction > 0.9),
                                         season_to_field[season])
        else:
            to_plot = season_to_field[season]

        to_plot = maskoceans(lon, lat, to_plot)
        im = basemap.pcolormesh(x,
                                y,
                                to_plot,
                                vmin=vmin,
                                vmax=vmax,
                                cmap=cmap,
                                norm=bn)
        basemap.drawcoastlines(ax=ax, linewidth=cpp.COASTLINE_WIDTH)

        # small_error = ((np.abs(season_to_field[season]) < reasonable_error_mm) | to_plot.mask).astype(int)
        # nlevs = 1
        # ax.contour(x, y, small_error, nlevs, colors = "black", linestyle = "-")
        # cs = ax.contourf(x, y, small_error, nlevs, colors="none", hatches=["/", None], extend="lower", linewidth=2)

        for the_station in stations:
            assert isinstance(the_station, Station)
            xst, yst = basemap(the_station.longitude, the_station.latitude)
            # ax.annotate(the_station.id, (xst, yst), font_properties=FontProperties(size=6),
            #             bbox=dict(facecolor="w"), va="top", ha="right")
            basemap.scatter(xst, yst, c="g")

    # artists, labels = cs.legend_elements()
    # plt.legend(artists, labels, handleheight=2)

    cax = fig.add_subplot(gs[:, 2])

    units_str = r"${\rm mm}$"
    var_str = r"SWE"
    cax.set_title("{0}\n".format(units_str))
    plt.colorbar(im, cax=cax, ticks=bounds, extend="both")

    seasons_str = "_".join(
        sorted([str(s) for s in list(season_to_months.keys())]))
    atm_val_folder = os.path.join(images_folder, "validate_atm")
    if not os.path.isdir(atm_val_folder):
        os.mkdir(atm_val_folder)

    out_filename = "{3}/validate_2d_{0}_{1}_{2}.png".format(
        model_var_name, simlabel, seasons_str, atm_val_folder)
    fig.savefig(os.path.join(images_folder, out_filename), bbox_inches="tight")
Esempio n. 24
0
def validate_as_is():
    #years are inclusive
    start_year = 1979
    end_year = 1988

    sim_name_list = ["crcm5-r", "crcm5-hcd-r", "crcm5-hcd-rl"]
    rpn_folder_path_form = "/home/huziy/skynet3_rech1/from_guillimin/new_outputs/quebec_0.1_{0}_spinup"
    nc_db_folder = "/home/huziy/skynet3_rech1/crcm_data_ncdb"

    #select stations
    selected_ids = None
    start_date = datetime(start_year, 1, 1)
    end_date = datetime(end_year, 12, 31)

    stations = cehq_station.read_station_data(selected_ids=selected_ids,
                                              start_date=start_date,
                                              end_date=end_date)

    varname = "STFL"
    sim_name_to_manager = {}
    sim_name_to_station_to_model_point = {}
    dmManager = None
    for sim_name in sim_name_list:
        rpn_folder = rpn_folder_path_form.format(sim_name)

        dmManager = Crcm5ModelDataManager(samples_folder_path=rpn_folder,
                                          file_name_prefix="dm",
                                          all_files_in_samples_folder=True,
                                          need_cell_manager=True)

        sim_name_to_manager[sim_name] = dmManager

        nc_path = os.path.join(nc_db_folder, sim_name)
        nc_path = os.path.join(nc_path, "{0}_all.nc".format(varname))
        st_to_mp = dmManager.get_model_points_for_stations(stations,
                                                           nc_path=nc_path,
                                                           varname=varname)

        sim_name_to_station_to_model_point[sim_name] = st_to_mp

    common_lake_fractions = dmManager.lake_fraction

    from matplotlib.backends.backend_pdf import PdfPages
    pp = PdfPages('comp_with_obs_as_is.pdf')
    for s in stations:

        #check the availability of the data
        assert isinstance(s, Station)
        #s.get_longest_continuous_series()

        plt.figure()
        obs_data = [s.date_to_value[d] for d in s.dates]
        obs_ann_mean = np.mean(obs_data)
        plt.plot(s.dates, [s.date_to_value[d] for d in s.dates],
                 label="Obs \n ann.mean = {0:.1f}".format(obs_ann_mean))

        mp = None
        for sim_name in sim_name_list:
            manager = sim_name_to_manager[sim_name]
            if s not in sim_name_to_station_to_model_point[sim_name]:
                continue

            mp = sim_name_to_station_to_model_point[sim_name][s]
            plt.plot(mp.time,
                     mp.data[:, 0],
                     label="{0}: {1:.2f} \n ann.mean = {2:.1f}".format(
                         sim_name,
                         manager.lake_fraction[mp.flow_in_mask == 1].mean(),
                         mp.data[:, 0].mean()))
            plt.legend()

        if mp is None: continue
        plt.title("{0}: point lake fraction={1:.4f}".format(
            s.id, common_lake_fractions[mp.ix, mp.jy]))

        pp.savefig()

    pp.close()
def main(start_year = 1980, end_year = 1989):


    soil_layer_widths = infovar.soil_layer_widths_26_to_60
    soil_tops = np.cumsum(soil_layer_widths).tolist()[:-1]
    soil_tops = [0, ] + soil_tops



    selected_station_ids = [
        "061905", "074903", "090613", "092715", "093801", "093806"
    ]

#    path1 = "/skynet3_rech1/huziy/hdf_store/quebec_0.1_crcm5-hcd-rl_spinup.hdf"
#    label1 = "CRCM5-HCD-RL"

    path1 = "/skynet3_rech1/huziy/hdf_store/quebec_0.1_crcm5-hcd-rl-intfl_spinup_ITFS.hdf5"
    label1 = "CRCM5-HCD-RL-INTFL"

    path2 = "/skynet3_rech1/huziy/hdf_store/quebec_0.1_crcm5-hcd-rl-intfl_ITFS_avoid_truncation1979-1989.hdf5"
    label2 = "CRCM5-HCD-RL-INTFL-improved"

    ############
    images_folder = "images_for_lake-river_paper/comp_soil_profiles"
    if not os.path.isdir(images_folder):
        os.mkdir(images_folder)

    fldirs = analysis.get_array_from_file(path=path1, var_name=infovar.HDF_FLOW_DIRECTIONS_NAME)
    lons2d, lats2d, basemap = analysis.get_basemap_from_hdf(path1)

    lake_fractions = analysis.get_array_from_file(path=path1, var_name=infovar.HDF_LAKE_FRACTION_NAME)
    cell_areas = analysis.get_array_from_file(path=path1, var_name=infovar.HDF_CELL_AREA_NAME_M2)
    acc_areakm2 = analysis.get_array_from_file(path=path1, var_name=infovar.HDF_ACCUMULATION_AREA_NAME)
    depth_to_bedrock = analysis.get_array_from_file(path=path1, var_name=infovar.HDF_DEPTH_TO_BEDROCK_NAME)


    cell_manager = CellManager(fldirs, lons2d=lons2d, lats2d=lats2d, accumulation_area_km2=acc_areakm2)

    #get climatologic liquid soil moisture and convert fractions to mm
    t0 = time.clock()
    daily_dates, levels, i1_nointfl = analysis.get_daily_climatology_of_3d_field(
        path_to_hdf_file=path1,
        var_name="I1",
        start_year=start_year,
        end_year=end_year
    )
    print("read I1 - 1")
    print("Spent {0} seconds ".format(time.clock() - t0))

    _, _, i1_intfl = analysis.get_daily_climatology_of_3d_field(
        path_to_hdf_file=path2,
        var_name="I1",
        start_year=start_year,
        end_year=end_year
    )
    print("read I1 - 2")

    #get climatologic frozen soil moisture and convert fractions to mm
    _, _, i2_nointfl = analysis.get_daily_climatology_of_3d_field(
        path_to_hdf_file=path1,
        var_name="I2",
        start_year=start_year,
        end_year=end_year
    )
    print("read I2 - 1")

    _, _, i2_intfl = analysis.get_daily_climatology_of_3d_field(
        path_to_hdf_file=path2,
        var_name="I2",
        start_year=start_year,
        end_year=end_year
    )
    print("read I2 - 2")
    #
    sm_intfl = i1_intfl + i2_intfl
    sm_nointfl = i1_nointfl + i2_nointfl


    #Get the list of stations to do the comparison with
    stations = cehq_station.read_station_data(
        start_date=datetime(start_year, 1, 1),
        end_date=datetime(end_year, 12, 31),
        selected_ids=selected_station_ids
    )


    print("sm_noinfl, min, max = {0}, {1}".format(sm_nointfl.min(), sm_nointfl.max()))
    print("sm_infl, min, max = {0}, {1}".format(sm_intfl.min(), sm_intfl.max()))
    diff = (sm_intfl - sm_nointfl)
    #diff *= soil_layer_widths[np.newaxis, :, np.newaxis, np.newaxis] * 1000  # to convert in mm

    #print "number of nans", np.isnan(diff).astype(int).sum()

    print("cell area min,max = {0}, {1}".format(cell_areas.min(), cell_areas.max()))
    print("acc area min,max = {0}, {1}".format(acc_areakm2.min(), acc_areakm2.max()))

    assert np.all(lake_fractions >= 0)
    print("lake fractions (min, max): ", lake_fractions.min(), lake_fractions.max())

    #Non need to go very deep
    nlayers = 3
    z, t = np.meshgrid(soil_tops[:nlayers], date2num(daily_dates))
    station_to_mp = cell_manager.get_model_points_for_stations(stations)


    plotted_global = False

    for the_station, mp in station_to_mp.items():
        assert isinstance(mp, ModelPoint)
        assert isinstance(the_station, Station)
        fig = plt.figure()
        umask = cell_manager.get_mask_of_upstream_cells_connected_with_by_indices(mp.ix, mp.jy)

        #exclude lake cells from the profiles
        sel = (umask == 1) & (depth_to_bedrock > 3) & (acc_areakm2 >= 0)

        umaskf = umask.astype(float)
        umaskf *= (1.0 - lake_fractions) * cell_areas
        umaskf[~sel] = 0.0


        profiles = np.tensordot(diff, umaskf) / umaskf.sum()
        print(profiles.shape, profiles.min(), profiles.max(), umaskf.sum(), umaskf.min(), umaskf.max())

        d = np.abs(profiles).max()
        print("d = {0}".format(d))
        clevs = np.round(np.linspace(-d, d, 12), decimals=5)

        diff_cmap = cm.get_cmap("RdBu_r", lut=len(clevs) - 1)
        bn = BoundaryNorm(clevs, len(clevs) - 1)


        plt.title("({})-({})".format(label2, label2))
        img = plt.contourf(t, z, profiles[:, :nlayers], cmap = diff_cmap, levels = clevs, norm = bn)
        plt.colorbar(img, ticks = clevs)
        ax = plt.gca()
        assert isinstance(ax, Axes)

        ax.invert_yaxis()
        ax.xaxis.set_major_formatter(DateFormatter("%b"))
        ax.xaxis.set_major_locator(MonthLocator())


        fig.savefig(os.path.join(images_folder, "{0}_{1}_{2}.jpeg".format(the_station.id, label1, label2)),
                    dpi = cpp.FIG_SAVE_DPI, bbox_inches = "tight")



        print("processed: {0}".format(the_station))
        if not plotted_global:
            plotted_global = True
            fig = plt.figure()
            sel = (depth_to_bedrock >= 0.1) & (acc_areakm2 >= 0)

            umaskf = (1.0 - lake_fractions) * cell_areas
            umaskf[~sel] = 0.0


            profiles = np.tensordot(diff, umaskf) / umaskf.sum()
            print(profiles.shape, profiles.min(), profiles.max(), umaskf.sum(), umaskf.min(), umaskf.max())

            d = np.abs(profiles).max()
            print("d = {0}".format(d))
            clevs = np.round(np.linspace(-d, d, 12), decimals=5)

            diff_cmap = cm.get_cmap("RdBu_r", lut=len(clevs) - 1)
            bn = BoundaryNorm(clevs, len(clevs) - 1)

            img = plt.contourf(t, z, profiles[:, :nlayers], cmap = diff_cmap, levels = clevs, norm = bn)
            plt.colorbar(img, ticks = clevs)
            ax = plt.gca()
            assert isinstance(ax, Axes)

            ax.invert_yaxis()
            ax.xaxis.set_major_formatter(DateFormatter("%b"))
            ax.xaxis.set_major_locator(MonthLocator())


            fig.savefig(os.path.join(images_folder, "global_mean.jpeg"),
                        dpi = cpp.FIG_SAVE_DPI, bbox_inches = "tight")


    pass
Esempio n. 26
0
def validate_daily_climatology():
    """

    """
    #years are inclusive
    start_year = 1979
    end_year = 1988

    #sim_name_list = ["crcm5-r",  "crcm5-hcd-r", "crcm5-hcd-rl"]
    sim_name_list = ["crcm5-hcd-rl", "crcm5-hcd-rl-intfl"]

    rpn_folder_paths = [
        "/home/huziy/skynet3_rech1/from_guillimin/new_outputs/quebec_0.1_{0}_spinup".format(sim_name_list[0]),
        "/home/huziy/skynet3_rech1/from_guillimin/new_outputs/quebec_0.1_{0}_spinup2/Samples_all_in_one_folder".format(
            sim_name_list[1])
    ]

    nc_db_folder = "/home/huziy/skynet3_rech1/crcm_data_ncdb"


    #select stations
    selected_ids = None
    selected_ids = ["092715", "080101", "074903", "050304", "080104", "081007", "061905",
                    "041903", "040830", "093806", "090613", "081002", "093801", "080718"]

    selected_ids = ["074903", ]

    start_date = datetime(start_year, 1, 1)
    end_date = datetime(end_year, 12, 31)

    selected_ids = None
    stations = cehq_station.read_station_data(selected_ids=selected_ids,
                                              start_date=start_date, end_date=end_date
    )

    stations_hydat = cehq_station.read_hydat_station_data(folder_path="/home/huziy/skynet3_rech1/HYDAT",
                                                          start_date=start_date, end_date=end_date)

    stations.extend(stations_hydat)

    varname = "STFL"
    sim_name_to_manager = {}
    sim_name_to_station_to_model_point = {}

    day_stamps = Station.get_stamp_days(2001)
    sweManager = SweDataManager(var_name="SWE")
    cruTempManager = CRUDataManager(lazy=True)
    cruPreManager = CRUDataManager(var_name="pre", lazy=True,
                                   path="data/cru_data/CRUTS3.1/cru_ts_3_10.1901.2009.pre.dat.nc")

    #common lake fractions when comparing simulations on the same grid
    all_model_points = []

    cell_manager = None

    for sim_name, rpn_folder in zip(sim_name_list, rpn_folder_paths):

        dmManager = Crcm5ModelDataManager(samples_folder_path=rpn_folder, file_name_prefix="dm",
                                          all_files_in_samples_folder=True, need_cell_manager=cell_manager is None)


        #here using the fact that all the simulations are on the same grid
        if cell_manager is None:
            cell_manager = dmManager.cell_manager
        else:
            dmManager.cell_manager = cell_manager



        #determine comon lake fractions, so it is not taken from the trivial case lf = 0, but note
        #this has only sense when all the simulations were performed on the same grid
        sim_name_to_manager[sim_name] = dmManager

        nc_sim_folder = os.path.join(nc_db_folder, sim_name)
        nc_path = os.path.join(nc_sim_folder, "{0}_all.nc4".format(varname))


        #In general there are several model points corresponding to a given station
        st_to_mp = dmManager.get_model_points_for_stations(stations, sim_name=sim_name,
                                                           nc_path=nc_path,
                                                           nc_sim_folder=nc_sim_folder,
                                                           set_data_to_model_points=True)

        print("got model points for stations")

        sim_name_to_station_to_model_point[sim_name] = st_to_mp

        #save model points to a list of all points
        for s, mps in st_to_mp.items():
            assert isinstance(s, Station)
            for mp in mps:
                assert isinstance(mp, ModelPoint)
                #calculate upstream swe if needed
                if s.mean_swe_upstream_daily_clim is None:
                    s.mean_swe_upstream_daily_clim = sweManager.get_mean_upstream_timeseries_daily(mp, dmManager,
                                                                                                   stamp_dates=day_stamps)
                    #These are taken from CRU dataset, only monthly data are available
                    s.mean_temp_upstream_monthly_clim = cruTempManager.get_mean_upstream_timeseries_monthly(mp,
                                                                                                            dmManager)
                    s.mean_prec_upstream_monthly_clim = cruPreManager.get_mean_upstream_timeseries_monthly(mp,
                                                                                                           dmManager)

                    print("Calculated observed upstream mean values...")
            all_model_points.extend(mps)

    print("imported input data successfully, plotting ...")


    #for tests
    #test(sim_name_to_station_to_model_point)

    #select only stations which have corresponding model points
    stations = list(sim_name_to_station_to_model_point[sim_name_list[0]].keys())

    from matplotlib.backends.backend_pdf import PdfPages


    for s in stations:
        years = s.get_list_of_complete_years()
        if len(years) < 6: continue #skip stations with less than 6 continuous years of data

        pp = PdfPages("nc_diagnose_{0}.pdf".format(s.id))

        #plot hydrographs
        fig = plt.figure()
        gs = gridspec.GridSpec(3, 3, left=0.05, hspace=0.3, wspace=0.2)
        ax_stfl = fig.add_subplot(gs[0, 0])
        labels, handles = plot_hydrographs(ax_stfl, s, sim_name_to_station_to_model_point,
                                           day_stamps=day_stamps, sim_names=sim_name_list
        )
        plt.setp(ax_stfl.get_xticklabels(), visible=False) #do not show ticklabels for upper rows

        fig.legend(handles, labels, "lower right")

        #plot swe 1d compare with obs
        ax_swe = fig.add_subplot(gs[1, 0], sharex=ax_stfl)
        plot_swe_1d_compare_with_obs(ax_swe, s, sim_name_to_station_to_model_point,
                                     day_stamps=day_stamps, sim_names=sim_name_list)


        #plot mean temp 1d compare with obs   -- here plot biases directly...??
        ax_temp = fig.add_subplot(gs[0, 2])
        plot_temp_1d_compare_with_obs(ax_temp, s, sim_name_to_station_to_model_point, sim_names=sim_name_list)
        plt.setp(ax_temp.get_xticklabels(), visible=False) #do not show ticklabels for upper rows

        #plot mean precip 1d compare with obs   -- here plot biases directly...??
        ax = fig.add_subplot(gs[1, 2], sharex=ax_temp)
        plot_precip_1d_compare_with_obs(ax, s, sim_name_to_station_to_model_point, sim_names=sim_name_list)


        #plot mean Surface and subsurface runoff
        ax = fig.add_subplot(gs[0, 1], sharex=ax_stfl)
        plot_surf_runoff(ax, s, sim_name_to_station_to_model_point, sim_names=sim_name_list)
        plt.setp(ax.get_xticklabels(), visible=False) #do not show ticklabels for upper rows

        ax = fig.add_subplot(gs[1, 1], sharex=ax_stfl)
        plot_subsurf_runoff(ax, s, sim_name_to_station_to_model_point, sim_names=sim_name_list)
        plt.setp(ax.get_xticklabels(), visible=False) #do not show ticklabels for upper rows

        ax = fig.add_subplot(gs[2, 1], sharex=ax_stfl)
        plot_total_runoff(ax, s, sim_name_to_station_to_model_point, sim_names=sim_name_list)

        pp.savefig()
        #plot flow direction and basin boundaries
        fig = plt.figure()
        gs = gridspec.GridSpec(1, 2, right=0.99, bottom=0.001)
        ax = fig.add_subplot(gs[0, 1])
        plot_flow_directions_and_basin_boundaries(ax, s, sim_name_to_station_to_model_point,
                                                  sim_name_to_manager=sim_name_to_manager)
        pp.savefig()



        #plot 2d correlation between wind speed and measured streamflow at the station



        pp.close()
Esempio n. 27
0
def diagnose(station_ids=None, model_data_path=None):

    manager = Crcm5ModelDataManager(samples_folder_path=model_data_path,
                                    file_name_prefix="pm",
                                    all_files_in_samples_folder=True,
                                    need_cell_manager=True)

    nx, ny = manager.lons2D.shape

    rot_lat_lon = RotatedLatLon(lon1=-68, lat1=52, lon2=16.65, lat2=0.0)

    x00, y00 = rot_lat_lon.toProjectionXY(manager.lons2D[0, 0],
                                          manager.lats2D[0, 0])
    x10, y10 = rot_lat_lon.toProjectionXY(manager.lons2D[1, 0],
                                          manager.lats2D[1, 0])
    x01, y01 = rot_lat_lon.toProjectionXY(manager.lons2D[0, 1],
                                          manager.lats2D[0, 1])

    dx = x10 - x00
    dy = y01 - y00

    print("dx, dy = {0}, {1}".format(dx, dy))
    areas = rot_lat_lon.get_areas_of_gridcells(
        dx, dy, nx, ny, y00, 1)  #1 -since the index is starting from 1
    print(areas[0, 0])

    start_date = datetime(1986, 1, 1)
    end_date = datetime(1986, 12, 31)

    stations = cehq_station.read_station_data(selected_ids=station_ids,
                                              start_date=start_date,
                                              end_date=end_date)

    stations.sort(key=lambda x: x.latitude, reverse=True)

    for i, s in enumerate(stations):

        fig = plt.figure()
        #3 columns
        gs = GridSpec(5,
                      3,
                      hspace=0.2,
                      wspace=0.2,
                      right=0.98,
                      left=0.1,
                      top=0.98)

        model_ts = manager.get_streamflow_timeseries_for_station(
            s, start_date=start_date, end_date=end_date, nneighbours=9)

        print(model_ts.time[0], model_ts.time[-1])

        i_model0, j_model0 = model_ts.metadata["ix"], model_ts.metadata["jy"]
        mask = manager.get_mask_for_cells_upstream(i_model0, j_model0)

        #hydrographs
        ax = fig.add_subplot(gs[0, 0])
        plot_streamflows(ax, s, model_ts)

        #relative error
        ax = fig.add_subplot(gs[1, 0])
        plot_streamflow_re(ax, s, model_ts)

        #directions
        plot_directions_and_positions(fig.add_subplot(gs[:2, 1]),
                                      s,
                                      model_ts,
                                      manager,
                                      rot_lat_lon,
                                      mask=mask)

        #runoff
        ax = fig.add_subplot(gs[2, 0])
        plot_runoff(ax, manager, areas, model_ts, mask=mask)

        #runoff from gldas
        ax = fig.add_subplot(gs[2, 1])
        #plot_gldas_runoff(ax, manager, areas, model_ts, mask = mask)

        #temperature
        ax_temp = fig.add_subplot(gs[3, 0])
        ax_prec = fig.add_subplot(gs[4, 0])

        plot_total_precip_and_temp_re_1d(ax_prec,
                                         ax_temp,
                                         manager,
                                         rot_lat_lon,
                                         areas,
                                         model_ts,
                                         mask=mask)

        #swe timeseries
        ax = fig.add_subplot(gs[3, 1])
        plot_swe_timeseries(ax, manager, areas, model_ts, mask=mask)

        #print np.where(mask == 1)
        print("(i, j) = ({0}, {1})".format(model_ts.metadata["ix"],
                                           model_ts.metadata["jy"]))

        fig.savefig("diagnose_{0}_{1:.2f}deg.pdf".format(s.id, dx))
Esempio n. 28
0
def main():
    data_path = "/home/huziy/skynet3_exec1/from_guillimin/quebec_test_198501_198612_0.1deg"
    coord_file = os.path.join(data_path, "pm1985010100_00000000p")

    manager = Crcm5ModelDataManager(samples_folder_path=data_path,
                                    file_name_prefix="pm",
                                    all_files_in_samples_folder=True)
    selected_ids = [
        "104001", "103715", "093806", "093801", "092715", "081006", "061502",
        "040830", "080718"
    ]

    start_date = datetime(1986, 1, 1)
    end_date = datetime(1986, 12, 31)

    stations = cehq_station.read_station_data(selected_ids=selected_ids,
                                              start_date=start_date,
                                              end_date=end_date)
    plot_utils.apply_plot_params(width_pt=None,
                                 height_cm=30.0,
                                 width_cm=16,
                                 font_size=10)
    fig = plt.figure()
    #two columns
    gs = GridSpec(len(stations) // 2 + len(stations) % 2,
                  2,
                  hspace=0.4,
                  wspace=0.4)
    line_model, line_obs = None, None
    stations.sort(key=lambda x: x.latitude, reverse=True)

    for i, s in enumerate(stations):
        model_ts = manager.get_streamflow_timeseries_for_station(
            s, start_date=start_date, end_date=end_date)
        ax = fig.add_subplot(gs[i // 2, i % 2])

        assert isinstance(model_ts, TimeSeries)

        #[t, m_data] = model_ts.get_daily_normals()
        #[t, s_data] = s.get_daily_normals()

        assert isinstance(s, Station)

        #climatologies
        #line_model = ax.plot(t, m_data, label = "Model (CRCM5)", lw = 3, color = "b")
        #line_obs = ax.plot(t, s_data, label = "Observation", lw = 3, color = "r")

        model_ts = model_ts.get_ts_of_daily_means()
        print(model_ts.time[0], model_ts.time[-1])
        print(model_ts.data[0:10])

        mod_vals = model_ts.get_data_for_dates(s.dates)
        print(mod_vals[:20])
        print("+" * 20)
        assert len(mod_vals) == len(s.dates)

        #model_acorr = [1] + [ np.corrcoef([mod_vals[i:], mod_vals[:-i]])[0,1] for i in range(1,len(mod_vals)) ]
        #obs_acorr = [1] + [ np.corrcoef([s.values[i:], s.values[:-i]])[0,1] for i in range(1,len(mod_vals)) ]

        npoints = np.array(list(range(len(mod_vals), 0, -1)))

        model_acorr = np.correlate(mod_vals, mod_vals, mode="full")
        model_acorr = model_acorr[len(model_acorr) / 2:] / max(model_acorr)
        model_acorr /= npoints

        obs_acorr = np.correlate(s.values, s.values, mode="full")
        obs_acorr = obs_acorr[len(obs_acorr) / 2:] / max(obs_acorr)
        obs_acorr /= npoints

        print(len(model_acorr), len(s.dates))

        line_model = ax.plot(s.dates,
                             model_acorr,
                             label="Model (CRCM5)",
                             lw=1,
                             color="b")
        line_obs = ax.plot(s.dates,
                           obs_acorr,
                           label="Observation",
                           lw=3,
                           color="r",
                           alpha=0.5)

        #ax.annotate( "r = {0:.2f}".format( float( np.corrcoef([mod_vals, s.values])[0,1] )), xy = (0.7,0.8), xycoords= "axes fraction")

        ax.set_title(
            "%s: da_diff=%.2f %%, dist = %.1f" %
            (s.id, (-s.drainage_km2 + model_ts.metadata["acc_area_km2"]) /
             s.drainage_km2 * 100.0, model_ts.metadata["distance_to_obs"]))

        ax.xaxis.set_major_formatter(DateFormatter("%b"))
        ax.xaxis.set_major_locator(MonthLocator(bymonth=list(range(1, 13, 2))))

    lines = (line_model, line_obs)
    labels = ("Model (CRCM5)", "Observation")
    fig.legend(lines, labels)
    fig.savefig("acorr_without_lakes_0.1deg_1year.png")

    pass
Esempio n. 29
0
def main():
    # Define the simulations to be validated
    r_config = RunConfig(
        data_path=
        "/RESCUE/skynet3_rech1/huziy/hdf_store/quebec_0.1_crcm5-hcd-r.hdf5",
        start_year=1990,
        end_year=2010,
        label="CRCM5-L1")
    r_config_list = [r_config]

    r_config = RunConfig(
        data_path=
        "/RESCUE/skynet3_rech1/huziy/hdf_store/quebec_0.1_crcm5-r.hdf5",
        start_year=1990,
        end_year=2010,
        label="CRCM5-NL")
    r_config_list.append(r_config)

    bmp_info = analysis.get_basemap_info_from_hdf(file_path=r_config.data_path)
    bmp_info.should_draw_grey_map_background = True
    bmp_info.should_draw_basin_boundaries = False
    bmp_info.map_bg_color = "0.75"

    station_ids = ["104001", "093806", "093801", "081002", "081007", "080718"]

    # get river network information used in the model
    flow_directions = analysis.get_array_from_file(
        r_config.data_path, var_name=infovar.HDF_FLOW_DIRECTIONS_NAME)
    accumulation_area_km2 = analysis.get_array_from_file(
        path=r_config.data_path, var_name=infovar.HDF_ACCUMULATION_AREA_NAME)
    cell_manager = CellManager(flow_dirs=flow_directions,
                               lons2d=bmp_info.lons,
                               lats2d=bmp_info.lats,
                               accumulation_area_km2=accumulation_area_km2)

    # Get the list of stations to indicate on the bias map
    stations = cehq_station.read_station_data(start_date=None,
                                              end_date=None,
                                              selected_ids=station_ids)
    """:type : list[Station]"""

    xx, yy = bmp_info.get_proj_xy()
    station_to_modelpoint = cell_manager.get_model_points_for_stations(
        station_list=stations)
    upstream_edges = cell_manager.get_upstream_polygons_for_points(
        model_point_list=station_to_modelpoint.values(), xx=xx, yy=yy)

    bmp_info.draw_colorbar_for_each_subplot = True

    # Validate temperature, precip and swe
    obs_path_anusplin = "/home/huziy/skynet3_rech1/anusplin_links"
    obs_path_swe = "data/swe_ross_brown/swe.nc"
    model_var_to_obs_path = OrderedDict([("TT", obs_path_anusplin),
                                         ("I5", obs_path_swe)])

    model_var_to_season = OrderedDict([
        ("TT", OrderedDict([("Spring", range(3, 6))])),
        ("I5", OrderedDict([("Winter", [1, 2, 12])]))
    ])

    vname_to_obs_data = {}

    # parameters that won't change in the loop over variable names
    params_const = dict(rconfig=r_config, bmp_info=bmp_info)

    for vname, obs_path in model_var_to_obs_path.items():
        season_to_obs_data = get_seasonal_clim_obs_data(
            vname=vname,
            obs_path=obs_path,
            season_to_months=model_var_to_season[vname],
            **params_const)

        # Comment swe over lakes, since I5 calculated only for land
        if vname in [
                "I5",
        ]:
            for season in season_to_obs_data:
                season_to_obs_data[season] = maskoceans(
                    bmp_info.lons,
                    bmp_info.lats,
                    season_to_obs_data[season],
                    inlands=True)

        vname_to_obs_data[vname] = season_to_obs_data

    # Plotting
    plot_all_vars_in_one_fig = True

    fig = None
    gs = None
    if plot_all_vars_in_one_fig:
        plot_utils.apply_plot_params(font_size=12,
                                     width_pt=None,
                                     width_cm=25,
                                     height_cm=20)
        fig = plt.figure()
        ncols = len(model_var_to_obs_path) + 1
        gs = GridSpec(len(r_config_list),
                      ncols,
                      width_ratios=(ncols - 1) * [
                          1.,
                      ] + [
                          0.05,
                      ])
    else:
        plot_utils.apply_plot_params(font_size=12,
                                     width_pt=None,
                                     width_cm=25,
                                     height_cm=25)

    station_x_list = []
    station_y_list = []

    mvarname_to_cs = {}
    for row, r_config in enumerate(r_config_list):
        for col, mname in enumerate(model_var_to_obs_path):

            row_axes = [
                fig.add_subplot(gs[row, col]),
            ]

            mvarname_to_cs[mname] = compare_vars(
                vname_model=mname,
                vname_to_obs=vname_to_obs_data,
                r_config=r_config,
                season_to_months=model_var_to_season[mname],
                bmp_info_agg=bmp_info,
                axes_list=row_axes)

            # -1 in order to exclude colorbars
            for the_ax in row_axes:

                the_ax.set_title(the_ax.get_title() + ", {}".format(
                    infovar.get_long_display_label_for_var(mname)))
                # Need titles only for the first row
                if row > 0:
                    the_ax.set_title("")

                if col == 0:
                    the_ax.set_ylabel(r_config.label)
                else:
                    the_ax.set_ylabel("")

                draw_upstream_area_bounds(the_ax, upstream_edges, color="g")

                if len(station_x_list) == 0:
                    for the_station in stations:
                        xst, yst = bmp_info.basemap(the_station.longitude,
                                                    the_station.latitude)
                        station_x_list.append(xst)
                        station_y_list.append(yst)

                bmp_info.basemap.scatter(station_x_list,
                                         station_y_list,
                                         c="g",
                                         ax=the_ax,
                                         s=20,
                                         zorder=10,
                                         alpha=0.5)

    # Save the figure if necessary
    if plot_all_vars_in_one_fig:

        if not img_folder.is_dir():
            img_folder.mkdir(parents=True)

        fig_path = img_folder.joinpath("{}.png".format(
            "_".join(model_var_to_obs_path)))
        with fig_path.open("wb") as figfile:
            fig.savefig(figfile, format="png", bbox_inches="tight")

        plt.close(fig)
Esempio n. 30
0
def main():
    data_path = "/home/huziy/skynet3_exec1/from_guillimin/quebec_test_198501_198612_0.1deg"
    coord_file = os.path.join(data_path, "pm1985010100_00000000p")


    manager = Crcm5ModelDataManager(samples_folder_path=data_path,
            file_name_prefix="pm", all_files_in_samples_folder=True
    )
    selected_ids = ["104001", "103715", "093806", "093801", "092715",
                    "081006", "061502", "040830", "080718"]

    start_date = datetime(1986, 1, 1)
    end_date = datetime(1986, 12, 31)

    stations = cehq_station.read_station_data(selected_ids = selected_ids,
            start_date=start_date, end_date=end_date
    )
    plot_utils.apply_plot_params(width_pt=None, height_cm =30.0, width_cm=16, font_size=10)
    fig = plt.figure()
    #two columns
    gs = GridSpec( len(stations) // 2 + len(stations) % 2, 2, hspace=0.4, wspace=0.4 )
    line_model, line_obs = None, None
    stations.sort(key=lambda x: x.latitude, reverse=True)

    for i, s in enumerate(stations):
        model_ts = manager.get_streamflow_timeseries_for_station(s, start_date = start_date, end_date = end_date)
        ax = fig.add_subplot( gs[i // 2, i % 2] )

        assert isinstance(model_ts, TimeSeries)

        #[t, m_data] = model_ts.get_daily_normals()
        #[t, s_data] = s.get_daily_normals()

        assert isinstance(s, Station)

        #climatologies
        #line_model = ax.plot(t, m_data, label = "Model (CRCM5)", lw = 3, color = "b")
        #line_obs = ax.plot(t, s_data, label = "Observation", lw = 3, color = "r")

        model_ts = model_ts.get_ts_of_daily_means()
        print(model_ts.time[0], model_ts.time[-1])
        print(model_ts.data[0:10])

        mod_vals = model_ts.get_data_for_dates(s.dates)
        print(mod_vals[:20])
        print("+" * 20)
        assert len(mod_vals) == len(s.dates)

        #model_acorr = [1] + [ np.corrcoef([mod_vals[i:], mod_vals[:-i]])[0,1] for i in range(1,len(mod_vals)) ]
        #obs_acorr = [1] + [ np.corrcoef([s.values[i:], s.values[:-i]])[0,1] for i in range(1,len(mod_vals)) ]



        npoints = np.array(list(range(len(mod_vals), 0, -1)))

        model_acorr = np.correlate(mod_vals, mod_vals, mode="full")
        model_acorr = model_acorr[len(model_acorr) / 2:] / max(model_acorr)
        model_acorr /= npoints

        obs_acorr = np.correlate(s.values, s.values, mode = "full")
        obs_acorr = obs_acorr[len(obs_acorr) / 2 :] / max(obs_acorr)
        obs_acorr /= npoints

        print(len(model_acorr), len(s.dates))

        line_model = ax.plot(s.dates, model_acorr, label = "Model (CRCM5)", lw = 1, color = "b")
        line_obs = ax.plot(s.dates, obs_acorr, label = "Observation", lw = 3, color = "r", alpha = 0.5)

        #ax.annotate( "r = {0:.2f}".format( float( np.corrcoef([mod_vals, s.values])[0,1] )), xy = (0.7,0.8), xycoords= "axes fraction")


        ax.set_title("%s: da_diff=%.2f %%, dist = %.1f" % (s.id, (-s.drainage_km2+
                        model_ts.metadata["acc_area_km2"]) / s.drainage_km2 * 100.0,
                        model_ts.metadata["distance_to_obs"]))

        ax.xaxis.set_major_formatter(DateFormatter("%b"))
        ax.xaxis.set_major_locator(MonthLocator(bymonth=list(range(1,13, 2))))

    lines = (line_model, line_obs)
    labels = ("Model (CRCM5)", "Observation" )
    fig.legend(lines, labels)
    fig.savefig("acorr_without_lakes_0.1deg_1year.png")

    pass
Esempio n. 31
0
def validate_precip(model_file="", simlabel="", obs_manager=None, season_to_months=None,
                    start_year=None, end_year=None, season_to_plot_indices=None, station_ids_list=None):
    """
    :param model_file:
    :param obs_manager: should implement the method
        getMeanFieldForMonthsInterpolatedTo(self, months = None, lonsTarget = None, latsTarget = None)
        anusplin data is in mm/day
        model data is in m/s
    """

    model_var_name = "PR"

    if obs_manager is None:
        print("Skipping validation of {}, since the obs manager is None.".format(model_var_name))
        return

    if station_ids_list is not None:
        # Get the list of stations to indicate on the bias map
        stations = cehq_station.read_station_data(
            start_date=None, end_date=None, selected_ids=station_ids_list
        )
    else:
        stations = []



    model_level = 0
    reasonable_error_mm_per_day = 1

    assert isinstance(obs_manager, AnuSplinManager)
    fig = plt.figure()
    assert isinstance(fig, Figure)

    fig.suptitle("({0}) - ({1})".format(simlabel, "Obs."))

    lon, lat, basemap = analysis.get_basemap_from_hdf(file_path=model_file)

    # do calculations and only after that do the plotting
    season_to_field = {}

    # calculate global min and max for plotting
    vmin = None
    vmax = None

    for season, months in season_to_months.items():
        model_field = analysis.get_seasonal_climatology(start_year=start_year, end_year=end_year,
                                                        months=months,
                                                        level=model_level,
                                                        var_name=model_var_name, hdf_path=model_file)

        # convert m/s to mm/day for comparison with anusplin data
        model_field *= 1000.0 * 60 * 60 * 24

        obs_field = obs_manager.getMeanFieldForMonthsInterpolatedTo(months=months, lonstarget=lon, latstarget=lat,
                                                                    start_year=start_year, end_year=end_year)

        # calculate the difference between the modelled and observed fields
        the_diff = model_field - obs_field
        current_min = np.min(the_diff)
        current_max = np.max(the_diff)

        if vmin is not None:
            vmin = current_min if current_min < vmin else vmin
            vmax = current_max if current_max > vmax else vmax
        else:
            vmin = current_min
            vmax = current_max

        season_to_field[season] = the_diff

    ncolors = 12
    gs = gridspec.GridSpec(2, 3, width_ratios=[1, 1, 0.05])

    cmap = cm.get_cmap("RdBu_r", ncolors)
    x, y = basemap(lon, lat)
    im = None

    d = min(abs(vmin), abs(vmax))
    vmin = -d
    vmax = d
    bn, bounds, _, _ = infovar.get_boundary_norm(vmin, vmax, ncolors, exclude_zero=False)

    print("bounds: ", bounds)

    cs = None
    for season, field in season_to_field.items():
        row, col = season_to_plot_indices[season]
        ax = fig.add_subplot(gs[row, col])
        ax.set_title(season)
        basemap.drawmapboundary(fill_color="gray", ax=ax)
        im = basemap.pcolormesh(x, y, season_to_field[season], vmin=vmin, vmax=vmax, cmap=cmap, norm=bn)
        basemap.drawcoastlines(ax=ax, linewidth=cpp.COASTLINE_WIDTH)

        for the_station in stations:
            assert isinstance(the_station, Station)
            xst, yst = basemap(the_station.longitude, the_station.latitude)
            # ax.annotate(the_station.id, (xst, yst), font_properties=FontProperties(size=6),
            #             bbox=dict(facecolor="w"), va="top", ha="right")
            basemap.scatter(xst, yst, c="g", ax=ax)


        # small_error = (np.abs(season_to_field[season]) < reasonable_error_mm_per_day).astype(int)
        # nlevs = 1
        # ax.contour(x, y, small_error, nlevs, colors = "black", linestyle = "-")
        # cs = ax.contourf(x, y, small_error, nlevs, colors="none", hatches=["/", None], extend="lower", linewidth=2)



    # artists, labels = cs.legend_elements()
    # plt.legend(artists, labels, handleheight=2)

    cax = fig.add_subplot(gs[:, 2])
    cax.set_title("mm/day\n")
    plt.colorbar(im, cax=cax, extend="both")
    seasons_str = "_".join(sorted([str(s) for s in list(season_to_field.keys())]))
    atm_val_folder = os.path.join(images_folder, "validate_atm")
    if not os.path.isdir(atm_val_folder):
        os.mkdir(atm_val_folder)

    out_filename = "{3}/validate_2d_{0}_{1}_{2}.png".format(model_var_name, simlabel, seasons_str, atm_val_folder)
    fig.savefig(os.path.join(images_folder, out_filename), bbox_inches="tight")
Esempio n. 32
0
def main():
    var_name_liquid = "I1"
    var_name_solid = "I2"
    #peirod of interest
    start_year = 1979
    end_year = 1988

    #spatial averaging will be done over upstream points to the stations
    selected_ids = ["092715", "080101", "074903", "050304", "080104", "081007", "061905",
                         "041903", "040830", "093806", "090613", "081002", "093801", "080718"]

    selected_ids = ["074903"]


    #simulation names corresponding to the paths
    sim_names = ["crcm5-hcd-rl", "crcm5-hcd-rl-intfl"]

    sim_labels = [x.upper() for x in sim_names]

    colors = ["blue", "violet"]

    layer_widths = [0.1, 0.2, 0.3, 0.4, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 1.0, 3.0, 5.0,
                    5.0, 5.0, 5.0, 5.0, 5.0, 5.0, 5.0, 5.0, 5.0]

    layer_depths = np.cumsum(layer_widths)


    paths = [
        "/home/huziy/skynet3_rech1/from_guillimin/new_outputs/quebec_0.1_crcm5-hcd-rl_spinup",
        "/home/huziy/skynet3_rech1/from_guillimin/new_outputs/quebec_0.1_crcm5-hcd-rl-intfl_spinup2/Samples_all_in_one"
    ]

    seasons = [
        [12, 1, 2], [3, 4, 5], [6, 7, 8], [9, 10, 11]
    ]

    season_names = [
        "DJF", "MAM", "JJA", "SON"
    ]

    managers = [
        Crcm5ModelDataManager(samples_folder_path=path, file_name_prefix="pm",
            all_files_in_samples_folder=True, need_cell_manager= (i == 0)) for i, path in enumerate(paths)
    ]

    #share the cell manager
    a_data_manager = managers[0]
    assert isinstance(a_data_manager, Crcm5ModelDataManager)
    cell_manager = a_data_manager.cell_manager
    assert isinstance(cell_manager, CellManager)
    for m in managers[1:]:
        assert isinstance(m, Crcm5ModelDataManager)
        m.cell_manager = cell_manager

    #share the lake fraction field
    lake_fraction = a_data_manager.lake_fraction



    #selected_ids = ["092715", "080101", "074903", "050304", "080104", "081007", "061905",
    #                  "041903", "040830", "093806", "090613", "081002", "093801", "080718"]
    start_date = datetime(start_year, 1, 1)
    end_date = datetime(end_year, 12, 31)

    stations = cehq_station.read_station_data(selected_ids = selected_ids,
            start_date=start_date, end_date=end_date
    )

    #stations with corresponding model points
    station_to_mp = a_data_manager.get_dataless_model_points_for_stations(stations)

    #figure out levels in soil



    sim_label_to_profiles = {}
    for s, mp in station_to_mp.items():
        assert isinstance(mp, ModelPoint)
        mask = (mp.flow_in_mask == 1) & (lake_fraction < 0.6)
        fig = plt.figure()
        fmt = ScalarFormatter(useMathText=True)
        fmt.set_powerlimits([-2, 2])

        print(mp.ix, mp.jy, s.id)

        for m, label, color in zip(managers, sim_labels, colors):
            assert isinstance(m, Crcm5ModelDataManager)



            monthly_means_liquid = _get_cached_monthly_mean_fields(label, start_year, end_year, var_name_liquid)
            if monthly_means_liquid is None:
                monthly_means_liquid = m.get_monthly_climatology_of_3d_field(var_name=var_name_liquid, start_year=start_year, end_year=end_year)
                _cache_monthly_mean_fields(monthly_means_liquid, label, start_year, end_year, var_name_liquid)

            monthly_means_solid = _get_cached_monthly_mean_fields(label, start_year, end_year, var_name_solid)
            if monthly_means_solid is None:
                monthly_means_solid = m.get_monthly_climatology_of_3d_field(var_name=var_name_solid, start_year=start_year, end_year=end_year)
                _cache_monthly_mean_fields(monthly_means_solid, label, start_year, end_year, var_name_solid)


            profiles = [ monthly_means_liquid[i][mask,:].mean(axis = 0) + monthly_means_solid[i][mask,:].mean(axis = 0) for i in range(12) ]

            sim_label_to_profiles[label] = np.array( profiles )


        x = [ date2num( datetime(2001,month,1) ) for month in range(1,13)]
        y = layer_depths

        y2d, x2d = np.meshgrid(y, x)
        delta = (sim_label_to_profiles[sim_labels[1]] - sim_label_to_profiles[sim_labels[0]]) / sim_label_to_profiles[sim_labels[0]] * 100

        #delta = np.ma.masked_where(delta < 0.1, delta)

        cmap = my_colormaps.get_cmap_from_ncl_spec_file(path="colormap_files/BlueRed.rgb", ncolors=10)
        the_min = -6.0
        the_max = 6.0
        step = (the_max - the_min) / float(cmap.N)

        plt.pcolormesh(x2d[:,:8], y2d[:,:8], delta[:,:8], cmap = cmap, vmin = the_min, vmax = the_max) #, levels = np.arange(-6,7,1))
        plt.gca().invert_yaxis()
        plt.colorbar(ticks = np.arange(the_min, the_max + step, step))
        plt.gca().set_ylabel("Depth (m)")

        plt.gca().xaxis.set_major_formatter(DateFormatter("%b"))


        #fig.tight_layout()
        fig.savefig("soil_profile_upstream_of_{0}.pdf".format(s.id))




    pass
def main(hdf_folder="/home/huziy/skynet3_rech1/hdf_store", start_date=None, end_date=None,
         min_station_accumulation_area_km2=1000.0):
    # Station ids to get from the CEHQ database
    # selected_ids = ["092715", "080101", "074903", "050304", "080104", "081007", "061905",
    # "041903", "040830", "093806", "090613", "081002", "093801", "080718"]

    ids_with_lakes_upstream = [
        "104001", "093806", "093801", "081002", "081007", "080718"
    ]

    # for presentation
    # ids_with_lakes_upstream = [
    #    "104001", "093806",
    # ]

    selected_ids = ["092715", "074903", "080104", "081007", "061905",
                    "093806", "090613", "081002", "093801", "080718", "104001"]

    selected_ids = ids_with_lakes_upstream

    # selected_ids = []  # Do not use CEHQ stations temporarily

    # selected_ids = [
    # "074903", "061905", "090613", "092715", "093801", "093806", "081002"
    # ]

    # selected_ids = ["081002", "093801"]



    # selected_ids = ["090613", ]

    sim_labels = [
        # "CRCM5-R",
        "CRCM5-L2",
        # "CRCM5-L1",
        # "CRCM5-NL"
        # "CRCM5-HCD-RL-INTF-a"
    ]

    sim_file_names = [
        # "quebec_0.1_crcm5-r.hdf5",
        "quebec_0.1_crcm5-hcd-rl.hdf5",
        # "quebec_0.1_crcm5-hcd-rl-intfl_ITFS.hdf5",
        # "quebec_0.1_crcm5-hcd-r.hdf5",
        # "quebec_0.1_crcm5-r.hdf5",
        # "quebec_0.1_crcm5-hcd-rl-intfl_ITFS.hdf5"
        # "quebec_0.1_crcm5-hcd-rl-intfl_ITFS_avoid_truncation1979-1989.hdf5"

    ]

    simname_to_color = {
        "CRCM5-NL": "b",
        "CRCM5-L2": "r",
        "Obs.": "g"
    }

    sim_name_to_file_name = OrderedDict()
    for k, v in zip(sim_labels, sim_file_names):
        sim_name_to_file_name[k] = v

    #     sim_name_to_file_name = {
    #     "CRCM5-R": "quebec_0.1_crcm5-r.hdf5",
    #     "CRCM5-HCD-R": "quebec_0.1_crcm5-hcd-r.hdf5",
    #     "CRCM5-HCD-RL": "/skynet3_rech1/huziy/hdf_store/quebec_0.1_crcm5-hcd-rl_spinup.hdf",
    #     "CRCM5-HCD-RL-INTFL": "/skynet3_rech1/huziy/hdf_store/quebec_0.1_crcm5-hcd-rl-intfl_do_not_discard_small.hdf",
    #     "SANI=10000, ignore THFC":
    #        "/skynet3_rech1/huziy/hdf_store/quebec_0.1_crcm5-hcd-rl-intfl_sani-10000_not_care_about_thfc.hdf",
    #
    # "CRCM5-HCD-RL-ERA075": "/skynet3_rech1/huziy/hdf_store/quebec_0.1_crcm5-hcd-rl-intfl_spinup_ecoclimap_era075.hdf",
    #     "SANI=10000": "/skynet3_rech1/huziy/hdf_store/quebec_0.1_crcm5-hcd-rl-intfl_sani-10000.hdf"
    #     "CRCM5-HCD-RL-ECOCLIMAP": "/skynet3_rech1/huziy/hdf_store/quebec_0.1_crcm5-hcd-rl-intfl_spinup_ecoclimap.hdf"
    # }





    # Get the list of stations to do the comparison with
    stations = cehq_station.read_station_data(
        start_date=start_date, end_date=end_date, selected_ids=selected_ids
    )

    print("Initial list of stations:")
    for s in stations:
        print("{0}".format(s))
        assert isinstance(s, Station)
        cy = s.get_list_of_complete_years()
        print("{} of complete years: {}".format(len(cy), cy))




    # Commented hydat station for performance during testing
    # province = "QC"
    # selected_ids_hydat = None
    # stations_hd = cehq_station.load_from_hydat_db(start_date=start_date, end_date=end_date,
    #                                               province=province,
    #                                               min_drainage_area_km2=min_station_accumulation_area_km2,
    #                                               selected_ids=selected_ids_hydat)
    # if not len(stations_hd):
    #     print "No hydat stations satisying the conditions: period {0}-{1}, province {2}".format(
    #         str(start_date), str(end_date), province
    #     )
    #
    # stations.extend(stations_hd)
    #
    # province = "ON"
    # stations_hd = cehq_station.load_from_hydat_db(start_date=start_date, end_date=end_date,
    #                                               province=province,
    #                                               min_drainage_area_km2=min_station_accumulation_area_km2)
    # stations.extend(stations_hd)

    draw_model_comparison(model_points=None, sim_name_to_file_name=sim_name_to_file_name,
                          hdf_folder=hdf_folder,
                          start_year=start_date.year, end_year=end_date.year, stations=stations,
                          stfl_name="STFA",
                          drainage_area_reldiff_min=0.1,
                          plot_upstream_area_averaged=False, sim_name_to_color=simname_to_color)
Esempio n. 34
0
def plot_station_positions(basemap):
    sel_ids = [
        "080707",
        "080717",
        "093801",
        "093806",
        "093808",
        "102701",
        "102704",
        "072301",
        "072302",
        "074902",
        "074903",
        "103702",
        "103703",
        "103715",
        "041902",
        "041903",
        "042103",
        "042607",
        "043012",
        "040212",
        "040814",
        "040830",
        "073801",
        "073802",
        "081002",
        "081006",
        "081007",
        "081008",
        "061502",
        "061801",
        "061901",
        "061905",
        "061906",
        "062102",
        "050119",
        "050135",
        "080704",
        "080718"
    ]

    stations = cehq_station.read_station_data(folder="data/cehq_measure_data_all", read_only_headers=True)
    stations = itertools.ifilter(lambda s: s.id in sel_ids, stations)
    stations = list(stations)
    assert  len(stations) == len(sel_ids), "{0} != {1}".format(len(stations), len(sel_ids))

    xsta = []
    ysta = []
    for s in stations:
        xsta.append(s.longitude)
        ysta.append(s.latitude)
    xsta, ysta = basemap(xsta, ysta)
    basemap.scatter(xsta, ysta, c = "r", s = 80 , zorder = 10)


    #color stations with yellow
    yellow_ids = [ "103702", "093808", "061502", "041903", "093806", "042607",
                   "081002", "073801", "080718", "081006", "093801", "103715", "050119", "040830"]



    xsta = []
    ysta = []
    for s in stations:
        if s.id not in yellow_ids: continue
        xsta.append(s.longitude)
        ysta.append(s.latitude)
    xsta, ysta = basemap(xsta, ysta)
    #basemap.scatter(xsta, ysta, c = "#DCBD34", s = 80 , zorder = 10)
    basemap.scatter(xsta, ysta, c = "#40dae6", s = 80 , zorder = 10)
Esempio n. 35
0
def main():

    lake_names = ["Matagami", "Mistassini", "Nemiscau"]
    lake_areas_km2 = [370.7,  2162.7, 148.3]

    level_st_ids = [
        "080716", "081003",  "081001"
    ]
    level_stations = cehq.read_station_data(folder="data/cehq_levels",
                        selected_ids=level_st_ids)

    stfl_st_ids = [
        "080707", "081007", "081002"
    ]
    stfl_stations = cehq.read_station_data(folder="data/cehq_measure_data",
            selected_ids=stfl_st_ids)

    q_obs = []
    q_calc = []

    plt.figure()
    for lev_station, stfl_station, lake_name, lake_area_km2, c in zip(level_stations,
                                                                    stfl_stations, lake_names, lake_areas_km2,
                                                                    ["r","g","b"]
                                                                ):
        assert isinstance(lev_station, Station)
        assert isinstance(stfl_station, Station)

        count_intersection = sum( [int(d in stfl_station.dates) for d in lev_station.dates] )
        intersection_dates = list( filter( lambda d: d in stfl_station.dates, lev_station.dates) )

        q_vals = [stfl_station.get_value_for_date(d) for d in intersection_dates]
        h_vals = [lev_station.get_value_for_date(d) for d in intersection_dates]


        q_obs.append(q_vals)
        q_calc.append(get_streamflows_from_active_stores_bowling(get_active_storages(h_vals,lake_area_km2), lake_area_km2))

        #Calculate correlation between Q and H
        print(10 * "-" + lake_name)
        print("r = {0}".format(np.corrcoef([q_vals, h_vals])[0,1]))
        print((lev_station.latitude, lev_station.longitude))

        print("dist_m = {0} km ".format( 1.0e-3 * lat_lon.get_distance_in_meters(lev_station.longitude, lev_station.latitude,
                                                                                stfl_station.longitude, stfl_station.latitude)))



        print("{0} and {1} have {2} measurements at the same time ({3}).".format(lev_station.id, stfl_station.id,
            count_intersection, lake_name ))

        #plot storage-discharge relation
        plt.title(lake_name)
        plt.scatter(get_active_storages(h_vals,lake_area_km2), q_vals, c = c , label = lake_name, linewidths=0)


        #plt.plot(intersection_dates, q_vals, c, label = lake_name )
        #plt.plot(intersection_dates, get_active_storages(h_vals,lake_area_km2), c+"-", label = lake_name )


        #plt.xlabel("S-active (obs)")
        #plt.ylabel("Q (obs)")
    plt.legend()


    #Compare observed and theoretical lake outflows
    plt.figure()
    title = ""

    for qo, qc, name, c  in zip(q_obs, q_calc,lake_names, ["r","g","b"]):
        qc_a = np.array(qc)
        qo_a = np.array(qo)
        title = "ME = {0:.2f}".format(1 - sum((qc_a - qo_a) ** 2) / sum( (qo_a - qo_a.mean()) ** 2))
        plt.scatter(qo, qc, c= c, linewidths=0,label = name + ", " + title)

    #plt.title(title)
    plt.xlabel("$Q_{\\rm obs}$")
    plt.ylabel("$Q_{\\rm mod}$")

    xmin, xmax = plt.xlim()
    print(plt.xlim())
    plt.plot([xmin, xmax], [xmin, xmax], "k-",lw = 3, zorder = 5)


    plt.legend()
    plt.show()


    #TODO: implement
    pass
def main():

    start_year = 1970
    end_year = 1999


    stations = cehq_station.read_station_data(folder="data/cehq_measure_data_all")
    stations = list( itertools.ifilter( lambda s: s.is_natural, stations) )
    for s in stations:
        s.delete_data_after_year(end_year)
        s.delete_data_before_year(start_year)
        pass


    stations = list( itertools.ifilter(lambda s: s.get_num_of_years_with_continuous_data() >= 10, stations) )
    s = stations[0]

    assert isinstance(s, Station)





    #stations = list( itertools.ifilter(lambda s: s.is_natural, stations) )

    x, y = polar_stereographic.lons, polar_stereographic.lats
    basemap = polar_stereographic.basemap
    x, y = basemap(x,y)

    sx = [s.longitude for s in stations]
    sy = [s.latitude for s in stations]

    sx, sy = basemap(sx, sy)

    #read model data
    model_file_path = "data/streamflows/hydrosheds_euler9/aex_discharge_1970_01_01_00_00.nc"
    acc_area = data_select.get_field_from_file(path=model_file_path,
        field_name="drainage")
    i_indices, j_indices = data_select.get_indices_from_file(path=model_file_path)
    lons_1d = data_select.get_field_from_file(path=model_file_path, field_name="longitude")
    lats_1d = data_select.get_field_from_file(path=model_file_path, field_name="latitude")

    x1d, y1d, z1d = lat_lon.lon_lat_to_cartesian(lons_1d, lats_1d)
    kdtree = KDTree(zip(x1d, y1d, z1d))

    print "Id: 4 DA (km2) <-> 4 dist (km) <-> 4 (i,j)"
    #basemap.scatter(sx, sy, c = "r", zorder = 5)
    for s, isx, isy in zip( stations, sx, sy ):
        assert isinstance(s, Station)
        plt.annotate(s.id, xy=(isx, isy),
                 bbox = dict(facecolor = 'white'), weight = "bold", font_properties = FontProperties(size=0.5))

        #get model drainaige areas for the four closest gridcells to the station
        x0, y0, z0 = lat_lon.lon_lat_to_cartesian(s.longitude, s.latitude)
        dists, indices = kdtree.query([x0, y0, z0], k = 4)
        dists /= 1000
        print("{0}: {1:.1f}; {2:.1f}; {3:.1f}; {4:.1f} <-> {5:.1f}; {6:.1f}; {7:.1f}; {8:.1f} <-> {9};{10};{11};{12}".format(
            "{0} (S_DA = {1:.1f})".format(s.id, s.drainage_km2),
            float(acc_area[indices[0]]),
            float(acc_area[indices[1]]),
            float(acc_area[indices[2]]),
            float(acc_area[indices[3]]),
            float( dists[0] ),
            float(dists[1]),
            float(dists[2]),
            float(dists[3]),
            "({0}, {1})".format(i_indices[indices[0]] + 1, j_indices[indices[0]] + 1),
            "({0}, {1})".format(i_indices[indices[1]] + 1, j_indices[indices[1]] + 1),
            "({0}, {1})".format(i_indices[indices[2]] + 1, j_indices[indices[2]] + 1),
            "({0}, {1})".format(i_indices[indices[3]] + 1, j_indices[indices[3]] + 1)
        ))

    basemap.drawcoastlines(linewidth=0.5)



    xmin, xmax = min(sx), max(sx)
    ymin, ymax = min(sy), max(sy)

    marginx = (xmax - xmin) * 0.1
    marginy = (ymax - ymin) * 0.1
    xmin -= marginx * 1.5
    xmax += marginx * 2
    ymin -= marginy
    ymax += marginy * 2

    plt.xlim(xmin, xmax)
    plt.ylim(ymin, ymax)
    plt.tight_layout()
    basin_boundaries.plot_basin_boundaries_from_shape(basemap=basemap, plotter=plt, linewidth=1)
    plt.savefig("10yr_cont_stations_natural_fs0.5.pdf")
    #plt.show()


    pass
Esempio n. 37
0
def compare_hydrographs_at_stations(manager_list,
                                    start_date=None,
                                    end_date=None, img_path="hydrographs.png", colors=None,
                                    fig=None):
    selected_ids = None
    stations = cehq_station.read_station_data(selected_ids=selected_ids,
                                              start_date=start_date, end_date=end_date
    )

    if colors is None:
        colors = len(manager_list) * [None]
    skip_stations = ["080718", "095003", "094206", "090613", "092715"]
    # 090613 is skipped for the 0.5 deg resolution since the drainaige network is not fully
    # represented by the model

    lines_model = []
    station_to_list_of_model_ts = {}
    run_id_list = [m.run_id for m in manager_list]

    filtered_stations = []
    for s in stations:
        assert isinstance(s, Station)

        if s.id in skip_stations:
            continue

        # skip stations with smaller accumulation areas
        if s.drainage_km2 <= 4 * np.radians(0.5) ** 2 * lat_lon.EARTH_RADIUS_METERS ** 2 * 1.0e-6:
            continue

        if not s.passes_rough_continuity_test(start_date, end_date):
            continue

        filtered_stations.append(s)

    stations = filtered_stations

    print(len(filtered_stations))
    # if True: raise Exception()

    #save all run ids
    plot_utils.apply_plot_params(width_pt=None, height_cm=40.0, width_cm=30.0, font_size=10)
    run_id_to_dataframe = {}
    run_id_to_cell_props = {}
    for manager in manager_list:
        assert isinstance(manager, Crcm5ModelDataManager)
        df, station_to_cellprops = manager.get_streamflow_dataframe_for_stations(stations, start_date=start_date,
                                                                                 end_date=end_date, var_name="STFL",
                                                                                 nneighbours=9)
        assert isinstance(df, pandas.DataFrame)
        df = df.dropna(axis=1)
        run_id_to_cell_props[manager.run_id] = station_to_cellprops

        df = df.groupby(lambda i: datetime(2001, i.month + 1, 1)
        if i.month == 2 and i.day == 29 else datetime(2001, i.month, i.day)).mean()

        print(df)

        #again filter the stations with data time interval overlapping with model time interval
        stations = list(filter(lambda s: s.id in df.columns, stations))
        run_id_to_dataframe[manager.run_id] = df

    if fig is None:
        fig = plt.figure()
    #two columns
    ncols = 2
    nrows = len(stations) / ncols
    if nrows * ncols < len(stations):
        nrows += 1
    gs = GridSpec(nrows, ncols, hspace=0.4, wspace=0.4)
    line_model, line_obs = None, None
    stations.sort(key=lambda x: x.latitude, reverse=True)

    plot_station_positions(manager_list[0], stations)

    i = -1
    ns_list = []
    station_list = []
    flow_acc_area_list = []

    #one_day = timedelta(days = 1)
    one_day_sec = 24 * 60 * 60.0
    for s in stations:
        i += 1
        ax = fig.add_subplot(gs[i // ncols, i % ncols])

        assert isinstance(s, Station)

        year_dates, sta_clims = s.get_daily_normals()

        #plot error limits
        ax.fill_between(year_dates, sta_clims * 1.256, sta_clims * 0.744, alpha=0.25, color="b")
        line_obs = ax.plot(year_dates, sta_clims, color="b", label="Observation", lw=3, alpha=0.5)

        ax.annotate("{0:.3g}".format(sum(sta_clims) * one_day_sec),
                    (0.1, 0.95), xycoords="axes fraction", color="b", alpha=0.5
        )  #integral flow since those values are daily normals

        for run_id, color, color_index in zip(run_id_list, colors, list(range(len(colors)))):
            df = run_id_to_dataframe[run_id]
            the_line = ax.plot(year_dates, df[s.id], color=color, label=run_id, lw=1)
            ax.annotate("{0:.3g}".format(sum(df[s.id]) * one_day_sec),
                        (0.1, 0.9 - color_index * 0.05), xycoords="axes fraction", color=color
            )  #integral flow since those values are daily normals
            if not i:  #save the labels only for the first step
                lines_model.append(the_line)


        #dt = model_ts.time[1] - model_ts.time[0]
        #dt_sec = dt.days * 24 * 60 * 60 + dt.seconds
        #ax.annotate( "{0:g}".format( sum(mod_vals) * dt_sec ) + " ${\\rm m^3}$", xy = (0.7,0.7), xycoords= "axes fraction", color = "b")
        #ax.annotate( "{0:g}".format( sum(s.values) * dt_sec) + " ${\\rm m^3}$", xy = (0.7,0.6), xycoords= "axes fraction", color = "r")
        metadata = list(run_id_to_cell_props.items())[0][1][s.id]
        da_mod = metadata["acc_area_km2"]
        dist = metadata["distance_to_obs_km"]
        #ax.set_title("{0}: $\delta DA = {1:.1f}$ %, dist = {2:.1f} km".format(s.id,
        #    (da_mod - s.drainage_km2) / s.drainage_km2 * 100.0, dist )  )
        ax.set_title("{0}: $DA = {1:.1f}$ {2}".format(s.id, s.drainage_km2, "${\\rm km ^ 2}$"))
        ax.xaxis.set_major_formatter(DateFormatter("%m"))
        #ax.xaxis.set_major_locator(YearLocator())
        assert isinstance(ax, Axes)
        ax.xaxis.axis_date()
        #ax.xaxis.tick_bottom().set_rotation(60)

    lines = lines_model + [line_obs, ]
    labels = run_id_list + ["Observation", ]
    fig.legend(lines, labels, ncol=5)
    if img_path is not None:
        fig.savefig(img_path)
Esempio n. 38
0
def create_kml_file_for_level_stations(
        data_path="data/cehq_levels",
        kml_file_name="mon.kml",
        title="Water levels in meters",
        icon_color="ffffccee",
        icon_link="http://dl.dropbox.com/u/4629759/blue-L.png",
        data_url_format="",
        plot_daily_normals=False,
        plot_monthly_normals=False):
    stations = cehq_station.read_station_data(folder=data_path)

    width = 250
    height = 100
    kmlBody = ("")

    for s in stations:
        assert isinstance(s, cehq_station.Station)
        print(s.id)

        ##Monthly normals
        if plot_monthly_normals:
            values_monthly = s.get_monthly_normals()
            if values_monthly is None:
                print(
                    "Skipping {0} since the data series is not continuous enough"
                    .format(s.id))
                continue  # skip stations with incomplete data
            low = min(values_monthly)
            up = max(values_monthly)
            xy_monthly = Line((values_monthly - low) / (up - low) * 100.0)
            xy_monthly.axes.type("xyx")
            xy_monthly.size(width, height)

            xy_monthly.axes.range(0, 1, 12)
            xy_monthly.axes.range(1, low, up)
            xy_monthly.axes.label(2, None, "Month")

        #Daily normals
        if plot_daily_normals:
            times, values_daily = s.get_daily_normals()
            if values_daily is None:
                print(
                    "Skipping {0} since the data series is not continuous enough"
                    .format(s.id))
                continue
            low = min(values_daily)
            up = max(values_daily)
            xy_daily = Line((values_daily - low) / (up - low) * 100.0)
            xy_daily.axes.type("xyx")
            xy_daily.size(width, height)

            xy_daily.axes.range(0, 1, 365)
            xy_daily.axes.range(1, low, up)
            xy_daily.axes.label(2, None, "Day")

        kml = ("""
            <Placemark>\n
            <name>%s</name>\n
            <Style>
                 <IconStyle>
                     <color>%s</color>
                     <Icon>
                        <href>%s</href>
                     </Icon>
                </IconStyle>


            </Style>

            <description>\n
            <![CDATA[\n
            <p> <b> %s </b>  </p>
            <p> Flow acc. area is %.1f km<sup>2<sup> </p>
            ]]>\n
            </description>\n

            <Point>\n
               <coordinates>%f, %f</coordinates>\n
            </Point>\n
            </Placemark>\n""") % (s.id, icon_color, icon_link, title,
                                  s.drainage_km2, s.longitude, s.latitude)

        kmlBody += kml

    #"morceaux" du fichier KML
    kmlHeader = ('<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n'
                 '<kml xmlns=\"http://earth.google.com/kml/2.2\">\n'
                 '<Document>\n')

    kmlFooter = ('</Document>\n' '</kml>\n')

    kmlFull = kmlHeader + kmlBody + kmlFooter

    open(kml_file_name, 'wb').write(kmlFull)
def put_selected_stations(ax, the_basemap, i_list, j_list):
    """
    :type the_basemap: Basemap
    """

    stations_dump = 'stations_dump.bin'
    global stations

    if stations is not None:
        pass
    elif os.path.isfile(stations_dump):
        print 'unpickling'
        stations = pickle.load(open(stations_dump))
    else:
        stations = read_station_data()
        pickle.dump(stations, open(stations_dump, 'w'))

    #get selected stations
    sel_stations = list( itertools.ifilter( lambda x: x.id in selected_station_ids, stations ) )

    xs, ys = polar_stereographic.xs, polar_stereographic.ys

    dx = 0.01 * ( xs[i_list, j_list].max() - xs[i_list, j_list].min() )
    dy = 0.01 * ( ys[i_list, j_list].max() - ys[i_list, j_list].min() )



    the_xs = []
    the_ys = []
    for station in sel_stations:
        x, y = the_basemap(station.longitude, station.latitude)
        the_xs.append(x)
        the_ys.append(y)

        xtext = 1.005 * x
        ytext = y
        if station.id in ['061906']:
            xtext = 1.00 * x
            ytext = 0.97 * y

        if station.id in ['103603', '081002']:
            ytext = 0.98 * y

        if station.id in ['081007']:
            xtext = 0.97 * x

        if station.id in ["090602"]:
            ytext -= 7 * dy
            xtext -= 5 * dx

        if station.id in ["090613"]:
            ytext += 4 * dy
            xtext -= 6 * dx

#        the_id = station.id
#
#        xtext = None
#        ytext = None
#        if the_id.startswith("0807"):
#            xtext, ytext = 0.1, 0.2
#
#        if the_id.startswith("081006"):
#            xtext, ytext = 0.1, 0.3
#
#        if the_id.startswith("093801"):
#            xtext, ytext = 0.1, 0.5
#
#        if the_id.startswith("093806"):
#            xtext, ytext = 0.1, 0.6
#
#        if the_id.startswith("103715"):
#            xtext, ytext = 0.1, 0.95
#
#        if the_id.startswith("104001"):
#            xtext, ytext = 0.4, 0.95
#
#        if the_id.startswith("061502"):
#            xtext, ytext = 0.8, 0.4
#
#        if the_id.startswith("040830"):
#            xtext, ytext = 0.8, 0.2
#
#        if the_id.startswith("092715"):
#            xtext, ytext = 0.1, 0.4




#        ax.annotate(station.id, xy = (x, y), xytext = (xtext, ytext), #textcoords = "axes fraction",
#                             bbox = dict(facecolor = 'white'), weight = "bold",
                             #arrowprops=dict(facecolor='black', width = 1, headwidth = 1.5)
#                             )

    the_basemap.scatter(the_xs,the_ys, c = 'c', s = 60, marker='^', linewidth = 0.5, alpha = 1,
        zorder = 5, ax = ax)


    pass
Esempio n. 40
0
def create_kml_file_for_level_stations(data_path = "data/cehq_levels",
                                       kml_file_name = "mon.kml",
                                       title = "Water levels in meters",
                                       icon_color = "ffffccee",
                                       icon_link = "http://dl.dropbox.com/u/4629759/blue-L.png",
                                       data_url_format = "", plot_daily_normals = False, plot_monthly_normals = False
                                       ):
    stations = cehq_station.read_station_data(folder=data_path)

    width = 250
    height = 100
    kmlBody = ("")

    for s in stations:
        assert isinstance(s, cehq_station.Station)
        print(s.id)


        ##Monthly normals
        if plot_monthly_normals:
            values_monthly = s.get_monthly_normals()
            if values_monthly is None:
                print("Skipping {0} since the data series is not continuous enough".format(s.id))
                continue # skip stations with incomplete data
            low = min(values_monthly)
            up = max(values_monthly)
            xy_monthly = Line((values_monthly - low) / (up - low) * 100.0)
            xy_monthly.axes.type("xyx")
            xy_monthly.size(width, height)

            xy_monthly.axes.range(0, 1,12)
            xy_monthly.axes.range(1, low, up)
            xy_monthly.axes.label(2, None, "Month")

        #Daily normals
        if plot_daily_normals:
            times, values_daily = s.get_daily_normals()
            if values_daily is None:
                print("Skipping {0} since the data series is not continuous enough".format(s.id))
                continue
            low = min(values_daily)
            up = max(values_daily)
            xy_daily = Line((values_daily - low) / (up - low) * 100.0)
            xy_daily.axes.type("xyx")
            xy_daily.size(width, height)

            xy_daily.axes.range(0, 1,365)
            xy_daily.axes.range(1, low, up)
            xy_daily.axes.label(2, None, "Day")



        kml = (

        """
            <Placemark>\n
            <name>%s</name>\n
            <Style>
                 <IconStyle>
                     <color>%s</color>
                     <Icon>
                        <href>%s</href>
                     </Icon>
                </IconStyle>


            </Style>

            <description>\n
            <![CDATA[\n
            <p> <b> %s </b>  </p>
            <p> Flow acc. area is %.1f km<sup>2<sup> </p>
            ]]>\n
            </description>\n

            <Point>\n
               <coordinates>%f, %f</coordinates>\n
            </Point>\n
            </Placemark>\n"""
        ) % ( s.id, icon_color, icon_link, title, s.drainage_km2,
               s.longitude, s.latitude)

        kmlBody += kml

    #"morceaux" du fichier KML
    kmlHeader = ('<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n'
                 '<kml xmlns=\"http://earth.google.com/kml/2.2\">\n'
                 '<Document>\n')

    kmlFooter = ('</Document>\n'
                 '</kml>\n')

    kmlFull = kmlHeader + kmlBody + kmlFooter

    open(kml_file_name,'wb').write(kmlFull)
Esempio n. 41
0
def validate_swe(model_file, obs_manager, season_to_months, simlabel, season_to_plot_indices, start_year, end_year,
                 lake_fraction=None, station_ids_list=None):
    model_var_name = "I5"
    model_level = None
    reasonable_error_mm = 100.0
    assert isinstance(obs_manager, SweDataManager)


    if station_ids_list is not None:
        # Get the list of stations to indicate on the bias map
        stations = cehq_station.read_station_data(
            start_date=None, end_date=None, selected_ids=station_ids_list
        )
    else:
        stations = []


    if lake_fraction is not None:
        print("lake fraction ranges: {0}, {1}".format(lake_fraction.min(), lake_fraction.max()))

    fig = plt.figure()
    obs_manager.name = "Obs."
    fig.suptitle("({0}) - ({1})".format(simlabel, obs_manager.name))


    # 1. read model results
    # 2. plot the differences (model - obs)

    lon, lat, basemap = analysis.get_basemap_from_hdf(file_path=model_file)

    # do calculations and only after that do the plotting
    season_to_field = {}

    # calculate global min and max for plotting
    vmin = None
    vmax = None

    season_to_obs_field = {}
    for season, months in season_to_months.items():
        model_field = analysis.get_seasonal_climatology(start_year=start_year, end_year=end_year,
                                                        months=months,
                                                        level=model_level,
                                                        var_name=model_var_name, hdf_path=model_file)

        obs_field = obs_manager.getMeanFieldForMonthsInterpolatedTo(months=months, lons_target=lon, lats_target=lat,
                                                                    start_year=start_year, end_year=end_year)

        season_to_obs_field[season] = obs_field
        # calculate the difference between the modelled and observed fields
        the_diff = model_field - obs_field
        current_min = np.min(the_diff)
        current_max = np.max(the_diff)

        if vmin is not None:
            vmin = current_min if current_min < vmin else vmin
            vmax = current_max if current_max > vmax else vmax
        else:
            vmin = current_min
            vmax = current_max

        season_to_field[season] = the_diff

    ncolors = 11
    gs = gridspec.GridSpec(2, 3, width_ratios=[1, 1, 0.05])

    x, y = basemap(lon, lat)
    im = None

    d = min(abs(vmin), abs(vmax))

    d = 100  # limit module of the difference to 200 mm

    vmin = -d
    vmax = d
    # bn, bounds, _, _ = infovar.get_boundary_norm(vmin, vmax, ncolors, exclude_zero=True)

    bounds = [-100, -80, -50, -20, -10, -5]
    bounds += [0, ] + [-b for b in reversed(bounds)]
    bn = BoundaryNorm(bounds, ncolors=len(bounds) - 1)
    cmap = cm.get_cmap("RdBu_r", len(bounds) - 1)

    print("bounds: ", bounds)

    cs = None
    for season, field in season_to_field.items():

        if season.lower() == "summer":
            print("Warning: skipping summer season for SWE")
            continue

        row, col = season_to_plot_indices[season]
        ax = fig.add_subplot(gs[row, col])
        ax.set_title(season)


        basemap.drawmapboundary(fill_color="gray")
        if lake_fraction is not None:
            to_plot = np.ma.masked_where((lake_fraction > 0.9), season_to_field[season])
        else:
            to_plot = season_to_field[season]


        to_plot = maskoceans(lon, lat, to_plot)
        im = basemap.pcolormesh(x, y, to_plot, vmin=vmin, vmax=vmax, cmap=cmap, norm=bn)
        basemap.drawcoastlines(ax=ax, linewidth=cpp.COASTLINE_WIDTH)

        # small_error = ((np.abs(season_to_field[season]) < reasonable_error_mm) | to_plot.mask).astype(int)
        # nlevs = 1
        # ax.contour(x, y, small_error, nlevs, colors = "black", linestyle = "-")
        # cs = ax.contourf(x, y, small_error, nlevs, colors="none", hatches=["/", None], extend="lower", linewidth=2)


        for the_station in stations:
            assert isinstance(the_station, Station)
            xst, yst = basemap(the_station.longitude, the_station.latitude)
            # ax.annotate(the_station.id, (xst, yst), font_properties=FontProperties(size=6),
            #             bbox=dict(facecolor="w"), va="top", ha="right")
            basemap.scatter(xst, yst, c="g")



    # artists, labels = cs.legend_elements()
    # plt.legend(artists, labels, handleheight=2)

    cax = fig.add_subplot(gs[:, 2])

    units_str = r"${\rm mm}$"
    var_str = r"SWE"
    cax.set_title("{0}\n".format(units_str))
    plt.colorbar(im, cax=cax, ticks=bounds, extend="both")

    seasons_str = "_".join(sorted([str(s) for s in list(season_to_months.keys())]))
    atm_val_folder = os.path.join(images_folder, "validate_atm")
    if not os.path.isdir(atm_val_folder):
        os.mkdir(atm_val_folder)

    out_filename = "{3}/validate_2d_{0}_{1}_{2}.png".format(model_var_name, simlabel, seasons_str, atm_val_folder)
    fig.savefig(os.path.join(images_folder, out_filename), bbox_inches="tight")