def read_station_data(folder = 'data/cehq_measure_data'): stations = [] for file in os.listdir(folder): if not '.txt' in file: continue path = os.path.join(folder, file) s = Station() s.parse_from_cehq(path) stations.append(s) return stations
def plot_subsurf_runoff(ax, station, sim_name_to_station_to_model_point, sim_names=None, day_stamps=None): assert isinstance(ax, Axes) #initialize day stamps if it is not passed if day_stamps is None: day_stamps = Station.get_stamp_days(2001) ax.plot(day_stamps, [0] * len(day_stamps), "k", lw=0) #so the colors correspond to the same simulation on all panels handles = [] labels = [] ax.set_title("Subsurface runoff (${\\rm m^3/s}$)") coef = 1.0e-3 #to convert mm to meters for sim_name in sim_names: if station not in sim_name_to_station_to_model_point[sim_name]: continue mps = sim_name_to_station_to_model_point[sim_name][station] h = None for mp in mps: the_area = mp.accumulation_area * 1.0e6 dates, values = mp.get_daily_climatology_for_complete_years(stamp_dates=day_stamps, varname="TDRA") label = "{0}".format(sim_name, np.mean(values)) h = ax.plot(dates, values * the_area * coef, label=label, lw=3) handles.append(h[0]) labels.append(label) #ax.xaxis.set_major_formatter(DateFormatter("%d\n%b")) #ax.xaxis.set_major_locator(MonthLocator(bymonth=range(1,13,3), bymonthday=15 )) return labels, handles
def get_station_objects(start_year=1980, end_year=2010, sel_names=None): # read ice depth values df = get_obs_data() lon_min, lon_max = -100, 0 lat_min, lat_max = 40, 90 nvals_min = 100 p = parser.parse(STATION_COORDS_FILE.open()) root = p.getroot() station_elts = root.Document.Placemark # select points based on the lat/lon limits? stations = [] for el in station_elts: lon, lat, _ = [float(c.strip()) for c in el.Point.coordinates.text.split(",")] # Check if the station if sel_names is not None: is_ok = False for sel_name in sel_names: if sel_name.lower() in el.name.text.lower(): is_ok = True break if not is_ok: continue if (lon_min <= lon <= lon_max) and (lat_min <= lat <= lat_max): print("{}: {}".format(el.name, el.Point.coordinates)) df_s = df.loc[df.station_name.str.lower().str.startswith(el.name.text.lower())] df_s = df_s.loc[(df_s.year >= start_year) & (df_s.year <= end_year)] if len(df_s) < nvals_min: continue print(len(df_s)) d_to_v = dict(zip(df_s["Date"][:], df_s["ice_depth"][:])) # df_s.plot(x="Date", y="ice_depth") # plt.title(el.name.text) # plt.show() # print(df_s.station_name) stations.append(Station(st_id=df_s.station_name.iloc[0], lon=lon, lat=lat, date_to_value=d_to_v)) return stations
def plot_hydrographs(ax, station, sim_name_to_station_to_model_point, day_stamps=None, sim_names=None): """ Plot climatological hydrographs """ assert isinstance(station, Station) assert isinstance(ax, Axes) years = station.get_list_of_complete_years() #initialize day stamps if it is not passed if day_stamps is None: day_stamps = Station.get_stamp_days(2001) if len(years) < 6: return handles = [] labels = [] dates, obs_data = station.get_daily_climatology_for_complete_years_with_pandas(stamp_dates=day_stamps, years=years) obs_ann_mean = np.mean(obs_data) label = "Obs: ann.mean = {0:.1f}".format(obs_ann_mean) h = ax.plot(dates, obs_data, "k", lw=2, label=label) handles.append(h[0]) labels.append(label) mp = None for sim_name in sim_names: if station in sim_name_to_station_to_model_point[sim_name]: continue mps = sim_name_to_station_to_model_point[sim_name][station] for mp in mps: assert isinstance(mp, ModelPoint) dates, values = mp.get_daily_climatology_for_complete_years(stamp_dates=day_stamps, varname="STFL") label = "{0}: {1:.2f} \n ann.mean = {2:.1f}".format(sim_name, mp.mean_upstream_lake_fraction, np.mean(values)) h = ax.plot(dates, values, label=label, lw=3) handles.append(h[0]) labels.append(label) ax.xaxis.set_major_formatter(DateFormatter("%d\n%b")) ax.xaxis.set_major_locator(MonthLocator(bymonth=list(range(1, 13, 3)), bymonthday=15)) if mp is None: return ax.set_title("{0}: point lake fr.={1:.2f}".format(station.id, mp.lake_fraction)) return labels, handles
def dotest(sim_name_to_station_to_model_point): day_stamps = Station.get_stamp_days(2001) for sim_name, station_to_mp in sim_name_to_station_to_model_point.items(): st_to_mp = sim_name_to_station_to_model_point[sim_name] for station, mp in st_to_mp.items(): assert isinstance(mp, ModelPoint) years = station.get_list_of_complete_years() d,v = mp.get_daily_climatology_for_complete_years_with_pandas(stamp_dates=day_stamps, years=years) plt.plot(d,v) plt.show() raise Exception()
def plot_swe_1d_compare_with_obs(ax, station, sim_name_to_station_to_model_point, day_stamps=None, sim_names=None): """ Plot climatological swe over upstream points, to the model point corresponding to the station sim_name_to_station_to_model_point is a complex dictionary: {simulation name: { station: [mp1, mp2, ..., mpN] }} sim_name_to_manager: {simulation name: Crcm5ModelDataManager object} Nad compare with the analysis by Ross Brown """ assert isinstance(station, Station) assert isinstance(ax, Axes) years = station.get_list_of_complete_years() #initialize day stamps if it is not passed if day_stamps is None: day_stamps = Station.get_stamp_days(2001) if len(years) < 6: return {} #suppose here that values and times are ordered accordingly in pandas.Timeseries obs_data = station.mean_swe_upstream_daily_clim.values time = station.mean_swe_upstream_daily_clim.index.to_pydatetime() obs_ann_mean = np.mean(obs_data) handles = [] labels = [] label = "Obs: ann.mean = {0:.1f}".format(obs_ann_mean) h = ax.plot(time, obs_data, "k", label=label, lw=2) handles.append(h[0]) labels.append(label) ax.set_title("SWE (mm)") for sim_name in sim_names: if station not in sim_name_to_station_to_model_point[sim_name]: continue mps = sim_name_to_station_to_model_point[sim_name][station] h = None for mp in mps: assert isinstance(mp, ModelPoint) dates, values = mp.get_daily_climatology_for_complete_years(stamp_dates=day_stamps, varname="I5") label = "{0}: {1:.2f} \n ann.mean = {2:.1f}".format(sim_name, mp.mean_upstream_lake_fraction , np.mean(values)) h = ax.plot(dates, values, label=label, lw=3) handles.append(h[0]) labels.append(label) #ax.xaxis.set_major_formatter(DateFormatter("%d\n%b")) #ax.xaxis.set_major_locator(MonthLocator(bymonth=range(1,13,3), bymonthday=15 )) return labels, handles
def validate_daily_climatology(): """ """ #years are inclusive start_year = 1979 end_year = 1988 #sim_name_list = ["crcm5-r", "crcm5-hcd-r", "crcm5-hcd-rl"] sim_name_list = ["crcm5-hcd-rl", "crcm5-hcd-rl-intfl"] rpn_folder_paths = [ "/home/huziy/skynet3_rech1/from_guillimin/new_outputs/quebec_0.1_{0}_spinup".format(sim_name_list[0]), "/home/huziy/skynet3_rech1/from_guillimin/new_outputs/quebec_0.1_{0}_spinup2/Samples_all_in_one_folder".format( sim_name_list[1]) ] nc_db_folder = "/home/huziy/skynet3_rech1/crcm_data_ncdb" #select stations selected_ids = None selected_ids = ["092715", "080101", "074903", "050304", "080104", "081007", "061905", "041903", "040830", "093806", "090613", "081002", "093801", "080718"] selected_ids = ["074903", ] start_date = datetime(start_year, 1, 1) end_date = datetime(end_year, 12, 31) selected_ids = None stations = cehq_station.read_station_data(selected_ids=selected_ids, start_date=start_date, end_date=end_date ) stations_hydat = cehq_station.read_hydat_station_data(folder_path="/home/huziy/skynet3_rech1/HYDAT", start_date=start_date, end_date=end_date) stations.extend(stations_hydat) varname = "STFL" sim_name_to_manager = {} sim_name_to_station_to_model_point = {} day_stamps = Station.get_stamp_days(2001) sweManager = SweDataManager(var_name="SWE") cruTempManager = CRUDataManager(lazy=True) cruPreManager = CRUDataManager(var_name="pre", lazy=True, path="data/cru_data/CRUTS3.1/cru_ts_3_10.1901.2009.pre.dat.nc") #common lake fractions when comparing simulations on the same grid all_model_points = [] cell_manager = None for sim_name, rpn_folder in zip(sim_name_list, rpn_folder_paths): dmManager = Crcm5ModelDataManager(samples_folder_path=rpn_folder, file_name_prefix="dm", all_files_in_samples_folder=True, need_cell_manager=cell_manager is None) #here using the fact that all the simulations are on the same grid if cell_manager is None: cell_manager = dmManager.cell_manager else: dmManager.cell_manager = cell_manager #determine comon lake fractions, so it is not taken from the trivial case lf = 0, but note #this has only sense when all the simulations were performed on the same grid sim_name_to_manager[sim_name] = dmManager nc_sim_folder = os.path.join(nc_db_folder, sim_name) nc_path = os.path.join(nc_sim_folder, "{0}_all.nc4".format(varname)) #In general there are several model points corresponding to a given station st_to_mp = dmManager.get_model_points_for_stations(stations, sim_name=sim_name, nc_path=nc_path, nc_sim_folder=nc_sim_folder, set_data_to_model_points=True) print("got model points for stations") sim_name_to_station_to_model_point[sim_name] = st_to_mp #save model points to a list of all points for s, mps in st_to_mp.items(): assert isinstance(s, Station) for mp in mps: assert isinstance(mp, ModelPoint) #calculate upstream swe if needed if s.mean_swe_upstream_daily_clim is None: s.mean_swe_upstream_daily_clim = sweManager.get_mean_upstream_timeseries_daily(mp, dmManager, stamp_dates=day_stamps) #These are taken from CRU dataset, only monthly data are available s.mean_temp_upstream_monthly_clim = cruTempManager.get_mean_upstream_timeseries_monthly(mp, dmManager) s.mean_prec_upstream_monthly_clim = cruPreManager.get_mean_upstream_timeseries_monthly(mp, dmManager) print("Calculated observed upstream mean values...") all_model_points.extend(mps) print("imported input data successfully, plotting ...") #for tests #test(sim_name_to_station_to_model_point) #select only stations which have corresponding model points stations = list(sim_name_to_station_to_model_point[sim_name_list[0]].keys()) from matplotlib.backends.backend_pdf import PdfPages for s in stations: years = s.get_list_of_complete_years() if len(years) < 6: continue #skip stations with less than 6 continuous years of data pp = PdfPages("nc_diagnose_{0}.pdf".format(s.id)) #plot hydrographs fig = plt.figure() gs = gridspec.GridSpec(3, 3, left=0.05, hspace=0.3, wspace=0.2) ax_stfl = fig.add_subplot(gs[0, 0]) labels, handles = plot_hydrographs(ax_stfl, s, sim_name_to_station_to_model_point, day_stamps=day_stamps, sim_names=sim_name_list ) plt.setp(ax_stfl.get_xticklabels(), visible=False) #do not show ticklabels for upper rows fig.legend(handles, labels, "lower right") #plot swe 1d compare with obs ax_swe = fig.add_subplot(gs[1, 0], sharex=ax_stfl) plot_swe_1d_compare_with_obs(ax_swe, s, sim_name_to_station_to_model_point, day_stamps=day_stamps, sim_names=sim_name_list) #plot mean temp 1d compare with obs -- here plot biases directly...?? ax_temp = fig.add_subplot(gs[0, 2]) plot_temp_1d_compare_with_obs(ax_temp, s, sim_name_to_station_to_model_point, sim_names=sim_name_list) plt.setp(ax_temp.get_xticklabels(), visible=False) #do not show ticklabels for upper rows #plot mean precip 1d compare with obs -- here plot biases directly...?? ax = fig.add_subplot(gs[1, 2], sharex=ax_temp) plot_precip_1d_compare_with_obs(ax, s, sim_name_to_station_to_model_point, sim_names=sim_name_list) #plot mean Surface and subsurface runoff ax = fig.add_subplot(gs[0, 1], sharex=ax_stfl) plot_surf_runoff(ax, s, sim_name_to_station_to_model_point, sim_names=sim_name_list) plt.setp(ax.get_xticklabels(), visible=False) #do not show ticklabels for upper rows ax = fig.add_subplot(gs[1, 1], sharex=ax_stfl) plot_subsurf_runoff(ax, s, sim_name_to_station_to_model_point, sim_names=sim_name_list) plt.setp(ax.get_xticklabels(), visible=False) #do not show ticklabels for upper rows ax = fig.add_subplot(gs[2, 1], sharex=ax_stfl) plot_total_runoff(ax, s, sim_name_to_station_to_model_point, sim_names=sim_name_list) pp.savefig() #plot flow direction and basin boundaries fig = plt.figure() gs = gridspec.GridSpec(1, 2, right=0.99, bottom=0.001) ax = fig.add_subplot(gs[0, 1]) plot_flow_directions_and_basin_boundaries(ax, s, sim_name_to_station_to_model_point, sim_name_to_manager=sim_name_to_manager) pp.savefig() #plot 2d correlation between wind speed and measured streamflow at the station pp.close()
def load_stations_from_csv( index_file="mh/obs_data/streamflow_data_organized/station_index.txt", selected_ids=None): res = [] data_dir = Path(index_file).parent with open(index_file) as f: # skip the first line f.readline() for line in f: if line.strip() == "": continue toks = re.split("\s+", line) st_id = toks[0].strip() if (selected_ids is not None) and (st_id not in selected_ids): continue lon, lat, = [float(tok.strip()) for tok in toks[1:3]] st_da = None try: st_da = float(toks[3].strip()) except Exception: pass st_name = " ".join(toks[5:]).split(",")[0] s = Station(st_id=st_id, lon=lon, lat=lat, name=st_name) s.source_data_units = toks[4].strip() s.drainage_km2 = st_da print(s) ts = read_data_file_for_station( s, data_file=Path(data_dir.joinpath("{}.csv".format(s.id)))) ts.dropna(inplace=True) # if it is date do nothing if hasattr(ts.iloc[0, 0], "year"): pass # convert to dates if it is just a year elif isinstance(ts.iloc[0, 0], str): date_format = None # try different known date formats for the_date_format in known_date_formats: try: datetime.strptime(ts.iloc[0, 0], the_date_format) date_format = the_date_format except Exception: pass if date_format is None: raise Exception( "Do not understand this date format: {}".format( ts.iloc[0, 0])) ts[0] = [ datetime.strptime(t, date_format) for t in ts.iloc[:, 0] ] elif float(ts.iloc[ 0, 0]).is_integer(): # in case we have only year values ts[0] = [datetime(int(y), 6, 15) for y in ts.iloc[:, 0]] else: print(ts.iloc[0, 0]) raise Exception("Could not convert {} to a date".format( ts.iloc[0, 0])) print(ts.head()) # start - plot for debug # fig = plt.figure() # ax = plt.gca() # ax.set_title(s.id) # ts.plot(ax=ax, x=0, y=1) # fig.autofmt_xdate() # # img_file = img_folder.joinpath("{}.png".format(s.id)) # fig.savefig(str(img_file)) # end - plot for debug set_data_from_pandas_timeseries(ts, s, date_col=0) res.append(s) return res
def validate_daily_climatology(): """ """ #years are inclusive start_year = 1979 end_year =1988 sim_name_list = ["crcm5-r", "crcm5-hcd-r", "crcm5-hcd-rl"] rpn_folder_path_form = "/home/huziy/skynet3_rech1/from_guillimin/new_outputs/quebec_0.1_{0}_spinup" nc_db_folder = "/home/huziy/skynet3_rech1/crcm_data_ncdb" #select stations selected_ids = None start_date = datetime(start_year, 1, 1) end_date = datetime(end_year, 12, 31) print("start reading cehq obs data") # stations = cehq_station.read_station_data(selected_ids = selected_ids, # start_date=start_date, end_date=end_date # ) stations = [] print("start reading hydat obs data") stations.extend(cehq_station.read_hydat_station_data(folder_path="/home/huziy/skynet3_rech1/HYDAT", start_date = start_date, end_date = end_date)) print("finished reading station data") varname = "STFL" sim_name_to_manager = {} sim_name_to_station_to_model_point = {} dmManager = None for sim_name in sim_name_list: print(sim_name) rpn_folder = rpn_folder_path_form.format(sim_name) dmManager = Crcm5ModelDataManager(samples_folder_path=rpn_folder, file_name_prefix="dm", all_files_in_samples_folder=True, need_cell_manager=True) sim_name_to_manager[sim_name] = dmManager nc_sim_folder = os.path.join(nc_db_folder, sim_name) nc_path = os.path.join(nc_sim_folder, "{0}_all.nc".format(varname)) print("get model points for the stations") st_to_mp = dmManager.get_model_points_for_stations(stations, nc_path=nc_path, npoints=1, nc_sim_folder=nc_sim_folder ) sim_name_to_station_to_model_point[sim_name] = st_to_mp common_lake_fractions = dmManager.lake_fraction #for tests #test(sim_name_to_station_to_model_point) print("finished reading data in memory") from matplotlib.backends.backend_pdf import PdfPages pp = PdfPages("comp_with_obs_daily_clim.pdf") stations_to_plot = [] #only stations that are compared with model are needed on a map day_stamps = Station.get_stamp_days(2001) for s in stations: plt.figure() assert isinstance(s, Station) years = s.get_list_of_complete_years() print(s) if len(years) < 6: continue dates, obs_data = s.get_daily_climatology_for_complete_years_with_pandas(stamp_dates=day_stamps, years=years) obs_ann_mean = np.mean(obs_data) plt.plot( dates, obs_data, label = "Obs: ann.mean = {0:.1f}".format( obs_ann_mean ) ) mp = None for sim_name in sim_name_list: manager = sim_name_to_manager[sim_name] if s not in sim_name_to_station_to_model_point[sim_name]: continue mp = sim_name_to_station_to_model_point[sim_name][s] for mp in sim_name_to_station_to_model_point[sim_name][s]: assert isinstance(mp, ModelPoint) dates, values = mp.get_daily_climatology_for_complete_years(stamp_dates=day_stamps, varname = "STFL") plt.plot(dates, values , label = "{0}: {1:.2f} \n ann.mean = {2:.1f}, dist = {3:.1f} km".format( sim_name, manager.lake_fraction[mp.flow_in_mask == 1].mean(), np.mean(values), mp.distance_to_station / 1000.0) ) ax = plt.gca() assert isinstance(ax, Axes) ax.xaxis.set_major_formatter(DateFormatter("%d/%b")) ax.xaxis.set_major_locator(MonthLocator(bymonth=list(range(1,13,3)), bymonthday=15 )) plt.legend(prop = FontProperties(size=8)) if mp is None: continue stations_to_plot.append(s) plt.title("{0}: point lake fraction={1:.4f}".format(s.id, common_lake_fractions[mp.ix, mp.jy] ) ) pp.savefig() #plot station positions plt.figure() bm = dmManager.get_rotpole_basemap_using_lons_lats(lons2d=dmManager.lons2D, lats2d=dmManager.lats2D, resolution="i") bm.drawcoastlines(linewidth=0.1) bm.drawrivers(linewidth=0.1) lons_list = [s.longitude for s in stations_to_plot] lats_list = [s.latitude for s in stations_to_plot] x_list, y_list = bm(lons_list, lats_list) bm.scatter(x_list, y_list, linewidths=0, s=0.5, zorder = 3) ax = plt.gca() for s, the_x, the_y in zip(stations, x_list, y_list): ax.annotate(s.id, xy=(the_x, the_y),xytext=(3, 3), textcoords='offset points', font_properties = FontProperties(size = 4), bbox = dict(facecolor = 'w', alpha = 1), ha = "left", va = "bottom", zorder = 2) pp.savefig() pp.close()