def get_dates_of_annual_maxima(self, position_index): """ """ return data_select.get_period_maxima_dates(self._data[:, position_index], self.times, start_month = 1, end_month = 12, event_duration = timedelta(days = 1) )
def main(): path = 'data/streamflows/hydrosheds_euler9/aex_discharge_1970_01_01_00_00.nc' #path = "data/streamflows/hydrosheds_rk4_changed_partiotioning/aex_discharge_1970_01_01_00_00.nc" #path = 'data/streamflows/na/discharge_1990_01_01_00_00_na.nc' data = pe_calc.get_station_and_corresponding_model_data(path = path) delete_not_continuous(data) plot_utils.apply_plot_params(width_pt = None, height_cm=6, font_size = 9) high_return_periods = [10, 30] high_start_month = 3 high_end_month = 7 high_event_duration = timedelta(days = 1) low_return_periods = [2, 5] low_start_month = 1 low_end_month = 5 low_event_duration = timedelta(days = 15) #---------------------------high high_station_return_levels = {} high_model_return_levels = {} station_high_dates = [] model_high_dates = [] station_minima = [] station_maxima = [] model_minima = [] model_maxima = [] high_data = [] labels = [] point_ids = [] for station, model_point in data.iteritems(): if station.id not in selected_station_ids: continue # @type model_point ModelPoint if not model_point.get_timeseries_length(): continue # @type station Station if not station.get_timeseries_length(): continue high_values_station = data_select.get_period_maxima(station.values, station.dates, start_date = None, end_date = None, start_month = high_start_month, end_month = high_end_month, event_duration = high_event_duration) vals = np.array(high_values_station.values()) high_data.append(high_values_station.values()) labels.append("Observed") pars_station = gevfit.optimize_stationary_for_period(vals, high_flow = True) high_values_model = data_select.get_period_maxima( model_point.get_values_sorted_by_date(), model_point.get_sorted_dates(), start_date = None, end_date = None, start_month = high_start_month, end_month = high_end_month, event_duration = high_event_duration) high_data.append(high_values_model.values()) labels.append("Modelled") vals = np.array(high_values_model.values()) point_ids.append(station.id) point_ids.append(station.id) pars_model = gevfit.optimize_stationary_for_period(vals, high_flow = True) for ret_period in high_return_periods: if not high_station_return_levels.has_key(ret_period): high_station_return_levels[ret_period] = [] high_model_return_levels[ret_period] = [] high_station_return_levels[ret_period].append(gevfit.get_high_ret_level_stationary(pars_station, ret_period)) high_model_return_levels[ret_period].append(gevfit.get_high_ret_level_stationary(pars_model, ret_period)) #gather dates of the high flow events the_station_high_dates = data_select.get_period_maxima_dates( station.values, station.dates, start_date = None, end_date = None, start_month = high_start_month, end_month = high_end_month, event_duration = high_event_duration) station_high_dates.extend(the_station_high_dates.values()) the_model_high_dates = data_select.get_period_maxima_dates( model_point.get_values_sorted_by_date(), # @type model_point ModelPoint model_point.get_sorted_dates(), start_date = None, end_date = None, start_month = high_start_month, end_month = high_end_month, event_duration = high_event_duration) model_high_dates.extend(the_model_high_dates.values()) ##save corresponding maximums for the station and model current_station_maxima = [] current_model_maxima = [] for year, value in high_values_station.iteritems(): if high_values_model.has_key(year): station_maxima.append(value) model_maxima.append(high_values_model[year]) current_station_maxima.append(value) current_model_maxima.append(high_values_model[year]) #plot scatter for low flow for each station separately plot_scatter( current_station_maxima, current_model_maxima, "observed (${\\rm m^3/s}$)", "modelled (${\\rm m^3/s}$)", "high flow values ({0})".format(station.id), different_shapes_and_colors = False) plt.savefig('high_values_scatter_{0}.png'.format( station.id ), bbox_inches = 'tight') print "%s: n(high_values) = %d;" % (station.id, len( current_station_maxima )) ##high flow values plot_scatter( station_maxima, model_maxima, "observed (${\\rm m^3/s}$)", "modelled (${\\rm m^3/s}$)", "high flow values", different_shapes_and_colors = False) plt.savefig('high_values_scatter.png', bbox_inches = 'tight') plot_boxplot([station_maxima, model_maxima], title="High flow amplitude", labels = ["Observed", "Modelled"], file_name="box_high.png") plot_boxplot(high_data, labels=labels, file_name="box_high_all_sep.png", point_ids=point_ids) #---------------------------low low_station_return_levels = {} low_model_return_levels = {} station_low_dates = [] model_low_dates = [] low_data = [] for station, model_point in data.iteritems(): if station.id not in selected_station_ids: continue #print "retained timeseries lenght of the station %s is %d " % (station.id, station.get_timeseries_length()) # @type model_point ModelPoint if not model_point.get_timeseries_length(): continue # @type station Station if not station.get_timeseries_length(): continue # print 'station min: %f, max: %f' % (np.min(station.values), np.max(station.values)) # print 'model min: %f, max: %f' % ( # np.min(model_point.get_values_sorted_by_date()), # np.max(model_point.get_values_sorted_by_date()) # ) low_values_station = data_select.get_period_minima(station.values, station.dates, start_date = None, end_date = None, start_month = low_start_month, end_month = low_end_month, event_duration = low_event_duration) low_data.append(low_values_station.values()) vals = np.array(low_values_station.values()) pars_station = gevfit.optimize_stationary_for_period(vals, high_flow = False) low_values_model = data_select.get_period_minima( model_point.get_values_sorted_by_date(), model_point.get_sorted_dates(), start_date = None, end_date = None, start_month = low_start_month, end_month = low_end_month, event_duration = low_event_duration) vals = np.array(low_values_model.values()) low_data.append(low_values_model.values()) pars_model = gevfit.optimize_stationary_for_period(vals, high_flow = False) #gather dates of the low flow events the_station_low_dates = data_select.get_period_minima_dates( station.values, station.dates, start_date = None, end_date = None, start_month = low_start_month, end_month = low_end_month, event_duration = low_event_duration) station_low_dates.extend(the_station_low_dates.values()) the_model_low_dates = data_select.get_period_minima_dates( model_point.get_values_sorted_by_date(), model_point.get_sorted_dates(), start_date = None, end_date = None, start_month = low_start_month, end_month = low_end_month, event_duration = low_event_duration) model_low_dates.extend(the_model_low_dates.values()) for ret_period in low_return_periods: if not low_station_return_levels.has_key(ret_period): low_station_return_levels[ret_period] = [] low_model_return_levels[ret_period] = [] low_station_return_levels[ret_period].append(gevfit.get_low_ret_level_stationary(pars_station, ret_period)) low_model_return_levels[ret_period].append(gevfit.get_low_ret_level_stationary(pars_model, ret_period)) ##save corresponding maxima for the station and model current_station_minima = [] current_model_minima = [] for year, value in low_values_station.iteritems(): if low_values_model.has_key(year): current_station_minima.append(value) current_model_minima.append(low_values_model[year]) station_minima.append(value) model_minima.append(low_values_model[year]) ##plot temporal dependency ts = sorted(low_values_station.keys()) values_at_ts = [] for the_t in ts: values_at_ts.append(low_values_station[the_t]) plt.figure() plt.plot(ts, values_at_ts) plt.ylabel('low flow') plt.xlabel('time') plt.title(station.id) plt.savefig('lows_in_time_%s.png' % station.id) #plot scatter for low flow for each station separately plot_scatter( current_station_minima, current_model_minima, "observed (${\\rm m^3/s}$)", "modelled (${\\rm m^3/s}$)", "low flow values ({0})".format(station.id), different_shapes_and_colors = False) plt.savefig('low_values_scatter_{0}.png'.format( station.id ), bbox_inches = 'tight') print "%s: n(low_values) = %d;" % (station.id, len( current_station_minima )) #draw return levels fig = plt.figure() gs = gridspec.GridSpec(1,2) ax = fig.add_subplot(gs[0,0]) plot_scatter(high_station_return_levels, high_model_return_levels, xlabel="Observed return levels", ylabel="Modelled return levels", new_figure=False, title="(a) High flow" ) ax.xaxis.set_major_formatter(FuncFormatter(hide_even_pos_ax_labels)) fig.add_subplot(gs[0,1]) plot_scatter(low_station_return_levels, low_model_return_levels, xlabel="Observed return levels", ylabel="Modelled return levels", title="(b) Low flow", new_figure=False) plt.tight_layout() plt.savefig("rl_scatter.png") plot_scatter( station_minima, model_minima, "observed", "modelled", "low flow values", different_shapes_and_colors = False) plt.savefig('low_values_scatter.png', bbox_inches = 'tight') plot_boxplot([station_minima, model_minima], title="Low flow amplitude", labels = ["Observed", "Modelled"], file_name="box_low.png") plot_boxplot(low_data, labels=labels, file_name="box_low_all_sep.png", point_ids= point_ids) #plot dates of the high and low flow occurences #before uncommenting figure out why the x and y arrays are of different sizes # plot_dates_scatter(model_high_dates, station_high_dates, model_low_dates, station_low_dates) # plt.savefig('occurences_scatter.png') # plt.show() pass