Example #1
0
    def get_dates_of_annual_maxima(self, position_index):
        """

        """
        return data_select.get_period_maxima_dates(self._data[:, position_index], self.times,
                start_month = 1, end_month = 12, event_duration = timedelta(days = 1)
        )
def main():
    path = 'data/streamflows/hydrosheds_euler9/aex_discharge_1970_01_01_00_00.nc'

    #path = "data/streamflows/hydrosheds_rk4_changed_partiotioning/aex_discharge_1970_01_01_00_00.nc"
    #path = 'data/streamflows/na/discharge_1990_01_01_00_00_na.nc'
    data = pe_calc.get_station_and_corresponding_model_data(path = path)
    delete_not_continuous(data)



    plot_utils.apply_plot_params(width_pt = None, height_cm=6, font_size = 9)

    high_return_periods = [10, 30]
    high_start_month = 3
    high_end_month = 7
    high_event_duration = timedelta(days = 1)


    low_return_periods = [2, 5]
    low_start_month = 1
    low_end_month = 5
    low_event_duration = timedelta(days = 15)


    #---------------------------high
    high_station_return_levels = {}
    high_model_return_levels = {}

    station_high_dates = []
    model_high_dates = []


    station_minima = []
    station_maxima = []

    model_minima = []
    model_maxima = []

    high_data = []
    labels = []

    point_ids = []

    for station, model_point in data.iteritems():
        if station.id not in selected_station_ids:
            continue


        # @type model_point ModelPoint
        if not model_point.get_timeseries_length():
            continue

        # @type station Station
        if not station.get_timeseries_length():
            continue
        
        high_values_station = data_select.get_period_maxima(station.values, station.dates,
                                      start_date = None, end_date = None,
                                      start_month = high_start_month,
                                      end_month = high_end_month,
                                      event_duration = high_event_duration)
        vals = np.array(high_values_station.values())
        high_data.append(high_values_station.values())
        labels.append("Observed")


        pars_station = gevfit.optimize_stationary_for_period(vals, high_flow = True)


        high_values_model = data_select.get_period_maxima(
                                      model_point.get_values_sorted_by_date(),
                                      model_point.get_sorted_dates(),
                                      start_date = None, end_date = None,
                                      start_month = high_start_month,
                                      end_month = high_end_month,
                                      event_duration = high_event_duration)

        high_data.append(high_values_model.values())
        labels.append("Modelled")
        vals = np.array(high_values_model.values())

        point_ids.append(station.id)
        point_ids.append(station.id)

        pars_model = gevfit.optimize_stationary_for_period(vals, high_flow = True)

        for ret_period in high_return_periods:
            if not high_station_return_levels.has_key(ret_period):
                high_station_return_levels[ret_period] = []
                high_model_return_levels[ret_period] = []
            high_station_return_levels[ret_period].append(gevfit.get_high_ret_level_stationary(pars_station, ret_period))
            high_model_return_levels[ret_period].append(gevfit.get_high_ret_level_stationary(pars_model, ret_period))
            

        #gather dates of the high flow events
        the_station_high_dates = data_select.get_period_maxima_dates(
                                    station.values, station.dates,
                                    start_date = None, end_date = None,
                                    start_month = high_start_month,
                                    end_month = high_end_month,
                                    event_duration = high_event_duration)
        station_high_dates.extend(the_station_high_dates.values())

        the_model_high_dates = data_select.get_period_maxima_dates(
                                    model_point.get_values_sorted_by_date(),
                                    # @type model_point ModelPoint
                                    model_point.get_sorted_dates(),
                                    start_date = None, end_date = None,
                                    start_month = high_start_month,
                                    end_month = high_end_month,
                                    event_duration = high_event_duration)
        model_high_dates.extend(the_model_high_dates.values())



        ##save corresponding maximums for the station and model
        current_station_maxima = []
        current_model_maxima = []
        for year, value in high_values_station.iteritems():
            if high_values_model.has_key(year):
                station_maxima.append(value)
                model_maxima.append(high_values_model[year])

                current_station_maxima.append(value)
                current_model_maxima.append(high_values_model[year])


        #plot scatter for low flow for each station separately
        plot_scatter( current_station_maxima, current_model_maxima, "observed (${\\rm m^3/s}$)",
                    "modelled (${\\rm m^3/s}$)",
                  "high flow values ({0})".format(station.id), different_shapes_and_colors = False)
        plt.savefig('high_values_scatter_{0}.png'.format( station.id ), bbox_inches = 'tight')

        print "%s: n(high_values) = %d;" % (station.id, len( current_station_maxima ))

    ##high flow values
    plot_scatter( station_maxima, model_maxima, "observed (${\\rm m^3/s}$)",
                    "modelled (${\\rm m^3/s}$)",
                    "high flow values", different_shapes_and_colors = False)

    plt.savefig('high_values_scatter.png', bbox_inches = 'tight')

    plot_boxplot([station_maxima, model_maxima],
            title="High flow amplitude",
            labels = ["Observed", "Modelled"], file_name="box_high.png")

    plot_boxplot(high_data, labels=labels, file_name="box_high_all_sep.png", point_ids=point_ids)



    #---------------------------low
    low_station_return_levels = {}
    low_model_return_levels = {}

    station_low_dates = []
    model_low_dates = []

    low_data = []
    for station, model_point in data.iteritems():
        if station.id not in selected_station_ids:
            continue

        #print "retained timeseries lenght of the station %s is %d " % (station.id, station.get_timeseries_length())

        # @type model_point ModelPoint
        if not model_point.get_timeseries_length():
            continue

        # @type station Station
        if not station.get_timeseries_length():
            continue


#        print 'station min: %f, max: %f' % (np.min(station.values), np.max(station.values))
#        print 'model min: %f, max: %f' % (
#                                            np.min(model_point.get_values_sorted_by_date()),
#                                            np.max(model_point.get_values_sorted_by_date())
#                                           )

        low_values_station = data_select.get_period_minima(station.values, station.dates,
                                      start_date = None, end_date = None,
                                      start_month = low_start_month,
                                      end_month = low_end_month,
                                      event_duration = low_event_duration)
        
        low_data.append(low_values_station.values())
        vals = np.array(low_values_station.values())
        pars_station = gevfit.optimize_stationary_for_period(vals, high_flow = False)

        low_values_model = data_select.get_period_minima(
                                      model_point.get_values_sorted_by_date(),
                                      model_point.get_sorted_dates(),
                                      start_date = None, end_date = None,
                                      start_month = low_start_month,
                                      end_month = low_end_month,
                                      event_duration = low_event_duration)
        vals = np.array(low_values_model.values())
        low_data.append(low_values_model.values())
        pars_model = gevfit.optimize_stationary_for_period(vals, high_flow = False)

        #gather dates of the low flow events
        the_station_low_dates = data_select.get_period_minima_dates(
                                    station.values, station.dates,
                                    start_date = None, end_date = None,
                                    start_month = low_start_month,
                                    end_month = low_end_month,
                                    event_duration = low_event_duration)
        station_low_dates.extend(the_station_low_dates.values())

        the_model_low_dates = data_select.get_period_minima_dates(
                                    model_point.get_values_sorted_by_date(),
                                    model_point.get_sorted_dates(),
                                    start_date = None, end_date = None,
                                    start_month = low_start_month,
                                    end_month = low_end_month,
                                    event_duration = low_event_duration)
        model_low_dates.extend(the_model_low_dates.values())



        for ret_period in low_return_periods:
            if not low_station_return_levels.has_key(ret_period):
                low_station_return_levels[ret_period] = []
                low_model_return_levels[ret_period] = []
            low_station_return_levels[ret_period].append(gevfit.get_low_ret_level_stationary(pars_station, ret_period))
            low_model_return_levels[ret_period].append(gevfit.get_low_ret_level_stationary(pars_model, ret_period))



        ##save corresponding maxima for the station and model
        current_station_minima = []
        current_model_minima = []
        for year, value in low_values_station.iteritems():
            if low_values_model.has_key(year):
                current_station_minima.append(value)
                current_model_minima.append(low_values_model[year])

                station_minima.append(value)
                model_minima.append(low_values_model[year])


        ##plot temporal dependency
        ts = sorted(low_values_station.keys())
        values_at_ts = []
        for the_t in ts:
            values_at_ts.append(low_values_station[the_t])
        plt.figure()
        plt.plot(ts, values_at_ts)
        plt.ylabel('low flow')
        plt.xlabel('time')
        plt.title(station.id)
        plt.savefig('lows_in_time_%s.png' % station.id)


        #plot scatter for low flow for each station separately
        plot_scatter( current_station_minima, current_model_minima, "observed (${\\rm m^3/s}$)",
                    "modelled (${\\rm m^3/s}$)",
                  "low flow values ({0})".format(station.id), different_shapes_and_colors = False)
        plt.savefig('low_values_scatter_{0}.png'.format( station.id ), bbox_inches = 'tight')

        print "%s: n(low_values) = %d;" % (station.id, len( current_station_minima ))



    #draw return levels
    fig = plt.figure()
    gs = gridspec.GridSpec(1,2)
    ax = fig.add_subplot(gs[0,0])
    plot_scatter(high_station_return_levels,
                 high_model_return_levels,
                 xlabel="Observed return levels",
                 ylabel="Modelled return levels", new_figure=False,
                 title="(a) High flow"

    )
    ax.xaxis.set_major_formatter(FuncFormatter(hide_even_pos_ax_labels))

    fig.add_subplot(gs[0,1])
    plot_scatter(low_station_return_levels,
                 low_model_return_levels,
                 xlabel="Observed return levels",
                 ylabel="Modelled  return levels",
        title="(b) Low flow",
        new_figure=False)


    plt.tight_layout()
    plt.savefig("rl_scatter.png")

    plot_scatter( station_minima, model_minima, "observed", "modelled",
                  "low flow values", different_shapes_and_colors = False)
    plt.savefig('low_values_scatter.png', bbox_inches = 'tight')

    plot_boxplot([station_minima, model_minima],
            title="Low flow amplitude",
            labels = ["Observed", "Modelled"], file_name="box_low.png")

    plot_boxplot(low_data, labels=labels, file_name="box_low_all_sep.png", point_ids= point_ids)

    #plot dates of the high and low flow occurences
    #before uncommenting figure out why the x and y arrays are of different sizes
#    plot_dates_scatter(model_high_dates, station_high_dates, model_low_dates, station_low_dates)
#    plt.savefig('occurences_scatter.png')

  #  plt.show()

    pass