def test_plot_kde_scipy():
    data_simar = tests.read_full_simar()
    data_column = 'Hm0'
    cumulative = False
    paso_datos = 0.1
    bins = np.max(data_simar[data_column]) / (paso_datos * 2.0)

    if cumulative:
        data_empirical = empirical_distributions.ecdf_histogram(
            data_simar[data_column])
    else:
        data_empirical = empirical_distributions.epdf_histogram(
            data_simar[data_column], bins=bins)

    data_scipy = empirical_distributions.kde_scipy(data_simar[data_column])
    empirical_distributions.plot_kde(data_empirical,
                                     data_scipy,
                                     cumulative,
                                     title='',
                                     var_name=data_column,
                                     var_unit='m',
                                     fig_filename='',
                                     circular=False,
                                     label_empirical='Empirical data',
                                     label_kernel='Empirical kernel fit')
def test_plot_empirical_cdf():
    data_simar = tests.read_sample_simar()
    data_column = 'Hm0'

    data = empirical_distributions.ecdf_histogram(data_simar[data_column])

    empirical_distributions.plot_empirical(data)
def test_plot_kde_sm():
    data_simar = tests.read_full_simar()
    data_column = 'Hm0'
    cumulative = True

    if cumulative:
        data_empirical = empirical_distributions.ecdf_histogram(
            data_simar[data_column])
        data_kernel = empirical_distributions.kde_sm(data_simar[data_column],
                                                     cumulative=cumulative,
                                                     gridsize=100)
    else:
        data_empirical = empirical_distributions.epdf_histogram(
            data_simar[data_column])
        data_kernel = None

    empirical_distributions.plot_kde(data_empirical,
                                     data_kernel,
                                     cumulative,
                                     title='',
                                     var_name=data_column,
                                     var_unit='m',
                                     fig_filename='',
                                     circular=False,
                                     label_empirical='Empirical data',
                                     label_kernel='Empirical kernel fit')
def test_plot_empirical_fit():
    data_simar = tests.read_full_simar()
    data_column = 'Hm0'
    fitting_function = st.norm

    data = empirical_distributions.ecdf_histogram(data_simar[data_column])
    _, _, params = fitting.fit(data_simar, data_column, fitting_function)

    empirical_distributions.plot_empirical_fit(data,
                                               fitting_function,
                                               params,
                                               cumulative=True)
Beispiel #5
0
def test_pdf_cdf_simulaciones():
    #%% Input data
    # Number of simulations
    no_sim = 100
    # Preparo la figura
    plt.rcParams.update({'font.size': 12})
    fig3, axes3 = plt.subplots(3, 3, figsize=(12, 10))
    plt.delaxes(axes3[2, 1])
    #%% Read data

    # Import wave data (from simar offshore)
    data_path = os.path.join(tests.current_path, '..', '..', 'inputadapter',
                             'tests', 'output', 'modf')
    modf_file_name = 'guadalete_estuary_wave.modf'
    path_name = os.path.join(data_path, modf_file_name)
    modf_wave = MetOceanDF.read_file(path_name)
    # Hourly resample
    modf_wave = modf_wave.resample('H').interpolate()

    # Import wind data (from simar)
    data_path = os.path.join(tests.current_path, '..', '..', 'inputadapter',
                             'tests', 'output', 'modf')
    modf_file_name = 'guadalete_estuary_wind.modf'
    path_name = os.path.join(data_path, modf_file_name)
    modf_wind = MetOceanDF.read_file(path_name)
    # Hourly resample
    modf_wind = modf_wind.resample('H').interpolate()

    # Import sea level pressure (from era)
    data_path = os.path.join(tests.current_path, '..', '..', 'inputadapter',
                             'tests', 'output', 'modf')
    modf_file_name = 'guadalete_estuary_sea_level_pressure.modf'
    path_name = os.path.join(data_path, modf_file_name)
    modf_slp = MetOceanDF.read_file(path_name)
    # Hourly resample
    modf_slp = modf_slp.resample('H').interpolate()

    # Import sea level pressure (from era)
    data_path = os.path.join(tests.current_path, '..', '..', 'inputadapter',
                             'tests', 'output', 'modf')
    modf_file_name = 'guadalete_estuary_astronomical_tide.modf'
    path_name = os.path.join(data_path, modf_file_name)
    modf_at = MetOceanDF.read_file(path_name)
    # Hourly resample
    modf_at = modf_at.resample('H').interpolate()
    at_hindcast_df = pd.DataFrame(modf_at)

    # Import sea level pressure (from era)
    data_path = os.path.join(tests.current_path, '..', '..', 'inputadapter',
                             'tests', 'output', 'modf')
    modf_file_name = 'guadalete_estuary_astronomical_tide_forecast.modf'
    path_name = os.path.join(data_path, modf_file_name)
    modf_at_fc = MetOceanDF.read_file(path_name)
    # Hourly resample
    modf_at_fc = modf_at_fc.resample('H').interpolate()
    at_forecast_df = pd.DataFrame(modf_at_fc)

    # Group into dataframe
    wave_wind = pd.concat([modf_wave, modf_wind, modf_slp], axis=1)
    wave_wind.columns = ['hs', 'tp', 'dh', 'vv', 'dv', 'slp']

    # Delete rows where with no common values
    wave_wind.dropna(how='any', inplace=True)

    # Lectura de descarga fluvial
    data_path = os.path.join(tests.current_path, '..', '..', 'inputadapter',
                             'tests', 'output', 'modf')
    modf_file_name = 'guadalete_estuary_river_discharge.modf'
    path_name = os.path.join(data_path, modf_file_name)
    modf_rd = MetOceanDF.read_file(path_name)

    # Group into dataframe
    river_discharge = pd.DataFrame(modf_rd)
    # Delete rows where with no common values
    river_discharge.dropna(how='any', inplace=True)

    #%% Preprocessing
    t_step = missing_values.find_timestep(wave_wind)  # Find tstep
    data_gaps = missing_values.find_missing_values(wave_wind, t_step)
    wave_wind = missing_values.fill_missing_values(wave_wind,
                                                   t_step,
                                                   technique='interpolation',
                                                   method='nearest',
                                                   limit=16 * 24,
                                                   limit_direction='both')
    data_gaps_after = missing_values.find_missing_values(wave_wind, t_step)

    # Add noise for VAR
    noise = np.random.rand(wave_wind.shape[0], wave_wind.shape[1]) * 1e-2
    wave_wind = wave_wind + noise

    # Save_to_pickle
    wave_wind.to_pickle('wave_wind_offshore.p')

    # Group into list of dataframes
    df = list()
    df.append(pd.DataFrame(wave_wind['hs']))
    df.append(pd.DataFrame(wave_wind['tp']))
    df.append(pd.DataFrame(wave_wind['dh']))
    df.append(pd.DataFrame(wave_wind['vv']))
    df.append(pd.DataFrame(wave_wind['dv']))
    df.append(pd.DataFrame(wave_wind['slp']))

    for i in tqdm(range(1, no_sim)):
        file_name_simar_sim = os.path.join(
            'output', 'simulacion', 'series_temporales',
            'wave_wind_slp_offshore_500',
            'wave_wind_slp_guadalete_offshore_sim_' + str(i).zfill(4) + '.txt')

        file_name_rd_sim = os.path.join(
            'output', 'simulacion', 'series_temporales',
            'descarga_fluvial_500',
            'descarga_fluvial_guadalete_sim_' + str(i).zfill(4) + '.txt')

        df_simar_sim = pd.read_table(file_name_simar_sim, index_col=0)
        df_rd_sim = pd.read_table(file_name_rd_sim, index_col=0)

        # Cdf Pdf
        data = df_simar_sim['hs']
        ecdf = empirical_distributions.ecdf_histogram(data)
        # epdf = empirical_distributions.epdf_histogram(data, bins=bins)
        axes3[0, 0].plot(ecdf.index,
                         ecdf,
                         color='tab:orange',
                         linestyle=':',
                         lw=3)
        axes3[0, 0].set_ylabel('CDF', fontsize=16)
        axes3[0, 0].set_xlabel('$H_{m0} (m)$', fontsize=16)
        axes3[0, 0].set_xticks([0, 5, 10])
        axes3[0, 0].set_yticks([0, 0.25, 0.5, 0.75, 1])
        axes3[0, 0].grid(True)
        axes3[0, 0].set_xlim([0, 10])
        axes3[0, 0].set_ylim([0, 1.05])

        data = df_simar_sim['tp']
        ecdf = empirical_distributions.ecdf_histogram(data)
        # epdf = empirical_distributions.epdf_histogram(data, bins=bins)
        axes3[0, 1].plot(ecdf.index,
                         ecdf,
                         color='tab:orange',
                         linestyle=':',
                         lw=3)
        axes3[0, 1].set_ylabel('', fontsize=16)
        axes3[0, 1].set_xlabel('$T_{p} (s)$', fontsize=16)
        axes3[0, 1].set_xticks([0, 12, 24])
        axes3[0, 1].set_yticks([0, 0.25, 0.5, 0.75, 1])
        axes3[0, 1].set_yticklabels([])
        axes3[0, 1].grid(True)
        axes3[0, 1].set_xlim([0, 24])
        axes3[0, 1].set_ylim([0, 1.05])

        data = df_simar_sim['dh']
        ecdf = empirical_distributions.ecdf_histogram(data)
        # epdf = empirical_distributions.epdf_histogram(data, bins=bins)
        axes3[0, 2].plot(ecdf.index,
                         ecdf,
                         color='tab:orange',
                         linestyle=':',
                         lw=3)
        axes3[0, 2].set_ylabel('', fontsize=16)
        axes3[0, 2].set_xlabel('$w_{theta} (^\circ)$', fontsize=16)
        axes3[0, 2].set_xticks([0, 180, 360])
        axes3[0, 2].set_yticks([0, 0.25, 0.5, 0.75, 1])
        axes3[0, 2].set_yticklabels([])
        axes3[0, 2].grid(True)
        axes3[0, 2].set_xlim([0, 360])
        axes3[0, 2].set_ylim([0, 1.05])

        data = df_simar_sim['vv']
        ecdf = empirical_distributions.ecdf_histogram(data)
        # epdf = empirical_distributions.epdf_histogram(data, bins=bins)
        axes3[1, 0].plot(ecdf.index,
                         ecdf,
                         color='tab:orange',
                         linestyle=':',
                         lw=3)
        axes3[1, 0].set_ylabel('CDF', fontsize=16)
        axes3[1, 0].set_xlabel('$u_{10} (m/s)$', fontsize=16)
        axes3[1, 0].set_xticks([0, 15, 30])
        axes3[1, 0].set_yticks([0, 0.25, 0.5, 0.75, 1])
        axes3[1, 0].grid(True)
        axes3[1, 0].set_xlim([0, 30])
        axes3[1, 0].set_ylim([0, 1.05])

        data = df_simar_sim['dv']
        ecdf = empirical_distributions.ecdf_histogram(data)
        # epdf = empirical_distributions.epdf_histogram(data, bins=bins)
        axes3[1, 1].plot(ecdf.index,
                         ecdf,
                         color='tab:orange',
                         linestyle=':',
                         lw=3)
        axes3[1, 1].set_ylabel('', fontsize=16)
        axes3[1, 1].set_xlabel('$u_{\\theta} (^\circ)$', fontsize=16)
        axes3[1, 1].set_xticks([0, 180, 360])
        axes3[1, 1].set_yticks([0, 0.25, 0.5, 0.75, 1])
        axes3[1, 1].set_yticklabels([])
        axes3[1, 1].grid(True)
        axes3[1, 1].set_xlim([0, 360])
        axes3[1, 1].set_ylim([0, 1.05])

        data = df_simar_sim['slp']
        ecdf = empirical_distributions.ecdf_histogram(data)
        # epdf = empirical_distributions.epdf_histogram(data, bins=bins)
        axes3[1, 2].plot(ecdf.index,
                         ecdf,
                         color='tab:orange',
                         linestyle=':',
                         lw=3)
        axes3[1, 2].set_ylabel('', fontsize=16)
        axes3[1, 2].set_xlabel('$slp (mbar)$', fontsize=16)
        axes3[1, 2].set_yticks([0, 0.25, 0.5, 0.75, 1])
        axes3[1, 2].set_yticklabels([])
        axes3[1, 2].set_xticks([980, 1015, 1050])
        axes3[1, 2].grid(True)
        axes3[1, 2].set_xlim([980, 1050])
        axes3[1, 2].set_ylim([0, 1.05])

        data = at_forecast_df['Eta']
        ecdf = empirical_distributions.ecdf_histogram(data)
        # epdf = empirical_distributions.epdf_histogram(data, bins=bins)
        axes3[2, 0].plot(ecdf.index,
                         ecdf,
                         color='tab:orange',
                         linestyle=':',
                         lw=3)
        axes3[2, 0].set_ylabel('CDF', fontsize=16)
        axes3[2, 0].set_xlabel('$A_{AT} (m)$', fontsize=16)
        axes3[2, 0].set_xticks([-2, 0, 2])
        axes3[2, 0].set_yticks([0, 0.25, 0.5, 0.75, 1])
        axes3[2, 0].grid(True)
        axes3[2, 0].set_xlim([-2, 2.])
        axes3[2, 0].set_ylim([0, 1.05])

        axes3[2, 1].set_xticklabels([])
        axes3[2, 1].set_yticklabels([])

        data = river_discharge['Q']
        ecdf = empirical_distributions.ecdf_histogram(data)
        # epdf = empirical_distributions.epdf_histogram(data, bins=bins)
        axes3[2, 2].plot(ecdf.index,
                         ecdf,
                         color='tab:orange',
                         linestyle=':',
                         lw=3)
        axes3[2, 2].set_ylabel('', fontsize=16)
        axes3[2, 2].set_xlabel('$Q (m^{3}/s)$', fontsize=16)
        axes3[2, 2].set_xticks([0, 250, 500])
        axes3[2, 2].set_yticks([0, 0.25, 0.5, 0.75, 1])
        axes3[2, 2].set_yticklabels([])
        axes3[2, 2].grid(True)
        axes3[2, 2].set_xlim([0, 500])
        axes3[2, 2].set_ylim([0, 1.05])

    data = wave_wind['hs']
    paso = 0.1
    bins = np.max(data) / (paso * 2.0)
    ecdf = empirical_distributions.ecdf_histogram(data)
    epdf = empirical_distributions.epdf_histogram(data, bins=bins)
    axes3[0, 0].plot(ecdf.index, ecdf, color='tab:blue', lw=2)

    data = wave_wind['tp']
    paso = 1
    bins = np.max(data) / (paso * 2.0)
    ecdf = empirical_distributions.ecdf_histogram(data)
    epdf = empirical_distributions.epdf_histogram(data, bins=bins)
    axes3[0, 1].plot(ecdf.index, ecdf, color='tab:blue', lw=2)

    data = wave_wind['dh']
    paso = 12
    bins = np.max(data) / (paso * 2.0)
    ecdf = empirical_distributions.ecdf_histogram(data)
    epdf = empirical_distributions.epdf_histogram(data, bins=bins)
    axes3[0, 2].plot(ecdf.index, ecdf, color='tab:blue', lw=2)

    data = wave_wind['vv']
    paso = 0.1
    bins = np.max(data) / (paso * 2.0)
    ecdf = empirical_distributions.ecdf_histogram(data)
    epdf = empirical_distributions.epdf_histogram(data, bins=bins)
    axes3[1, 0].plot(ecdf.index, ecdf, color='tab:blue', lw=2)

    data = wave_wind['dv']
    bins = 8
    ecdf = empirical_distributions.ecdf_histogram(data)
    epdf = empirical_distributions.epdf_histogram(data, bins=bins)
    axes3[1, 1].plot(ecdf.index, ecdf, color='tab:blue', lw=2)

    data = wave_wind['slp']
    paso = 1
    bins = np.max(data) / (paso * 2.0)
    ecdf = empirical_distributions.ecdf_histogram(data)
    epdf = empirical_distributions.epdf_histogram(data, bins=bins)
    axes3[1, 2].plot(ecdf.index, ecdf, color='tab:blue', lw=2)

    data = at_forecast_df['Eta']
    ecdf = empirical_distributions.ecdf_histogram(data)
    axes3[2, 0].plot(ecdf.index, ecdf, color='tab:blue', lw=2)

    data = river_discharge['Q']
    paso = 1
    bins = np.max(data) / (paso * 2.0)
    ecdf = empirical_distributions.ecdf_histogram(data)
    axes3[2, 2].plot(ecdf.index, ecdf, color='tab:blue', lw=2)
    plt.tight_layout()

    fig3.savefig(
        os.path.join('output', 'analisis', 'graficas',
                     'ecdf_historico_simulacion.pdf'))
    fig3.savefig(
        os.path.join('output', 'analisis', 'graficas',
                     'ecdf_historico_simulacion.png'))
def output_plot_empirical_cdf(modf, info, output_path):
    elements = []

    # Section title
    default_title = _('Plot empirical CDF')

    # Required values
    location = info['location_metocean']
    driver = info['name_driver']
    descriptor = info['name_descriptor']
    block = info['name_block']
    section = info['name_section']

    # Optional values and default values
    title = get_key(info, 'title_section', default_title)
    var_name = get_key(info, 'var_name_descriptor', descriptor)
    var_unit = get_key(info, 'unit_descriptor', '')
    circular = get_key(info, 'circular_descriptor', False)
    label_empirical = get_key(info, 'label_empirical_section', '')
    label_kernel = get_key(info, 'label_kernel_section', '')
    bins = get_key(info, 'bins_section', 100)

    # Input tex section
    input_tex(elements, info, output_path, section)

    # Computation
    data = extract_data(modf, descriptor_name=descriptor)
    cumulative = True
    data_empirical = empirical_distributions.ecdf_histogram(data)
    data_kernel = empirical_distributions.kde_sm(data,
                                                 cumulative=cumulative,
                                                 gridsize=bins)

    # Figure
    kind = 'figure'
    default_caption = _('Empirical CDF') + ': {}'.format(
        info['title_descriptor'])
    caption = get_key(info, 'empirical_pdf_figure_caption_section',
                      default_caption)

    path = get_output_name(location=location,
                           driver=driver,
                           name=descriptor,
                           block=block,
                           title=section,
                           kind=kind)

    empirical_distributions.plot_kde(data_empirical,
                                     data_kernel,
                                     cumulative,
                                     title='',
                                     var_name=var_name,
                                     var_unit=var_unit,
                                     fig_filename=os.path.join(
                                         output_path, path),
                                     circular=circular,
                                     label_empirical=label_empirical,
                                     label_kernel=label_kernel)

    elements.append([path, kind, caption])

    return pd.DataFrame(elements, columns=['path', 'kind', 'caption']), title
Beispiel #7
0
def extremal_distribution_fit_bootstrapping(sample, n_sim_boot, data, var_name,
                                            threshold, param_orig, fit_type,
                                            x_min, x_max, alpha):
    # TODO: hacer paquete opcional
    from tqdm import tqdm

    # TODO: que el input sea la función de scipy y no una string. Permite hacer más ajustes.
    # This calculation is only done for gpd and gev
    if fit_type == 'gpd' or fit_type == 'gev':
        # Calculate the number of peaks per year
        if fit_type == 'gpd':
            n_peaks_year = len(sample) / len(
                data[var_name].index.year.unique())
        else:
            n_peaks_year = 1

        # Generation of new samples of peaks over threshold
        peaks_boot = simulation_sample(sample, n_sim_boot)

        # Initialization
        ecdf_boot = []
        ecdf_boot_rp = []
        x_boot = []
        y_boot = []
        y_boot_rp = []

        # Fit a extremal distribution for each new sample
        for sim in tqdm(range(n_sim_boot)):
            peaks_sample_boot = pd.Series(peaks_boot[sim])

            # Fit empirical distribution to peaks over threshold
            ecdf_boot.append(ecdf_histogram(peaks_sample_boot))

            # Calculate the return period curve
            ecdf_boot_rp.append(
                return_period_curve(n_peaks_year, ecdf_boot[sim]))

            # Fit peaks sample to theretical distribution to obtain the parameters
            (param, x_boot_sim, y_boot_sim,
             y_boot_sim_rp) = extremal_distribution_fit(
                 data=data,
                 var_name=var_name,
                 sample=peaks_sample_boot,
                 threshold=threshold,
                 fit_type=fit_type,
                 x_min=x_min,
                 x_max=x_max,
                 n_points=1000,
                 cumulative=True)
            # # Get style
            # fig1 = plt.figure()
            # plot.get_default_plot_style()
            # ax = plt.axes()
            # ax.semilogx(y_boot_sim_rp, x_boot_sim, 'grey', alpha=0.35)
            # ax.semilogx(ecdf_boot_rp[sim], ecdf_boot_rp[sim].index, '.r')
            # plt.xlim(0, 500)
            # ax.legend()
            # fig1.savefig(
            #     os.path.join('..', 'output', 'img', 'extremal', 'Ajuste_bootstrap_' + str(sim).zfill(4) + '.png'))

            if np.max(y_boot_sim_rp) > 1000:
                # Add results to the list
                x_boot.append(x_boot_sim)
                y_boot.append(y_boot_sim)
                y_boot_rp.append(y_boot_sim_rp)
            else:
                # Add results to the list
                x_boot.append(x_boot[-1])
                y_boot.append(y_boot[-1])
                y_boot_rp.append(y_boot_rp[-1])

        # Stack peaks boot sample
        y_boot_gpd_stacked = np.stack(y_boot).T

        # Extract upper and lower confidence bounds
        upper_band = np.percentile(y_boot_gpd_stacked, (alpha / 2) * 100,
                                   axis=1)
        lower_band = np.percentile(y_boot_gpd_stacked, (1 - alpha / 2) * 100,
                                   axis=1)

        upper_band_rp = 1 / (n_peaks_year * (1 - upper_band))
        lower_band_rp = 1 / (n_peaks_year * (1 - lower_band))

        # Output dictionary
        boot_extreme = {
            'ecdf_boot': ecdf_boot,
            'ecdf_boot_rp': ecdf_boot_rp,
            'x_boot': x_boot,
            'y_boot': y_boot,
            'y_boot_rp': y_boot_rp,
            'upper_band': upper_band,
            'lower_band': lower_band,
            'upper_band_rp': upper_band_rp,
            'lower_band_rp': lower_band_rp
        }

        return boot_extreme
Beispiel #8
0
def test_gev_fit_to_annual_maxima_confidence_bands():
    # Inputs
    data_file = 'SIMAR_1052046'
    threshold_percentile = 95
    n_sim_boot = 100
    alpha = 0.05  # Confidence level

    # Read SIMAR
    full_data_path = os.path.join('..', '..', '..', '..', '..', 'data',
                                  'simar')
    data_simar, code = read.simar(data_file, path=full_data_path)
    threshold = np.percentile(data_simar.loc[:, 'Hm0'], threshold_percentile)

    # Calculation of the annual maxima sample
    annual_maxima = extremal.annual_maxima_calculation(data_simar['Hm0'])

    # Annual Maxima Empirical distribution
    ecdf_am = empirical_distributions.ecdf_histogram(annual_maxima)
    ecdf_am_rp = extremal.return_period_curve(1, ecdf_am)

    # Fit Annual Maxima to GEV
    (param_orig, x_gev, y_gev, y_gev_rp) = extremal.extremal_distribution_fit(
        data=data_simar,
        var_name='Hm0',
        sample=annual_maxima,
        threshold=threshold,
        fit_type='gev',
        x_min=0.90 * min(annual_maxima),
        x_max=1.5 * max(annual_maxima),
        n_points=1000,
        cumulative=True)
    # Add confidence bands to asses the uncertainty (Bootstrapping)
    boot_extreme = extremal.extremal_distribution_fit_bootstrapping(
        sample=annual_maxima,
        n_sim_boot=n_sim_boot,
        data=data_simar,
        var_name='Hm0',
        threshold=threshold,
        param_orig=param_orig,
        fit_type='gev',
        x_min=0.90 * min(annual_maxima),
        x_max=1.5 * max(annual_maxima),
        alpha=alpha)

    # Representation
    extremal.plot_extremal_cdf(x_gev,
                               y_gev,
                               ecdf_am,
                               n_sim_boot,
                               boot_extreme,
                               alpha,
                               title='',
                               var_name='Hm0',
                               var_unit='m',
                               fig_filename='',
                               circular=False,
                               extremal_label='GEV Fit',
                               empirical_label='GEV ECDF')

    extremal.plot_extremal_return_period(x_gev,
                                         y_gev_rp,
                                         ecdf_am_rp,
                                         n_sim_boot,
                                         boot_extreme,
                                         alpha,
                                         title='',
                                         var_name='Hm0',
                                         var_unit='m',
                                         fig_filename='',
                                         circular=False,
                                         extremal_label='GEV Fit',
                                         empirical_label='GEV ECDF')
Beispiel #9
0
def test_gpd_fit_to_pot_confidence_bands_israel_era5():
    # Inputs
    threshold_percentile = 95
    minimum_interarrival_time = pd.Timedelta('3 days')
    minimum_cycle_length = pd.Timedelta('3 hours')
    interpolation = True
    interpolation_method = 'linear'
    interpolation_freq = '1min'
    truncate = False
    extra_info = False
    n_sim_boot = 10
    alpha = 0.05  # Confidence level

    # Read MODF
    location = 'israel_north'
    drivers = ['wave']

    data = []
    # Data
    for driver in drivers:
        modf = os.path.join(tests.current_path, '..', '..', 'inputadapter',
                            'tests', 'output', 'modf',
                            '{}_{}.modf'.format(location, driver))
        data.append(MetOceanDF.read_file(modf))

    data_simar = pd.DataFrame(data[0])

    threshold = np.percentile(data_simar.loc[:, 'swh'], threshold_percentile)

    # Storm cycles calculation
    cycles, calm_periods = extremal.extreme_events(
        data_simar, 'swh', threshold, minimum_interarrival_time,
        minimum_cycle_length, interpolation, interpolation_method,
        interpolation_freq, truncate, extra_info)
    # Peaks over threshold
    peaks_over_thres = extremal.events_max(cycles)

    # POT Empirical distribution
    ecdf_pot = empirical_distributions.ecdf_histogram(peaks_over_thres)
    n_peaks_year = len(peaks_over_thres) / len(
        data_simar['swh'].index.year.unique())
    ecdf_pot_rp = extremal.return_period_curve(n_peaks_year, ecdf_pot)

    # Fit POT to Scipy-GPD
    (param_orig, x_gpd, y_gpd, y_gpd_rp) = extremal.extremal_distribution_fit(
        data=data_simar,
        var_name='swh',
        sample=peaks_over_thres,
        threshold=threshold,
        fit_type='gpd',
        x_min=0.90 * min(peaks_over_thres),
        x_max=1.5 * max(peaks_over_thres),
        n_points=1000,
        cumulative=True)
    # Add confidence bands to asses the uncertainty (Bootstrapping)
    boot_extreme = extremal.extremal_distribution_fit_bootstrapping(
        sample=peaks_over_thres,
        n_sim_boot=n_sim_boot,
        data=data_simar,
        var_name='swh',
        threshold=threshold,
        param_orig=param_orig,
        fit_type='gpd',
        x_min=0.90 * min(peaks_over_thres),
        x_max=1.5 * max(peaks_over_thres),
        alpha=alpha)

    # Representation
    extremal.plot_extremal_cdf(x_gpd,
                               y_gpd,
                               ecdf_pot,
                               n_sim_boot,
                               boot_extreme,
                               alpha,
                               title='',
                               var_name='swh',
                               var_unit='m',
                               fig_filename='',
                               circular=False,
                               extremal_label='GPD Fit',
                               empirical_label='POT ECDF')

    extremal.plot_extremal_return_period(x_gpd,
                                         y_gpd_rp,
                                         ecdf_pot_rp,
                                         n_sim_boot,
                                         boot_extreme,
                                         alpha,
                                         title='',
                                         var_name='swh',
                                         var_unit='m',
                                         fig_filename='',
                                         circular=False,
                                         extremal_label='GPD Fit',
                                         empirical_label='POT ECDF')
Beispiel #10
0
def test_gpd_fit_to_pot_confidence_bands():
    # Inputs
    data_file = 'SIMAR_1052046'
    threshold_percentile = 95
    minimum_interarrival_time = pd.Timedelta('3 days')
    minimum_cycle_length = pd.Timedelta('3 hours')
    interpolation = True
    interpolation_method = 'linear'
    interpolation_freq = '1min'
    truncate = False
    extra_info = False
    n_sim_boot = 100
    alpha = 0.05  # Confidence level

    # Read SIMAR
    full_data_path = os.path.join('..', '..', '..', '..', '..', 'data',
                                  'simar')
    data_simar, code = read.simar(data_file, path=full_data_path)
    threshold = np.percentile(data_simar.loc[:, 'Hm0'], threshold_percentile)

    # Storm cycles calculation
    cycles, calm_periods = extremal.extreme_events(
        data_simar, 'Hm0', threshold, minimum_interarrival_time,
        minimum_cycle_length, interpolation, interpolation_method,
        interpolation_freq, truncate, extra_info)
    # Peaks over threshold
    peaks_over_thres = extremal.events_max(cycles)

    # POT Empirical distribution
    ecdf_pot = empirical_distributions.ecdf_histogram(peaks_over_thres)
    n_peaks_year = len(peaks_over_thres) / len(
        data_simar['Hm0'].index.year.unique())
    ecdf_pot_rp = extremal.return_period_curve(n_peaks_year, ecdf_pot)

    # Fit POT to Scipy-GPD
    (param_orig, x_gpd, y_gpd, y_gpd_rp) = extremal.extremal_distribution_fit(
        data=data_simar,
        var_name='Hm0',
        sample=peaks_over_thres,
        threshold=threshold,
        fit_type='gpd',
        x_min=0.90 * min(peaks_over_thres),
        x_max=1.5 * max(peaks_over_thres),
        n_points=1000,
        cumulative=True)
    # Add confidence bands to asses the uncertainty (Bootstrapping)
    boot_extreme = extremal.extremal_distribution_fit_bootstrapping(
        sample=peaks_over_thres,
        n_sim_boot=n_sim_boot,
        data=data_simar,
        var_name='Hm0',
        threshold=threshold,
        param_orig=param_orig,
        fit_type='gpd',
        x_min=0.90 * min(peaks_over_thres),
        x_max=1.5 * max(peaks_over_thres),
        alpha=alpha)

    # Representation
    extremal.plot_extremal_cdf(x_gpd,
                               y_gpd,
                               ecdf_pot,
                               n_sim_boot,
                               boot_extreme,
                               alpha,
                               title='',
                               var_name='Hm0',
                               var_unit='m',
                               fig_filename='',
                               circular=False,
                               extremal_label='GPD Fit',
                               empirical_label='POT ECDF')

    extremal.plot_extremal_return_period(x_gpd,
                                         y_gpd_rp,
                                         ecdf_pot_rp,
                                         n_sim_boot,
                                         boot_extreme,
                                         alpha,
                                         title='',
                                         var_name='Hm0',
                                         var_unit='m',
                                         fig_filename='',
                                         circular=False,
                                         extremal_label='GPD Fit',
                                         empirical_label='POT ECDF')
Beispiel #11
0
def test_poisson_pareto_fit_to_pot_and_gev_fit_to_annual_maxima():
    # Inputs
    data_file = 'SIMAR_1052046'
    threshold_percentile = 95
    minimum_interarrival_time = pd.Timedelta('3 days')
    minimum_cycle_length = pd.Timedelta('3 hours')
    interpolation = True
    interpolation_method = 'linear'
    interpolation_freq = '1min'
    truncate = False
    extra_info = False

    # Read SIMAR
    full_data_path = os.path.join('..', '..', '..', '..', '..', 'data',
                                  'simar')
    data_simar, code = read.simar(data_file, path=full_data_path)
    threshold = np.percentile(data_simar.loc[:, 'Hm0'], threshold_percentile)

    # Storm cycles calculation
    cycles, calm_periods = extremal.extreme_events(
        data_simar, 'Hm0', threshold, minimum_interarrival_time,
        minimum_cycle_length, interpolation, interpolation_method,
        interpolation_freq, truncate, extra_info)
    # Peaks over threshold
    peaks_over_thres = extremal.events_max(cycles)

    # Calculation of the annual maxima sample
    annual_maxima = extremal.annual_maxima_calculation(data_simar['Hm0'])

    # POT Empirical distribution
    ecdf_pot = empirical_distributions.ecdf_histogram(peaks_over_thres)
    n_peaks_year = len(peaks_over_thres) / len(
        data_simar['Hm0'].index.year.unique())
    ecdf_pot_rp = extremal.return_period_curve(n_peaks_year, ecdf_pot)

    # Annual Maxima Empirical distribution
    ecdf_am = empirical_distributions.ecdf_histogram(annual_maxima)
    ecdf_am_rp = extremal.return_period_curve(1, ecdf_am)

    # Fit Annual Maxima to GEV
    (param, x_gev, y_gev, y_gev_rp) = extremal.extremal_distribution_fit(
        data=data_simar,
        var_name='Hm0',
        sample=annual_maxima,
        threshold=None,
        fit_type='gev',
        x_min=0.90 * min(annual_maxima),
        x_max=1.5 * max(annual_maxima),
        n_points=1000,
        cumulative=True)

    # Fit Peaks over threshold to Poisson Pareto
    (param, x_pp, y_pp, y_pp_rp) = extremal.extremal_distribution_fit(
        data=data_simar,
        var_name='Hm0',
        sample=peaks_over_thres,
        threshold=threshold,
        fit_type='poisson',
        x_min=0.90 * min(peaks_over_thres),
        x_max=1.5 * max(peaks_over_thres),
        n_points=1000,
        cumulative=True)

    # Represent results
    plt.figure()
    ax = plt.axes()
    ax.plot(ecdf_am.index, ecdf_am, '.k', label='Annual maxima ECDF')
    ax.plot(x_gev, y_gev, 'k', label='GEV fit')
    ax.plot(x_pp, y_pp, label='Poisson-Pareto fit')
    plt.xlabel('Hm0 (m)')
    plt.ylabel('CDF')
    ax.legend()
    plt.grid()
    plt.show()

    plt.figure()
    ax = plt.axes()
    ax.semilogx(ecdf_am_rp, ecdf_am_rp.index, '.k', label='Annual maxima ECDF')
    ax.semilogx(y_gev_rp, x_gev, 'k', label='GEV fit')
    ax.semilogx(y_pp_rp, x_pp, label='Poisson-Pareto fit')
    plt.xlim(0, 500)
    plt.xlabel('Return Period (years)')
    plt.ylabel('Hm0 (m)')
    ax.legend()
    plt.grid()
    plt.show()
def test_river_discharge_simulation():
    # Modules activation and deactivation
    # analysis = False
    # cdf_pdf_representation = False
    # temporal_dependency = False
    # climatic_events_fitting = True
    # threshold_checking_for_simulation = False
    # simulation_cycles = True
    analysis = True
    cdf_pdf_representation = False
    temporal_dependency = False
    climatic_events_fitting = True
    threshold_checking_for_simulation = False
    simulation_cycles = True

    #%% Input data
    # Initial year, number of years, number of valid  data in a year
    anocomienzo, duracion, umbralano = (2018, 10, 0.8)
    # Type of fit (0-GUI, 1-stationary, 2-nonstationary)
    ant = [2]
    # Fourier order for nonstationary analysis
    no_ord_cycles = [2]
    no_ord_calms = [2]
    # Number of simulations
    no_sim = 1
    # Type of fit functions
    fun_cycles = [st.exponweib]
    fun_calms = [st.norm]
    # Number of normals
    no_norm_cycles = [False]
    no_norm_calms = [False]
    f_mix_cycles = [False]
    mod_cycles = [[0, 0, 0, 0]]

    # Cycles River discharge
    threshold_cycles = 25
    # minimum_interarrival_time = pd.Timedelta('250 days')
    # minimum_cycle_length = pd.Timedelta('5 days')
    minimum_interarrival_time = pd.Timedelta('7 days')
    minimum_cycle_length = pd.Timedelta('2 days')

    # Cycles SPEI
    threshold_spei = 0
    minimum_interarrival_time_spei = pd.Timedelta('150 days')
    minimum_cycle_length_spei = pd.Timedelta('150 days')

    interpolation = True
    interpolation_method = 'linear'
    interpolation_freq = '1min'
    truncate = True
    extra_info = True

    #%% Read data
    # Import river discharge data when all dams were active
    data_path = os.path.join(tests.current_path, '..', '..', 'inputadapter',
                             'tests', 'output', 'modf')
    modf_file_name = 'guadalete_estuary_river_discharge.modf'
    path_name = os.path.join(data_path, modf_file_name)
    modf_rd = MetOceanDF.read_file(path_name)

    # Group into dataframe
    river_discharge = pd.DataFrame(modf_rd)

    # Delete rows where with no common values
    river_discharge.dropna(how='any', inplace=True)

    # Import complete rive discharge historic data
    # All historic river discharge
    data_path = os.path.join(tests.current_path, '..', '..', '..', '..',
                             'data', 'solar_flux_nao_index_spei')
    modf_file_name = 'caudales.txt'
    path_name = os.path.join(data_path, modf_file_name)
    modf_all = pd.read_table(path_name, header=None, delim_whitespace=True)
    date_col = dates.extract_date(modf_all.iloc[:, 0:4])
    modf_all.index = date_col
    modf_all.drop(modf_all.columns[0:4], axis=1, inplace=True)
    modf_all.columns = ['Q']

    #%% Preprocessing
    t_step = missing_values.find_timestep(river_discharge)  # Find tstep
    data_gaps = missing_values.find_missing_values(river_discharge, t_step)
    river_discharge = missing_values.fill_missing_values(
        river_discharge,
        t_step,
        technique='interpolation',
        method='nearest',
        limit=16 * 24,
        limit_direction='both')
    data_gaps_after = missing_values.find_missing_values(
        river_discharge, t_step)

    # Add noise for VAR
    noise = np.random.rand(river_discharge.shape[0],
                           river_discharge.shape[1]) * 1e-2
    river_discharge = river_discharge + noise

    # Save_to_pickle
    river_discharge.to_pickle('river_discharge.p')

    # Group into list of dataframes
    df = list()
    df.append(pd.DataFrame(river_discharge['Q']))

    #%% Cycles and calms calculation
    cycles, calm_periods, info = extremal.extreme_events(
        river_discharge, 'Q', threshold_cycles, minimum_interarrival_time,
        minimum_cycle_length, interpolation, interpolation_method,
        interpolation_freq, truncate, extra_info)
    # Calculate duration of the cycles
    dur_cycles = extremal.events_duration(cycles)
    dur_cycles_description = dur_cycles.describe()

    sample_cycles = pd.DataFrame(info['data_cycles'].iloc[:, 0])
    noise = np.random.rand(sample_cycles.shape[0],
                           sample_cycles.shape[1]) * 1e-2
    sample_cycles = sample_cycles + noise

    sample_calms = pd.DataFrame(info['data_calm_periods'])
    noise = np.random.rand(sample_calms.shape[0], sample_calms.shape[1]) * 1e-2
    sample_calms = sample_calms + noise

    #%% CLIMATIC INDICES
    # Sunspots
    data_path = os.path.join(tests.current_path, '..', '..', '..', '..',
                             'data', 'solar_flux_nao_index_spei')
    modf_file_name = 'sunspot.csv'
    path_name = os.path.join(data_path, modf_file_name)
    sunspot = pd.read_csv(path_name,
                          header=None,
                          delim_whitespace=True,
                          parse_dates=[[0, 1]],
                          index_col=0)
    sunspot = sunspot.drop([2, 4, 5], axis=1)

    # SPEI
    data_path = os.path.join(tests.current_path, '..', '..', '..', '..',
                             'data', 'solar_flux_nao_index_spei')
    modf_file_name = 'spei_cadiz.csv'
    path_name = os.path.join(data_path, modf_file_name)
    spei = pd.read_csv(path_name, sep=',')
    spei.index = sunspot.index[2412:3233]

    # Calculate cycles over SPEI
    spei = pd.DataFrame(spei.loc[:, 'SPEI_12'] * 100).dropna()
    cycles_spei, calm_periods_spei, info_spei = extremal.extreme_events(
        spei, 'SPEI_12', threshold_spei, minimum_interarrival_time_spei,
        minimum_cycle_length_spei, interpolation, interpolation_method,
        interpolation_freq, truncate, extra_info)
    peaks_over_thres_spei = extremal.events_max(cycles_spei)

    # Plot peaks
    peaks_over_thres = extremal.events_max(cycles)

    # Represent cycles
    fig1 = plt.figure(figsize=(20, 20))
    ax = plt.axes()
    ax.plot(river_discharge)
    ax.axhline(threshold_cycles, color='lightgray')
    ax.plot(spei.loc[:, 'SPEI_12'] * 100, color='0.75', linewidth=2)
    # Plot cycles
    # for cycle in cycles_all:
    #     ax.plot(cycle, 'sandybrown', marker='.', markersize=5)
    #     # ax.plot(cycle.index[0], cycle[0], 'gray', marker='.', markersize=10)
    #     # ax.plot(cycle.index[-1], cycle[-1], 'black', marker='.', markersize=10)
    for cycle in cycles:
        ax.plot(cycle, 'g', marker='.', markersize=5)
        # ax.plot(cycle.index[0], cycle[0], 'gray', marker='.', markersize=10)
        # ax.plot(cycle.index[-1], cycle[-1], 'black', marker='.', markersize=10)
    for cycle in cycles_spei:
        ax.plot(cycle, 'k', marker='.', markersize=5, linewidth=2)
        ax.plot(cycle.index[0], cycle[0], 'gray', marker='.', markersize=15)
        ax.plot(cycle.index[-1], cycle[-1], 'black', marker='.', markersize=15)
    ax.plot(peaks_over_thres, '.r', markersize=15)
    ax.plot(peaks_over_thres_spei, '.c', markersize=15)
    ax.grid()
    ax.set_xlim([datetime.date(1970, 01, 01), datetime.date(2018, 04, 11)])
    ax.set_ylim([-5, 500])
    fig1.savefig(
        os.path.join('output', 'analisis', 'graficas',
                     'ciclos_river_discharge_spei.png'))

    #%% # ANALISIS CLIMATICO (0: PARA SALTARLO, 1: PARA HACERLO; LO MISMO PARA TODOS ESTOS IF)
    if analysis:
        if cdf_pdf_representation:
            for i in range(len(df)):
                # DIBUJO LAS CDF Y PDF DE LOS REGISTROS
                plot_analisis.cdf_pdf_registro(df[i], df[i].columns[0])
                plt.pause(0.5)

        #%%  THEORETICAL FIT CYCLES
        data_cycles = sample_cycles['Q']

        # Empirical cdf
        ecdf = empirical_distributions.ecdf_histogram(data_cycles)
        # Fit the variable to an extremal distribution
        (param, x, cdf_expwbl, pdf_expwbl) = theoretical_fit.fit_distribution(
            data_cycles,
            fit_type=fun_cycles[0].name,
            x_min=min(data_cycles),
            x_max=2 * max(data_cycles),
            n_points=1000)
        par0_cycles = list()
        par0_cycles.append(np.asarray(param))
        # GUARDO LOS PARAMETROS
        np.save(
            os.path.join('output', 'analisis',
                         'parameter_river_discharge_cycles.npy'), par0_cycles)

        # Check the goodness of the fit
        fig1 = plt.figure(figsize=(20, 20))
        ax = plt.axes()
        ax.plot(ecdf.index, ecdf, '.')
        ax.plot(x, cdf_expwbl)
        ax.set_xlabel('Q (m3/s)')
        ax.set_ylabel('CDF')
        ax.legend([
            'ECDF',
            'Exponweib Fit',
        ])
        ax.grid()
        ax.set_xlim([0, 500])
        fig1.savefig(
            os.path.join('output', 'analisis', 'graficas',
                         'cdf_fit_ciclos_river_discharge.png'))

        # PP - Plot values
        (yppplot_emp,
         yppplot_teo) = theoretical_fit.pp_plot(x, cdf_expwbl, ecdf)
        # QQ - Plot values
        (yqqplot_emp,
         yqqplot_teo) = theoretical_fit.qq_plot(x, cdf_expwbl, ecdf)
        # Plot Goodness of fit
        theoretical_fit.plot_goodness_of_fit(cdf_expwbl, ecdf, river_discharge,
                                             'Q', x, yppplot_emp, yqqplot_emp,
                                             yppplot_teo, yqqplot_teo)

        # Non-stationary fit for calms
        par_cycles, mod_cycles, f_mix_cycles, data_graph_cycles = list(), list(
        ), list(), list()
        df = list()
        df.append(data_cycles)
        for i in range(len(df)):
            # SE HAN SELECCIONADO LOS ULTIMOS 7 ANOS PARA QUE EL ANALISIS SEA MAS RAPIDO
            analisis_ = analisis.analisis(df[i],
                                          fun_cycles[i],
                                          ant[i],
                                          ordg=no_ord_cycles[i],
                                          nnorm=no_norm_cycles[i],
                                          par0=par0_cycles[i])

            par_cycles.append(analisis_[0])
            mod_cycles.append(analisis_[1])
            f_mix_cycles.append(analisis_[2])

            aux = list(analisis_[3])
            aux[5] = i
            aux = tuple(aux)
            data_graph_cycles.append(aux)

            # DIBUJO LOS RESULTADOS (HAY UNA GRAN GAMA DE FUNCIONES DE DIBUJO; VER MANUAL)
            plot_analisis.cuantiles_ne(*data_graph_cycles[i])
            plt.pause(0.5)

        fig2 = plt.figure(figsize=(20, 20))
        plt.plot(x, pdf_expwbl)
        _ = plt.hist(data_cycles,
                     bins=np.linspace(0, 500, 100),
                     normed=True,
                     alpha=0.5)
        plt.xlim([0, 400])
        fig2.savefig(
            os.path.join('output', 'analisis', 'graficas',
                         'pdf_fit_ciclos_river_discharge.png'))

        # %%  THEORETICAL FIT CALMS
        param0_calms = list()
        data_calms = sample_calms['Q']
        (param, x, cdf, pdf) = theoretical_fit.fit_distribution(
            data_calms,
            fit_type=fun_calms[0].name,
            x_min=np.min(data_calms),
            x_max=1.1 * np.max(data_calms),
            n_points=1000)
        param0_calms.append(np.asarray(param))
        # Empirical cdf
        ecdf = empirical_distributions.ecdf_histogram(data_calms)
        epdf = empirical_distributions.epdf_histogram(data_calms, bins=0)
        # PP - Plot values
        (yppplot_emp, yppplot_teo) = theoretical_fit.pp_plot(x, cdf, ecdf)
        # QQ - Plot values
        (yqqplot_emp, yqqplot_teo) = theoretical_fit.qq_plot(x, cdf, ecdf)
        # Plot Goodness of fit
        theoretical_fit.plot_goodness_of_fit(cdf, ecdf, sample_calms, 'Q', x,
                                             yppplot_emp, yqqplot_emp,
                                             yppplot_teo, yqqplot_teo)

        # Non-stationary fit for calms
        par_calms, mod_calms, f_mix_calms, data_graph_calms = list(), list(
        ), list(), list()
        df = list()
        df.append(data_calms)
        for i in range(len(df)):
            # SE HAN SELECCIONADO LOS ULTIMOS 7 ANOS PARA QUE EL ANALISIS SEA MAS RAPIDO
            analisis_ = analisis.analisis(df[i],
                                          fun_calms[i],
                                          ant[i],
                                          ordg=no_ord_calms[i],
                                          nnorm=no_norm_calms[i],
                                          par0=param0_calms[i])

            par_calms.append(analisis_[0])
            mod_calms.append(analisis_[1])
            f_mix_calms.append(analisis_[2])
            data_graph_calms.append(analisis_[3])

            # DIBUJO LOS RESULTADOS (HAY UNA GRAN GAMA DE FUNCIONES DE DIBUJO; VER MANUAL)
            plot_analisis.cuantiles_ne(*data_graph_calms[i])
            plt.pause(0.5)

        # Guardo parametros
        np.save(
            os.path.join('output', 'analisis',
                         'parameter_river_discharge_calms.npy'), par_calms)
        np.save(
            os.path.join('output', 'analisis',
                         'mod_river_discharge_calms.npy'), mod_calms)
        np.save(
            os.path.join('output', 'analisis',
                         'f_mix_river_discharge_calms.npy'), f_mix_calms)

    #%% TEMPORAL DEPENDENCY
    if temporal_dependency:
        # SE UTILIZAN LOS PARAMETROS DE SALIDA DEL ANÁLISIS PREVIO
        # Lectura de datos
        par_cycles = np.load(
            os.path.join('output', 'analisis',
                         'parameter_river_discharge_cycles.npy'))
        par_calms = np.load(
            os.path.join('output', 'analisis',
                         'parameter_river_discharge_calms.npy'))
        mod_calms = np.load(
            os.path.join('output', 'analisis',
                         'mod_river_discharge_calms.npy'))
        f_mix_calms = np.load(
            os.path.join('output', 'analisis',
                         'f_mix_river_discharge_calms.npy'))

        (df_dt_cycles,
         cdf_) = analisis.dependencia_temporal(sample_cycles, par_cycles,
                                               mod_cycles, no_norm_cycles,
                                               f_mix_cycles, fun_cycles)

        # SE GUARDAN LOS PARAMETROS DEL MODELO VAR
        df_dt_cycles.to_pickle(
            os.path.join('output', 'dependencia_temporal',
                         'df_dt_river_discharge_cycles.p'))

        (df_dt_calms,
         cdf_) = analisis.dependencia_temporal(sample_calms, par_calms,
                                               mod_calms, no_norm_calms,
                                               f_mix_calms, fun_calms)

        # SE GUARDAN LOS PARAMETROS DEL MODELO VAR
        df_dt_calms.to_pickle(
            os.path.join('output', 'dependencia_temporal',
                         'df_dt_river_discharge_calms.p'))

    if climatic_events_fitting:
        #%% FIT NUMBER OF EVENTS DURING WET CYCLES
        events_wet_cycle = pd.Series([5, 2, 1, 3, 2, 2, 0, 6, 1])
        ecdf_events_wet_cycle = empirical_distributions.ecdf_histogram(
            events_wet_cycle)

        mu = np.mean(events_wet_cycle)
        simulated_number_events = pd.Series(
            poisson.rvs(mu, loc=0, size=100, random_state=None))
        ecdf_simulated_events_wet_cycle = empirical_distributions.ecdf_histogram(
            simulated_number_events)
        x_poisson = np.linspace(0, 10, 100)
        cdf_poisson = poisson.cdf(x_poisson, mu, loc=0)

        plt.figure()
        ax = plt.axes()
        ax.plot(ecdf_events_wet_cycle.index, ecdf_events_wet_cycle, '.')
        ax.plot(ecdf_simulated_events_wet_cycle.index,
                ecdf_simulated_events_wet_cycle, '.')
        ax.plot(x_poisson, cdf_poisson)
        ax.legend(['ECDF', 'ECDF Sim', 'Poisson Fit'])
        ax.grid()

        #%% FIT TIME BETWEEN WET CYCLES
        t_wet_cycles = peaks_over_thres_spei.index.to_series().diff().dropna(
        ).astype('m8[s]').astype(np.float32)
        ecdf_t_wet_cycle = empirical_distributions.ecdf_histogram(t_wet_cycles)

        norm_param = norm.fit(t_wet_cycles, loc=0)
        simulated_t_wet_cycles = pd.Series(
            norm.rvs(*norm_param, size=100, random_state=None))
        ecdf_simulated_t_wet_cycles = empirical_distributions.ecdf_histogram(
            simulated_t_wet_cycles)
        x_norm = np.linspace(0, 2 * max(t_wet_cycles), 100)
        cdf_norm = norm.cdf(x_norm, *norm_param)

        plt.figure()
        ax = plt.axes()
        ax.plot(ecdf_t_wet_cycle.index, ecdf_t_wet_cycle, '.')
        ax.plot(ecdf_simulated_t_wet_cycles.index, ecdf_simulated_t_wet_cycles,
                '.')
        ax.plot(x_norm, cdf_norm)
        ax.legend(['ECDF', 'ECDF Sim', 'Exponential Fit'])
        ax.grid()

        simulated_t_wet_cycles_days = simulated_t_wet_cycles.astype('m8[s]')
        # Elimino valores negativos
        simulated_t_wet_cycles_days = simulated_t_wet_cycles_days[
            simulated_t_wet_cycles_days.values > datetime.timedelta(days=1)]

        #%% FIT TIME BETWEEN EVENTS DURING WET CYCLES
        t_between_events = peaks_over_thres.index.to_series().diff().dropna()
        t_between_events = t_between_events[
            t_between_events < datetime.timedelta(days=400)]
        t_between_events = t_between_events.astype('m8[s]').astype(np.float32)
        ecdf_t_between_events = empirical_distributions.ecdf_histogram(
            t_between_events)

        lambda_par = expon.fit(t_between_events, loc=0)
        simulated_t_between_events = pd.Series(
            expon.rvs(scale=lambda_par[1], size=100, random_state=None))
        ecdf_simulated_t_between_events = empirical_distributions.ecdf_histogram(
            simulated_t_between_events)
        x_expon = np.linspace(0, 2 * max(t_between_events), 100)
        cdf_expon = expon.cdf(x_expon, scale=lambda_par[1], loc=0)

        plt.figure()
        ax = plt.axes()
        ax.plot(ecdf_t_between_events.index, ecdf_t_between_events, '.')
        ax.plot(ecdf_simulated_t_between_events.index,
                ecdf_simulated_t_between_events, '.')
        ax.plot(x_expon, cdf_expon)
        ax.legend(['ECDF', 'ECDF Sim', 'Exponential Fit'])
        ax.grid()

        simulated_t_between_events_days = simulated_t_between_events.astype(
            'm8[s]')

        #%% FIT TIME BETWEEN ALL EVENTS
        # Fit time between events (without considering wet cycles) 2 method
        t_between_events_2method = peaks_over_thres.index.to_series().diff(
        ).dropna()
        t_between_events_2method = t_between_events_2method.astype(
            'm8[s]').astype(np.float32)
        ecdf_t_between_events_2method = empirical_distributions.ecdf_histogram(
            t_between_events_2method)

        lambda_par = expon.fit(t_between_events_2method, loc=0)
        simulated_t_between_events_2method = pd.Series(
            expon.rvs(scale=lambda_par[1], size=100, random_state=None))
        ecdf_simulated_t_between_events_2method = empirical_distributions.ecdf_histogram(
            simulated_t_between_events_2method)
        x_expon = np.linspace(0, 2 * np.max(t_between_events_2method), 100)
        cdf_expon = expon.cdf(x_expon, scale=lambda_par[1], loc=0)

        plt.figure()
        ax = plt.axes()
        ax.plot(ecdf_t_between_events_2method.index,
                ecdf_t_between_events_2method, '.')
        ax.plot(ecdf_simulated_t_between_events_2method.index,
                ecdf_simulated_t_between_events_2method, '.')
        ax.plot(x_expon, cdf_expon)
        ax.legend(['ECDF', 'ECDF Sim', 'Exponential Fit'])
        ax.grid()

        simulated_t_between_events_2method_days = simulated_t_between_events.astype(
            'm8[s]')
        # nul_values = simulated_t_between_events_2method_days.values > datetime.timedelta(days=2000)

    #%% SIMULACION CLIMÁTICA CHEQUEO UMBRAL OPTIMO PARA AJUSTAR DURACIONES
    if threshold_checking_for_simulation:
        # CARGO PARÁMETROS
        par_cycles = np.load(
            os.path.join('output', 'analisis',
                         'parameter_river_discharge_cycles.npy'))
        df_dt_cycles = pd.read_pickle(
            os.path.join('output', 'dependencia_temporal',
                         'df_dt_river_discharge_cycles.p'))
        vars_ = ['Q']

        # Cargo el SPEI Index para ajustar tiempo entre ciclos humedos, numero de eventos por ciclo humedo
        # tiempo entre eventos dentro de ciclo humedo

        # Figura de las cdf y pdf empiricas
        fig1, axes1 = plt.subplots(1, 2, figsize=(20, 7))

        cont = 0
        iter = 0
        while cont < no_sim:
            df_sim = simulacion.simulacion(anocomienzo,
                                           duracion,
                                           par_cycles,
                                           mod_cycles,
                                           no_norm_cycles,
                                           f_mix_cycles,
                                           fun_cycles,
                                           vars_,
                                           sample_cycles,
                                           df_dt_cycles, [0, 0, 0, 0, 0],
                                           semilla=int(
                                               np.random.rand(1) * 1e6))

            iter += 1

            # Primero filtro si hay valores mayores que el umbral,en cuyo caso descarto la serie
            if np.max(df_sim).values <= np.max(sample_cycles['Q']) * 1.25:
                # Representacion de la serie
                plt.figure()
                ax = plt.axes()
                ax.plot(df_sim)
                ax.plot(sample_cycles, '.')
                ax.plot(df_sim * 0 + max(sample_cycles['Q']), 'r')
                ax.grid()

                # Cdf Pdf
                data = df_sim['Q']
                ecdf = empirical_distributions.ecdf_histogram(data)
                epdf = empirical_distributions.epdf_histogram(data, bins=0)
                axes1[0].plot(epdf.index, epdf, '--', color='0.75')
                axes1[1].plot(ecdf.index, ecdf, '--', color='0.75')

                # Extract cycles from data for different thresholds to fix the duration
                fig2, axes2 = plt.subplots(1, 2, figsize=(20, 7))
                if cont == 0:
                    dur_cycles = dur_cycles.astype('m8[s]').astype(
                        np.float32)  # Convierto a segundos y flotante
                ecdf_dur = empirical_distributions.ecdf_histogram(dur_cycles)
                epdf_dur = empirical_distributions.epdf_histogram(dur_cycles,
                                                                  bins=0)
                axes2[0].plot(epdf_dur.index, epdf_dur, 'r', lw=2)
                axes2[1].plot(ecdf_dur.index, ecdf_dur, 'r', lw=2)

                threshold = np.arange(20, 110, 10)
                color_sequence = [
                    '#1f77b4', '#aec7e8', '#ff7f0e', '#ffbb78', '#2ca02c',
                    '#98df8a', '#d62728', '#ff9896', '#9467bd', '#c5b0d5',
                    '#8c564b', '#c49c94', '#e377c2', '#f7b6d2', '#7f7f7f',
                    '#c7c7c7', '#bcbd22', '#dbdb8d', '#17becf', '#9edae5'
                ]
                for j, th in enumerate(threshold):
                    minimum_interarrival_time = pd.Timedelta('1 hour')
                    minimum_cycle_length = pd.Timedelta('2 days')
                    cycles, calm_periods, info = extremal.extreme_events(
                        df_sim, 'Q', th, minimum_interarrival_time,
                        minimum_cycle_length, interpolation,
                        interpolation_method, interpolation_freq, truncate,
                        extra_info)

                    # Calculate duration of the cycles
                    dur_cycles_sim = extremal.events_duration(cycles)
                    dur_cycles_sim_description = dur_cycles_sim.describe()

                    # Represent cycles
                    fig3 = plt.figure(figsize=(20, 20))
                    ax = plt.axes()
                    ax.plot(df_sim)
                    ax.axhline(th, color='lightgray')
                    ax.grid()
                    ax.legend([
                        'Threshold: ' + str(th) + ' (m3/s)' + '/ Dur_min ' +
                        str(dur_cycles_description['min']) + ' - ' +
                        str(dur_cycles_sim_description['min']) +
                        '/ Dur_mean ' + str(dur_cycles_description['mean']) +
                        ' - ' + str(dur_cycles_sim_description['mean']) +
                        '/ Dur_max ' + str(dur_cycles_description['max']) +
                        ' - ' + str(dur_cycles_sim_description['max'])
                    ])

                    for cycle in cycles:
                        ax.plot(cycle, 'g', marker='.', markersize=5)
                        ax.plot(cycle.index[0],
                                cycle[0],
                                'gray',
                                marker='.',
                                markersize=10)
                        ax.plot(cycle.index[-1],
                                cycle[-1],
                                'black',
                                marker='.',
                                markersize=10)
                    ax.set_xlim([
                        datetime.date(2018, 04, 01),
                        datetime.date(2030, 01, 01)
                    ])
                    ax.set_ylim([0, 600])

                    fig_name = 'ciclos_sim_' + str(cont) + '_threshold_' + str(
                        th) + '.png'
                    fig3.savefig(
                        os.path.join('output', 'simulacion', 'graficas',
                                     'descarga_fluvial', 'umbral_optimo',
                                     fig_name))

                    # Calculate the cdf and pdf of the cycle duration
                    dur_cycles_sim = dur_cycles_sim.astype('m8[s]').astype(
                        np.float32)
                    ecdf_dur_sim = empirical_distributions.ecdf_histogram(
                        dur_cycles_sim)
                    epdf_dur_sim = empirical_distributions.epdf_histogram(
                        dur_cycles_sim, bins=0)
                    axes2[0].plot(epdf_dur_sim.index,
                                  epdf_dur_sim,
                                  '--',
                                  color=color_sequence[j],
                                  label=['Threshold: ' + str(threshold[j])])
                    axes2[1].plot(ecdf_dur_sim.index,
                                  ecdf_dur_sim,
                                  '--',
                                  color=color_sequence[j],
                                  label=['Threshold: ' + str(threshold[j])])
                    axes2[0].legend()
                    axes2[1].set_xlim([0, 5000000])
                    axes2[0].set_xlim([0, 5000000])

                fig_name = 'ciclos_dur_sim_' + str(cont) + '.png'
                fig2.savefig(
                    os.path.join('output', 'simulacion', 'graficas',
                                 'descarga_fluvial', 'umbral_optimo',
                                 fig_name))

                cont += 1

            data = sample_cycles['Q']
            ecdf = empirical_distributions.ecdf_histogram(data)
            epdf = empirical_distributions.epdf_histogram(data, bins=0)
            axes1[0].plot(epdf.index, epdf, 'r', lw=2)
            axes1[1].plot(ecdf.index, ecdf, 'r', lw=2)

        fig_name = 'pdf_cdf_descarga_fluvial.png'
        fig1.savefig(
            os.path.join('output', 'simulacion', 'graficas',
                         'descarga_fluvial', 'umbral_optimo', fig_name))

    #%% SIMULACION CLIMATICA
    threshold = 50
    minimum_interarrival_time = pd.Timedelta('1 hour')
    minimum_cycle_length = pd.Timedelta('2 days')
    if simulation_cycles:
        # CARGO PARÁMETROS
        par_cycles = np.load(
            os.path.join('output', 'analisis',
                         'parameter_river_discharge_cycles.npy'))
        par_calms = np.load(
            os.path.join('output', 'analisis',
                         'parameter_river_discharge_calms.npy'))
        mod_calms = np.load(
            os.path.join('output', 'analisis',
                         'mod_river_discharge_calms.npy'))
        f_mix_calms = np.load(
            os.path.join('output', 'analisis',
                         'f_mix_river_discharge_calms.npy'))

        df_dt_cycles = pd.read_pickle(
            os.path.join('output', 'dependencia_temporal',
                         'df_dt_river_discharge_cycles.p'))
        df_dt_calms = pd.read_pickle(
            os.path.join('output', 'dependencia_temporal',
                         'df_dt_river_discharge_calms.p'))
        vars_ = ['Q']

        # Figura de las cdf y pdf empiricas
        fig2, axes1 = plt.subplots(1, 2, figsize=(20, 7))

        cont = 0
        iter = 0
        while cont < no_sim:
            df_sim = simulacion.simulacion(anocomienzo,
                                           duracion,
                                           par_cycles,
                                           mod_cycles,
                                           no_norm_cycles,
                                           f_mix_cycles,
                                           fun_cycles,
                                           vars_,
                                           sample_cycles,
                                           df_dt_cycles, [0, 0, 0, 0, 0],
                                           semilla=int(
                                               np.random.rand(1) * 1e6))

            iter += 1

            # Primero filtro si hay valores mayores que el umbral,en cuyo caso descarto la serie
            if np.max(df_sim).values <= np.max(sample_cycles['Q']) * 1.25:
                df_sim = df_sim.resample('1H').interpolate()

                # Extract cycles from data for different thresholds to fix the duration
                if cont == 0:
                    dur_cycles = dur_cycles.astype('m8[s]').astype(
                        np.float32)  # Convierto a segundos y flotante
                # Calculate cycles
                cycles, calm_periods, info = extremal.extreme_events(
                    df_sim, 'Q', threshold, minimum_interarrival_time,
                    minimum_cycle_length, interpolation, interpolation_method,
                    interpolation_freq, truncate, extra_info)

                # # Represent cycles
                # fig3 = plt.figure(figsize=(20, 20))
                # ax = plt.axes()
                # ax.plot(df_sim)
                # ax.axhline(threshold, color='lightgray')
                # ax.grid()
                #
                # for cycle in cycles:
                #     ax.plot(cycle, 'g', marker='.', markersize=5)
                #     ax.plot(cycle.index[0], cycle[0], 'gray', marker='.', markersize=10)
                #     ax.plot(cycle.index[-1], cycle[-1], 'black', marker='.', markersize=10)
                # ax.set_xlim([datetime.date(2018, 01, 01), datetime.date(2021, 01, 01)])
                # ax.set_ylim([0, 600])
                # fig3.savefig(os.path.join('output', 'simulacion', 'graficas', 'descarga_fluvial',
                #                           'ciclos_cadiz_simulado_' + str(cont).zfill(4) + '.png'))

                # Start to construct the time series
                indices = pd.date_range(start='2018', end='2100', freq='1H')
                df_simulate = pd.DataFrame(np.zeros((len(indices), 1)) + 25,
                                           dtype=float,
                                           index=indices,
                                           columns=['Q'])

                # The start is in wet cycles
                cont_wet_cicles = 0
                cont_df_events = 1
                t_ini = datetime.datetime(2018, 01, 01)
                t_end = datetime.datetime(2018, 01, 01)
                while t_end < datetime.datetime(2090, 01, 01):
                    if cont_wet_cicles != 0:
                        t_ini = t_end + simulated_t_wet_cycles_days[
                            cont_wet_cicles]
                        year = t_ini.year
                    else:
                        year = 2018

                    # Select the number of events during wet cycle
                    n_events = simulated_number_events[cont_wet_cicles] - 1
                    cont_wet_cicles += 1

                    if n_events != 0:

                        # for j in range(0, n_events):
                        cont_df_events_in_wet_cycles = 0
                        while cont_df_events_in_wet_cycles <= n_events:
                            if cont_df_events_in_wet_cycles != 0:
                                # Time between events
                                year = year + 1

                            # Select the event
                            cycle = cycles[cont_df_events]

                            if np.max(cycle) >= 150:
                                # Simulate date
                                month1 = [
                                    random.randint(1, 3),
                                    random.randint(10, 12)
                                ]
                                rand_pos = random.randint(0, 1)
                                month = month1[rand_pos]
                                day = random.randint(1, 28)
                                hour = random.randint(0, 23)
                            else:
                                # Simulate date
                                month = random.randint(1, 12)
                                day = random.randint(1, 28)
                                hour = random.randint(0, 23)
                            t_ini = datetime.datetime(year, month, day, hour)
                            pos_ini = np.where(
                                df_simulate.index == t_ini)[0][0]
                            pos_end = pos_ini + cycle.shape[0]

                            # Insert cycle
                            df_simulate.iloc[pos_ini:pos_end, 0] = cycle.values
                            t_end = df_simulate.index[pos_end]
                            year = df_simulate.index[pos_end].to_datetime(
                            ).year
                            cont_df_events += 1
                            cont_df_events_in_wet_cycles += 1

                    else:
                        t_end = t_ini

                # Simulation of calm periods
                df_sim_calms = simulacion.simulacion(
                    anocomienzo,
                    85,
                    par_calms,
                    mod_calms,
                    no_norm_calms,
                    f_mix_calms,
                    fun_calms,
                    vars_,
                    sample_calms,
                    df_dt_calms, [0, 0, 0, 0, 0],
                    semilla=int(np.random.rand(1) * 1e6))

                # Remove negative values
                df_sim_calms[df_sim_calms < 0] = np.random.randint(1, 5)

                # Combine both dataframes with cycles and calms
                pos_cycles = df_simulate >= 50
                df_river_discharge = df_sim_calms
                df_river_discharge[pos_cycles] = df_simulate

                # Hourly interpolation
                df_river_discharge = df_river_discharge.resample(
                    'H').interpolate()

                # Representation of results
                fig1 = plt.figure(figsize=(20, 10))
                ax = plt.axes()
                ax.plot(river_discharge)
                ax.plot(df_river_discharge)
                ax.legend('Hindcast', 'Forecast')
                ax.grid()
                ax.set_ylim([-5, 500])
                fig1.savefig(
                    os.path.join(
                        'output', 'simulacion', 'graficas', 'descarga_fluvial',
                        'descarga_fluvial_cadiz_simulado_' +
                        str(cont).zfill(4) + '.png'))

                # Cdf Pdf
                data = df_river_discharge['Q']
                ecdf = empirical_distributions.ecdf_histogram(data)
                epdf = empirical_distributions.epdf_histogram(data, bins=0)
                axes1[0].plot(epdf.index, epdf, '--', color='0.75')
                axes1[1].plot(ecdf.index, ecdf, '--', color='0.75')

                # Guardado de ficheros
                df_river_discharge.to_csv(os.path.join(
                    'output', 'simulacion', 'series_temporales',
                    'descarga_fluvial_500', 'descarga_fluvial_guadalete_sim_' +
                    str(cont).zfill(4) + '.txt'),
                                          sep=n(b'\t'))
                cont += 1

        data = river_discharge['Q']
        ecdf = empirical_distributions.ecdf_histogram(data)
        epdf = empirical_distributions.epdf_histogram(data, bins=0)
        axes1[0].plot(epdf.index, epdf, 'r', lw=2)
        axes1[1].plot(ecdf.index, ecdf, 'r', lw=2)
        fig_name = 'pdf_cdf_descarga_fluvial.png'
        fig2.savefig(
            os.path.join('output', 'simulacion', 'graficas',
                         'descarga_fluvial', fig_name))