def test_save_to_msgpack(): data_file = os.path.join('simar', 'SIMAR_1052046') file_name = 'full_simar_preprocessed.msg' path = 'D:\\REPOSITORIO GIT\\protocol_project\\data\\intermediate_files' # Read simar data_simar, _ = read.simar(data_file, tests.full_data_path) # Preproccesing simar time_step = missing_values.find_timestep(data_simar, n_random_values=10) data_clean = missing_values.erase_null_values(data_simar, method='all') data_simar_interp = missing_values.fill_missing_values( data_clean, time_step, technique='interpolation', method='nearest', limit=720, limit_direction='both') # Check missing values miss_values = missing_values.find_missing_values(data_simar_interp, time_step) # Save simar if miss_values.empty: save_to_msgpack(data_simar_interp, file_name, path) return
def test_fill_missing_values(): # Read simar file data_path = os.path.join(tests.sample_data_path, 'simar') # noinspection PyTypeChecker data_simar, _ = read.simar('SIMAR_1052046_short_gap', data_path) # Calculate the time step time_step = missing_values.find_timestep(data_simar) # Fill missing values data_fill = missing_values.fill_missing_values(data_simar, time_step, technique='interpolation', method='nearest', limit=24, limit_direction='both') tolerance = 0.01 assert data_fill.loc['1958-01-04 08', 'Hm0'] == pytest.approx(2.1, tolerance) assert data_fill.loc['1958-01-04 12', 'Tp'] == pytest.approx(10.5, tolerance) assert data_fill.loc['1958-01-04 14', 'Tp'] == pytest.approx(10.6, tolerance) assert data_fill.loc['1960-12-31 19', 'Hm0'] == pytest.approx(0.6, tolerance)
def test_missing_values_report(): # Input file_name = 'gaps_report.csv' path = os.path.join('.', '..', '..', 'report', 'tests', 'output', 'tables') # Read simar file data_path = os.path.join(tests.sample_data_path, 'simar') # noinspection PyTypeChecker data_simar, _ = read.simar('SIMAR_1052046_short_gap', data_path) # Calculate the time step time_step = missing_values.find_timestep(data_simar) # Find gaps data_gaps = missing_values.find_missing_values(data_simar, time_step) # Gaps report data_gaps_report = missing_values.missing_values_report( data_simar, data_gaps) missing_values.missing_values_report_to_file(data_gaps_report, file_name, path) # Plot missing_values.plot_missing_values(data=data_simar, data_column='Hm0', data_gaps=data_gaps, title='', var_name='Hm0', var_unit='m', fig_filename='', circular=False, label='Hm0')
def test_find_timestep(): # Read simar file data_path = os.path.join(tests.sample_data_path, 'simar') # noinspection PyTypeChecker data_simar, _ = read.simar('SIMAR_1052046_short', data_path) t_step = missing_values.find_timestep(data_simar) assert t_step == pd.timedelta(hours=1)
def test_find_missing_values(): # Read simar file data_path = os.path.join(tests.sample_data_path, 'simar') # noinspection PyTypeChecker data_simar, _ = read.simar('SIMAR_1052046_short_gap', data_path) # Calculate the time step time_step = missing_values.find_timestep(data_simar) data_gaps = missing_values.find_missing_values(data_simar, time_step) # Representation of the gaps fig = plt.figure() ax = fig.gca() ax.plot(data_simar.loc[:, 'Hm0']) ax.plot(data_simar.loc[data_gaps.loc[:, 'pos_ini'], 'Hm0'], 'k.', markersize=10) ax.plot(data_simar.loc[data_gaps.loc[:, 'pos_fin'], 'Hm0'], 'k.', markersize=10) fig.show()
def test_figuras_cuantiles_ajuste_no_estacionario(): #%% Input data # Initial year, number of years, number of valid data in a year anocomienzo, duracion, umbralano = (2018, 85, 0.5) # Type of fit (0-GUI, 1-stationary, 2-nonstationary) ant = [2, 2, 2, 2, 2, 2] # Fourier order for nonstationary analysis no_ord = [4, 4, 4, 4, 4, 4] # Number of simulations no_sim = 350 # Type of fit functions fun = [st.lognorm, lognorm2, normtrun, st.weibull_min, normtrun, st.norm] # Number of normals no_norm = [False, False, 2, False, 2, False] #%% Read data # Import wave data (from simar offshore) data_path = os.path.join(tests.current_path, '..', '..', 'inputadapter', 'tests', 'output', 'modf') modf_file_name = 'guadalete_estuary_wave.modf' path_name = os.path.join(data_path, modf_file_name) modf_wave = MetOceanDF.read_file(path_name) # Hourly resample modf_wave = modf_wave.resample('H').interpolate() # Import wind data (from simar) data_path = os.path.join(tests.current_path, '..', '..', 'inputadapter', 'tests', 'output', 'modf') modf_file_name = 'guadalete_estuary_wind.modf' path_name = os.path.join(data_path, modf_file_name) modf_wind = MetOceanDF.read_file(path_name) # Hourly resample modf_wind = modf_wind.resample('H').interpolate() # Import sea level pressure (from era) data_path = os.path.join(tests.current_path, '..', '..', 'inputadapter', 'tests', 'output', 'modf') modf_file_name = 'guadalete_estuary_sea_level_pressure.modf' path_name = os.path.join(data_path, modf_file_name) modf_slp = MetOceanDF.read_file(path_name) # Hourly resample modf_slp = modf_slp.resample('H').interpolate() # Group into dataframe wave_wind = pd.concat([modf_wave, modf_wind, modf_slp], axis=1) wave_wind.columns = ['hs', 'tp', 'dh', 'vv', 'dv', 'slp'] # Delete rows where with no common values wave_wind.dropna(how='any', inplace=True) #%% Preprocessing t_step = missing_values.find_timestep(wave_wind) # Find tstep data_gaps = missing_values.find_missing_values(wave_wind, t_step) wave_wind = missing_values.fill_missing_values(wave_wind, t_step, technique='interpolation', method='nearest', limit=16 * 24, limit_direction='both') data_gaps_after = missing_values.find_missing_values(wave_wind, t_step) # Add noise for VAR noise = np.random.rand(wave_wind.shape[0], wave_wind.shape[1]) * 1e-2 wave_wind = wave_wind + noise # Save_to_pickle wave_wind.to_pickle('wave_wind_offshore.p') # Group into list of dataframes df = list() df.append(pd.DataFrame(wave_wind['hs'])) df.append(pd.DataFrame(wave_wind['tp'])) df.append(pd.DataFrame(wave_wind['dh'])) df.append(pd.DataFrame(wave_wind['vv'])) df.append(pd.DataFrame(wave_wind['dv'])) df.append(pd.DataFrame(wave_wind['slp'])) d_frame = pd.DataFrame(wave_wind) fichero = os.path.join('output', 'analisis', 'data_graph_offshore.p') with open(fichero, 'rb') as filehandle: # store the data as binary data stream data_graph = pickle.load(filehandle) # DIBUJO LOS RESULTADOS (HAY UNA GRAN GAMA DE FUNCIONES DE DIBUJO; VER MANUAL) plot_analisis.cuantiles_ne_paper(fun, *data_graph)
def test_pdf_cdf_simulaciones(): #%% Input data # Number of simulations no_sim = 100 # Preparo la figura plt.rcParams.update({'font.size': 12}) fig3, axes3 = plt.subplots(3, 3, figsize=(12, 10)) plt.delaxes(axes3[2, 1]) #%% Read data # Import wave data (from simar offshore) data_path = os.path.join(tests.current_path, '..', '..', 'inputadapter', 'tests', 'output', 'modf') modf_file_name = 'guadalete_estuary_wave.modf' path_name = os.path.join(data_path, modf_file_name) modf_wave = MetOceanDF.read_file(path_name) # Hourly resample modf_wave = modf_wave.resample('H').interpolate() # Import wind data (from simar) data_path = os.path.join(tests.current_path, '..', '..', 'inputadapter', 'tests', 'output', 'modf') modf_file_name = 'guadalete_estuary_wind.modf' path_name = os.path.join(data_path, modf_file_name) modf_wind = MetOceanDF.read_file(path_name) # Hourly resample modf_wind = modf_wind.resample('H').interpolate() # Import sea level pressure (from era) data_path = os.path.join(tests.current_path, '..', '..', 'inputadapter', 'tests', 'output', 'modf') modf_file_name = 'guadalete_estuary_sea_level_pressure.modf' path_name = os.path.join(data_path, modf_file_name) modf_slp = MetOceanDF.read_file(path_name) # Hourly resample modf_slp = modf_slp.resample('H').interpolate() # Import sea level pressure (from era) data_path = os.path.join(tests.current_path, '..', '..', 'inputadapter', 'tests', 'output', 'modf') modf_file_name = 'guadalete_estuary_astronomical_tide.modf' path_name = os.path.join(data_path, modf_file_name) modf_at = MetOceanDF.read_file(path_name) # Hourly resample modf_at = modf_at.resample('H').interpolate() at_hindcast_df = pd.DataFrame(modf_at) # Import sea level pressure (from era) data_path = os.path.join(tests.current_path, '..', '..', 'inputadapter', 'tests', 'output', 'modf') modf_file_name = 'guadalete_estuary_astronomical_tide_forecast.modf' path_name = os.path.join(data_path, modf_file_name) modf_at_fc = MetOceanDF.read_file(path_name) # Hourly resample modf_at_fc = modf_at_fc.resample('H').interpolate() at_forecast_df = pd.DataFrame(modf_at_fc) # Group into dataframe wave_wind = pd.concat([modf_wave, modf_wind, modf_slp], axis=1) wave_wind.columns = ['hs', 'tp', 'dh', 'vv', 'dv', 'slp'] # Delete rows where with no common values wave_wind.dropna(how='any', inplace=True) # Lectura de descarga fluvial data_path = os.path.join(tests.current_path, '..', '..', 'inputadapter', 'tests', 'output', 'modf') modf_file_name = 'guadalete_estuary_river_discharge.modf' path_name = os.path.join(data_path, modf_file_name) modf_rd = MetOceanDF.read_file(path_name) # Group into dataframe river_discharge = pd.DataFrame(modf_rd) # Delete rows where with no common values river_discharge.dropna(how='any', inplace=True) #%% Preprocessing t_step = missing_values.find_timestep(wave_wind) # Find tstep data_gaps = missing_values.find_missing_values(wave_wind, t_step) wave_wind = missing_values.fill_missing_values(wave_wind, t_step, technique='interpolation', method='nearest', limit=16 * 24, limit_direction='both') data_gaps_after = missing_values.find_missing_values(wave_wind, t_step) # Add noise for VAR noise = np.random.rand(wave_wind.shape[0], wave_wind.shape[1]) * 1e-2 wave_wind = wave_wind + noise # Save_to_pickle wave_wind.to_pickle('wave_wind_offshore.p') # Group into list of dataframes df = list() df.append(pd.DataFrame(wave_wind['hs'])) df.append(pd.DataFrame(wave_wind['tp'])) df.append(pd.DataFrame(wave_wind['dh'])) df.append(pd.DataFrame(wave_wind['vv'])) df.append(pd.DataFrame(wave_wind['dv'])) df.append(pd.DataFrame(wave_wind['slp'])) for i in tqdm(range(1, no_sim)): file_name_simar_sim = os.path.join( 'output', 'simulacion', 'series_temporales', 'wave_wind_slp_offshore_500', 'wave_wind_slp_guadalete_offshore_sim_' + str(i).zfill(4) + '.txt') file_name_rd_sim = os.path.join( 'output', 'simulacion', 'series_temporales', 'descarga_fluvial_500', 'descarga_fluvial_guadalete_sim_' + str(i).zfill(4) + '.txt') df_simar_sim = pd.read_table(file_name_simar_sim, index_col=0) df_rd_sim = pd.read_table(file_name_rd_sim, index_col=0) # Cdf Pdf data = df_simar_sim['hs'] ecdf = empirical_distributions.ecdf_histogram(data) # epdf = empirical_distributions.epdf_histogram(data, bins=bins) axes3[0, 0].plot(ecdf.index, ecdf, color='tab:orange', linestyle=':', lw=3) axes3[0, 0].set_ylabel('CDF', fontsize=16) axes3[0, 0].set_xlabel('$H_{m0} (m)$', fontsize=16) axes3[0, 0].set_xticks([0, 5, 10]) axes3[0, 0].set_yticks([0, 0.25, 0.5, 0.75, 1]) axes3[0, 0].grid(True) axes3[0, 0].set_xlim([0, 10]) axes3[0, 0].set_ylim([0, 1.05]) data = df_simar_sim['tp'] ecdf = empirical_distributions.ecdf_histogram(data) # epdf = empirical_distributions.epdf_histogram(data, bins=bins) axes3[0, 1].plot(ecdf.index, ecdf, color='tab:orange', linestyle=':', lw=3) axes3[0, 1].set_ylabel('', fontsize=16) axes3[0, 1].set_xlabel('$T_{p} (s)$', fontsize=16) axes3[0, 1].set_xticks([0, 12, 24]) axes3[0, 1].set_yticks([0, 0.25, 0.5, 0.75, 1]) axes3[0, 1].set_yticklabels([]) axes3[0, 1].grid(True) axes3[0, 1].set_xlim([0, 24]) axes3[0, 1].set_ylim([0, 1.05]) data = df_simar_sim['dh'] ecdf = empirical_distributions.ecdf_histogram(data) # epdf = empirical_distributions.epdf_histogram(data, bins=bins) axes3[0, 2].plot(ecdf.index, ecdf, color='tab:orange', linestyle=':', lw=3) axes3[0, 2].set_ylabel('', fontsize=16) axes3[0, 2].set_xlabel('$w_{theta} (^\circ)$', fontsize=16) axes3[0, 2].set_xticks([0, 180, 360]) axes3[0, 2].set_yticks([0, 0.25, 0.5, 0.75, 1]) axes3[0, 2].set_yticklabels([]) axes3[0, 2].grid(True) axes3[0, 2].set_xlim([0, 360]) axes3[0, 2].set_ylim([0, 1.05]) data = df_simar_sim['vv'] ecdf = empirical_distributions.ecdf_histogram(data) # epdf = empirical_distributions.epdf_histogram(data, bins=bins) axes3[1, 0].plot(ecdf.index, ecdf, color='tab:orange', linestyle=':', lw=3) axes3[1, 0].set_ylabel('CDF', fontsize=16) axes3[1, 0].set_xlabel('$u_{10} (m/s)$', fontsize=16) axes3[1, 0].set_xticks([0, 15, 30]) axes3[1, 0].set_yticks([0, 0.25, 0.5, 0.75, 1]) axes3[1, 0].grid(True) axes3[1, 0].set_xlim([0, 30]) axes3[1, 0].set_ylim([0, 1.05]) data = df_simar_sim['dv'] ecdf = empirical_distributions.ecdf_histogram(data) # epdf = empirical_distributions.epdf_histogram(data, bins=bins) axes3[1, 1].plot(ecdf.index, ecdf, color='tab:orange', linestyle=':', lw=3) axes3[1, 1].set_ylabel('', fontsize=16) axes3[1, 1].set_xlabel('$u_{\\theta} (^\circ)$', fontsize=16) axes3[1, 1].set_xticks([0, 180, 360]) axes3[1, 1].set_yticks([0, 0.25, 0.5, 0.75, 1]) axes3[1, 1].set_yticklabels([]) axes3[1, 1].grid(True) axes3[1, 1].set_xlim([0, 360]) axes3[1, 1].set_ylim([0, 1.05]) data = df_simar_sim['slp'] ecdf = empirical_distributions.ecdf_histogram(data) # epdf = empirical_distributions.epdf_histogram(data, bins=bins) axes3[1, 2].plot(ecdf.index, ecdf, color='tab:orange', linestyle=':', lw=3) axes3[1, 2].set_ylabel('', fontsize=16) axes3[1, 2].set_xlabel('$slp (mbar)$', fontsize=16) axes3[1, 2].set_yticks([0, 0.25, 0.5, 0.75, 1]) axes3[1, 2].set_yticklabels([]) axes3[1, 2].set_xticks([980, 1015, 1050]) axes3[1, 2].grid(True) axes3[1, 2].set_xlim([980, 1050]) axes3[1, 2].set_ylim([0, 1.05]) data = at_forecast_df['Eta'] ecdf = empirical_distributions.ecdf_histogram(data) # epdf = empirical_distributions.epdf_histogram(data, bins=bins) axes3[2, 0].plot(ecdf.index, ecdf, color='tab:orange', linestyle=':', lw=3) axes3[2, 0].set_ylabel('CDF', fontsize=16) axes3[2, 0].set_xlabel('$A_{AT} (m)$', fontsize=16) axes3[2, 0].set_xticks([-2, 0, 2]) axes3[2, 0].set_yticks([0, 0.25, 0.5, 0.75, 1]) axes3[2, 0].grid(True) axes3[2, 0].set_xlim([-2, 2.]) axes3[2, 0].set_ylim([0, 1.05]) axes3[2, 1].set_xticklabels([]) axes3[2, 1].set_yticklabels([]) data = river_discharge['Q'] ecdf = empirical_distributions.ecdf_histogram(data) # epdf = empirical_distributions.epdf_histogram(data, bins=bins) axes3[2, 2].plot(ecdf.index, ecdf, color='tab:orange', linestyle=':', lw=3) axes3[2, 2].set_ylabel('', fontsize=16) axes3[2, 2].set_xlabel('$Q (m^{3}/s)$', fontsize=16) axes3[2, 2].set_xticks([0, 250, 500]) axes3[2, 2].set_yticks([0, 0.25, 0.5, 0.75, 1]) axes3[2, 2].set_yticklabels([]) axes3[2, 2].grid(True) axes3[2, 2].set_xlim([0, 500]) axes3[2, 2].set_ylim([0, 1.05]) data = wave_wind['hs'] paso = 0.1 bins = np.max(data) / (paso * 2.0) ecdf = empirical_distributions.ecdf_histogram(data) epdf = empirical_distributions.epdf_histogram(data, bins=bins) axes3[0, 0].plot(ecdf.index, ecdf, color='tab:blue', lw=2) data = wave_wind['tp'] paso = 1 bins = np.max(data) / (paso * 2.0) ecdf = empirical_distributions.ecdf_histogram(data) epdf = empirical_distributions.epdf_histogram(data, bins=bins) axes3[0, 1].plot(ecdf.index, ecdf, color='tab:blue', lw=2) data = wave_wind['dh'] paso = 12 bins = np.max(data) / (paso * 2.0) ecdf = empirical_distributions.ecdf_histogram(data) epdf = empirical_distributions.epdf_histogram(data, bins=bins) axes3[0, 2].plot(ecdf.index, ecdf, color='tab:blue', lw=2) data = wave_wind['vv'] paso = 0.1 bins = np.max(data) / (paso * 2.0) ecdf = empirical_distributions.ecdf_histogram(data) epdf = empirical_distributions.epdf_histogram(data, bins=bins) axes3[1, 0].plot(ecdf.index, ecdf, color='tab:blue', lw=2) data = wave_wind['dv'] bins = 8 ecdf = empirical_distributions.ecdf_histogram(data) epdf = empirical_distributions.epdf_histogram(data, bins=bins) axes3[1, 1].plot(ecdf.index, ecdf, color='tab:blue', lw=2) data = wave_wind['slp'] paso = 1 bins = np.max(data) / (paso * 2.0) ecdf = empirical_distributions.ecdf_histogram(data) epdf = empirical_distributions.epdf_histogram(data, bins=bins) axes3[1, 2].plot(ecdf.index, ecdf, color='tab:blue', lw=2) data = at_forecast_df['Eta'] ecdf = empirical_distributions.ecdf_histogram(data) axes3[2, 0].plot(ecdf.index, ecdf, color='tab:blue', lw=2) data = river_discharge['Q'] paso = 1 bins = np.max(data) / (paso * 2.0) ecdf = empirical_distributions.ecdf_histogram(data) axes3[2, 2].plot(ecdf.index, ecdf, color='tab:blue', lw=2) plt.tight_layout() fig3.savefig( os.path.join('output', 'analisis', 'graficas', 'ecdf_historico_simulacion.pdf')) fig3.savefig( os.path.join('output', 'analisis', 'graficas', 'ecdf_historico_simulacion.png'))
def test_river_discharge_simulation(): # Modules activation and deactivation # analysis = False # cdf_pdf_representation = False # temporal_dependency = False # climatic_events_fitting = True # threshold_checking_for_simulation = False # simulation_cycles = True analysis = True cdf_pdf_representation = False temporal_dependency = False climatic_events_fitting = True threshold_checking_for_simulation = False simulation_cycles = True #%% Input data # Initial year, number of years, number of valid data in a year anocomienzo, duracion, umbralano = (2018, 10, 0.8) # Type of fit (0-GUI, 1-stationary, 2-nonstationary) ant = [2] # Fourier order for nonstationary analysis no_ord_cycles = [2] no_ord_calms = [2] # Number of simulations no_sim = 1 # Type of fit functions fun_cycles = [st.exponweib] fun_calms = [st.norm] # Number of normals no_norm_cycles = [False] no_norm_calms = [False] f_mix_cycles = [False] mod_cycles = [[0, 0, 0, 0]] # Cycles River discharge threshold_cycles = 25 # minimum_interarrival_time = pd.Timedelta('250 days') # minimum_cycle_length = pd.Timedelta('5 days') minimum_interarrival_time = pd.Timedelta('7 days') minimum_cycle_length = pd.Timedelta('2 days') # Cycles SPEI threshold_spei = 0 minimum_interarrival_time_spei = pd.Timedelta('150 days') minimum_cycle_length_spei = pd.Timedelta('150 days') interpolation = True interpolation_method = 'linear' interpolation_freq = '1min' truncate = True extra_info = True #%% Read data # Import river discharge data when all dams were active data_path = os.path.join(tests.current_path, '..', '..', 'inputadapter', 'tests', 'output', 'modf') modf_file_name = 'guadalete_estuary_river_discharge.modf' path_name = os.path.join(data_path, modf_file_name) modf_rd = MetOceanDF.read_file(path_name) # Group into dataframe river_discharge = pd.DataFrame(modf_rd) # Delete rows where with no common values river_discharge.dropna(how='any', inplace=True) # Import complete rive discharge historic data # All historic river discharge data_path = os.path.join(tests.current_path, '..', '..', '..', '..', 'data', 'solar_flux_nao_index_spei') modf_file_name = 'caudales.txt' path_name = os.path.join(data_path, modf_file_name) modf_all = pd.read_table(path_name, header=None, delim_whitespace=True) date_col = dates.extract_date(modf_all.iloc[:, 0:4]) modf_all.index = date_col modf_all.drop(modf_all.columns[0:4], axis=1, inplace=True) modf_all.columns = ['Q'] #%% Preprocessing t_step = missing_values.find_timestep(river_discharge) # Find tstep data_gaps = missing_values.find_missing_values(river_discharge, t_step) river_discharge = missing_values.fill_missing_values( river_discharge, t_step, technique='interpolation', method='nearest', limit=16 * 24, limit_direction='both') data_gaps_after = missing_values.find_missing_values( river_discharge, t_step) # Add noise for VAR noise = np.random.rand(river_discharge.shape[0], river_discharge.shape[1]) * 1e-2 river_discharge = river_discharge + noise # Save_to_pickle river_discharge.to_pickle('river_discharge.p') # Group into list of dataframes df = list() df.append(pd.DataFrame(river_discharge['Q'])) #%% Cycles and calms calculation cycles, calm_periods, info = extremal.extreme_events( river_discharge, 'Q', threshold_cycles, minimum_interarrival_time, minimum_cycle_length, interpolation, interpolation_method, interpolation_freq, truncate, extra_info) # Calculate duration of the cycles dur_cycles = extremal.events_duration(cycles) dur_cycles_description = dur_cycles.describe() sample_cycles = pd.DataFrame(info['data_cycles'].iloc[:, 0]) noise = np.random.rand(sample_cycles.shape[0], sample_cycles.shape[1]) * 1e-2 sample_cycles = sample_cycles + noise sample_calms = pd.DataFrame(info['data_calm_periods']) noise = np.random.rand(sample_calms.shape[0], sample_calms.shape[1]) * 1e-2 sample_calms = sample_calms + noise #%% CLIMATIC INDICES # Sunspots data_path = os.path.join(tests.current_path, '..', '..', '..', '..', 'data', 'solar_flux_nao_index_spei') modf_file_name = 'sunspot.csv' path_name = os.path.join(data_path, modf_file_name) sunspot = pd.read_csv(path_name, header=None, delim_whitespace=True, parse_dates=[[0, 1]], index_col=0) sunspot = sunspot.drop([2, 4, 5], axis=1) # SPEI data_path = os.path.join(tests.current_path, '..', '..', '..', '..', 'data', 'solar_flux_nao_index_spei') modf_file_name = 'spei_cadiz.csv' path_name = os.path.join(data_path, modf_file_name) spei = pd.read_csv(path_name, sep=',') spei.index = sunspot.index[2412:3233] # Calculate cycles over SPEI spei = pd.DataFrame(spei.loc[:, 'SPEI_12'] * 100).dropna() cycles_spei, calm_periods_spei, info_spei = extremal.extreme_events( spei, 'SPEI_12', threshold_spei, minimum_interarrival_time_spei, minimum_cycle_length_spei, interpolation, interpolation_method, interpolation_freq, truncate, extra_info) peaks_over_thres_spei = extremal.events_max(cycles_spei) # Plot peaks peaks_over_thres = extremal.events_max(cycles) # Represent cycles fig1 = plt.figure(figsize=(20, 20)) ax = plt.axes() ax.plot(river_discharge) ax.axhline(threshold_cycles, color='lightgray') ax.plot(spei.loc[:, 'SPEI_12'] * 100, color='0.75', linewidth=2) # Plot cycles # for cycle in cycles_all: # ax.plot(cycle, 'sandybrown', marker='.', markersize=5) # # ax.plot(cycle.index[0], cycle[0], 'gray', marker='.', markersize=10) # # ax.plot(cycle.index[-1], cycle[-1], 'black', marker='.', markersize=10) for cycle in cycles: ax.plot(cycle, 'g', marker='.', markersize=5) # ax.plot(cycle.index[0], cycle[0], 'gray', marker='.', markersize=10) # ax.plot(cycle.index[-1], cycle[-1], 'black', marker='.', markersize=10) for cycle in cycles_spei: ax.plot(cycle, 'k', marker='.', markersize=5, linewidth=2) ax.plot(cycle.index[0], cycle[0], 'gray', marker='.', markersize=15) ax.plot(cycle.index[-1], cycle[-1], 'black', marker='.', markersize=15) ax.plot(peaks_over_thres, '.r', markersize=15) ax.plot(peaks_over_thres_spei, '.c', markersize=15) ax.grid() ax.set_xlim([datetime.date(1970, 01, 01), datetime.date(2018, 04, 11)]) ax.set_ylim([-5, 500]) fig1.savefig( os.path.join('output', 'analisis', 'graficas', 'ciclos_river_discharge_spei.png')) #%% # ANALISIS CLIMATICO (0: PARA SALTARLO, 1: PARA HACERLO; LO MISMO PARA TODOS ESTOS IF) if analysis: if cdf_pdf_representation: for i in range(len(df)): # DIBUJO LAS CDF Y PDF DE LOS REGISTROS plot_analisis.cdf_pdf_registro(df[i], df[i].columns[0]) plt.pause(0.5) #%% THEORETICAL FIT CYCLES data_cycles = sample_cycles['Q'] # Empirical cdf ecdf = empirical_distributions.ecdf_histogram(data_cycles) # Fit the variable to an extremal distribution (param, x, cdf_expwbl, pdf_expwbl) = theoretical_fit.fit_distribution( data_cycles, fit_type=fun_cycles[0].name, x_min=min(data_cycles), x_max=2 * max(data_cycles), n_points=1000) par0_cycles = list() par0_cycles.append(np.asarray(param)) # GUARDO LOS PARAMETROS np.save( os.path.join('output', 'analisis', 'parameter_river_discharge_cycles.npy'), par0_cycles) # Check the goodness of the fit fig1 = plt.figure(figsize=(20, 20)) ax = plt.axes() ax.plot(ecdf.index, ecdf, '.') ax.plot(x, cdf_expwbl) ax.set_xlabel('Q (m3/s)') ax.set_ylabel('CDF') ax.legend([ 'ECDF', 'Exponweib Fit', ]) ax.grid() ax.set_xlim([0, 500]) fig1.savefig( os.path.join('output', 'analisis', 'graficas', 'cdf_fit_ciclos_river_discharge.png')) # PP - Plot values (yppplot_emp, yppplot_teo) = theoretical_fit.pp_plot(x, cdf_expwbl, ecdf) # QQ - Plot values (yqqplot_emp, yqqplot_teo) = theoretical_fit.qq_plot(x, cdf_expwbl, ecdf) # Plot Goodness of fit theoretical_fit.plot_goodness_of_fit(cdf_expwbl, ecdf, river_discharge, 'Q', x, yppplot_emp, yqqplot_emp, yppplot_teo, yqqplot_teo) # Non-stationary fit for calms par_cycles, mod_cycles, f_mix_cycles, data_graph_cycles = list(), list( ), list(), list() df = list() df.append(data_cycles) for i in range(len(df)): # SE HAN SELECCIONADO LOS ULTIMOS 7 ANOS PARA QUE EL ANALISIS SEA MAS RAPIDO analisis_ = analisis.analisis(df[i], fun_cycles[i], ant[i], ordg=no_ord_cycles[i], nnorm=no_norm_cycles[i], par0=par0_cycles[i]) par_cycles.append(analisis_[0]) mod_cycles.append(analisis_[1]) f_mix_cycles.append(analisis_[2]) aux = list(analisis_[3]) aux[5] = i aux = tuple(aux) data_graph_cycles.append(aux) # DIBUJO LOS RESULTADOS (HAY UNA GRAN GAMA DE FUNCIONES DE DIBUJO; VER MANUAL) plot_analisis.cuantiles_ne(*data_graph_cycles[i]) plt.pause(0.5) fig2 = plt.figure(figsize=(20, 20)) plt.plot(x, pdf_expwbl) _ = plt.hist(data_cycles, bins=np.linspace(0, 500, 100), normed=True, alpha=0.5) plt.xlim([0, 400]) fig2.savefig( os.path.join('output', 'analisis', 'graficas', 'pdf_fit_ciclos_river_discharge.png')) # %% THEORETICAL FIT CALMS param0_calms = list() data_calms = sample_calms['Q'] (param, x, cdf, pdf) = theoretical_fit.fit_distribution( data_calms, fit_type=fun_calms[0].name, x_min=np.min(data_calms), x_max=1.1 * np.max(data_calms), n_points=1000) param0_calms.append(np.asarray(param)) # Empirical cdf ecdf = empirical_distributions.ecdf_histogram(data_calms) epdf = empirical_distributions.epdf_histogram(data_calms, bins=0) # PP - Plot values (yppplot_emp, yppplot_teo) = theoretical_fit.pp_plot(x, cdf, ecdf) # QQ - Plot values (yqqplot_emp, yqqplot_teo) = theoretical_fit.qq_plot(x, cdf, ecdf) # Plot Goodness of fit theoretical_fit.plot_goodness_of_fit(cdf, ecdf, sample_calms, 'Q', x, yppplot_emp, yqqplot_emp, yppplot_teo, yqqplot_teo) # Non-stationary fit for calms par_calms, mod_calms, f_mix_calms, data_graph_calms = list(), list( ), list(), list() df = list() df.append(data_calms) for i in range(len(df)): # SE HAN SELECCIONADO LOS ULTIMOS 7 ANOS PARA QUE EL ANALISIS SEA MAS RAPIDO analisis_ = analisis.analisis(df[i], fun_calms[i], ant[i], ordg=no_ord_calms[i], nnorm=no_norm_calms[i], par0=param0_calms[i]) par_calms.append(analisis_[0]) mod_calms.append(analisis_[1]) f_mix_calms.append(analisis_[2]) data_graph_calms.append(analisis_[3]) # DIBUJO LOS RESULTADOS (HAY UNA GRAN GAMA DE FUNCIONES DE DIBUJO; VER MANUAL) plot_analisis.cuantiles_ne(*data_graph_calms[i]) plt.pause(0.5) # Guardo parametros np.save( os.path.join('output', 'analisis', 'parameter_river_discharge_calms.npy'), par_calms) np.save( os.path.join('output', 'analisis', 'mod_river_discharge_calms.npy'), mod_calms) np.save( os.path.join('output', 'analisis', 'f_mix_river_discharge_calms.npy'), f_mix_calms) #%% TEMPORAL DEPENDENCY if temporal_dependency: # SE UTILIZAN LOS PARAMETROS DE SALIDA DEL ANÁLISIS PREVIO # Lectura de datos par_cycles = np.load( os.path.join('output', 'analisis', 'parameter_river_discharge_cycles.npy')) par_calms = np.load( os.path.join('output', 'analisis', 'parameter_river_discharge_calms.npy')) mod_calms = np.load( os.path.join('output', 'analisis', 'mod_river_discharge_calms.npy')) f_mix_calms = np.load( os.path.join('output', 'analisis', 'f_mix_river_discharge_calms.npy')) (df_dt_cycles, cdf_) = analisis.dependencia_temporal(sample_cycles, par_cycles, mod_cycles, no_norm_cycles, f_mix_cycles, fun_cycles) # SE GUARDAN LOS PARAMETROS DEL MODELO VAR df_dt_cycles.to_pickle( os.path.join('output', 'dependencia_temporal', 'df_dt_river_discharge_cycles.p')) (df_dt_calms, cdf_) = analisis.dependencia_temporal(sample_calms, par_calms, mod_calms, no_norm_calms, f_mix_calms, fun_calms) # SE GUARDAN LOS PARAMETROS DEL MODELO VAR df_dt_calms.to_pickle( os.path.join('output', 'dependencia_temporal', 'df_dt_river_discharge_calms.p')) if climatic_events_fitting: #%% FIT NUMBER OF EVENTS DURING WET CYCLES events_wet_cycle = pd.Series([5, 2, 1, 3, 2, 2, 0, 6, 1]) ecdf_events_wet_cycle = empirical_distributions.ecdf_histogram( events_wet_cycle) mu = np.mean(events_wet_cycle) simulated_number_events = pd.Series( poisson.rvs(mu, loc=0, size=100, random_state=None)) ecdf_simulated_events_wet_cycle = empirical_distributions.ecdf_histogram( simulated_number_events) x_poisson = np.linspace(0, 10, 100) cdf_poisson = poisson.cdf(x_poisson, mu, loc=0) plt.figure() ax = plt.axes() ax.plot(ecdf_events_wet_cycle.index, ecdf_events_wet_cycle, '.') ax.plot(ecdf_simulated_events_wet_cycle.index, ecdf_simulated_events_wet_cycle, '.') ax.plot(x_poisson, cdf_poisson) ax.legend(['ECDF', 'ECDF Sim', 'Poisson Fit']) ax.grid() #%% FIT TIME BETWEEN WET CYCLES t_wet_cycles = peaks_over_thres_spei.index.to_series().diff().dropna( ).astype('m8[s]').astype(np.float32) ecdf_t_wet_cycle = empirical_distributions.ecdf_histogram(t_wet_cycles) norm_param = norm.fit(t_wet_cycles, loc=0) simulated_t_wet_cycles = pd.Series( norm.rvs(*norm_param, size=100, random_state=None)) ecdf_simulated_t_wet_cycles = empirical_distributions.ecdf_histogram( simulated_t_wet_cycles) x_norm = np.linspace(0, 2 * max(t_wet_cycles), 100) cdf_norm = norm.cdf(x_norm, *norm_param) plt.figure() ax = plt.axes() ax.plot(ecdf_t_wet_cycle.index, ecdf_t_wet_cycle, '.') ax.plot(ecdf_simulated_t_wet_cycles.index, ecdf_simulated_t_wet_cycles, '.') ax.plot(x_norm, cdf_norm) ax.legend(['ECDF', 'ECDF Sim', 'Exponential Fit']) ax.grid() simulated_t_wet_cycles_days = simulated_t_wet_cycles.astype('m8[s]') # Elimino valores negativos simulated_t_wet_cycles_days = simulated_t_wet_cycles_days[ simulated_t_wet_cycles_days.values > datetime.timedelta(days=1)] #%% FIT TIME BETWEEN EVENTS DURING WET CYCLES t_between_events = peaks_over_thres.index.to_series().diff().dropna() t_between_events = t_between_events[ t_between_events < datetime.timedelta(days=400)] t_between_events = t_between_events.astype('m8[s]').astype(np.float32) ecdf_t_between_events = empirical_distributions.ecdf_histogram( t_between_events) lambda_par = expon.fit(t_between_events, loc=0) simulated_t_between_events = pd.Series( expon.rvs(scale=lambda_par[1], size=100, random_state=None)) ecdf_simulated_t_between_events = empirical_distributions.ecdf_histogram( simulated_t_between_events) x_expon = np.linspace(0, 2 * max(t_between_events), 100) cdf_expon = expon.cdf(x_expon, scale=lambda_par[1], loc=0) plt.figure() ax = plt.axes() ax.plot(ecdf_t_between_events.index, ecdf_t_between_events, '.') ax.plot(ecdf_simulated_t_between_events.index, ecdf_simulated_t_between_events, '.') ax.plot(x_expon, cdf_expon) ax.legend(['ECDF', 'ECDF Sim', 'Exponential Fit']) ax.grid() simulated_t_between_events_days = simulated_t_between_events.astype( 'm8[s]') #%% FIT TIME BETWEEN ALL EVENTS # Fit time between events (without considering wet cycles) 2 method t_between_events_2method = peaks_over_thres.index.to_series().diff( ).dropna() t_between_events_2method = t_between_events_2method.astype( 'm8[s]').astype(np.float32) ecdf_t_between_events_2method = empirical_distributions.ecdf_histogram( t_between_events_2method) lambda_par = expon.fit(t_between_events_2method, loc=0) simulated_t_between_events_2method = pd.Series( expon.rvs(scale=lambda_par[1], size=100, random_state=None)) ecdf_simulated_t_between_events_2method = empirical_distributions.ecdf_histogram( simulated_t_between_events_2method) x_expon = np.linspace(0, 2 * np.max(t_between_events_2method), 100) cdf_expon = expon.cdf(x_expon, scale=lambda_par[1], loc=0) plt.figure() ax = plt.axes() ax.plot(ecdf_t_between_events_2method.index, ecdf_t_between_events_2method, '.') ax.plot(ecdf_simulated_t_between_events_2method.index, ecdf_simulated_t_between_events_2method, '.') ax.plot(x_expon, cdf_expon) ax.legend(['ECDF', 'ECDF Sim', 'Exponential Fit']) ax.grid() simulated_t_between_events_2method_days = simulated_t_between_events.astype( 'm8[s]') # nul_values = simulated_t_between_events_2method_days.values > datetime.timedelta(days=2000) #%% SIMULACION CLIMÁTICA CHEQUEO UMBRAL OPTIMO PARA AJUSTAR DURACIONES if threshold_checking_for_simulation: # CARGO PARÁMETROS par_cycles = np.load( os.path.join('output', 'analisis', 'parameter_river_discharge_cycles.npy')) df_dt_cycles = pd.read_pickle( os.path.join('output', 'dependencia_temporal', 'df_dt_river_discharge_cycles.p')) vars_ = ['Q'] # Cargo el SPEI Index para ajustar tiempo entre ciclos humedos, numero de eventos por ciclo humedo # tiempo entre eventos dentro de ciclo humedo # Figura de las cdf y pdf empiricas fig1, axes1 = plt.subplots(1, 2, figsize=(20, 7)) cont = 0 iter = 0 while cont < no_sim: df_sim = simulacion.simulacion(anocomienzo, duracion, par_cycles, mod_cycles, no_norm_cycles, f_mix_cycles, fun_cycles, vars_, sample_cycles, df_dt_cycles, [0, 0, 0, 0, 0], semilla=int( np.random.rand(1) * 1e6)) iter += 1 # Primero filtro si hay valores mayores que el umbral,en cuyo caso descarto la serie if np.max(df_sim).values <= np.max(sample_cycles['Q']) * 1.25: # Representacion de la serie plt.figure() ax = plt.axes() ax.plot(df_sim) ax.plot(sample_cycles, '.') ax.plot(df_sim * 0 + max(sample_cycles['Q']), 'r') ax.grid() # Cdf Pdf data = df_sim['Q'] ecdf = empirical_distributions.ecdf_histogram(data) epdf = empirical_distributions.epdf_histogram(data, bins=0) axes1[0].plot(epdf.index, epdf, '--', color='0.75') axes1[1].plot(ecdf.index, ecdf, '--', color='0.75') # Extract cycles from data for different thresholds to fix the duration fig2, axes2 = plt.subplots(1, 2, figsize=(20, 7)) if cont == 0: dur_cycles = dur_cycles.astype('m8[s]').astype( np.float32) # Convierto a segundos y flotante ecdf_dur = empirical_distributions.ecdf_histogram(dur_cycles) epdf_dur = empirical_distributions.epdf_histogram(dur_cycles, bins=0) axes2[0].plot(epdf_dur.index, epdf_dur, 'r', lw=2) axes2[1].plot(ecdf_dur.index, ecdf_dur, 'r', lw=2) threshold = np.arange(20, 110, 10) color_sequence = [ '#1f77b4', '#aec7e8', '#ff7f0e', '#ffbb78', '#2ca02c', '#98df8a', '#d62728', '#ff9896', '#9467bd', '#c5b0d5', '#8c564b', '#c49c94', '#e377c2', '#f7b6d2', '#7f7f7f', '#c7c7c7', '#bcbd22', '#dbdb8d', '#17becf', '#9edae5' ] for j, th in enumerate(threshold): minimum_interarrival_time = pd.Timedelta('1 hour') minimum_cycle_length = pd.Timedelta('2 days') cycles, calm_periods, info = extremal.extreme_events( df_sim, 'Q', th, minimum_interarrival_time, minimum_cycle_length, interpolation, interpolation_method, interpolation_freq, truncate, extra_info) # Calculate duration of the cycles dur_cycles_sim = extremal.events_duration(cycles) dur_cycles_sim_description = dur_cycles_sim.describe() # Represent cycles fig3 = plt.figure(figsize=(20, 20)) ax = plt.axes() ax.plot(df_sim) ax.axhline(th, color='lightgray') ax.grid() ax.legend([ 'Threshold: ' + str(th) + ' (m3/s)' + '/ Dur_min ' + str(dur_cycles_description['min']) + ' - ' + str(dur_cycles_sim_description['min']) + '/ Dur_mean ' + str(dur_cycles_description['mean']) + ' - ' + str(dur_cycles_sim_description['mean']) + '/ Dur_max ' + str(dur_cycles_description['max']) + ' - ' + str(dur_cycles_sim_description['max']) ]) for cycle in cycles: ax.plot(cycle, 'g', marker='.', markersize=5) ax.plot(cycle.index[0], cycle[0], 'gray', marker='.', markersize=10) ax.plot(cycle.index[-1], cycle[-1], 'black', marker='.', markersize=10) ax.set_xlim([ datetime.date(2018, 04, 01), datetime.date(2030, 01, 01) ]) ax.set_ylim([0, 600]) fig_name = 'ciclos_sim_' + str(cont) + '_threshold_' + str( th) + '.png' fig3.savefig( os.path.join('output', 'simulacion', 'graficas', 'descarga_fluvial', 'umbral_optimo', fig_name)) # Calculate the cdf and pdf of the cycle duration dur_cycles_sim = dur_cycles_sim.astype('m8[s]').astype( np.float32) ecdf_dur_sim = empirical_distributions.ecdf_histogram( dur_cycles_sim) epdf_dur_sim = empirical_distributions.epdf_histogram( dur_cycles_sim, bins=0) axes2[0].plot(epdf_dur_sim.index, epdf_dur_sim, '--', color=color_sequence[j], label=['Threshold: ' + str(threshold[j])]) axes2[1].plot(ecdf_dur_sim.index, ecdf_dur_sim, '--', color=color_sequence[j], label=['Threshold: ' + str(threshold[j])]) axes2[0].legend() axes2[1].set_xlim([0, 5000000]) axes2[0].set_xlim([0, 5000000]) fig_name = 'ciclos_dur_sim_' + str(cont) + '.png' fig2.savefig( os.path.join('output', 'simulacion', 'graficas', 'descarga_fluvial', 'umbral_optimo', fig_name)) cont += 1 data = sample_cycles['Q'] ecdf = empirical_distributions.ecdf_histogram(data) epdf = empirical_distributions.epdf_histogram(data, bins=0) axes1[0].plot(epdf.index, epdf, 'r', lw=2) axes1[1].plot(ecdf.index, ecdf, 'r', lw=2) fig_name = 'pdf_cdf_descarga_fluvial.png' fig1.savefig( os.path.join('output', 'simulacion', 'graficas', 'descarga_fluvial', 'umbral_optimo', fig_name)) #%% SIMULACION CLIMATICA threshold = 50 minimum_interarrival_time = pd.Timedelta('1 hour') minimum_cycle_length = pd.Timedelta('2 days') if simulation_cycles: # CARGO PARÁMETROS par_cycles = np.load( os.path.join('output', 'analisis', 'parameter_river_discharge_cycles.npy')) par_calms = np.load( os.path.join('output', 'analisis', 'parameter_river_discharge_calms.npy')) mod_calms = np.load( os.path.join('output', 'analisis', 'mod_river_discharge_calms.npy')) f_mix_calms = np.load( os.path.join('output', 'analisis', 'f_mix_river_discharge_calms.npy')) df_dt_cycles = pd.read_pickle( os.path.join('output', 'dependencia_temporal', 'df_dt_river_discharge_cycles.p')) df_dt_calms = pd.read_pickle( os.path.join('output', 'dependencia_temporal', 'df_dt_river_discharge_calms.p')) vars_ = ['Q'] # Figura de las cdf y pdf empiricas fig2, axes1 = plt.subplots(1, 2, figsize=(20, 7)) cont = 0 iter = 0 while cont < no_sim: df_sim = simulacion.simulacion(anocomienzo, duracion, par_cycles, mod_cycles, no_norm_cycles, f_mix_cycles, fun_cycles, vars_, sample_cycles, df_dt_cycles, [0, 0, 0, 0, 0], semilla=int( np.random.rand(1) * 1e6)) iter += 1 # Primero filtro si hay valores mayores que el umbral,en cuyo caso descarto la serie if np.max(df_sim).values <= np.max(sample_cycles['Q']) * 1.25: df_sim = df_sim.resample('1H').interpolate() # Extract cycles from data for different thresholds to fix the duration if cont == 0: dur_cycles = dur_cycles.astype('m8[s]').astype( np.float32) # Convierto a segundos y flotante # Calculate cycles cycles, calm_periods, info = extremal.extreme_events( df_sim, 'Q', threshold, minimum_interarrival_time, minimum_cycle_length, interpolation, interpolation_method, interpolation_freq, truncate, extra_info) # # Represent cycles # fig3 = plt.figure(figsize=(20, 20)) # ax = plt.axes() # ax.plot(df_sim) # ax.axhline(threshold, color='lightgray') # ax.grid() # # for cycle in cycles: # ax.plot(cycle, 'g', marker='.', markersize=5) # ax.plot(cycle.index[0], cycle[0], 'gray', marker='.', markersize=10) # ax.plot(cycle.index[-1], cycle[-1], 'black', marker='.', markersize=10) # ax.set_xlim([datetime.date(2018, 01, 01), datetime.date(2021, 01, 01)]) # ax.set_ylim([0, 600]) # fig3.savefig(os.path.join('output', 'simulacion', 'graficas', 'descarga_fluvial', # 'ciclos_cadiz_simulado_' + str(cont).zfill(4) + '.png')) # Start to construct the time series indices = pd.date_range(start='2018', end='2100', freq='1H') df_simulate = pd.DataFrame(np.zeros((len(indices), 1)) + 25, dtype=float, index=indices, columns=['Q']) # The start is in wet cycles cont_wet_cicles = 0 cont_df_events = 1 t_ini = datetime.datetime(2018, 01, 01) t_end = datetime.datetime(2018, 01, 01) while t_end < datetime.datetime(2090, 01, 01): if cont_wet_cicles != 0: t_ini = t_end + simulated_t_wet_cycles_days[ cont_wet_cicles] year = t_ini.year else: year = 2018 # Select the number of events during wet cycle n_events = simulated_number_events[cont_wet_cicles] - 1 cont_wet_cicles += 1 if n_events != 0: # for j in range(0, n_events): cont_df_events_in_wet_cycles = 0 while cont_df_events_in_wet_cycles <= n_events: if cont_df_events_in_wet_cycles != 0: # Time between events year = year + 1 # Select the event cycle = cycles[cont_df_events] if np.max(cycle) >= 150: # Simulate date month1 = [ random.randint(1, 3), random.randint(10, 12) ] rand_pos = random.randint(0, 1) month = month1[rand_pos] day = random.randint(1, 28) hour = random.randint(0, 23) else: # Simulate date month = random.randint(1, 12) day = random.randint(1, 28) hour = random.randint(0, 23) t_ini = datetime.datetime(year, month, day, hour) pos_ini = np.where( df_simulate.index == t_ini)[0][0] pos_end = pos_ini + cycle.shape[0] # Insert cycle df_simulate.iloc[pos_ini:pos_end, 0] = cycle.values t_end = df_simulate.index[pos_end] year = df_simulate.index[pos_end].to_datetime( ).year cont_df_events += 1 cont_df_events_in_wet_cycles += 1 else: t_end = t_ini # Simulation of calm periods df_sim_calms = simulacion.simulacion( anocomienzo, 85, par_calms, mod_calms, no_norm_calms, f_mix_calms, fun_calms, vars_, sample_calms, df_dt_calms, [0, 0, 0, 0, 0], semilla=int(np.random.rand(1) * 1e6)) # Remove negative values df_sim_calms[df_sim_calms < 0] = np.random.randint(1, 5) # Combine both dataframes with cycles and calms pos_cycles = df_simulate >= 50 df_river_discharge = df_sim_calms df_river_discharge[pos_cycles] = df_simulate # Hourly interpolation df_river_discharge = df_river_discharge.resample( 'H').interpolate() # Representation of results fig1 = plt.figure(figsize=(20, 10)) ax = plt.axes() ax.plot(river_discharge) ax.plot(df_river_discharge) ax.legend('Hindcast', 'Forecast') ax.grid() ax.set_ylim([-5, 500]) fig1.savefig( os.path.join( 'output', 'simulacion', 'graficas', 'descarga_fluvial', 'descarga_fluvial_cadiz_simulado_' + str(cont).zfill(4) + '.png')) # Cdf Pdf data = df_river_discharge['Q'] ecdf = empirical_distributions.ecdf_histogram(data) epdf = empirical_distributions.epdf_histogram(data, bins=0) axes1[0].plot(epdf.index, epdf, '--', color='0.75') axes1[1].plot(ecdf.index, ecdf, '--', color='0.75') # Guardado de ficheros df_river_discharge.to_csv(os.path.join( 'output', 'simulacion', 'series_temporales', 'descarga_fluvial_500', 'descarga_fluvial_guadalete_sim_' + str(cont).zfill(4) + '.txt'), sep=n(b'\t')) cont += 1 data = river_discharge['Q'] ecdf = empirical_distributions.ecdf_histogram(data) epdf = empirical_distributions.epdf_histogram(data, bins=0) axes1[0].plot(epdf.index, epdf, 'r', lw=2) axes1[1].plot(ecdf.index, ecdf, 'r', lw=2) fig_name = 'pdf_cdf_descarga_fluvial.png' fig2.savefig( os.path.join('output', 'simulacion', 'graficas', 'descarga_fluvial', fig_name))
def output_missing_values(modf, info, output_path): elements = [] # Section title default_title = _('Missing values') # Required values location = info['location_metocean'] driver = info['name_driver'] descriptor = info['name_descriptor'] block = info['name_block'] section = info['name_section'] # Optional values and default values title = get_key(info, 'title_section', default_title) var_name = get_key(info, 'var_name_descriptor', descriptor) var_unit = get_key(info, 'unit_descriptor', '') circular = get_key(info, 'circular_descriptor', False) # Input tex section input_tex(elements, info, output_path, section) # Computation data = extract_data(modf, descriptor_name=descriptor) time_step = missing_values.find_timestep(data) data_gaps = missing_values.find_missing_values(data, time_step) # Figure kind = 'figure' default_caption = _('Missing values plot') + ': {}'.format( info['title_descriptor']) caption = get_key(info, 'missing_values_figure_caption_section', default_caption) path = get_output_name(location=location, driver=driver, name=descriptor, block=block, title=section, kind=kind) missing_values.plot_missing_values(data=data, data_gaps=data_gaps, title='', var_name=var_name, var_unit=var_unit, fig_filename=os.path.join( output_path, path), circular=circular, label=var_name) elements.append([path, kind, caption]) # Table kind = 'table' default_caption = _('Missing values table') + ': {}'.format( info['title_descriptor']) caption = get_key(info, 'missing_values_table_caption_section', default_caption) path = get_output_name(location=location, driver=driver, name=descriptor, block=block, title=section, kind=kind) data_gaps_report = missing_values.missing_values_report(data, data_gaps) save_table(data_gaps_report, os.path.join(output_path, path)) elements.append([os.path.join(output_path, path), kind, caption]) return pd.DataFrame(elements, columns=['path', 'kind', 'caption']), title