Esempi in Python per merge_data, esempi in Python per hydrostats.data.merge_data

Esempio n. 1

0

Mostra file

File: plots.py Progetto: BYU-Hydroinformatics/geoglows

def corrected_day_average(corrected: pd.DataFrame, simulated: pd.DataFrame, observed: pd.DataFrame,
                          merged_sim_obs: pd.DataFrame = False, merged_cor_obs: pd.DataFrame = False,
                          titles: dict = None, outformat: str = 'plotly') -> go.Figure or str:
    """
    Calculates and plots the daily average streamflow. This function uses
    hydrostats.data.merge_data on the 3 inputs. If you have already computed these because you are doing a full
    comparison of bias correction, you can provide them to save time

    Args:
        corrected: the response from the geoglows.bias.correct_historical_simulation function
        simulated: the csv response from historic_simulation
        merged_sim_obs: (optional) if you have already computed it, hydrostats.data.merge_data(simulated, observed)
        merged_cor_obs: (optional) if you have already computed it, hydrostats.data.merge_data(corrected, observed)
        observed: the dataframe of observed data. Must have a datetime index and a single column of flow values
        outformat: either 'plotly' or 'plotly_html' (default plotly)
        titles: (dict) Extra info to show on the title of the plot. For example:
            {'Reach ID': 1234567, 'Drainage Area': '1000km^2'}

    Returns:
         plotly.GraphObject: plotly object, especially for use with python notebooks and the .show() method
    """
    if corrected is False and simulated is False and observed is False:
        if merged_sim_obs is not False and merged_cor_obs is not False:
            pass  # if you provided the merged dataframes already, we use those
    else:
        # merge the datasets together
        merged_sim_obs = hd.merge_data(sim_df=simulated, obs_df=observed)
        merged_cor_obs = hd.merge_data(sim_df=corrected, obs_df=observed)
    daily_avg = hd.daily_average(merged_sim_obs)
    daily_avg2 = hd.daily_average(merged_cor_obs)

    scatters = [
        go.Scatter(x=daily_avg.index, y=daily_avg.iloc[:, 1].values, name='Observed Data'),
        go.Scatter(x=daily_avg.index, y=daily_avg.iloc[:, 0].values, name='Simulated Data'),
        go.Scatter(x=daily_avg2.index, y=daily_avg2.iloc[:, 0].values, name='Corrected Simulated Data'),
    ]

    layout = go.Layout(
        title=_build_title('Daily Average Streamflow Comparison', titles),
        xaxis=dict(title='Days'), yaxis=dict(title='Discharge (m<sup>3</sup>/s)', autorange=True),
        showlegend=True)

    if outformat == 'plotly':
        return go.Figure(data=scatters, layout=layout)
    elif outformat == 'plotly_html':
        return offline_plot(
            go.Figure(data=scatters, layout=layout),
            config={'autosizable': True, 'responsive': True},
            output_type='div',
            include_plotlyjs=False
        )
    raise ValueError('Invalid outformat chosen. Choose plotly or plotly_html')

Esempio n. 2

0

Mostra file

File: controllers.py Progetto: BYU-Hydroinformatics/historical_validation_tool_somalia

def get_monthlyAverages(request):
    """
    Get observed data from csv files in Hydroshare
    Get historic simulations from ERA Interim
    """
    get_data = request.GET
    global nomRiver
    global nomEstacion
    global simulated_df
    global observed_df
    global corrected_df

    try:

        '''Merge Data'''

        merged_df = hd.merge_data(sim_df=simulated_df, obs_df=observed_df)

        merged_df2 = hd.merge_data(sim_df=corrected_df, obs_df=observed_df)

        '''Plotting Data'''

        monthly_avg = hd.monthly_average(merged_df)

        monthly_avg2 = hd.monthly_average(merged_df2)

        monthly_avg_obs_Q = go.Scatter(x=monthly_avg.index, y=monthly_avg.iloc[:, 1].values, name='Observed', )

        monthly_avg_sim_Q = go.Scatter(x=monthly_avg.index, y=monthly_avg.iloc[:, 0].values, name='Simulated', )

        monthly_avg_corr_sim_Q = go.Scatter(x=monthly_avg2.index, y=monthly_avg2.iloc[:, 0].values,
                                            name='Corrected Simulated', )

        layout = go.Layout(
            title='Monthly Average Streamflow for River {0} at {1}'.format(nomRiver, nomEstacion),
            xaxis=dict(title='Months', ), yaxis=dict(title='Discharge (m<sup>3</sup>/s)', autorange=True),
            showlegend=True)

        chart_obj = PlotlyView(
            go.Figure(data=[monthly_avg_obs_Q, monthly_avg_sim_Q, monthly_avg_corr_sim_Q], layout=layout))

        context = {
            'gizmo_object': chart_obj,
        }

        return render(request, 'historical_validation_tool_somalia/gizmo_ajax.html', context)

    except Exception as e:
        print(str(e))
        return JsonResponse({'error': 'No data found for the selected station.'})

Esempio n. 3

0

Mostra file

File: dev_monthly_test.py Progetto: rileyhales/hydroinformatics

def statistics_tables(corrected: pd.DataFrame, simulated: pd.DataFrame, observed: pd.DataFrame) -> pd.DataFrame:
    # merge the datasets together
    merged_sim_obs = hd.merge_data(sim_df=simulated, obs_df=observed)
    merged_cor_obs = hd.merge_data(sim_df=corrected, obs_df=observed)

    metrics = ['ME', 'RMSE', 'NRMSE (Mean)', 'MAPE', 'NSE', 'KGE (2009)', 'KGE (2012)']
    # Merge Data
    table1 = hs.make_table(merged_dataframe=merged_sim_obs, metrics=metrics)
    table2 = hs.make_table(merged_dataframe=merged_cor_obs, metrics=metrics)

    table2 = table2.rename(index={'Full Time Series': 'Corrected Full Time Series'})
    table1 = table1.rename(index={'Full Time Series': 'Original Full Time Series'})
    table1 = table1.transpose()
    table2 = table2.transpose()

    return pd.merge(table1, table2, right_index=True, left_index=True)

Esempio n. 4

0

Mostra file

File: bias.py Progetto: BYU-Hydroinformatics/geoglows

def statistics_tables(corrected: pd.DataFrame,
                      simulated: pd.DataFrame,
                      observed: pd.DataFrame,
                      merged_sim_obs: pd.DataFrame = False,
                      merged_cor_obs: pd.DataFrame = False,
                      metrics: list = None) -> str:
    """
    Makes an html table of various statistical metrics for corrected vs observed data alongside the same metrics for
    the simulated vs observed data as a way to see the improvement made by the bias correction. This function uses
    hydrostats.data.merge_data on the 3 inputs. If you have already computed these because you are doing a full
    comparison of bias correction, you can provide them to save time

    Args:
        corrected: A dataframe with a datetime index and a single column of streamflow values
        simulated: A dataframe with a datetime index and a single column of streamflow values
        observed: A dataframe with a datetime index and a single column of streamflow values
        merged_sim_obs: (optional) if you have already computed it, hydrostats.data.merge_data(simulated, observed)
        merged_cor_obs: (optional) if you have already computed it, hydrostats.data.merge_data(corrected, observed)
        metrics: A list of abbreviated statistic names. See the documentation for HydroErr
    """
    if corrected is False and simulated is False and observed is False:
        if merged_sim_obs is not False and merged_cor_obs is not False:
            pass  # if you provided the merged dataframes already, we use those
    else:
        # merge the datasets together
        merged_sim_obs = hd.merge_data(sim_df=simulated, obs_df=observed)
        merged_cor_obs = hd.merge_data(sim_df=corrected, obs_df=observed)

    if metrics is None:
        metrics = [
            'ME', 'RMSE', 'NRMSE (Mean)', 'MAPE', 'NSE', 'KGE (2009)',
            'KGE (2012)'
        ]
    # Merge Data
    table1 = hs.make_table(merged_dataframe=merged_sim_obs, metrics=metrics)
    table2 = hs.make_table(merged_dataframe=merged_cor_obs, metrics=metrics)

    table2 = table2.rename(
        index={'Full Time Series': 'Corrected Full Time Series'})
    table1 = table1.rename(
        index={'Full Time Series': 'Original Full Time Series'})
    table1 = table1.transpose()
    table2 = table2.transpose()

    table_final = pd.merge(table1, table2, right_index=True, left_index=True)

    return table_final.to_html()

Esempio n. 5

0

Mostra file

    def setUp(self):
        pd.options.display.max_columns = 100

        # Defining the URLs of the datasets
        sfpt_url = r'https://github.com/waderoberts123/Hydrostats/raw/master/Sample_data/sfpt_data/magdalena' \
                   r'-calamar_interim_data.csv '
        glofas_url = r'https://github.com/waderoberts123/Hydrostats/raw/master/Sample_data/GLOFAS_Data/magdalena' \
                     r'-calamar_ECMWF_data.csv '
        # Merging the data
        self.merged_df = hd.merge_data(sfpt_url, glofas_url, column_names=('SFPT', 'GLOFAS'))

Esempio n. 6

0

Mostra file

File: controllers.py Progetto: BYU-Hydroinformatics/historical_validation_tool_somalia

def volume_table_ajax(request):
    """Calculates the volumes of the simulated and observed streamflow"""

    get_data = request.GET
    global simulated_df
    global observed_df
    global corrected_df

    try:

        '''Merge Data'''

        merged_df = hd.merge_data(sim_df=simulated_df, obs_df=observed_df)

        merged_df2 = hd.merge_data(sim_df=corrected_df, obs_df=observed_df)

        '''Plotting Data'''

        sim_array = merged_df.iloc[:, 0].values
        obs_array = merged_df.iloc[:, 1].values
        corr_array = merged_df2.iloc[:, 0].values

        sim_volume = round((integrate.simps(sim_array)) * 0.0864, 3)
        obs_volume = round((integrate.simps(obs_array)) * 0.0864, 3)
        corr_volume = round((integrate.simps(corr_array)) * 0.0864, 3)

        resp = {
            "sim_volume": sim_volume,
            "obs_volume": obs_volume,
            "corr_volume": corr_volume,
        }

        return JsonResponse(resp)

    except Exception as e:
        print(str(e))
        return JsonResponse({'error': 'No data found for the selected station.'})

Esempio n. 7

0

Mostra file

def get_scatterPlotLogScale(request):
    """
	Get observed data from csv files in Hydroshare
	Get historic simulations from ERA Interim
	"""
    get_data = request.GET
    global codEstacion
    global nomEstacion
    global simulated_df
    global observed_df
    global corrected_df

    try:
        '''Merge Data'''

        merged_df = hd.merge_data(sim_df=simulated_df, obs_df=observed_df)

        merged_df2 = hd.merge_data(sim_df=corrected_df, obs_df=observed_df)
        '''Plotting Data'''

        scatter_data = go.Scatter(x=merged_df.iloc[:, 0].values,
                                  y=merged_df.iloc[:, 1].values,
                                  mode='markers',
                                  name='original',
                                  marker=dict(color='#ef553b'))

        scatter_data2 = go.Scatter(x=merged_df2.iloc[:, 0].values,
                                   y=merged_df2.iloc[:, 1].values,
                                   mode='markers',
                                   name='corrected',
                                   marker=dict(color='#00cc96'))

        min_value = min(min(merged_df.iloc[:, 1].values),
                        min(merged_df.iloc[:, 0].values))
        max_value = max(max(merged_df.iloc[:, 1].values),
                        max(merged_df.iloc[:, 0].values))

        line_45 = go.Scatter(x=[min_value, max_value],
                             y=[min_value, max_value],
                             mode='lines',
                             name='45deg line',
                             line=dict(color='black'))

        layout = go.Layout(
            title="Scatter Plot for {0} - {1} (Log Scale)".format(
                codEstacion, nomEstacion),
            xaxis=dict(
                title='Simulated',
                type='log',
            ),
            yaxis=dict(title='Observed', type='log', autorange=True),
            showlegend=True)

        chart_obj = PlotlyView(
            go.Figure(data=[scatter_data, scatter_data2, line_45],
                      layout=layout))

        context = {
            'gizmo_object': chart_obj,
        }

        return render(
            request,
            'historical_validation_tool_dominican_republic/gizmo_ajax.html',
            context)

    except Exception as e:
        print(str(e))
        return JsonResponse(
            {'error': 'No data found for the selected station.'})

Esempio n. 8

0

Mostra file

        #plt.figure(2)
        #plt.figure(figsize=(15, 9))
        #plt.plot(eraI_df.index, eraI_df.iloc[:, 0].values, 'k', color='blue', label='ERA-Interim Streamflow')
        #plt.title('ERA-Interim Hydrograph for COMID: ' + str(comid))
        #plt.xlabel('Date')
        #plt.ylabel('Streamflow (m$^3$/s)')
        #plt.legend()
        #plt.grid()
        #plt.xlim(eraI_df.index[0], eraI_df.index[len(eraI_df.index) - 1])
        #t = pd.date_range(eraI_df.index[0], eraI_df.index[len(eraI_df.index) - 1], periods=10).to_pydatetime()
        #plt.gca().xaxis.set_major_formatter(mdates.DateFormatter('%Y-%m-%d'))
        #plt.tight_layout()
        #plt.savefig(plot_sim_hyd_dir + '/ERA-Interim Hydrograph for ' + str(comid) + '.png')

        # Merging the Data
        merged_df = hd.merge_data(sim_df=eraI_df, obs_df=era5_df)

        #'''Tables and Plots'''
        # Appending the table to the final table
        table = hs.make_table(merged_df,
                              metrics=[
                                  'ME', 'MAE', 'MAPE', 'RMSE', 'NRMSE (Mean)',
                                  'NSE', 'KGE (2009)', 'KGE (2012)',
                                  'R (Pearson)', 'R (Spearman)', 'r2'
                              ],
                              location=point,
                              remove_neg=False,
                              remove_zero=False)
        all_station_table = all_station_table.append(table)

        # Making plots for all the stations

Esempio n. 9

0

Mostra file

File: plots.py Progetto: BYU-Hydroinformatics/geoglows

def corrected_volume_compare(corrected: pd.DataFrame, simulated: pd.DataFrame, observed: pd.DataFrame,
                             merged_sim_obs: pd.DataFrame = False, merged_cor_obs: pd.DataFrame = False,
                             titles: dict = None, outformat: str = 'plotly') -> go.Figure or str:
    """
    Calculates and plots the cumulative volume output on each of the 3 datasets provided. This function uses
    hydrostats.data.merge_data on the 3 inputs. If you have already computed these because you are doing a full
    comparison of bias correction, you can provide them to save time

    Args:
        corrected: the response from the geoglows.bias.correct_historical_simulation function
        simulated: the csv response from historic_simulation
        observed: the dataframe of observed data. Must have a datetime index and a single column of flow values
        merged_sim_obs: (optional) if you have already computed it, hydrostats.data.merge_data(simulated, observed)
        merged_cor_obs: (optional) if you have already computed it, hydrostats.data.merge_data(corrected, observed)
        outformat: either 'plotly' or 'plotly_html' (default plotly)
        titles: (dict) Extra info to show on the title of the plot. For example:
            {'Reach ID': 1234567, 'Drainage Area': '1000km^2'}

    Returns:
         plotly.GraphObject: plotly object, especially for use with python notebooks and the .show() method
    """
    if corrected is False and simulated is False and observed is False:
        if merged_sim_obs is not False and merged_cor_obs is not False:
            pass  # if you provided the merged dataframes already, we use those
    else:
        # merge the datasets together
        merged_sim_obs = hd.merge_data(sim_df=simulated, obs_df=observed)
        merged_cor_obs = hd.merge_data(sim_df=corrected, obs_df=observed)

    sim_array = merged_sim_obs.iloc[:, 0].values
    obs_array = merged_sim_obs.iloc[:, 1].values
    corr_array = merged_cor_obs.iloc[:, 0].values

    sim_volume_dt = sim_array * 0.0864
    obs_volume_dt = obs_array * 0.0864
    corr_volume_dt = corr_array * 0.0864

    sim_volume_cum = []
    obs_volume_cum = []
    corr_volume_cum = []
    sum_sim = 0
    sum_obs = 0
    sum_corr = 0

    for i in sim_volume_dt:
        sum_sim = sum_sim + i
        sim_volume_cum.append(sum_sim)

    for j in obs_volume_dt:
        sum_obs = sum_obs + j
        obs_volume_cum.append(sum_obs)

    for k in corr_volume_dt:
        sum_corr = sum_corr + k
        corr_volume_cum.append(sum_corr)

    observed_volume = go.Scatter(x=merged_sim_obs.index, y=obs_volume_cum, name='Observed', )
    simulated_volume = go.Scatter(x=merged_sim_obs.index, y=sim_volume_cum, name='Simulated', )
    corrected_volume = go.Scatter(x=merged_cor_obs.index, y=corr_volume_cum, name='Corrected Simulated', )

    layout = go.Layout(
        title=_build_title('Cumulative Volume Comparison', titles),
        xaxis=dict(title='Datetime', ), yaxis=dict(title='Volume (m<sup>3</sup>)', autorange=True),
        showlegend=True)

    if outformat == 'plotly':
        return go.Figure(data=[observed_volume, simulated_volume, corrected_volume], layout=layout)
    elif outformat == 'plotly_html':
        return offline_plot(
            go.Figure(data=[observed_volume, simulated_volume, corrected_volume], layout=layout),
            config={'autosizable': True, 'responsive': True},
            output_type='div',
            include_plotlyjs=False
        )
    raise ValueError('Invalid outformat chosen. Choose plotly or plotly_html')

Esempio n. 10

0

Mostra file

    'ID: Low vs. Med Res', 'ID: Low vs. High Res', 'ID: Med vs. High Res',
    'MO: Low vs. Med Res', 'MO: Low vs. High Res', 'MO: Med vs. High Res',
    'NY: Low vs. Med Res', 'NY: Low vs. High Res', 'NY: Med vs. High Res',
    'OR: Low vs. Med Res', 'OR: Low vs. High Res', 'OR: Med vs. High Res',
    'COL: Low vs. Med Res', 'COL: Low vs. High Res', 'COL: Med vs. High Res'
]

# seasonal = [['01-01', '03-31'], ['04-01', '06-30'], ['07-01', '09-30'], ['10-01', '12-31']]

# Create df for each comparison, append to overall df
table = pd.DataFrame()

# Can make seasonal_periods=seasonal (see above)

for s, o, n in zip(sim_list, obs_list, name_list):
    merged_df = hd.merge_data(sim_df=s, obs_df=o)
    temp_table = ha.make_table(merged_df,
                               metrics=my_metrics,
                               seasonal_periods=None,
                               location=n)
    table = table.append(temp_table)

table.to_csv(
    '/home/chrisedwards/Documents/rapid_output/stat_comparison/Statistical_Summary.csv'
)
# print(table)

# # # CSV including: Full Time Series
# table_full_yr = table[table.index.str.contains('Full')]
# table_full_yr.to_csv('/home/chrisedwards/Documents/rapid_output/statistical_comparison/Full_Year_Stats.csv')
# # # print(table_full_yr)

Esempio n. 11

0

Mostra file

File: Evaluate_Monthly_Seasonality_Blue_Nile_Corrected.py Progetto: jorgessanchez7/Global_Forecast_Validation

ERA5_Files = []
ERAI_Files = []

for comid, name in zip(COMIDs, Names):
	ERA5_Files.append(
		'/Users/student/Dropbox/PhD/2020 Winter/Dissertation_v9/Africa/Blue_Nile/Data/Historical/simulated_data/ERA_5/Monthly_Corrected/'
		+ str(comid) + '_' + str(name) + '.csv')
	ERAI_Files.append(
		'/Users/student/Dropbox/PhD/2020 Winter/Dissertation_v9/Africa/Blue_Nile/Data/Historical/simulated_data/ERA_Interim/Monthly_Corrected/'
		+ str(comid) + '_' + str(name) + '.csv')

for comid, name, rio, ERA5_File, ERAI_File in zip(COMIDs, Names, Rivers, ERA5_Files, ERAI_Files):
	print(comid, name, rio)

	#Merging the Data
	merged_df = hd.merge_data(ERAI_File, ERA5_File)

	monthly_avg = hd.monthly_average(merged_df)
	monthly_std_error = hd.monthly_std_error(merged_data=merged_df)

	ERA5_monthly_avg = monthly_avg[['Observed']]
	ERA_Interim_monthly_avg = monthly_avg[['Simulated']]

	ERA5_monthly_std_error = monthly_std_error[['Observed']]
	ERA_Interim_monthly_std_error = monthly_std_error[['Simulated']]

	observed_monthly = pd.read_csv('/Users/student/Dropbox/PhD/2020 Winter/Dissertation_v9/Africa/Blue_Nile/Data/Historical/observed_data/Multiannual_Mean_Streamflow/{0}_{1}.csv'.format(comid, name), dtype={'Month': str})
	observed_monthly.set_index('Month', inplace=True)

	observed_monthly_avg = observed_monthly[['Mean Streamflow (m3/s)']]
	observed_monthly_std_error = observed_monthly[['Standard Error']]

Esempio n. 12

0

Mostra file

 def setUp(self):
     sfpt_url = r'https://github.com/waderoberts123/Hydrostats/raw/master/Sample_data/sfpt_data/' \
                r'magdalena-calamar_interim_data.csv'
     glofas_url = r'https://github.com/waderoberts123/Hydrostats/raw/master/Sample_data/GLOFAS_Data/' \
                  r'magdalena-calamar_ECMWF_data.csv'
     self.merged_df = hd.merge_data(sfpt_url, glofas_url, column_names=('Streamflow Prediction Tool', 'GLOFAS'))

Esempio n. 13

0

Mostra file

File: Seas-Avg-Plot_Obs_vs_ERA-Interim.py Progetto: cycle13/Multiple_Resolution_Analysis

    'Low Res', 'Med Res', 'High Res', 'Low Res', 'Med Res', 'High Res',
    'Low Res', 'Med Res', 'High Res', 'Low Res', 'Med Res', 'High Res',
    'Low Res', 'Med Res', 'High Res'
]
s2 = 'Observed'

# This list specifies which metrics to use:
metrics = []

# This list controls the axis labels:
labels = ['Datetime', 'Streamflow (cms)']

# End of Dynamic Input. Do NOT Change the following -------------------------------------------

for s, o, t, c1, s1 in zip(list_sim, list_obs, range(15), color1, series1):
    temp_merged = hd.merge_data(sim_df=x_df, obs_df=list_obs_df[o])
    temp_davg = hd.daily_average(temp_merged)
    obs_loop_df = temp_davg.drop(index='02/29', columns='Simulated')

    sim_temp_df = list_riv_mouth[s]
    sim_loop_in = sim_temp_df.index = pd.date_range(
        '2001-01-01', '2001-12-31').strftime("%m/%d")
    # dates=pd.date_range('2001-01-01', '2001-12-31').strftime("%m/%d")
    sim_loop_df = pd.DataFrame(
        sim_loop_in,
        index=pd.date_range('2001-01-01', '2001-12-31').strftime("%m/%d"))

    group = [sim_loop_df, obs_loop_df]
    merged_df = pd.concat(group, axis=1)
    filename = list_titles[t]
    hv.plot(merged_data_df=merged_df,

Esempio n. 14

0

Mostra file

metrics = []

# This list controls the axis labels:
labels=['Datetime', 'Streamflow (cms)']


# End of Dynamic Input. Do NOT Change the following -------------------------------------------

# Create a list of 18 stream modified Time Series
list_riv_part = []
for riv in list_riv_mouth:
    riv_part = riv.loc[begin_date:end_date]
    list_riv_part.append(riv_part)

for s, o, c1, t, s1 in zip(list_sim, list_obs, color1, range(18), series1):
    merged_df = hd.merge_data(sim_df=list_riv_part[s], obs_df=list_obs_df[o])
    filename = year + ': ' + list_titles[t]
    hv.plot(merged_data_df=merged_df,
            title=filename,
            linestyles=[c1, 'k-'],
            legend=(s1, series2),
            labels=labels,
            metrics = metrics,
            grid=True)
    plt.savefig('/home/chrisedwards/Documents/rapid_output/graphs/{}.png'.format(filename))

Esempio n. 15

0

Mostra file

    plt.title('Simulated Hydrograph for ' + str(id) + ' - ' + name +
              '\n River: ' + rio + '. COMID - ' + str(comid))
    plt.xlabel('Date')
    plt.ylabel('Streamflow (m$^3$/s)')
    plt.legend()
    plt.grid()
    plt.xlim(dates_sim[0], dates_sim[len(dates_sim) - 1])
    t = pd.date_range(dates_sim[0], dates_sim[len(dates_sim) - 1],
                      periods=10).to_pydatetime()
    plt.gca().xaxis.set_major_formatter(mdates.DateFormatter('%Y-%m-%d'))
    plt.tight_layout()
    plt.savefig(plot_sim_hyd_dir + '/Simulated Hydrograph for ' + str(id) +
                ' - ' + name + '. COMID - ' + str(comid) + '.png')

    #Merging the Data
    merged_df = hd.merge_data(simFile, obsFile)
    '''Tables and Plots'''
    # Appending the table to the final table
    table = hs.make_table(merged_df,
                          metrics=[
                              'ME', 'MAE', 'MAPE', 'RMSE', 'NRMSE (Mean)',
                              'NSE', 'KGE (2009)', 'KGE (2012)', 'R (Pearson)',
                              'R (Spearman)', 'r2'
                          ],
                          location=id,
                          remove_neg=False,
                          remove_zero=False)
    all_station_table = all_station_table.append(table)

    #Making plots for all the stations

Esempio n. 16

0

Mostra file

File: Seas-Avg-Plot_Obs_vs_ERA-Interim-Qout.py Progetto: cycle13/Multiple_Resolution_Analysis

    'AZ: Sim (str-60) vs Obs (gauge-09492400)',
    'MO: Sim (str-50) vs Obs (gauge-07013000)',
    'MO: Sim (str-51) vs Obs (gauge-07014000)',
    'NY: Sim (str-47) vs Obs (gauge-01413408)',
    'NY: Sim (str-46) vs Obs (gauge-01413398)',
    'OR: Sim (str-58) vs Obs (gauge-14306400)',
    'OR: Sim (str-47) vs Obs (gauge-14306100)'
]

# This list specifies which metrics to use:
metrics = []

# This list controls the axis labels:
labels = ['Datetime', 'Streamflow (cms)']

for s, o, t in zip(range(12), range(12), range(12)):
    merged_df = hd.merge_data(sim_df=list_sim_df[s], obs_df=list_obs_df[o])
    da_df = hd.daily_average(merged_df)
    filename = list_titles[t] + ' Daily Average'
    hv.plot(merged_data_df=da_df,
            title=filename,
            linestyles=['b-', 'k-'],
            legend=('Sim', 'Obs'),
            labels=labels,
            metrics=metrics,
            x_season=True,
            grid=True)
    plt.tight_layout()
    plt.savefig(
        '/home/chrisedwards/Documents/rapid_output/graphs/{}.png'.format(
            filename))

Esempio n. 17

0

Mostra file

File: Plot_TS_ERA-5_Resolution-Comparison.py Progetto: cycle13/Multiple_Resolution_Analysis

# This list specifies which metrics to use:
metrics = []

# This list controls the axis labels:
labels = ['Datetime', 'Streamflow (cms)']

# End of Dynamic Input. Do NOT Change the following -------------------------------------------

# Create a list of 18 stream modified Time Series
list_riv_part_era5 = []
for riv in list_riv_mouth_era5:
    riv_part_era5 = riv.loc[begin_date:end_date]
    list_riv_part_era5.append(riv_part_era5)

for s, o, t, s1, s2 in zip(list_sim, list_obs, range(18), series1, series2):
    merged_df = hd.merge_data(sim_df=list_riv_part_era5[s],
                              obs_df=list_riv_part_era5[o])
    filename = year + ' (ERA-5): ' + list_titles[t]
    hv.scatter(merged_data_df=merged_df,
               title=filename,
               labels=(s1, s2),
               metrics=metrics,
               marker_style=".",
               grid=True,
               log_scale=False,
               line45=True)
    plt.tight_layout()
    plt.savefig(
        '/home/chrisedwards/Documents/rapid_output/graphs/{}.png'.format(
            filename))

Esempio n. 18

0

Mostra file

def get_scatterPlot(request):
    """
	Get observed data from csv files in Hydroshare
	Get historic simulations from ERA Interim
	"""
    get_data = request.GET
    global codEstacion
    global nomEstacion
    global simulated_df
    global observed_df
    global corrected_df

    try:
        '''Merge Data'''

        merged_df = hd.merge_data(sim_df=simulated_df, obs_df=observed_df)

        merged_df2 = hd.merge_data(sim_df=corrected_df, obs_df=observed_df)
        '''Plotting Data'''

        scatter_data = go.Scatter(x=merged_df.iloc[:, 0].values,
                                  y=merged_df.iloc[:, 1].values,
                                  mode='markers',
                                  name='original',
                                  marker=dict(color='#ef553b'))

        scatter_data2 = go.Scatter(x=merged_df2.iloc[:, 0].values,
                                   y=merged_df2.iloc[:, 1].values,
                                   mode='markers',
                                   name='corrected',
                                   marker=dict(color='#00cc96'))

        min_value = min(min(merged_df.iloc[:, 1].values),
                        min(merged_df.iloc[:, 0].values))
        max_value = max(max(merged_df.iloc[:, 1].values),
                        max(merged_df.iloc[:, 0].values))

        line_45 = go.Scatter(x=[min_value, max_value],
                             y=[min_value, max_value],
                             mode='lines',
                             name='45deg line',
                             line=dict(color='black'))

        slope, intercept, r_value, p_value, std_err = sp.linregress(
            merged_df.iloc[:, 0].values, merged_df.iloc[:, 1].values)

        slope2, intercept2, r_value2, p_value2, std_err2 = sp.linregress(
            merged_df2.iloc[:, 0].values, merged_df2.iloc[:, 1].values)

        line_adjusted = go.Scatter(
            x=[min_value, max_value],
            y=[slope * min_value + intercept, slope * max_value + intercept],
            mode='lines',
            name='{0}x + {1} (Original)'.format(str(round(slope, 2)),
                                                str(round(intercept, 2))),
            line=dict(color='red'))

        line_adjusted2 = go.Scatter(x=[min_value, max_value],
                                    y=[
                                        slope2 * min_value + intercept2,
                                        slope2 * max_value + intercept2
                                    ],
                                    mode='lines',
                                    name='{0}x + {1} (Corrected)'.format(
                                        str(round(slope2, 2)),
                                        str(round(intercept2, 2))),
                                    line=dict(color='green'))

        layout = go.Layout(
            title='Scatter Plot for  {0}-{1} <br> COMID: {2}'.format(
                watershed, subbasin, comid),
            xaxis=dict(title='Simulated', ),
            yaxis=dict(title='Observed', autorange=True),
            showlegend=True)

        chart_obj = PlotlyView(
            go.Figure(data=[
                scatter_data, scatter_data2, line_45, line_adjusted,
                line_adjusted2
            ],
                      layout=layout))

        context = {
            'gizmo_object': chart_obj,
        }

        return render(
            request, 'historical_validation_tool_west_africa/gizmo_ajax.html',
            context)

    except Exception as e:
        print(str(e))
        return JsonResponse(
            {'error': 'No data found for the selected station.'})

Esempio n. 19

0

Mostra file

def get_volumeAnalysis(request):
    """
	Get observed data from csv files in Hydroshare
	Get historic simulations from ERA Interim
	"""
    get_data = request.GET
    global codEstacion
    global nomEstacion
    global simulated_df
    global observed_df
    global corrected_df

    try:
        '''Merge Data'''

        merged_df = hd.merge_data(sim_df=simulated_df, obs_df=observed_df)

        merged_df2 = hd.merge_data(sim_df=corrected_df, obs_df=observed_df)
        '''Plotting Data'''

        sim_array = merged_df.iloc[:, 0].values
        obs_array = merged_df.iloc[:, 1].values
        corr_array = merged_df2.iloc[:, 0].values

        sim_volume_dt = sim_array * 0.0864
        obs_volume_dt = obs_array * 0.0864
        corr_volume_dt = corr_array * 0.0864

        sim_volume_cum = []
        obs_volume_cum = []
        corr_volume_cum = []
        sum_sim = 0
        sum_obs = 0
        sum_corr = 0

        for i in sim_volume_dt:
            sum_sim = sum_sim + i
            sim_volume_cum.append(sum_sim)

        for j in obs_volume_dt:
            sum_obs = sum_obs + j
            obs_volume_cum.append(sum_obs)

        for k in corr_volume_dt:
            sum_corr = sum_corr + k
            corr_volume_cum.append(sum_corr)

        observed_volume = go.Scatter(
            x=merged_df.index,
            y=obs_volume_cum,
            name='Observed',
        )

        simulated_volume = go.Scatter(
            x=merged_df.index,
            y=sim_volume_cum,
            name='Simulated',
        )

        corrected_volume = go.Scatter(
            x=merged_df2.index,
            y=corr_volume_cum,
            name='Corrected Simulated',
        )

        layout = go.Layout(
            title='Observed & Simulated Volume at<br> {0} - {1}'.format(
                codEstacion, nomEstacion),
            xaxis=dict(title='Dates', ),
            yaxis=dict(title='Volume (Mm<sup>3</sup>)', autorange=True),
            showlegend=True)

        chart_obj = PlotlyView(
            go.Figure(
                data=[observed_volume, simulated_volume, corrected_volume],
                layout=layout))

        context = {
            'gizmo_object': chart_obj,
        }

        return render(
            request,
            'historical_validation_tool_dominican_republic/gizmo_ajax.html',
            context)

    except Exception as e:
        print(str(e))
        return JsonResponse(
            {'error': 'No data found for the selected station.'})

Esempio n. 20

0

Mostra file

File: era_5_interim_analysis.py Progetto: jorgessanchez7/Global_Forecast_Validation

	era5_prec.index = pd.to_datetime(era5_prec.index)
	era5_prec.rename({'Precipitation (mm)': 'ERA-5 Precipitation (mm)'}, axis=1, inplace=True)

	erai_prec = pd.read_csv('/volumes/files/ECMWF_Precipitation/ERA_Interim/Daily_GeoTIFF_Clipped/{0}.csv'.format(region), index_col=0)
	erai_prec.index = pd.to_datetime(erai_prec.index)
	erai_prec.rename({'Precipitation (mm)': 'ERA-I Precipitation (mm)'}, axis=1, inplace=True)

	era5_run = pd.read_csv('/volumes/files/ECMWF_Runoff/ERA_5/Daily_GeoTIFF_Clipped/{0}.csv'.format(region), index_col=0)
	era5_run.index = pd.to_datetime(era5_run.index)
	era5_run.rename({'Runoff (mm)': 'ERA-5 Runoff (mm)'}, axis=1, inplace=True)

	erai_run = pd.read_csv('/volumes/files/ECMWF_Runoff/ERA_Interim/Daily_GeoTIFF_Clipped/{0}.csv'.format(region), index_col=0)
	erai_run.index = pd.to_datetime(erai_run.index)
	erai_run.rename({'Runoff (mm)': 'ERA-I Runoff (mm)'}, axis=1, inplace=True)

	merged_prec = hd.merge_data(sim_df=erai_prec, obs_df=era5_prec)

	plt.figure(1)
	plt.figure(figsize=(17.7983738762, 11))
	plt.plot(merged_prec.index, merged_prec.iloc[:, 0].values, 'k', color='red', label='ERA-5')
	plt.plot(merged_prec.index, merged_prec.iloc[:, 1].values, 'k', color='blue', label='ERA-Interim')
	plt.title('ERA-5 and ERA-Interim Precipitation at ' + region)
	plt.xlabel('Date')
	plt.ylabel('Precipitation (mm)')
	plt.legend()
	plt.grid()
	plt.xlim(merged_prec.index[0], merged_prec.index[len(merged_prec.index) - 1])
	#t = pd.date_range(merged_prec.index[0], merged_prec.index[len(merged_prec.index) - 1], periods=10).to_pydatetime()
	#plt.xticks(t)
	#plt.gca().xaxis.set_major_formatter(mdates.DateFormatter('%Y-%m-%d'))
	plt.tight_layout()

Esempio n. 21

0

Mostra file

def make_table_ajax(request):
    get_data = request.GET
    global simulated_df
    global observed_df
    global corrected_df

    try:

        # Indexing the metrics to get the abbreviations
        selected_metric_abbr = get_data.getlist("metrics[]", None)

        # print(selected_metric_abbr)

        # Retrive additional parameters if they exist
        # Retrieving the extra optional parameters
        extra_param_dict = {}

        if request.GET.get('mase_m', None) is not None:
            mase_m = float(request.GET.get('mase_m', None))
            extra_param_dict['mase_m'] = mase_m
        else:
            mase_m = 1
            extra_param_dict['mase_m'] = mase_m

        if request.GET.get('dmod_j', None) is not None:
            dmod_j = float(request.GET.get('dmod_j', None))
            extra_param_dict['dmod_j'] = dmod_j
        else:
            dmod_j = 1
            extra_param_dict['dmod_j'] = dmod_j

        if request.GET.get('nse_mod_j', None) is not None:
            nse_mod_j = float(request.GET.get('nse_mod_j', None))
            extra_param_dict['nse_mod_j'] = nse_mod_j
        else:
            nse_mod_j = 1
            extra_param_dict['nse_mod_j'] = nse_mod_j

        if request.GET.get('h6_k_MHE', None) is not None:
            h6_mhe_k = float(request.GET.get('h6_k_MHE', None))
            extra_param_dict['h6_mhe_k'] = h6_mhe_k
        else:
            h6_mhe_k = 1
            extra_param_dict['h6_mhe_k'] = h6_mhe_k

        if request.GET.get('h6_k_AHE', None) is not None:
            h6_ahe_k = float(request.GET.get('h6_k_AHE', None))
            extra_param_dict['h6_ahe_k'] = h6_ahe_k
        else:
            h6_ahe_k = 1
            extra_param_dict['h6_ahe_k'] = h6_ahe_k

        if request.GET.get('h6_k_RMSHE', None) is not None:
            h6_rmshe_k = float(request.GET.get('h6_k_RMSHE', None))
            extra_param_dict['h6_rmshe_k'] = h6_rmshe_k
        else:
            h6_rmshe_k = 1
            extra_param_dict['h6_rmshe_k'] = h6_rmshe_k

        if float(request.GET.get('lm_x_bar', None)) != 1:
            lm_x_bar_p = float(request.GET.get('lm_x_bar', None))
            extra_param_dict['lm_x_bar_p'] = lm_x_bar_p
        else:
            lm_x_bar_p = None
            extra_param_dict['lm_x_bar_p'] = lm_x_bar_p

        if float(request.GET.get('d1_p_x_bar', None)) != 1:
            d1_p_x_bar_p = float(request.GET.get('d1_p_x_bar', None))
            extra_param_dict['d1_p_x_bar_p'] = d1_p_x_bar_p
        else:
            d1_p_x_bar_p = None
            extra_param_dict['d1_p_x_bar_p'] = d1_p_x_bar_p
        '''Merge Data'''

        merged_df = hd.merge_data(sim_df=simulated_df, obs_df=observed_df)

        merged_df2 = hd.merge_data(sim_df=corrected_df, obs_df=observed_df)
        '''Plotting Data'''

        # Creating the Table Based on User Input
        table = hs.make_table(
            merged_dataframe=merged_df,
            metrics=selected_metric_abbr,
            # remove_neg=remove_neg,
            # remove_zero=remove_zero,
            mase_m=extra_param_dict['mase_m'],
            dmod_j=extra_param_dict['dmod_j'],
            nse_mod_j=extra_param_dict['nse_mod_j'],
            h6_mhe_k=extra_param_dict['h6_mhe_k'],
            h6_ahe_k=extra_param_dict['h6_ahe_k'],
            h6_rmshe_k=extra_param_dict['h6_rmshe_k'],
            d1_p_obs_bar_p=extra_param_dict['d1_p_x_bar_p'],
            lm_x_obs_bar_p=extra_param_dict['lm_x_bar_p'],
            # seasonal_periods=all_date_range_list
        )
        table = table.round(decimals=2)
        table_html = table.transpose()
        table_html = table_html.to_html(
            classes="table table-hover table-striped").replace(
                'border="1"', 'border="0"')

        # Creating the Table Based on User Input
        table2 = hs.make_table(
            merged_dataframe=merged_df2,
            metrics=selected_metric_abbr,
            # remove_neg=remove_neg,
            # remove_zero=remove_zero,
            mase_m=extra_param_dict['mase_m'],
            dmod_j=extra_param_dict['dmod_j'],
            nse_mod_j=extra_param_dict['nse_mod_j'],
            h6_mhe_k=extra_param_dict['h6_mhe_k'],
            h6_ahe_k=extra_param_dict['h6_ahe_k'],
            h6_rmshe_k=extra_param_dict['h6_rmshe_k'],
            d1_p_obs_bar_p=extra_param_dict['d1_p_x_bar_p'],
            lm_x_obs_bar_p=extra_param_dict['lm_x_bar_p'],
            # seasonal_periods=all_date_range_list
        )
        table2 = table2.round(decimals=2)
        table_html2 = table.transpose()
        table_html2 = table_html2.to_html(
            classes="table table-hover table-striped").replace(
                'border="1"', 'border="0"')

        table2 = table2.rename(
            index={'Full Time Series': 'Corrected Full Time Series'})
        table = table.rename(
            index={'Full Time Series': 'Original Full Time Series'})
        table_html2 = table2.transpose()
        table_html1 = table.transpose()

        table_final = pd.merge(table_html1,
                               table_html2,
                               right_index=True,
                               left_index=True)

        table_html2 = table_html2.to_html(
            classes="table table-hover table-striped",
            table_id="corrected_1").replace('border="1"', 'border="0"')

        table_final_html = table_final.to_html(
            classes="table table-hover table-striped",
            table_id="corrected_1").replace('border="1"', 'border="0"')

        return HttpResponse(table_final_html)

    except Exception:
        traceback.print_exc()
        return JsonResponse(
            {'error': 'No data found for the selected station.'})

Esempio n. 22

0

Mostra file

File: Seas-Avg_Obs_vs_ERA-Interim_no_loop.py Progetto: cycle13/Multiple_Resolution_Analysis

    index_col=0)
ny_obs_cms = ny_obs_full.drop(columns=["Flow-cfs", "Estimation"])

or_obs_full = pd.read_csv(
    '/home/chrisedwards/Documents/gauge_data/14306500_1-1-1980_12-31-2014.csv',
    index_col=0)
or_obs_cms = or_obs_full.drop(columns=["Flow-cfs", "Estimation"])

list_obs_df = [az_obs_cms, id_obs_cms, mo_obs_cms, ny_obs_cms, or_obs_cms]

x_df = or_obs_full.drop(columns=['Flow-cms', 'Estimation'])

# ------------------------------------------------------------------------------------------------------

merged_df = hd.merge_data(sim_df=x_df,
                          obs_df=or_obs_cms,
                          column_names=['Delete', 'Observed'])

temp_da = hd.daily_average(merged_df)
avg_obs = temp_da.drop(columns='Delete')

or_lowres.index = pd.date_range("2001-01-01", "2001-12-31").strftime("%m/%d")

group = [or_lowres, avg_obs]
rapid_vs_obs = pd.concat(group, axis=1)
rapid_vs_obs.drop(index='02/29', inplace=True)

# print(rapid_vs_obs)

labels = ['Datetime', 'Streamflow (cms)']
hv.plot(merged_data_df=rapid_vs_obs,

Esempio n. 23

0

Mostra file

    '''Defining the simulated return periods thresholds'''

    sim_2_threshold = simulated_rp['return_period_2'].loc[
        simulated_rp.index == float('{0}.0'.format(comid))].values[0]
    sim_5_threshold = simulated_rp['return_period_5'].loc[
        simulated_rp.index == float('{0}.0'.format(comid))].values[0]
    sim_10_threshold = simulated_rp['return_period_10'].loc[
        simulated_rp.index == float('{0}.0'.format(comid))].values[0]
    sim_25_threshold = simulated_rp['return_period_25'].loc[
        simulated_rp.index == float('{0}.0'.format(comid))].values[0]
    sim_50_threshold = simulated_rp['return_period_50'].loc[
        simulated_rp.index == float('{0}.0'.format(comid))].values[0]
    sim_100_threshold = simulated_rp['return_period_100'].loc[
        simulated_rp.index == float('{0}.0'.format(comid))].values[0]

    merged_df = hd.merge_data(sim_df=simulated_df, obs_df=historical_df)

    historical_df = merged_df.iloc[:, 1].to_frame()
    simulated_df = merged_df.iloc[:, 0].to_frame()

    df1 = historical_df.loc[(historical_df['Observed'] < obs_2_threshold)]
    df2 = historical_df.loc[(historical_df['Observed'] >= obs_2_threshold)]
    df3 = historical_df.loc[(historical_df['Observed'] >= obs_5_threshold)]
    df4 = historical_df.loc[(historical_df['Observed'] >= obs_10_threshold)]
    df5 = historical_df.loc[(historical_df['Observed'] >= obs_25_threshold)]
    df6 = historical_df.loc[(historical_df['Observed'] >= obs_50_threshold)]
    df7 = historical_df.loc[(historical_df['Observed'] >= obs_100_threshold)]

    event_return_period = 0

    if len(df2.index) > 0:

Esempio n. 24

0

Mostra file

list_avg_condensed.sort()

# Now there is a dictionary called 'seas_avg_dict' that has the seasonal averages stored in a pandas DataFrame.
# Each array has the datetime and flowrate.
# Each data frame is named in the format '{state}-{streamID}' (eg: 'az-7' or 'col-9').
# There are a total of 180 streams, or 180 keys in the dictionary: seas_avg_dict['az-7']
# list_streams_condensed = list of all the stream names, or names of the data frames.

# ***************************************************************************************************************
# ***************************************************************************************************************

az_9 = streamflow_dict['mo-7']
az_avg_9 = seas_avg_dict['mo-avg-7']

merged_df = hd.merge_data(sim_df=az_9,
                          obs_df=streamflow_dict['az-21'],
                          column_names=['9-calc', '21-calc'])
dailyavg2 = hd.daily_average(merged_df)
avg_calc = dailyavg2.drop(columns='21-calc')

az_avg_9.index = pd.date_range("2001-01-01", "2001-12-31").strftime("%m/%d")

group = [avg_calc, az_avg_9]
calc_vs_ncdf = pd.concat(group, axis=1)
calc_vs_ncdf.drop(index='02/29', inplace=True)

labels = ['Datetime', 'Streamflow (cms)']
hv.plot(merged_data_df=calc_vs_ncdf,
        title="MO Daily Avg: Hydrostats vs Rapid ",
        linestyles=['r-', 'k-'],
        legend=('Hydrostats', 'Rapid NetCDF'),

Esempio n. 25

0

Mostra file

File: validation_Annual_Corrected_Volume_Blue_Nile.py Progetto: jorgessanchez7/Global_Forecast_Validation

                '. COMID - ' + str(comid) + '.png')

    obsData = pd.DataFrame({
        'datetime': dates_obs,
        'observed volume (BCM)': obs_df.iloc[:, 0].values
    })
    obsData.set_index(['datetime'], inplace=True)
    simData = pd.DataFrame({
        'datetime': dates_sim,
        'simulated volume (BCM)': sim_df.iloc[:, 0].values
    })
    simData.set_index(['datetime'], inplace=True)

    #Merging the Data
    merged_df = hd.merge_data(sim_df=simData,
                              obs_df=obsData,
                              column_names=('Simulated', 'Observed'))
    '''Tables and Plots'''
    # Appending the table to the final table
    table = hs.make_table(merged_df,
                          metrics=[
                              'ME', 'MAE', 'MAPE', 'RMSE', 'NRMSE (Mean)',
                              'NSE', 'KGE (2009)', 'KGE (2012)', 'R (Pearson)',
                              'R (Spearman)', 'r2'
                          ],
                          location=name,
                          remove_neg=False,
                          remove_zero=False)
    all_station_table = all_station_table.append(table)

    #Making plots for all the stations

Esempio n. 26

0

Mostra file

File: plots.py Progetto: BYU-Hydroinformatics/geoglows

def corrected_scatterplots(corrected: pd.DataFrame, simulated: pd.DataFrame, observed: pd.DataFrame,
                           merged_sim_obs: pd.DataFrame = False, merged_cor_obs: pd.DataFrame = False,
                           titles: dict = None, outformat: str = 'plotly') -> go.Figure or str:
    """
    Creates a plot of corrected discharge, observered discharge, and simulated discharge. This function uses
    hydrostats.data.merge_data on the 3 inputs. If you have already computed these because you are doing a full
    comparison of bias correction, you can provide them to save time

    Args:
        corrected: the response from the geoglows.bias.correct_historical_simulation function
        simulated: the csv response from historic_simulation
        observed: the dataframe of observed data. Must have a datetime index and a single column of flow values
        merged_sim_obs: (optional) if you have already computed it, hydrostats.data.merge_data(simulated, observed)
        merged_cor_obs: (optional) if you have already computed it, hydrostats.data.merge_data(corrected, observed)
        outformat: either 'plotly' or 'plotly_html' (default plotly)
        titles: (dict) Extra info to show on the title of the plot. For example:
            {'Reach ID': 1234567, 'Drainage Area': '1000km^2'}

    Returns:
         plotly.GraphObject: plotly object, especially for use with python notebooks and the .show() method
    """
    if corrected is False and simulated is False and observed is False:
        if merged_sim_obs is not False and merged_cor_obs is not False:
            pass  # if you provided the merged dataframes already, we use those
    else:
        # merge the datasets together
        merged_sim_obs = hd.merge_data(sim_df=simulated, obs_df=observed)
        merged_cor_obs = hd.merge_data(sim_df=corrected, obs_df=observed)

    # get the min/max values for plotting the 45 degree line
    min_value = min(min(merged_sim_obs.iloc[:, 1].values), min(merged_sim_obs.iloc[:, 0].values))
    max_value = max(max(merged_sim_obs.iloc[:, 1].values), max(merged_sim_obs.iloc[:, 0].values))

    # do a linear regression on both of the merged dataframes
    slope, intercept, r_value, p_value, std_err = scipy.stats.linregress(merged_sim_obs.iloc[:, 0].values,
                                                                         merged_sim_obs.iloc[:, 1].values)
    slope2, intercept2, r_value2, p_value2, std_err2 = scipy.stats.linregress(merged_cor_obs.iloc[:, 0].values,
                                                                              merged_cor_obs.iloc[:, 1].values)
    scatter_sets = [
        go.Scatter(
            x=merged_sim_obs.iloc[:, 0].values,
            y=merged_sim_obs.iloc[:, 1].values,
            mode='markers',
            name='Original Data',
            marker=dict(color='#ef553b')
        ),
        go.Scatter(
            x=merged_cor_obs.iloc[:, 0].values,
            y=merged_cor_obs.iloc[:, 1].values,
            mode='markers',
            name='Corrected',
            marker=dict(color='#00cc96')
        ),
        go.Scatter(
            x=[min_value, max_value],
            y=[min_value, max_value],
            mode='lines',
            name='45 degree line',
            line=dict(color='black')
        ),
        go.Scatter(
            x=[min_value, max_value],
            y=[slope * min_value + intercept, slope * max_value + intercept],
            mode='lines',
            name=f'Y = {round(slope, 2)}x + {round(intercept, 2)} (Original)',
            line=dict(color='red')
        ),
        go.Scatter(
            x=[min_value, max_value],
            y=[slope2 * min_value + intercept2, slope2 * max_value + intercept2],
            mode='lines',
            name=f'Y = {round(slope2, 2)}x + {round(intercept2, 2)} (Corrected)',
            line=dict(color='green')
        )
    ]

    updatemenus = [
        dict(active=0,
             buttons=[dict(label='Linear Scale',
                           method='update',
                           args=[{'visible': [True, True]},
                                 {'title': 'Linear scale',
                                  'yaxis': {'type': 'linear'}}]),
                      dict(label='Log Scale',
                           method='update',
                           args=[{'visible': [True, True]},
                                 {'title': 'Log scale',
                                  'xaxis': {'type': 'log'},
                                  'yaxis': {'type': 'log'}}]),
                      ]
             )
    ]

    layout = go.Layout(title=_build_title('Bias Correction Scatter Plot', titles),
                       xaxis=dict(title='Simulated', ),
                       yaxis=dict(title='Observed', autorange=True),
                       showlegend=True, updatemenus=updatemenus)
    if outformat == 'plotly':
        return go.Figure(data=scatter_sets, layout=layout)
    elif outformat == 'plotly_html':
        return offline_plot(
            go.Figure(data=scatter_sets, layout=layout),
            config={'autosizable': True, 'responsive': True},
            output_type='div',
            include_plotlyjs=False
        )
    raise ValueError('Invalid outformat chosen. Choose plotly or plotly_html')