def metrics(obs, pred, f, q, m):
    # obs - log(observed), pred - prediction, f - FIB, q - subset, m - model
    rsq = round(r2_score(obs, pred), 3)
    dw = round(durbin_watson(obs - pred), 3)  # Durbin-Watson
    rmse = round(np.sqrt(((pred - obs)**2).mean()),
                 3)  # Root Mean Square Error
    mape = 100 * round(abs(
        (pred - obs) / obs).mean(), 3)  # Mean Absolute Percentage Error
    sens_spec = wqm.pred_eval(obs, pred, thresh=np.log10(
        wqm.fib_thresh(f)))  # Sensitivity/Specificity
    auroc = round(HF_models.compute_AUROC(obs, pred, f),
                  3)  # Area Under the Receiver Operating Curve

    # Add to q performance for model m to perf dataframe
    mets = [[
        rsq, dw, rmse, mape, auroc, sens_spec['Sensitivity'],
        sens_spec['Specificity'], sens_spec['Samples'],
        sens_spec['Exceedances']
    ]]
    temp_perf = pd.DataFrame(data=mets,
                             columns=[
                                 'Rsq', 'D-W', 'RMSE', 'MAPE', 'AUROC', 'sens',
                                 'spec', 'N', 'exc'
                             ],
                             index=[[q], [m]])
    return temp_perf
Exemple #2
0
def compute_AUROC(y, y_pred, f):
    tune_range = np.arange(0.7, 2.25, 0.005)
    sens_spec = np.array([
        wqm.pred_eval(y, (y_pred * j),
                      thresh=np.log10(wqm.fib_thresh(f)),
                      tune=True) for j in tune_range
    ])
    tpr = sens_spec[:, 0]
    fpr = 1 - sens_spec[:, 1]
    auroc = auc(fpr, tpr)
    return auroc
Exemple #3
0
def compute_AUROC(y, y_pred, f):
    # Calculate AUROC given the observed and predicted SSS exceedances
    tune_range = np.arange(0.7, 2.25, 0.005)
    sens_spec = np.array([
        wqm.pred_eval(y, (y_pred * j),
                      thresh=np.log10(wqm.fib_thresh(f)),
                      tune=True) for j in tune_range
    ])
    tpr = sens_spec[:, 0]
    fpr = 1 - sens_spec[:, 1]
    auroc = auc(fpr, tpr)
    return auroc
Exemple #4
0
blr, blr_perf = wqm.fit(y_train, X_train_vs, model_type='blr')

#%% TUNE
tune_mlr = wqm.tune(y_train, X_train_vs, model=mlr, cm_perf=cm_train)
tune_blr = wqm.tune(y_train, X_train_vs, model=blr, cm_perf=cm_train)

#%% TRAIN/TEST PERFORMANCE
print('\n\n- - - | Metrics | - - -')
if np.isnan(tune_mlr):
    tune_mlr = 1
if np.isnan(tune_blr):
    tune_blr = 0.5

mlr_t_perf = wqm.pred_eval(y_train,
                           mlr.predict(X_train_vs) * tune_mlr,
                           thresh=np.log10(wqm.fib_thresh(f)))
train_perf_df = train_perf_df.append(pd.DataFrame(mlr_t_perf, index=['MLR-T']))

mlr_t_perf_test = wqm.pred_eval(y_test,
                                mlr.predict(X_test_vs) * tune_mlr,
                                thresh=np.log10(wqm.fib_thresh(f)))
test_perf_df = test_perf_df.append(
    pd.DataFrame(mlr_t_perf_test, index=['MLR-T']))

blr_t_perf = wqm.pred_eval(y_train > np.log10(wqm.fib_thresh(f)),
                           blr.predict_proba(X_train_vs)[:, 1] > tune_blr)
train_perf_df = train_perf_df.append(pd.DataFrame(blr_t_perf, index=['BLR-T']))

blr_t_perf_test = wqm.pred_eval(y_test > np.log10(wqm.fib_thresh(f)),
                                blr.predict_proba(X_test_vs)[:, 1] > tune_blr)
test_perf_df = test_perf_df.append(
    kurt = df_fib.kurtosis()
    kurt.name = 'kurtosis'
    df_stats = df_stats.append(kurt.T)

    # At or Below Level of Quantification
    bloq = (df_fib == 10).sum()
    bloq.name = 'abloq'
    df_stats = df_stats.append(bloq.T)
    df_stats.loc['abloq_%'] = round(
        100 * df_stats.loc['abloq'] / df_stats.loc['N'], 1)

    # Exceedances
    exc = pd.Series()
    for f in ['TC', 'FC', 'ENT']:
        exc[f] = (df_fib[f] > wqm.fib_thresh(f)).sum()
    exc.name = 'exc'
    df_stats = df_stats.append(exc.T)
    df_stats.loc['exc_%'] = round(
        100 * df_stats.loc['exc'] / df_stats.loc['N'], 1)

    # Shanon Entropy
    shan = pd.Series()
    for f in ['TC', 'FC', 'ENT']:
        vals, counts = np.unique(df_fib[f], return_counts=True)
        shan[f] = round(stats.entropy(counts / len(df_fib[f])), 3)
    shan.name = 'Shannon'
    df_stats = df_stats.append(shan.T)

    # Append to basic stats df
    df_stats = df_stats.T
Exemple #6
0
### INPUTS ###
case = 'LP3'
f = 'ENT'
model_types = ['RF']

model_color = {'MLR': 'b', 'GLS': 'g', 'RF': 'k', 'ANN': 'r'}

folder = '/Users/rtsearcy/Box/water_quality_modeling/thfs/EDA/summer2020/prediction'
case_folder = folder + '/test_cases/' + case + '/'

test_cases = pd.read_csv(os.path.join(folder, 'test_cases.csv'),
                         index_col=['test_case'])
train_events = test_cases.loc[case]['train_event'].split(',')

thresh = wqm.fib_thresh(f)

### Load data
df_train = pd.read_excel(os.path.join(
    case_folder, 'train_test_subsets_' + f + '_' + case + '.xlsx'),
                         sheet_name='Train',
                         index_col='dt',
                         parse_dates=['dt'])
print('Train: ' + str(len(df_train)))

df_rm = pd.read_excel(os.path.join(
    case_folder, 'train_test_subsets_' + f + '_' + case + '.xlsx'),
                      sheet_name='RM Test',
                      index_col='dt',
                      parse_dates=['dt'])
df_rm = df_rm[df_rm.index.year == df_rm.index[0].year]  # Remove Year 2 RM Data
Exemple #7
0
# Fit
mlr = sm.OLS(train['log' + f], sm.add_constant(X_train), hasconst=True).fit()
print(mlr.summary2())

# Tune (TBD)

# Eval (Train)
print('\nMetrics (Training)')
rmse = np.sqrt(((mlr.predict() - train['log' + f])**2).mean())
print('RMSE - ' + str(round(rmse, 3)))
mape = abs((mlr.predict() - train['log' + f]) / train['log' + f]).mean()
print('MAPE - ' + str(round(mape, 3)))
mlr_train_perf = wqm.pred_eval(train['log' + f],
                               mlr.predict(),
                               thresh=np.log10(wqm.fib_thresh(f)))
print('AUROC - ' +
      str(round(compute_AUROC(train['log' + f], mlr.predict(), f), 3)))
print(mlr_train_perf)

# Eval (HF Test)
if len(hf_test) > 0:
    print('\nMetrics (HF Testing)')
    hf_test_pred = mlr.predict(sm.add_constant(hf_test_IV, has_constant='add'))
    rmse = np.sqrt(((hf_test_pred - hf_test['log' + f])**2).mean())
    print('RMSE - ' + str(round(rmse, 3)))
    mape = abs((hf_test_pred - hf_test['log' + f]) / hf_test['log' + f]).mean()
    print('MAPE - ' + str(round(mape, 3)))
    print('AUROC - ' +
          str(round(compute_AUROC(hf_test['log' + f], hf_test_pred, f), 3)))
    mlr_hft_perf = wqm.pred_eval(hf_test['log' + f],