def prediction_wrapper(df_data, lags, target_ts=None, keys: list=None, match_lag: bool=False,
                       n_boot: int=1):

    # alphas = np.append(np.logspace(.1, 1.5, num=25), [250])
    alphas = np.logspace(.1, 1.5, num=25)
    kwrgs_model = {'scoring':'neg_mean_absolute_error',
                   'alphas':alphas, # large a, strong regul.
                   'normalize':False}

    if target_ts is None:
        fc_mask = df_data.iloc[:,-1].loc[0]#.shift(lag, fill_value=False)
        target_ts = df_data.iloc[:,[0]].loc[0][fc_mask]

    else:
        target_ts = target_ts
    target_ts = (target_ts - target_ts.mean()) / target_ts.std()
    out = rg.fit_df_data_ridge(df_data=df_data,
                               target=target_ts,
                               keys=keys,
                               tau_min=min(lags), tau_max=max(lags),
                               kwrgs_model=kwrgs_model,
                               match_lag_region_to_lag_fc=match_lag,
                               transformer=fc_utils.standardize_on_train)

    prediction, weights, models_lags = out
    # get skill scores
    clim_mean_temp = float(target_ts.mean())
    RMSE_SS = fc_utils.ErrorSkillScore(constant_bench=clim_mean_temp).RMSE
    MAE_SS = fc_utils.ErrorSkillScore(constant_bench=clim_mean_temp).MAE
    score_func_list = [RMSE_SS, fc_utils.corrcoef, MAE_SS]

    df_train_m, df_test_s_m, df_test_m, df_boot = fc_utils.get_scores(prediction,
                                                             df_data.iloc[:,-2:],
                                                             score_func_list,
                                                             n_boot = n_boot,
                                                             blocksize=blocksize,
                                                             rng_seed=1)
    index = np.unique(core_pp.flatten([k.split('_') for k in  keys]))
    AR = [l for l in index if '..' not in l]
    AR = [l for l in AR if 'PDO' not in l]
    index = [k for k in index if k not in AR]
    df_test_m.index = ['AR' + ''.join(AR) +'_'+'_'.join(index)]
    n_splits = df_data.index.levels[0].size # test for high alpha
    for col in df_test_m.columns.levels[0]:
        cvfitalpha = [models_lags[f'lag_{col}'][f'split_{s}'].alpha_ for s in range(n_splits)]
        print('lag {} mean alpha {:.0f}'.format(col, np.mean(cvfitalpha)))
        maxalpha_c = list(cvfitalpha).count(alphas[-1])
        if maxalpha_c > n_splits/3:
            print(f'\nlag {col} alpha {int(np.mean(cvfitalpha))}')
            print(f'{maxalpha_c} splits are max alpha\n')
            # maximum regularization selected. No information in timeseries
            # df_test_m.loc[:,pd.IndexSlice[col, 'corrcoef']][:] = 0
            # df_boot.loc[:,pd.IndexSlice[col, 'corrcoef']][:] = 0
            no_info_fc.append(col)
    df_test = functions_pp.get_df_test(prediction.merge(df_data.iloc[:,-2:],
                                                    left_index=True,
                                                    right_index=True)).iloc[:,:-2]
    return prediction, df_test, df_test_m, df_boot, models_lags, weights, df_test_s_m, df_train_m
Example #2
0
def get_df_forcing_cond_fc(rg_list,
                           target_ts,
                           fcmodel,
                           kwrgs_model,
                           mean_vars=['sst', 'smi']):
    for j, rg in enumerate(rg_list):

        PacAtl = []
        # find west-sub-tropical Atlantic region
        df_labels = find_precursors.labels_to_df(rg.list_for_MI[0].prec_labels)
        dlat = df_labels['latitude'] - 29
        dlon = df_labels['longitude'] - 290
        zz = pd.concat([dlat.abs(), dlon.abs()], axis=1)
        Atlan = zz.query('latitude < 10 & longitude < 10')
        if Atlan.size > 0:
            PacAtl.append(int(Atlan.index[0]))
        PacAtl.append(int(df_labels['n_gridcells'].idxmax()))  # Pacific SST
        PacAtl = [int(df_labels['n_gridcells'].idxmax())]  # only Pacific

        weights_norm = rg.prediction_tuple[1]  # .mean(axis=0, level=1)
        # weights_norm = weights_norm.sort_values(ascending=False, by=0)

        keys = [
            k for k in weights_norm.index.levels[1]
            if int(k.split('..')[1]) in PacAtl
        ]
        keys = [k for k in keys if 'sst' in k]  # only SST
        labels = ['..'.join(k.split('..')[1:]) for k in keys] + [
            '0..smi_sp'
        ]  # add smi just because it almost always in there

        df_mean, keys_dict = get_df_mean_SST(rg,
                                             mean_vars=mean_vars,
                                             n_strongest='all',
                                             weights=True,
                                             fcmodel=fcmodel,
                                             kwrgs_model=kwrgs_model,
                                             target_ts=target_ts,
                                             labels=labels)

        # apply weighted mean based on coefficients of precursor regions
        weights_norm = weights_norm.loc[pd.IndexSlice[:, keys], :]
        # weights_norm = weights_norm.div(weights_norm.max(axis=0))
        weights_norm = weights_norm.div(weights_norm.max(axis=0, level=0),
                                        level=0)
        weights_norm = weights_norm.reset_index().pivot(index='level_0',
                                                        columns='level_1')[0]
        weights_norm.index.name = 'fold'
        df_mean.index.name = ('fold', 'time')
        PacAtl_ts = weights_norm.multiply(df_mean[keys], axis=1, level=0)
        PacAtl_ts = functions_pp.get_df_test(PacAtl_ts.mean(axis=1),
                                             df_splits=rg.df_splits)

        rg.df_forcing = PacAtl_ts
Example #3
0
def prediction_wrapper(q):
    fcmodel = ScikitModel(scikitmodel=LogisticRegressionCV).fit
    kwrgs_model = {
        'class_weight': {
            0: 1,
            1: 1
        },
        'scoring': 'neg_brier_score',
        'penalty': 'l2',
        'solver': 'lbfgs'
    }

    lag = 4
    keys = ['0..PEPsv']  #rg.df_data.columns[2:-2]
    keys = [k for k in rg.df_data.columns[2:-2] if 'sst' in k]
    target_ts = rg.TV_ts  # - rg.TV_ts.mean()) / rg.TV_ts.std()
    # target_ts = rg.df_data_ext.loc[0][['mx2t']][rg.df_data.loc[0]['RV_mask']]
    target_ts = target_ts.to_dataframe('target')[['target']]
    target_ts.index.name = None
    target_ts = (target_ts > target_ts.quantile(q=q)).astype(int)
    out = rg.fit_df_data_ridge(target=target_ts,
                               fcmodel=fcmodel,
                               keys=keys,
                               tau_min=0,
                               tau_max=lag,
                               kwrgs_model=kwrgs_model)

    prediction, weights, models_lags = out

    df_test = functions_pp.get_df_test(
        prediction.merge(rg.df_data.iloc[:, -2:].copy(),
                         left_index=True,
                         right_index=True)).iloc[:, :-2]

    # get skill scores
    clim_mean_temp = float(target_ts.mean())
    SS = fc_utils.ErrorSkillScore(constant_bench=clim_mean_temp)
    BSS = SS.BSS
    score_func_list = [
        metrics.roc_auc_score, BSS,
        fc_utils.ErrorSkillScore().AUC_SS
    ]

    df_train_m, df_test_s_m, df_test_m, df_boot = fc_utils.get_scores(
        prediction,
        rg.df_data.iloc[:, -2:],
        score_func_list,
        score_per_test=False,
        n_boot=0,
        blocksize=2,
        rng_seed=1)
    return df_train_m, df_test_m, df_boot, df_test, models_lags, SS
Example #4
0
ax = df_orig_midwest.plot(ax=ax, c='red', title='Red is orig csv mid-west spatial data mean')
rg_always.df_fullts.plot(ax=ax, c='blue')

f, ax = plt.subplots()
ax = df_orig_all.plot(ax=ax, c='red', title='Red is orig csv all spatial data mean')
rg_always.df_fullts.plot(ax=ax, c='blue')

f, ax = plt.subplots()
ax = df_USDA_midwest[['obs_yield']].plot(ax=ax, c='red', title='Red is USDA obs Beguería et al. 2020')
rg_always.df_fullts.plot(ax=ax, c='blue')
df_orig_midwest.plot(ax=ax)

#%%
filepath_RGCPD_hindcast = '/Users/semvijverberg/surfdrive/output_paper3/USDA_Soy_csv_midwest_bimonthly_random_10_s1_1950_2019/predictions_s1_continuous.h5'
df_preds = functions_pp.load_hdf5(filepath_RGCPD_hindcast)['df_predictions']
df_preds = functions_pp.get_df_test(df_preds) ; df_preds.index.name='time'
xr_obs = df_preds[['raw_target']].to_xarray().to_array().squeeze()


trend = xr_obs - core_pp.detrend_lin_longterm(xr_obs)
recon = df_preds.iloc[:,[0]] + trend.values[None,:].T #.values[1:][None,:].T + float(rg_always.df_fullts.mean())
ax = recon.plot()
df_preds[['raw_target']].plot(ax=ax)
#%%
pred = df_preds[[0]] + trend.values[None,:].T
ax = pred.plot()
df_USDA_midwest[['frcst_aug_yield']].plot(ax=ax)
df_preds[['raw_target']].plot(ax=ax)

#%%
f, ax = plt.subplots()
Example #5
0
                  list_import_ts=[('PDO', z_filepath)],
                  start_end_TVdate=('05-01', '08-01'),
                  start_end_date=None,
                  start_end_year=(1979+int(round(lowpass+0.49)), 2020),
                  tfreq=2,
                  path_outmain=path_out_main,
                  append_pathsub='_' + exper)
    rgPDO.pp_TV(name_ds, anomaly=True, kwrgs_core_pp_time={'dailytomonths':True})

    rgPDO.pp_precursors()

    rgPDO.traintest('random_10')
    rgPDO.get_ts_prec()
    # Predicting PDO at lag 1 vs start_end_data of RW
    PDO1, df_lagmask1 = get_lagged_ts(rgPDO.df_data.copy() , 0, keys_ext)
    target = functions_pp.get_df_test(PDO1,
                                      df_splits=rgPDO.df_data[['TrainIsTrue']].loc[PDO1.index])
    PDO2, df_lagmask2 = get_lagged_ts(rgPDO.df_data.copy() , 2, keys_ext)
    # PDO3, df_lagmask3 = get_lagged_ts(rgPDO.df_data.copy() , 3, keys_ext)
    # PDO4, df_lagmask4 = get_lagged_ts(rgPDO.df_data.copy() , 4, keys_ext)
    # PDO5, df_lagmask5 = get_lagged_ts(rgPDO.df_data.copy() , 5, keys_ext)
    df_prec = PDO2 # AR1 model to predict PDO at lag 1 vs RW
    # df_prec = df_prec.merge(PDO3, left_index=True, right_index=True)
    # df_prec = df_prec.merge(PDO4, left_index=True, right_index=True)
    # df_prec = df_prec.merge(PDO5, left_index=True, right_index=True)


    out = rgPDO.fit_df_data_ridge(target=target,
                                  df_data = df_prec,
                                  tau_min=0, tau_max=0,
                                  kwrgs_model={'alphas':np.array([.01,.1,1,5,10])})
    predict = out[0].rename({0:'AR1'}, axis=1)
# predictions temp using PDO
df_precPDOs = merge_lagged_wrapper(rg.df_data.copy(), [1, 2], ['PDO0.5rm'])
dates = core_pp.get_subdates(rg.dates_TV, start_end_year=(1980, 2020))
df_precPDOs = df_precPDOs.loc[pd.IndexSlice[:, dates], :]
df_precPDOs = df_precPDOs.merge(rg.df_splits.loc[pd.IndexSlice[:, dates], :],
                                left_index=True,
                                right_index=True)
outPDOtemp = prediction_wrapper(df_precPDOs,
                                lags=np.array([0]),
                                target_ts=rg.TV.RV_ts.loc[dates],
                                keys=None,
                                match_lag=False,
                                n_boot=n_boot)

# predictions PDO using PDO
target_PDO = functions_pp.get_df_test(rg.df_data.copy()[['PDO', 'TrainIsTrue'
                                                         ]])[['PDO']]
df_precPDOs = merge_lagged_wrapper(rg.df_data.copy(), [1], ['PDO'])
dates = core_pp.get_subdates(rg.dates_TV, start_end_year=(1980, 2020))
df_precPDOs = df_precPDOs.loc[pd.IndexSlice[:, dates], :]
df_precPDOs = df_precPDOs.merge(rg.df_splits.loc[pd.IndexSlice[:, dates], :],
                                left_index=True,
                                right_index=True)
outPDO = prediction_wrapper(df_precPDOs,
                            lags=np.array([0]),
                            target_ts=target_PDO.loc[dates],
                            keys=None,
                            match_lag=False,
                            n_boot=n_boot)

# Conditional forecast
df_forcings = merge_lagged_wrapper(rg.df_data, [1], df_PDOs.columns)
        n_splits = rg.df_data.index.levels[0].size
        cvfitalpha = [
            models_lags[f'lag_{lag}'][f'split_{s}'].alpha_
            for s in range(n_splits)
        ]
        print('mean alpha {:.2f}'.format(np.mean(cvfitalpha)))
        maxalpha_c = list(cvfitalpha).count(alphas[-1])
        if maxalpha_c > n_splits / 3:
            print(f'\n{month} alpha {int(np.mean(cvfitalpha))}')
            print(f'{maxalpha_c} splits are max alpha\n')
            # maximum regularization selected. No information in timeseries
            # df_test_m['Prediction']['corrcoef'][:] = 0
            # df_boot['Prediction']['corrcoef'][:] = 0
            no_info_fc.append(month)
        df_test = functions_pp.get_df_test(
            prediction.merge(rg.df_data.iloc[:, -2:],
                             left_index=True,
                             right_index=True)).iloc[:, :2]

    else:
        print('no precursor timeseries found, scores all 0')
        df_boot = pd.DataFrame(data=np.zeros((n_boot, len(score_func_list))),
                               columns=['RMSE', 'corrcoef', 'MAE'])
        df_test_m = pd.DataFrame(np.zeros((1, len(score_func_list))),
                                 columns=['RMSE', 'corrcoef', 'MAE'])

    list_test_b.append(df_boot)
    list_test.append(df_test_m)
    append_dict(month, df_test_m)
    # df_ana.loop_df(df=rg.df_data[keys], colwrap=1, sharex=False,
    #                       function=df_ana.plot_timeseries,
    #                       kwrgs={'timesteps':rg.fullts.size,
Example #8
0
def get_scores(prediction,
               df_splits: pd.DataFrame = None,
               score_func_list: list = None,
               score_per_test=True,
               n_boot: int = 1,
               blocksize: int = 1,
               rng_seed=1):
    '''


    Parameters
    ----------
    prediction : TYPE
        DESCRIPTION.
    df_splits : pd.DataFrame, optional
        DESCRIPTION. The default is None.
    score_func_list : list, optional
        DESCRIPTION. The default is None.
    score_per_test : TYPE, optional
        DESCRIPTION. The default is True.
    n_boot : int, optional
        DESCRIPTION. The default is 1.
    blocksize : int, optional
        DESCRIPTION. The default is 1.
    rng_seed : TYPE, optional
        DESCRIPTION. The default is 1.

    Returns
    -------
    pd.DataFrames format:
    index [opt. splits]
    Multi-index columns [lag, metric name]
    df_trains, df_test_s, df_tests, df_boots.

    '''
    #%%
    if df_splits is None:
        # assuming all is test data
        TrainIsTrue = np.zeros((prediction.index.size, 1))
        RV_mask = np.ones((prediction.index.size, 1))
        df_splits = pd.DataFrame(np.concatenate([TrainIsTrue, RV_mask],
                                                axis=1),
                                 index=prediction.index,
                                 dtype=bool,
                                 columns=['TrainIsTrue', 'RV_mask'])

    # add empty multi-index to maintain same data format
    if hasattr(df_splits.index, 'levels') == False:
        df_splits = pd.concat([df_splits], keys=[0])

    if hasattr(prediction.index, 'levels') == False:
        prediction = pd.concat([prediction], keys=[0])

    pred = prediction.merge(df_splits, left_index=True, right_index=True)

    # score on train and per test split
    if score_func_list is None:
        score_func_list = [metrics.mean_squared_error, corrcoef]
    splits = pred.index.levels[0]
    columns = prediction.columns[1:]
    df_trains = np.zeros((columns.size), dtype=object)
    df_tests_s = np.zeros((columns.size), dtype=object)
    for c, col in enumerate(columns):
        df_train = pd.DataFrame(np.zeros((splits.size, len(score_func_list))),
                                columns=[f.__name__ for f in score_func_list])
        df_test_s = pd.DataFrame(np.zeros((splits.size, len(score_func_list))),
                                 columns=[f.__name__ for f in score_func_list])
        for s in splits:
            sp = pred.loc[s]
            trainRV = np.logical_and(sp['TrainIsTrue'], sp['RV_mask'])
            testRV = np.logical_and(~sp['TrainIsTrue'], sp['RV_mask'])
            for f in score_func_list:
                name = f.__name__
                if (~trainRV).all() == False:  # training data exists
                    train_score = f(sp[trainRV].iloc[:, 0],
                                    sp[trainRV].loc[:, col])
                else:
                    train_score = np.nan
                if score_per_test and testRV.any():
                    test_score = f(sp[testRV].iloc[:, 0], sp[testRV].loc[:,
                                                                         col])
                else:
                    test_score = np.nan

                df_train.loc[s, name] = train_score
                df_test_s.loc[s, name] = test_score
        df_trains[c] = df_train
        df_tests_s[c] = df_test_s
    df_trains = pd.concat(df_trains, keys=columns, axis=1)
    df_tests_s = pd.concat(df_tests_s, keys=columns, axis=1)

    # score on complete test
    df_tests = np.zeros((columns.size), dtype=object)
    pred_test = functions_pp.get_df_test(pred).iloc[:, :-2]
    if pred_test.size != 0:  # ensure test data is available
        for c, col in enumerate(columns):
            df_test = pd.DataFrame(
                np.zeros((1, len(score_func_list))),
                columns=[f.__name__ for f in score_func_list])

            for f in score_func_list:
                name = f.__name__
                y_true = pred_test.iloc[:, 0]
                y_pred = pred_test.loc[:, col]
                df_test[name] = f(y_true, y_pred)
            df_tests[c] = df_test
        df_tests = pd.concat(df_tests, keys=columns, axis=1)

    # Bootstrapping with replacement
    df_boots = np.zeros((columns.size), dtype=object)
    if pred_test.size != 0:  # ensure test data is available
        for c, col in enumerate(columns):
            old_index = range(0, len(y_true), 1)
            n_bl = blocksize
            chunks = [
                old_index[n_bl * i:n_bl * (i + 1)]
                for i in range(int(len(old_index) / n_bl))
            ]
            score_list = _bootstrap(pred_test.iloc[:, [0, c + 1]],
                                    n_boot,
                                    chunks,
                                    score_func_list,
                                    rng_seed=rng_seed)
            df_boot = pd.DataFrame(
                score_list, columns=[f.__name__ for f in score_func_list])
            df_boots[c] = df_boot
        df_boots = pd.concat(df_boots, keys=columns, axis=1)

    out = (df_trains, df_tests_s, df_tests, df_boots)

    #%%
    return out
#%%
fc_months_periodnames = {
    'August': 'JJ',
    'July': 'MJ',
    'June': 'AM',
    'May': 'MA',
    'April': 'FM',
    'March': 'JF',
    'December': 'SO',
    'February': 'DJ'
}
filepath_df_output = os.path.join(
    path_input_main, f'df_output_{fc_months_periodnames[fc_month]}.h5')

df_output = functions_pp.load_hdf5(filepath_df_output)
df_data = df_output['df_data']
df_splits = df_data.iloc[:, -2:]

out = utils_paper3.load_scores(['Target'],
                               model_name,
                               model_name,
                               2000,
                               filepath_df_datas,
                               condition='strong 50%')
df_scores, df_boots, df_preds = out

df_test_m = [d[fc_month] for d in df_scores]
df_boots_list = [d[fc_month] for d in df_boots]
df_test = df_preds[0][['Target', fc_month]]
df_test = functions_pp.get_df_test(df_test, df_splits=df_splits)
Example #10
0
    def parallel(cluster, month, agg_level, n_lags, kwrgs_MI, fold_method,
                 row_arrays, column_array, subfolder):
        #%%
        print(f'Starting cluster {cluster}, prediciting {month}')
        #get list_of_name_path
        list_of_name_path = get_list_of_name_path(agg_level, cluster)
        #run define
        rg, list_for_MI, lags, crossyr = define(list_of_name_path, month,
                                                n_lags, kwrgs_MI, subfolder)
        #run check (possible, not necessary)
        #check(rg, list_of_name_path, cluster)
        #run processing
        rg = process(rg, lags, fold_method, crossyr)
        #run forecast
        test_scores, train_scores, prediction = forecast(rg, crossyr)

        #store skill score results in df_ss_result dataframe
        df_ss_result = pd.DataFrame(np.zeros(
            (len(row_arrays[0]), len(column_array)), dtype=float),
                                    index=row_arrays,
                                    columns=column_array)
        for count, i in enumerate(
                row_idx_2_arr[:6]
        ):  #always loop over test test test train train train per cluster
            if count < 3:
                df_ss_result.loc[
                    (cluster, i, row_idx_3_arr[count]),
                    all_targetperiods_dict[month]] = test_scores[count]
            else:
                df_ss_result.loc[(cluster, i, row_idx_3_arr[count]),
                                 all_targetperiods_dict[month]] = train_scores[
                                     count - 3]

        #get test df actual and predictions
        test_df_pred = functions_pp.get_df_test(prediction,
                                                df_splits=pd.DataFrame(
                                                    rg.df_data.iloc[:, -2:]))

        #update dates
        delta = int(month[0][:2]) - 1
        date_list = test_df_pred.index.get_level_values(0).shift(delta,
                                                                 freq='MS')
        test_df_pred.set_index([date_list], inplace=True)

        #change column header of prediction to RV#ts_pred
        new_columns = test_df_pred.columns.values
        new_columns[1] = new_columns[0] + '_pred'
        test_df_pred.columns = new_columns

        #save intermediate cluster csv
        results_path = os.path.join(
            main_dir, 'Results', 'skillscores',
            f'{agg_level}_{fold_method}')  #path of results
        os.makedirs(results_path,
                    exist_ok=True)  # make folder if it doesn't exist
        df_ss_result.to_csv(
            os.path.join(
                results_path,
                str(cluster) + '_' + str(all_targetperiods_dict[month]) +
                '_ss_scores_' + agg_level +
                '.csv'))  #intermediate save skillscores per cluster to csv
        #%%
        return df_ss_result, test_df_pred, rg
Example #11
0
def cond_forecast_table(rg_list, score_func_list, n_boot=0):
    df_test_m = rg_list[0].verification_tuple[2]
    quantiles = [.15, .25]
    metrics = df_test_m.columns.levels[1]
    if n_boot > 0:
        cond_df = np.zeros(
            (metrics.size, len(rg_list), len(quantiles) * 2, n_boot))
    else:
        cond_df = np.zeros((metrics.size, len(rg_list), len(quantiles) * 2))
    for i, met in enumerate(metrics):
        for j, rg in enumerate(rg_list):

            PacAtl_ts = rg.df_forcing

            prediction = rg.prediction_tuple[0]
            df_test = functions_pp.get_df_test(prediction,
                                               df_splits=rg.df_splits)

            # df_test_m = rg.verification_tuple[2]
            # cond_df[i, j, 0] = df_test_m[df_test_m.columns[0][0]].loc[0][met]
            for k, l in enumerate(range(0, 4, 2)):
                q = quantiles[k]
                low = PacAtl_ts < PacAtl_ts.quantile(q)
                high = PacAtl_ts > PacAtl_ts.quantile(1 - q)
                mask_anomalous = np.logical_or(low, high)
                # anomalous Boundary forcing
                condfc = df_test[mask_anomalous.values]
                # condfc = condfc.rename({'causal':periodnames[i]}, axis=1)
                cond_verif_tuple = fc_utils.get_scores(
                    condfc,
                    score_func_list=score_func_list,
                    n_boot=n_boot,
                    score_per_test=False,
                    blocksize=1,
                    rng_seed=1)
                df_train_m, df_test_s_m, df_test_m, df_boot = cond_verif_tuple
                rg.cond_verif_tuple = cond_verif_tuple
                if n_boot == 0:
                    cond_df[i, j,
                            l] = df_test_m[df_test_m.columns[0][0]].loc[0][met]
                else:
                    cond_df[i, j, l, :] = df_boot[df_boot.columns[0][0]][met]
                # mild boundary forcing
                higher_low = PacAtl_ts > PacAtl_ts.quantile(.5 - q)
                lower_high = PacAtl_ts < PacAtl_ts.quantile(.5 + q)
                mask_anomalous = np.logical_and(higher_low,
                                                lower_high)  # changed 11-5-21

                condfc = df_test[mask_anomalous.values]
                # condfc = condfc.rename({'causal':periodnames[i]}, axis=1)
                cond_verif_tuple = fc_utils.get_scores(
                    condfc,
                    score_func_list=score_func_list,
                    n_boot=n_boot,
                    score_per_test=False,
                    blocksize=1,
                    rng_seed=1)
                df_train_m, df_test_s_m, df_test_m, df_boot = cond_verif_tuple
                if n_boot == 0:
                    cond_df[i, j, l +
                            1] = df_test_m[df_test_m.columns[0][0]].loc[0][met]
                else:
                    cond_df[i, j,
                            l + 1, :] = df_boot[df_boot.columns[0][0]][met]

    columns = [[f'strong {int(q*200)}%', f'weak {int(q*200)}%']
               for q in quantiles]
    columns = functions_pp.flatten(columns)
    if n_boot > 0:
        columns = pd.MultiIndex.from_product([columns, list(range(n_boot))])

    df_cond_fc = pd.DataFrame(cond_df.reshape(
        (len(metrics) * len(rg_list), -1)),
                              index=pd.MultiIndex.from_product([
                                  list(metrics),
                                  [rg.fc_month for rg in rg_list]
                              ]),
                              columns=columns)

    return df_cond_fc
Example #12
0
    prediction,
    rg.df_data.iloc[:, -2:],
    score_func_list,
    n_boot=n_boot,
    blocksize=1,
    rng_seed=seed)

m = models_lags[f'lag_{lag_}'][f'split_{0}']
cvfitalpha = [
    models_lags[f'lag_{lag_}'][f'split_{s}'].alpha_ for s in range(n_spl)
]
if kwrgs_model['alphas'].max() in cvfitalpha: print('Max a reached')
if kwrgs_model['alphas'].min() in cvfitalpha: print('Min a reached')
# assert kwrgs_model['alphas'].min() not in cvfitalpha, 'decrease min a'

df_test = functions_pp.get_df_test(predict.rename({lag_: 'causal'}, axis=1),
                                   df_splits=rg.df_splits)
print(df_test_m)

#%%
from matplotlib import gridspec
from matplotlib.offsetbox import TextArea, VPacker, AnnotationBbox

fontsize = 16

fig = plt.figure(figsize=(12, 5))
gs = gridspec.GridSpec(1, 1, height_ratios=None)
facecolor = 'white'
ax0 = plt.subplot(gs[0], facecolor=facecolor)
# df_test.plot(ax=ax0)
ax0.plot_date(df_test.index,
              df_test[target_dataset],