Example #1
0
def test_calculate_ci():
    # Independently test getting the
    # confidence interval between two data arrays
    # based on method
    randx_seed = np.random.seed(0)
    # Test 1
    ci_method = 'EMC'
    modelB_values = np.array([
        0.4983181, 0.63076339, 0.73753565, 0.97960614, 0.74599612, 0.18829818,
        0.29490815, 0.5063043, 0.15074971, 0.89009979, 0.81246532, 0.45399668,
        0.98247594, 0.38211414, 0.26690678
    ])
    modelA_values = np.array([
        0.37520287, 0.89286092, 0.66785908, 0.55742834, 0.60978346, 0.5760979,
        0.55055558, 0.00388764, 0.55821689, 0.56042747, 0.30637593, 0.83325185,
        0.84098604, 0.04021844, 0.57214717
    ])
    total_days = 15
    stat = 'bias'
    average_method = 'MEAN'
    randx = np.random.rand(10000, total_days)
    expected_std = np.sqrt(
        (((modelB_values - modelA_values) -
          (modelB_values - modelA_values).mean())**2).mean())
    expected_intvl = 2.228 * expected_std / np.sqrt(total_days - 1)
    test_intvl = plot_util.calculate_ci(logger, ci_method, modelB_values,
                                        modelA_values, total_days, stat,
                                        average_method, randx)
    assert (test_intvl == expected_intvl)
    # Test 2
    ci_method = 'EMC'
    modelB_values = np.array([
        0.4983181, 0.63076339, 0.73753565, 0.97960614, 0.74599612, 0.18829818,
        0.29490815, 0.5063043, 0.15074971, 0.89009979, 0.81246532, 0.45399668,
        0.98247594, 0.38211414, 0.26690678, 0.64162609, 0.01370935, 0.79477382,
        0.31573415, 0.35282921, 0.57511574, 0.27815519, 0.49562973, 0.4859588,
        0.16461642, 0.75849444, 0.44332183, 0.94935173, 0.62597888, 0.12819335
    ])
    modelA_values = np.array([
        0.37520287, 0.89286092, 0.66785908, 0.55742834, 0.60978346, 0.5760979,
        0.55055558, 0.00388764, 0.55821689, 0.56042747, 0.30637593, 0.83325185,
        0.84098604, 0.04021844, 0.57214717, 0.75091023, 0.47321941, 0.12862311,
        0.8644722, 0.92040807, 0.61376225, 0.24347848, 0.69990467, 0.69711331,
        0.91866337, 0.63945963, 0.59999792, 0.2920741, 0.64972479, 0.25025121
    ])
    total_days = 30
    stat = 'bias'
    average_method = 'MEAN'
    randx = np.random.rand(10000, total_days)
    expected_std = np.sqrt(
        (((modelB_values - modelA_values) -
          (modelB_values - modelA_values).mean())**2).mean())
    expected_intvl = 2.042 * expected_std / np.sqrt(total_days - 1)
    test_intvl = plot_util.calculate_ci(logger, ci_method, modelB_values,
                                        modelA_values, total_days, stat,
                                        average_method, randx)
    assert (test_intvl == expected_intvl)
    # Test 3
    date_base = datetime.datetime(2019, 1, 1)
    date_array = np.array(
        [date_base + datetime.timedelta(days=i) for i in range(5)])
    expected_stat_file_dates = []
    for date in date_array:
        dt = date.time()
        expected_stat_file_dates.append(date.strftime('%Y%m%d_%H%M%S'))
    model_data_indexA = pd.MultiIndex.from_product(
        [['MODEL_TESTA'], expected_stat_file_dates],
        names=['model_plot_name', 'dates'])
    model_data_arrayA = np.array([[
        3600, 5525.75062, 5525.66493, 30615218.26089, 30615764.49722,
        30614724.90979, 5.06746
    ],
                                  [
                                      3600, 5519.11108, 5519.1014,
                                      30549413.45946, 30549220.68868,
                                      30549654.24048, 5.12344
                                  ],
                                  [
                                      3600, 5516.80228, 5516.79513,
                                      30522742.16484, 30522884.89927,
                                      30522660.30975, 5.61752
                                  ],
                                  [
                                      3600, 5516.93924, 5517.80544,
                                      30525709.03932, 30520984.50965,
                                      30530479.99675, 4.94325
                                  ],
                                  [
                                      3600, 5514.52274, 5514.68224,
                                      30495695.82208, 30494633.24046,
                                      30496805.48259, 5.20369
                                  ]])
    model_dataA = pd.DataFrame(
        model_data_arrayA,
        index=model_data_indexA,
        columns=['TOTAL', 'FBAR', 'OBAR', 'FOBAR', 'FFBAR', 'OOBAR', 'MAE'])
    model_data_arrayB = np.array([[
        3600, 5527.43726, 5527.79714, 30635385.37277, 30633128.08035,
        30637667.9488, 3.74623
    ],
                                  [
                                      3600, 5520.22487, 5520.5867,
                                      30562940.31742, 30560471.32084,
                                      30565442.31244, 4.17792
                                  ],
                                  [
                                      3600, 5518.16049, 5518.53379,
                                      30538694.69234, 30536683.66886,
                                      30540732.11308, 3.86693
                                  ],
                                  [
                                      3600, 5519.20033, 5519.38443,
                                      30545925.19732, 30544766.74602,
                                      30547108.75357, 3.7534
                                  ],
                                  [
                                      3600, 5515.78776, 5516.17552,
                                      30509811.84136, 30507573.43899,
                                      30512077.12263, 4.02554
                                  ]])
    model_data_indexB = pd.MultiIndex.from_product(
        [['MODEL_TESTB'], expected_stat_file_dates],
        names=['model_plot_name', 'dates'])
    model_dataB = pd.DataFrame(
        model_data_arrayB,
        index=model_data_indexB,
        columns=['TOTAL', 'FBAR', 'OBAR', 'FOBAR', 'FFBAR', 'OOBAR', 'MAE'])
    ci_method = 'EMC_MONTE_CARLO'
    modelB_values = model_dataB
    modelA_values = model_dataA
    total_days = 5
    stat = 'bias'
    average_method = 'AGGREGATION'
    randx = np.random.rand(10000, total_days)
    expected_intvl = 0.3893656076904014
    test_intvl = plot_util.calculate_ci(logger, ci_method, modelB_values,
                                        modelA_values, total_days, stat,
                                        average_method, randx)
    assert (test_intvl == expected_intvl)
Example #2
0
 if ci_method == "NONE":
     logger.debug("Not calculating confidence intervals")
 else:
     if stat == "fbar_obar":
         CI_filename = os.path.join(
             plotting_out_dir_data, model_plot_name + "_" + stat + "_" +
             plot_time + start_date_YYYYmmdd + "to" +
             end_date_YYYYmmdd + "_valid" + valid_time_info[0] + "to" +
             valid_time_info[-1] + "Z_init" + init_time_info[0] + "to" +
             init_time_info[-1] + "Z" + "_fcst" + fcst_var_name +
             fcst_var_level + fcst_var_extra + fcst_var_thresh +
             "_obs" + obs_var_name + obs_var_level + obs_var_extra +
             obs_var_thresh + "_interp" + interp + "_region" + region +
             "_CI_" + ci_method + ".txt")
         stat_CI = plot_util.calculate_ci(logger, ci_method,
                                          model_stat_values_array,
                                          obs_stat_values_array,
                                          total_days)
         logger.debug(
             "Writing " + ci_method +
             " confidence intervals for difference between model " +
             str(model_num) + " " + model_name + " with name on plot " +
             model_plot_name + " and the observations lead " + lead +
             " to file: " + CI_filename)
         with open(CI_filename, 'a') as CI_file:
             CI_file.write(
                 lead.ljust(6, '0') + ' ' + str(stat_CI) + '\n')
     else:
         if model_num == 1:
             model1_stat_values_array = model_stat_values_array
             model1_plot_name = model_plot_name
             model1_name = model_name
Example #3
0
         or stat == 'baser_frate'):
     logger.debug("Writing " + ci_method +
                  " confidence intervals " +
                  "for difference between model " +
                  str(model_num) + " " + model_name +
                  " with name " + "on plot " + model_plot_name +
                  " and the " + "observations at lead " +
                  fcst_lead + " to " + "file: " + CI_file)
     if ci_method == 'EMC_MONTE_CARLO':
         logger.warning("Monte Carlo resampling not " +
                        "done for fbar_obar, orate_frate, " +
                        "or baser_frate.")
         stat_CI = '--'
     else:
         stat_CI = plot_util.calculate_ci(
             logger, ci_method, model_stat_values_array[0, :],
             model_stat_values_array[1, :], total_dates, stat,
             average_method, randx[model_idx, :, :])
     with open(CI_file, 'a') as file2write:
         file2write.write(fcst_lead + ' ' + str(stat_CI) + '\n')
 else:
     if model_num == 1:
         model1_stat_values_array = (
             model_stat_values_array[0, :])
         model1_plot_name = model_plot_name
         model1_name = model_name
     else:
         logger.debug("Writing " + ci_method + " confidence " +
                      "intervals for difference between " +
                      "model " + str(model_num) + " " +
                      model_name + " with name on plot " +
                      model_plot_name + " and model 1 " +