def test_calculate_ci(): # Independently test getting the # confidence interval between two data arrays # based on method randx_seed = np.random.seed(0) # Test 1 ci_method = 'EMC' modelB_values = np.array([ 0.4983181, 0.63076339, 0.73753565, 0.97960614, 0.74599612, 0.18829818, 0.29490815, 0.5063043, 0.15074971, 0.89009979, 0.81246532, 0.45399668, 0.98247594, 0.38211414, 0.26690678 ]) modelA_values = np.array([ 0.37520287, 0.89286092, 0.66785908, 0.55742834, 0.60978346, 0.5760979, 0.55055558, 0.00388764, 0.55821689, 0.56042747, 0.30637593, 0.83325185, 0.84098604, 0.04021844, 0.57214717 ]) total_days = 15 stat = 'bias' average_method = 'MEAN' randx = np.random.rand(10000, total_days) expected_std = np.sqrt( (((modelB_values - modelA_values) - (modelB_values - modelA_values).mean())**2).mean()) expected_intvl = 2.228 * expected_std / np.sqrt(total_days - 1) test_intvl = plot_util.calculate_ci(logger, ci_method, modelB_values, modelA_values, total_days, stat, average_method, randx) assert (test_intvl == expected_intvl) # Test 2 ci_method = 'EMC' modelB_values = np.array([ 0.4983181, 0.63076339, 0.73753565, 0.97960614, 0.74599612, 0.18829818, 0.29490815, 0.5063043, 0.15074971, 0.89009979, 0.81246532, 0.45399668, 0.98247594, 0.38211414, 0.26690678, 0.64162609, 0.01370935, 0.79477382, 0.31573415, 0.35282921, 0.57511574, 0.27815519, 0.49562973, 0.4859588, 0.16461642, 0.75849444, 0.44332183, 0.94935173, 0.62597888, 0.12819335 ]) modelA_values = np.array([ 0.37520287, 0.89286092, 0.66785908, 0.55742834, 0.60978346, 0.5760979, 0.55055558, 0.00388764, 0.55821689, 0.56042747, 0.30637593, 0.83325185, 0.84098604, 0.04021844, 0.57214717, 0.75091023, 0.47321941, 0.12862311, 0.8644722, 0.92040807, 0.61376225, 0.24347848, 0.69990467, 0.69711331, 0.91866337, 0.63945963, 0.59999792, 0.2920741, 0.64972479, 0.25025121 ]) total_days = 30 stat = 'bias' average_method = 'MEAN' randx = np.random.rand(10000, total_days) expected_std = np.sqrt( (((modelB_values - modelA_values) - (modelB_values - modelA_values).mean())**2).mean()) expected_intvl = 2.042 * expected_std / np.sqrt(total_days - 1) test_intvl = plot_util.calculate_ci(logger, ci_method, modelB_values, modelA_values, total_days, stat, average_method, randx) assert (test_intvl == expected_intvl) # Test 3 date_base = datetime.datetime(2019, 1, 1) date_array = np.array( [date_base + datetime.timedelta(days=i) for i in range(5)]) expected_stat_file_dates = [] for date in date_array: dt = date.time() expected_stat_file_dates.append(date.strftime('%Y%m%d_%H%M%S')) model_data_indexA = pd.MultiIndex.from_product( [['MODEL_TESTA'], expected_stat_file_dates], names=['model_plot_name', 'dates']) model_data_arrayA = np.array([[ 3600, 5525.75062, 5525.66493, 30615218.26089, 30615764.49722, 30614724.90979, 5.06746 ], [ 3600, 5519.11108, 5519.1014, 30549413.45946, 30549220.68868, 30549654.24048, 5.12344 ], [ 3600, 5516.80228, 5516.79513, 30522742.16484, 30522884.89927, 30522660.30975, 5.61752 ], [ 3600, 5516.93924, 5517.80544, 30525709.03932, 30520984.50965, 30530479.99675, 4.94325 ], [ 3600, 5514.52274, 5514.68224, 30495695.82208, 30494633.24046, 30496805.48259, 5.20369 ]]) model_dataA = pd.DataFrame( model_data_arrayA, index=model_data_indexA, columns=['TOTAL', 'FBAR', 'OBAR', 'FOBAR', 'FFBAR', 'OOBAR', 'MAE']) model_data_arrayB = np.array([[ 3600, 5527.43726, 5527.79714, 30635385.37277, 30633128.08035, 30637667.9488, 3.74623 ], [ 3600, 5520.22487, 5520.5867, 30562940.31742, 30560471.32084, 30565442.31244, 4.17792 ], [ 3600, 5518.16049, 5518.53379, 30538694.69234, 30536683.66886, 30540732.11308, 3.86693 ], [ 3600, 5519.20033, 5519.38443, 30545925.19732, 30544766.74602, 30547108.75357, 3.7534 ], [ 3600, 5515.78776, 5516.17552, 30509811.84136, 30507573.43899, 30512077.12263, 4.02554 ]]) model_data_indexB = pd.MultiIndex.from_product( [['MODEL_TESTB'], expected_stat_file_dates], names=['model_plot_name', 'dates']) model_dataB = pd.DataFrame( model_data_arrayB, index=model_data_indexB, columns=['TOTAL', 'FBAR', 'OBAR', 'FOBAR', 'FFBAR', 'OOBAR', 'MAE']) ci_method = 'EMC_MONTE_CARLO' modelB_values = model_dataB modelA_values = model_dataA total_days = 5 stat = 'bias' average_method = 'AGGREGATION' randx = np.random.rand(10000, total_days) expected_intvl = 0.3893656076904014 test_intvl = plot_util.calculate_ci(logger, ci_method, modelB_values, modelA_values, total_days, stat, average_method, randx) assert (test_intvl == expected_intvl)
if ci_method == "NONE": logger.debug("Not calculating confidence intervals") else: if stat == "fbar_obar": CI_filename = os.path.join( plotting_out_dir_data, model_plot_name + "_" + stat + "_" + plot_time + start_date_YYYYmmdd + "to" + end_date_YYYYmmdd + "_valid" + valid_time_info[0] + "to" + valid_time_info[-1] + "Z_init" + init_time_info[0] + "to" + init_time_info[-1] + "Z" + "_fcst" + fcst_var_name + fcst_var_level + fcst_var_extra + fcst_var_thresh + "_obs" + obs_var_name + obs_var_level + obs_var_extra + obs_var_thresh + "_interp" + interp + "_region" + region + "_CI_" + ci_method + ".txt") stat_CI = plot_util.calculate_ci(logger, ci_method, model_stat_values_array, obs_stat_values_array, total_days) logger.debug( "Writing " + ci_method + " confidence intervals for difference between model " + str(model_num) + " " + model_name + " with name on plot " + model_plot_name + " and the observations lead " + lead + " to file: " + CI_filename) with open(CI_filename, 'a') as CI_file: CI_file.write( lead.ljust(6, '0') + ' ' + str(stat_CI) + '\n') else: if model_num == 1: model1_stat_values_array = model_stat_values_array model1_plot_name = model_plot_name model1_name = model_name
or stat == 'baser_frate'): logger.debug("Writing " + ci_method + " confidence intervals " + "for difference between model " + str(model_num) + " " + model_name + " with name " + "on plot " + model_plot_name + " and the " + "observations at lead " + fcst_lead + " to " + "file: " + CI_file) if ci_method == 'EMC_MONTE_CARLO': logger.warning("Monte Carlo resampling not " + "done for fbar_obar, orate_frate, " + "or baser_frate.") stat_CI = '--' else: stat_CI = plot_util.calculate_ci( logger, ci_method, model_stat_values_array[0, :], model_stat_values_array[1, :], total_dates, stat, average_method, randx[model_idx, :, :]) with open(CI_file, 'a') as file2write: file2write.write(fcst_lead + ' ' + str(stat_CI) + '\n') else: if model_num == 1: model1_stat_values_array = ( model_stat_values_array[0, :]) model1_plot_name = model_plot_name model1_name = model_name else: logger.debug("Writing " + ci_method + " confidence " + "intervals for difference between " + "model " + str(model_num) + " " + model_name + " with name on plot " + model_plot_name + " and model 1 " +