def main(): time_series = joblib.load("result/TimeSeriesHyperSlice.gz") test_set = dataset.CardiffTest() test_rain = test_set.rain forecaster = time_series.forecaster forecaster.load_memmap("r") seed = random.SeedSequence(254267254235771235840594891069714545013) rng = random.RandomState(random.MT19937(seed)) rain_array = [0, 5, 10, 15, 20, 25, 30] decimial_place_array = [] n_bootstrap = 32 for rain in rain_array: auc_array = [] for i in range(n_bootstrap): bootstrap = forecaster.bootstrap(rng) roc = bootstrap.get_roc_curve(rain, test_rain) auc_array.append(roc.area_under_curve) auc_std = np.std(auc_array, ddof=1) decimial_place_array.append(-round(math.log10(auc_std))) data_frame = pd.DataFrame(decimial_place_array, rain_array, ["no. dec. places"]) print("rain (mm)") print(data_frame)
def main(): directory = "figure_roc_compare" if not path.isdir(directory): os.mkdir(directory) era5 = compound_poisson.era5.TimeSeries() era5.fit(dataset.Era5Cardiff()) observed_data = dataset.CardiffTest() time_series = joblib.load(path.join("result", "TimeSeriesHyperSlice.gz")) time_series.forecaster.load_memmap("r") for rain in RAIN_ARRAY: positive_array = era5.forecaster.forecast > rain alt_array = observed_data.rain > rain null_array = np.logical_not(alt_array) true_positive_rate = ( np.sum(np.logical_and(positive_array, alt_array)) / np.sum(alt_array)) false_positive_rate = ( np.sum(np.logical_and(positive_array, null_array)) / np.sum(null_array)) roc = time_series.forecaster.get_roc_curve(rain, observed_data.rain) plt.figure() roc.plot() plt.scatter(false_positive_rate, true_positive_rate, label="ERA5") plt.legend() plt.savefig(path.join(directory, str(rain) + ".pdf")) plt.close() time_series.forecaster.del_memmap()
def main(): directory = "figure" if not path.isdir(directory): os.mkdir(directory) era5 = dataset.Era5Cardiff() time_series = compound_poisson.era5.TimeSeries() time_series.fit(era5) observed_data = dataset.CardiffTest() printer = print.TimeSeries( time_series.forecaster, observed_data.rain, directory, "test") printer.print()
def main(): fitter = fit.time_series.FitterHyperSlice() training = dataset.Cardiff10Training() test = dataset.CardiffTest() default_burn_in = 30000 wrapper.time_series_forecast(fitter, training, test, default_burn_in)
def main(): monochrome = (cycler.cycler('color', ['k']) * cycler.cycler('linestyle', LINESTYLE)) plt.rcParams.update({'font.size': 14}) #where to save the figures directory = "figure" if not path.isdir(directory): os.mkdir(directory) era5 = compound_poisson.era5.TimeSeries() era5.fit(dataset.Era5Cardiff()) observed_data = dataset.CardiffTest() observed_rain = observed_data.rain dir = path.join("..", "cardiff_5_20") time_series = joblib.load( path.join(dir, "result", "TimeSeriesHyperSlice.gz")) time_series_name = "CP-MCMC (5)" era5_name = "IFS" observed_name = "observed" old_dir = time_series.forecaster.memmap_path time_series.forecaster.memmap_path = path.join(dir, old_dir) time_series.forecaster.load_memmap("r") cp_comparer = time_series.forecaster.compare_dist_with_observed( observed_rain) era5_comparer = era5.forecaster.compare_dist_with_observed( observed_rain) #survival plot plt.figure() ax = plt.gca() ax.set_prop_cycle(monochrome) cp_comparer.plot_survival_forecast(time_series_name) era5_comparer.plot_survival_forecast(era5_name) era5_comparer.plot_survival_observed(observed_name) cp_comparer.adjust_survival_plot() plt.legend() plt.savefig(path.join(directory, "survival.pdf"), bbox_inches="tight") plt.close() #pp plot plt.figure() ax = plt.gca() ax.set_prop_cycle(monochrome) cp_comparer.plot_pp(time_series_name) era5_comparer.plot_pp(era5_name) cp_comparer.adjust_pp_plot() plt.legend() plt.savefig(path.join(directory, "pp.pdf"), bbox_inches="tight") plt.close() #qq plot plt.figure() ax = plt.gca() ax.set_prop_cycle(monochrome) cp_comparer.plot_qq(time_series_name) cp_comparer.adjust_qq_plot() era5_comparer.plot_qq(era5_name) plt.legend() plt.savefig(path.join(directory, "qq.pdf"), bbox_inches="tight") plt.close() time_series.forecaster.del_memmap()
def main(): monochrome = (cycler.cycler('color', ['k']) * cycler.cycler('linestyle', LINESTYLE)) monochrome2 = (cycler.cycler('color', LINECOLOUR2) + cycler.cycler('linestyle', LINESTYLE2) + cycler.cycler('marker', LINEMARKER2)) plt.rcParams.update({'font.size': 14}) #where to save the figures directory = "figure" if not path.isdir(directory): os.mkdir(directory) seed = random.SeedSequence(301608752619507842997952162996242447135) rng = random.RandomState(random.MT19937(seed)) era5 = compound_poisson.era5.TimeSeries() era5.fit(dataset.Era5Cardiff()) observed_data = dataset.CardiffTest() observed_rain = observed_data.rain time_array = observed_data.time_array training_size_array = [1, 5, 10, 20] script_dir_array = [ "cardiff_1_20", "cardiff_5_20", "cardiff_10_20", "cardiff_20_20", ] for i, dir_i in enumerate(script_dir_array): script_dir_array[i] = path.join("..", dir_i) time_series_name_array = [] #time series for each training set time_series_array = [] #will need to update the location of each time series memmap_path because #they would be using relative paths for i, dir_i in enumerate(script_dir_array): time_series = joblib.load( path.join(dir_i, "result", "TimeSeriesHyperSlice.gz")) old_dir = time_series.forecaster.memmap_path time_series.forecaster.memmap_path = path.join(dir_i, old_dir) time_series.forecaster.load_memmap("r") time_series_array.append(time_series) time_series_name_array.append("CP-MCMC (" + str(training_size_array[i]) + ")") #plot auc for varying precipitation #array of array: #for each training set, then for each value in rain_array auc_array = [] bootstrap_error_array = [] n_bootstrap = 32 rain_array = [0, 5, 10, 15] for i_training_size, size_i in enumerate(training_size_array): auc_array.append([]) bootstrap_error_array.append([]) forecaster_i = time_series_array[i_training_size].forecaster for rain_i in rain_array: roc_i = forecaster_i.get_roc_curve(rain_i, observed_rain) auc_array[i_training_size].append(roc_i.area_under_curve) bootstrap_i_array = [] for j_bootstrap in range(n_bootstrap): bootstrap = forecaster_i.bootstrap(rng) roc_ij = bootstrap.get_roc_curve(rain_i, observed_rain) bootstrap_i_array.append( math.pow(roc_ij.area_under_curve - roc_i.area_under_curve, 2)) bootstrap_error_array[i_training_size].append( math.sqrt(np.mean(bootstrap_i_array))) #figure format plt.figure() ax = plt.gca() ax.set_prop_cycle(monochrome) for i_training_size, size_i in enumerate(training_size_array): plt.plot(rain_array, auc_array[i_training_size], label=time_series_name_array[i_training_size]) plt.ylim([0.5, 1]) plt.xlabel("precipitation (mm)") plt.ylabel("Area under ROC curve") plt.legend() plt.savefig(path.join(directory, "auc.pdf"), bbox_inches="tight") plt.close() #table format rain_label_array = [] for rain in rain_array: rain_label_array.append(str(rain) + " mm") #table format with uncertainity values auc_table = [] for auc_i, error_i in zip(auc_array, bootstrap_error_array): auc_table.append([]) for auc_ij, error_ij in zip(auc_i, error_i): auc_table[-1].append("${:0.4f}\pm {:0.4f}$".format( auc_ij, error_ij)) data_frame = pd.DataFrame( np.asarray(auc_table).T, rain_label_array, time_series_name_array) data_frame.to_latex(path.join(directory, "auc.txt"), escape=False) #add era5 (for loss evaluation) #roc unavailable for era5 time_series_array.append(era5) time_series_name_array.append("IFS") #yearly plot of the bias losses time_segmentator = time_segmentation.YearSegmentator(time_array) loss_segmentator_array = [] for time_series_i in time_series_array: loss_segmentator_i = loss_segmentation.TimeSeries( time_series_i.forecaster, observed_rain) loss_segmentator_i.evaluate_loss(time_segmentator) loss_segmentator_array.append(loss_segmentator_i) pandas.plotting.register_matplotlib_converters() for i_loss, Loss in enumerate(loss_segmentation.LOSS_CLASSES): #array of arrays, one for each time_series in time_series_array #for each array, contains array of loss for each time point bias_loss_plot_array = [] bias_median_loss_plot_array = [] for loss_segmentator_i in loss_segmentator_array: bias_loss_plot, bias_median_loss_plot = ( loss_segmentator_i.get_bias_plot(i_loss)) bias_loss_plot_array.append(bias_loss_plot) bias_median_loss_plot_array.append(bias_median_loss_plot) plt.figure() ax = plt.gca() ax.set_prop_cycle(monochrome2) for time_series_label, bias_plot_array in zip(time_series_name_array, bias_loss_plot_array): plt.plot(loss_segmentator_i.time_array, bias_plot_array, label=time_series_label) plt.legend(bbox_to_anchor=(0, 1, 1, 0), loc="lower left", mode="expand", ncol=3) plt.ylabel(Loss.get_axis_bias_label()) plt.xlabel("year") plt.xticks(rotation=45) plt.savefig(path.join(directory, Loss.get_short_bias_name() + "_mean.pdf"), bbox_inches="tight") plt.close() plt.figure() ax = plt.gca() ax.set_prop_cycle(monochrome2) for time_series_label, bias_plot_array in zip( time_series_name_array, bias_median_loss_plot_array): plt.plot(loss_segmentator_i.time_array, bias_plot_array, label=time_series_label) plt.legend(bbox_to_anchor=(0, 1, 1, 0), loc="lower left", mode="expand", ncol=3) plt.ylabel(Loss.get_axis_bias_label()) plt.xlabel("year") plt.xticks(rotation=45) plt.savefig(path.join(directory, Loss.get_short_bias_name() + "_median.pdf"), bbox_inches="tight") plt.close() #plot table of test set bias loss time_segmentator_array = { "all_years": time_segmentation.AllInclusive(time_array), "spring": time_segmentation.SpringSegmentator(time_array), "summer": time_segmentation.SummerSegmentator(time_array), "autumn": time_segmentation.AutumnSegmentator(time_array), "winter": time_segmentation.WinterSegmentator(time_array), } time_segmentator_names = list(time_segmentator_array.keys()) #array of loss_segmentator objects, for each time series #dim 0: for each time series #dim 1: for each time segmentator loss_array = [] #plot the table (for mean, the median bias) for i, time_series_i in enumerate(time_series_array): loss_array.append([]) for time_segmentator_k in time_segmentator_array.values(): forecaster_i = time_series_i.forecaster loss_i = loss_segmentation.TimeSeries(forecaster_i, observed_rain) loss_i.evaluate_loss(time_segmentator_k) loss_array[i].append(loss_i) for i_loss, Loss in enumerate(loss_segmentation.LOSS_CLASSES): #using training set size 5 years to get bootstrap variance, this is used #to guide the number of decimial places to use n_decimial = 3 float_format = ("{:." + str(n_decimial) + "f}").format #table of losses #columns: for each time segmentator #rows: for each time series loss_mean_array = [] loss_median_array = [] #plot the table (for mean, the median bias) for i_time_series, time_series_i in enumerate(time_series_array): loss_mean_array.append([]) loss_median_array.append([]) for loss_segmentator_i in loss_array[i_time_series]: loss = loss_segmentator_i.loss_all_array[i_loss] loss_mean_array[i_time_series].append(loss.get_bias_loss()) loss_median_array[i_time_series].append( loss.get_bias_median_loss()) for prefix, loss_table in zip(["mean", "median"], [loss_mean_array, loss_median_array]): data_frame = pd.DataFrame(loss_table, time_series_name_array, time_segmentator_names) path_to_table = path.join( directory, prefix + "_" + Loss.get_short_bias_name() + ".txt") data_frame.to_latex(path_to_table, float_format=float_format) for i, time_series_i in enumerate(time_series_array): residual_plot = residual_analysis.ResidualLnqqPlotter() #add residuals data residual_plot.add_data(time_series_i.forecaster, observed_rain) #plot residual data residual_plot.plot_heatmap([[0, 3.8], [0, 3.8]], 1.8, 5.3, 'Greys') plt.savefig(path.join( directory, time_series_name_array[i] + "_residual_qq_hist.pdf"), bbox_inches="tight") plt.close() for time_series_i in time_series_array: time_series_i.forecaster.del_memmap()