def _init(self, *args, **kwargs): if 'inc' in kwargs: raise RuntimeError( "'inc' is no longer a valid keyword; see <https://www.firedrakeproject.org/firedrake.html#module-firedrake.randomfunctiongen>" ) rank = self._comm.Get_rank() size = self._comm.Get_size() _kwargs = kwargs.copy() seed = _kwargs.get("seed") if seed is None: if rank == 0: # generate a 128bit seed seed = randomgen.SeedSequence().entropy else: seed = None seed = self._comm.bcast(seed, root=0) if isinstance(seed, randomgen.SeedSequence): # We assume that the user has generated # a parallel-safe SeedSequence. pass else: # Create multiple streams sg = randomgen.SeedSequence(seed) _kwargs["seed"] = sg.spawn(size)[rank] super(_Wrapper, self).__init__(*args, **_kwargs)
def main(): time_series = joblib.load("result/TimeSeriesHyperSlice.gz") test_set = dataset.CardiffTest() test_rain = test_set.rain forecaster = time_series.forecaster forecaster.load_memmap("r") seed = random.SeedSequence(254267254235771235840594891069714545013) rng = random.RandomState(random.MT19937(seed)) rain_array = [0, 5, 10, 15, 20, 25, 30] decimial_place_array = [] n_bootstrap = 32 for rain in rain_array: auc_array = [] for i in range(n_bootstrap): bootstrap = forecaster.bootstrap(rng) roc = bootstrap.get_roc_curve(rain, test_rain) auc_array.append(roc.area_under_curve) auc_std = np.std(auc_array, ddof=1) decimial_place_array.append(-round(math.log10(auc_std))) data_frame = pd.DataFrame(decimial_place_array, rain_array, ["no. dec. places"]) print("rain (mm)") print(data_frame)
def main(): time_length = 365 #length of the time series #no model fields, set it to one model field, filled with zeros n_model_field = 1 x_array = np.zeros((time_length, n_model_field)) n_arma = [0, 1] #sets number of ar and ma terms to be 0 and 1 #value of the ma parameter ma_parameter = np.asarray([0.3]) #set seed of the rng seed = random.SeedSequence(103616317136878112071633291725501775781) rng = random.RandomState(random.MT19937(seed)) #define the parameters for this model poisson_rate = parameter.PoissonRate(n_model_field, n_arma) gamma_mean = parameter.GammaMean(n_model_field, n_arma) gamma_dispersion = parameter.GammaDispersion(n_model_field) #set the ma parameter poisson_rate["MA"] = ma_parameter gamma_mean["MA"] = ma_parameter #instantiate the time series parameter_array = [ poisson_rate, gamma_mean, gamma_dispersion, ] time_series = compound_poisson.TimeSeries( x_array, cp_parameter_array=parameter_array) #set the x_shift and x_scale as by default, TimeSeries normalise the model #fields using mean and std. Since std of all zeros is 0, set x_scale #to an appropriate value time_series.x_shift = 0 time_series.x_scale = 1 time_series.rng = rng #set rng time_series.simulate() #and simulate #plot the time series plt.figure() plt.plot(time_series[:]) plt.title("Compound-Poisson with MA(1)") plt.xlabel("time (days)") plt.ylabel("precipitation (mm)") plt.show() plt.close() #plt the sample autocorrelation #a peak at lag 1 indicate MA(1) behaviour acf = stattools.acf(time_series[:]) plt.figure() plt.bar(range(len(acf)), acf) plt.title("Compound-Poisson with MA(1)") plt.xlabel("lag (days)") plt.ylabel("autocorrelation") plt.show()
def main(): time_length = 2 * 365 #length of the time series #one model field with sine wave n_model_field = 1 x_array = np.zeros((time_length, n_model_field)) x_array[:, 0] = range(time_length) x_array = np.sin(2 * math.pi * x_array / 365) n_arma = [0, 0] #no arma #value of the regression parameter reg_parameter = np.asarray([0.8]) #set seed of the rng seed = random.SeedSequence(199412950541405529670631357604770615867) rng = random.RandomState(random.MT19937(seed)) #define the parameters for this model poisson_rate = parameter.PoissonRate(n_model_field, n_arma) gamma_mean = parameter.GammaMean(n_model_field, n_arma) gamma_dispersion = parameter.GammaDispersion(n_model_field) #set the ma parameter poisson_rate["reg"] = reg_parameter gamma_mean["reg"] = reg_parameter #instantiate the time series parameter_array = [ poisson_rate, gamma_mean, gamma_dispersion, ] time_series = compound_poisson.TimeSeries( x_array, cp_parameter_array=parameter_array) time_series.rng = rng #set rng time_series.simulate() #and simulate #plot the time series #note the sine behaviour plt.figure() plt.plot(time_series[:]) plt.title("Seasonal Compound-Poisson") plt.xlabel("time (days)") plt.ylabel("precipitation (mm)") plt.show() plt.close() #plt the sample autocorrelation acf = stattools.acf(time_series[:]) plt.figure() plt.bar(range(len(acf)), acf) plt.title("Seasonal Compound-Poisson") plt.xlabel("lag (days)") plt.ylabel("autocorrelation") plt.show()
def test_seed_new(): # Can't use pytest parametrize because tests will fail where the new # Generator object and related function are not defined test_bank = [ (None, None, npr.Generator, False), (npr.RandomState(0), npr.RandomState(0), npr.RandomState, True), (npr.RandomState(0), npr.RandomState(1), npr.RandomState, False), (npr.default_rng(1), npr.default_rng(1), npr.Generator, True), (npr.default_rng(1), npr.default_rng(2), npr.Generator, False), (npr.SeedSequence(10), npr.SeedSequence(10), npr.Generator, True), (npr.SeedSequence(10), npr.SeedSequence(20), npr.Generator, False), (100, 100, npr.Generator, True), (100, 200, npr.Generator, False), ] for seed1, seed2, rng_class, match in test_bank: rng1 = algo._handle_random_seed(seed1) rng2 = algo._handle_random_seed(seed2) assert isinstance(rng1, rng_class) assert isinstance(rng2, rng_class) assert (rng1.uniform() == rng2.uniform()) == match
def main(): seed = random.SeedSequence(332301838246917065154383428780003278502) path_here = pathlib.Path(__file__).parent.absolute() figure_dir = path.join(path_here, "figure") if not path.isdir(figure_dir): os.mkdir(figure_dir) figure_dir = path.join(figure_dir, "hyper") if not path.isdir(figure_dir): os.mkdir(figure_dir) prior_simulate = prior_simulator.downscale.PriorSimulator(figure_dir, seed) prior_simulate()
def main(): seed = random.SeedSequence(224505493302849505223964154111538808129) path_here = pathlib.Path(__file__).parent.absolute() figure_dir = path.join(path_here, "figure") if not path.isdir(figure_dir): os.mkdir(figure_dir) figure_dir = path.join(figure_dir, "gp") if not path.isdir(figure_dir): os.mkdir(figure_dir) prior_simulate = prior_simulator.downscale.PriorGpSimulator( figure_dir, seed) prior_simulate()
def main(): fitter = fit.time_series.FitterSlice() training = dataset.LondonSimulatedTraining() seed = random.SeedSequence(170300509484813619611218577657545000221) wrapper.time_series_fit(fitter, training, seed)
def main(): fitter = fit.downscale.FitterDownscaleDeepGp() data = dataset.IsleOfManTraining() seed = random.SeedSequence(335181766240425557327571375931666354614) Pool = multiprocess.Pool wrapper.downscale_fit(fitter, data, seed, Pool)
def main(): fitter = fit.downscale.FitterMultiSeries() data = dataset.Wales5Training() seed = random.SeedSequence(336116686577838597869553922167649360230) Pool = multiprocess.Pool wrapper.downscale_fit(fitter, data, seed, Pool)
def main(): fitter = fit.time_series.FitterHyperSlice() training = dataset.CardiffTraining() seed = random.SeedSequence(80188344912064343414862752267182073625) wrapper.time_series_fit(fitter, training, seed)
def __init__(self, data, n_arma=(0, 0)): """ Args: data: DataDualGrid object containing the training set """ # note: data can have no model fields (eg ERA5) self.n_arma = n_arma self.time_series_array = [] self.time_array = data.time_array self.model_field_units = None self.n_model_field = None self.mask = data.mask self.parameter_mask_vector = [] self.n_parameter = None self.n_total_parameter = None self.topography = data.topography self.shape = self.mask.shape self.area = self.shape[0] * self.shape[1] self.area_unmask = np.sum(np.logical_not(self.mask)) self.seed_seq = None self.rng = None self.n_sample = 10000 self.burn_in = 0 self.pool = None self.memmap_dir = "" self.forecaster = None self.mcmc = None self.model_field_shift = [] self.model_field_scale = [] # instantiate time series for every point in space # unmasked points have rain, provide it to the constructor to # TimeSeries # # masked points do not have rain, cannot provide it time_series_array = self.time_series_array TimeSeries = self.get_time_series_class() for lat_i in range(self.shape[0]): time_series_array.append([]) for long_i in range(self.shape[1]): # TimeSeries object to be appended to time_series_array time_series = None # empty constructor for TimeSeries is non data if data.model_field is None: time_series = TimeSeries() else: x_i, rain_i = data.get_data(lat_i, long_i) is_mask = self.mask[lat_i, long_i] if is_mask: # provide no rain if this space is masked time_series = TimeSeries(x_i, poisson_rate_n_arma=n_arma, gamma_mean_n_arma=n_arma) else: # provide rain time_series = TimeSeries(x_i, rain_i.data, n_arma, n_arma) for i in range(time_series.n_parameter): self.parameter_mask_vector.append(is_mask) self.n_parameter = time_series.n_parameter # provide information to time_series time_series.id = [lat_i, long_i] time_series.time_array = self.time_array time_series_array[lat_i].append(time_series) if data.model_field is not None: # set other member variables self.model_field_units = data.model_field_units self.n_model_field = len(data.model_field) self.set_seed_seq(random.SeedSequence()) self.set_time_series_rng() self.parameter_mask_vector = np.asarray(self.parameter_mask_vector) self.n_total_parameter = self.area_unmask * self.n_parameter # get normalising info for model fields using mean and standard # deviation over all space and time # # all locations share the same normalisation constants for model_field in data.model_field.values(): self.model_field_shift.append(np.mean(model_field)) self.model_field_scale.append(np.std(model_field, ddof=1)) self.model_field_shift = np.asarray(self.model_field_shift) self.model_field_scale = np.asarray(self.model_field_scale) for time_series_array_i in self.time_series_array: for time_series_i in time_series_array_i: time_series_i.x_shift = self.model_field_shift time_series_i.x_scale = self.model_field_scale
""" The random module in NumPy provides several alternatives to the default PRNG, which uses a 128-bit permutation congruential generator. While this is a good general-purpose random number generator, it might not be sufficient some particular needs. This module illustrates how to use other PSNG. """ from numpy import random seed_seq = random.SeedSequence() print(seed_seq) bit_gen = random.MT19937(seed_seq) rng = random.Generator(bit_gen)
def main(): fitter = fit.time_series.FitterMcmc() training = dataset.LondonSimulatedTraining() seed = random.SeedSequence(199862391501461976584157354151760167878) wrapper.time_series_fit(fitter, training, seed)
def main(): fitter = fit.downscale.FitterMultiSeries() data = dataset.IsleOfManTraining() seed = random.SeedSequence(275033816910622348579815457010957489899) Pool = multiprocess.Pool wrapper.downscale_fit(fitter, data, seed, Pool)
def main(): fitter = fit.downscale.FitterDownscale() data = dataset.IsleOfManTraining() seed = random.SeedSequence(41597761383904719560264433323691455830) Pool = multiprocess.Pool wrapper.downscale_fit(fitter, data, seed, Pool)
def main(): fitter = fit.time_series.FitterHyperSlice() training = dataset.Cardiff1Training() seed = random.SeedSequence(277310809467192855312273294721104678816) wrapper.time_series_fit(fitter, training, seed)
def main(): fitter = fit.time_series.FitterHyperSlice() training = dataset.LondonTraining() seed = random.SeedSequence(126906591942422578422472743313642430795) wrapper.time_series_fit(fitter, training, seed)
def main(): fitter = fit.downscale.FitterDownscale() data = dataset.Wales5Training() seed = random.SeedSequence(135973542338678598285681473918294488781) Pool = multiprocess.Pool wrapper.downscale_fit(fitter, data, seed, Pool)
def main(): fitter = fit.time_series.FitterHyperSlice() training = dataset.Cardiff10Training() seed = random.SeedSequence(177782466634943011322205683796258167716) wrapper.time_series_fit(fitter, training, seed)
def __init__(self, x, rainfall=None, poisson_rate_n_arma=None, gamma_mean_n_arma=None, cp_parameter_array=None): """ Provide the following combination parameters (or signature) only: -x, rainfall, poisson_rate_n_arma, gamma_mean_n_arma -to be used for fitting the model onto a provided data -x, rainfall, cp_parameter_array -to be used for fitting the model onto a provided data with a provided initial value -x, cp_parameter_array -to be used for simulating rainfall -x, poisson_rate_n_arma, gamma_mean_n_arma -to be used for simulating rainfall using the default parameters Args: x: design matrix of the model fields, shape (n, n_model_field) rainfall: array of rainfall data. If none, all rain is zero poisson_rate_n_arma: 2 element array, number of AR and MA terms for the poisson rate. Ignored if cp_parameter_array is provided. gamma_mean_n_arma: 2 element array, number of AR and MA terms for the gamma mean. Ignored if cp_parameter_array is provided. cp_parameter_array: array containing in order PoissonRate object, GammaMean object, GammaDispersion object """ if type(x) is pandas.core.frame.DataFrame: self.x = np.asarray(x) else: self.x = x self.x_shift = np.mean(self.x, 0) self.x_scale = np.std(self.x, 0, ddof=1) n = self.x.shape[0] self.model_field_name = [] self.time_array = range(n) self.n_model_field = self.x.shape[1] self.poisson_rate = None self.gamma_mean = None self.gamma_dispersion = None self.n_parameter = None # array containing poisson_rate, gamma_mean and gamma_dispersion self.cp_parameter_array = None self.z_array = np.zeros(n) # z_array can be float in E step self.z_var_array = np.zeros(n) self.y_array = None self.fitted_time_series = None self.rng = None self.id = None self.forecaster = None self.self_forecaster = None self.forecaster_memmap_dir = "" self.forecaster_rng = None self.self_forecaster_rng = None self.set_rng(random.SeedSequence()) # name the model fields, or extract from pandas data frame if type(x) is pandas.core.frame.DataFrame: self.model_field_name = x.columns else: for i in range(self.n_model_field): self.model_field_name.append("model_field_" + str(i)) if rainfall is None: self.y_array = np.zeros(n) else: self.y_array = rainfall # initalise parameters if none is provided, all regression parameters # to zero, constant is a naive estimate if cp_parameter_array is None: # cannot estimate parameter if no rain if rainfall is None: poisson_rate = parameter.PoissonRate(self.n_model_field, poisson_rate_n_arma) gamma_mean = parameter.GammaMean(self.n_model_field, gamma_mean_n_arma) gamma_dispersion = parameter.GammaDispersion( self.n_model_field) cp_parameter_array = [ poisson_rate, gamma_mean, gamma_dispersion, ] self.set_new_parameter(cp_parameter_array) else: self.initalise_parameters(poisson_rate_n_arma, gamma_mean_n_arma) else: self.set_new_parameter(cp_parameter_array)
def main(): fitter = fit.time_series.FitterHyperSlice() training = dataset.Cardiff5Training() seed = random.SeedSequence(230692462564320493984147630542548799902) wrapper.time_series_fit(fitter, training, seed)
def main(): monochrome = (cycler.cycler('color', ['k']) * cycler.cycler('linestyle', LINESTYLE)) monochrome2 = (cycler.cycler('color', LINECOLOUR2) + cycler.cycler('linestyle', LINESTYLE2) + cycler.cycler('marker', LINEMARKER2)) plt.rcParams.update({'font.size': 14}) #where to save the figures directory = "figure" if not path.isdir(directory): os.mkdir(directory) seed = random.SeedSequence(301608752619507842997952162996242447135) rng = random.RandomState(random.MT19937(seed)) era5 = compound_poisson.era5.TimeSeries() era5.fit(dataset.Era5Cardiff()) observed_data = dataset.CardiffTest() observed_rain = observed_data.rain time_array = observed_data.time_array training_size_array = [1, 5, 10, 20] script_dir_array = [ "cardiff_1_20", "cardiff_5_20", "cardiff_10_20", "cardiff_20_20", ] for i, dir_i in enumerate(script_dir_array): script_dir_array[i] = path.join("..", dir_i) time_series_name_array = [] #time series for each training set time_series_array = [] #will need to update the location of each time series memmap_path because #they would be using relative paths for i, dir_i in enumerate(script_dir_array): time_series = joblib.load( path.join(dir_i, "result", "TimeSeriesHyperSlice.gz")) old_dir = time_series.forecaster.memmap_path time_series.forecaster.memmap_path = path.join(dir_i, old_dir) time_series.forecaster.load_memmap("r") time_series_array.append(time_series) time_series_name_array.append("CP-MCMC (" + str(training_size_array[i]) + ")") #plot auc for varying precipitation #array of array: #for each training set, then for each value in rain_array auc_array = [] bootstrap_error_array = [] n_bootstrap = 32 rain_array = [0, 5, 10, 15] for i_training_size, size_i in enumerate(training_size_array): auc_array.append([]) bootstrap_error_array.append([]) forecaster_i = time_series_array[i_training_size].forecaster for rain_i in rain_array: roc_i = forecaster_i.get_roc_curve(rain_i, observed_rain) auc_array[i_training_size].append(roc_i.area_under_curve) bootstrap_i_array = [] for j_bootstrap in range(n_bootstrap): bootstrap = forecaster_i.bootstrap(rng) roc_ij = bootstrap.get_roc_curve(rain_i, observed_rain) bootstrap_i_array.append( math.pow(roc_ij.area_under_curve - roc_i.area_under_curve, 2)) bootstrap_error_array[i_training_size].append( math.sqrt(np.mean(bootstrap_i_array))) #figure format plt.figure() ax = plt.gca() ax.set_prop_cycle(monochrome) for i_training_size, size_i in enumerate(training_size_array): plt.plot(rain_array, auc_array[i_training_size], label=time_series_name_array[i_training_size]) plt.ylim([0.5, 1]) plt.xlabel("precipitation (mm)") plt.ylabel("Area under ROC curve") plt.legend() plt.savefig(path.join(directory, "auc.pdf"), bbox_inches="tight") plt.close() #table format rain_label_array = [] for rain in rain_array: rain_label_array.append(str(rain) + " mm") #table format with uncertainity values auc_table = [] for auc_i, error_i in zip(auc_array, bootstrap_error_array): auc_table.append([]) for auc_ij, error_ij in zip(auc_i, error_i): auc_table[-1].append("${:0.4f}\pm {:0.4f}$".format( auc_ij, error_ij)) data_frame = pd.DataFrame( np.asarray(auc_table).T, rain_label_array, time_series_name_array) data_frame.to_latex(path.join(directory, "auc.txt"), escape=False) #add era5 (for loss evaluation) #roc unavailable for era5 time_series_array.append(era5) time_series_name_array.append("IFS") #yearly plot of the bias losses time_segmentator = time_segmentation.YearSegmentator(time_array) loss_segmentator_array = [] for time_series_i in time_series_array: loss_segmentator_i = loss_segmentation.TimeSeries( time_series_i.forecaster, observed_rain) loss_segmentator_i.evaluate_loss(time_segmentator) loss_segmentator_array.append(loss_segmentator_i) pandas.plotting.register_matplotlib_converters() for i_loss, Loss in enumerate(loss_segmentation.LOSS_CLASSES): #array of arrays, one for each time_series in time_series_array #for each array, contains array of loss for each time point bias_loss_plot_array = [] bias_median_loss_plot_array = [] for loss_segmentator_i in loss_segmentator_array: bias_loss_plot, bias_median_loss_plot = ( loss_segmentator_i.get_bias_plot(i_loss)) bias_loss_plot_array.append(bias_loss_plot) bias_median_loss_plot_array.append(bias_median_loss_plot) plt.figure() ax = plt.gca() ax.set_prop_cycle(monochrome2) for time_series_label, bias_plot_array in zip(time_series_name_array, bias_loss_plot_array): plt.plot(loss_segmentator_i.time_array, bias_plot_array, label=time_series_label) plt.legend(bbox_to_anchor=(0, 1, 1, 0), loc="lower left", mode="expand", ncol=3) plt.ylabel(Loss.get_axis_bias_label()) plt.xlabel("year") plt.xticks(rotation=45) plt.savefig(path.join(directory, Loss.get_short_bias_name() + "_mean.pdf"), bbox_inches="tight") plt.close() plt.figure() ax = plt.gca() ax.set_prop_cycle(monochrome2) for time_series_label, bias_plot_array in zip( time_series_name_array, bias_median_loss_plot_array): plt.plot(loss_segmentator_i.time_array, bias_plot_array, label=time_series_label) plt.legend(bbox_to_anchor=(0, 1, 1, 0), loc="lower left", mode="expand", ncol=3) plt.ylabel(Loss.get_axis_bias_label()) plt.xlabel("year") plt.xticks(rotation=45) plt.savefig(path.join(directory, Loss.get_short_bias_name() + "_median.pdf"), bbox_inches="tight") plt.close() #plot table of test set bias loss time_segmentator_array = { "all_years": time_segmentation.AllInclusive(time_array), "spring": time_segmentation.SpringSegmentator(time_array), "summer": time_segmentation.SummerSegmentator(time_array), "autumn": time_segmentation.AutumnSegmentator(time_array), "winter": time_segmentation.WinterSegmentator(time_array), } time_segmentator_names = list(time_segmentator_array.keys()) #array of loss_segmentator objects, for each time series #dim 0: for each time series #dim 1: for each time segmentator loss_array = [] #plot the table (for mean, the median bias) for i, time_series_i in enumerate(time_series_array): loss_array.append([]) for time_segmentator_k in time_segmentator_array.values(): forecaster_i = time_series_i.forecaster loss_i = loss_segmentation.TimeSeries(forecaster_i, observed_rain) loss_i.evaluate_loss(time_segmentator_k) loss_array[i].append(loss_i) for i_loss, Loss in enumerate(loss_segmentation.LOSS_CLASSES): #using training set size 5 years to get bootstrap variance, this is used #to guide the number of decimial places to use n_decimial = 3 float_format = ("{:." + str(n_decimial) + "f}").format #table of losses #columns: for each time segmentator #rows: for each time series loss_mean_array = [] loss_median_array = [] #plot the table (for mean, the median bias) for i_time_series, time_series_i in enumerate(time_series_array): loss_mean_array.append([]) loss_median_array.append([]) for loss_segmentator_i in loss_array[i_time_series]: loss = loss_segmentator_i.loss_all_array[i_loss] loss_mean_array[i_time_series].append(loss.get_bias_loss()) loss_median_array[i_time_series].append( loss.get_bias_median_loss()) for prefix, loss_table in zip(["mean", "median"], [loss_mean_array, loss_median_array]): data_frame = pd.DataFrame(loss_table, time_series_name_array, time_segmentator_names) path_to_table = path.join( directory, prefix + "_" + Loss.get_short_bias_name() + ".txt") data_frame.to_latex(path_to_table, float_format=float_format) for i, time_series_i in enumerate(time_series_array): residual_plot = residual_analysis.ResidualLnqqPlotter() #add residuals data residual_plot.add_data(time_series_i.forecaster, observed_rain) #plot residual data residual_plot.plot_heatmap([[0, 3.8], [0, 3.8]], 1.8, 5.3, 'Greys') plt.savefig(path.join( directory, time_series_name_array[i] + "_residual_qq_hist.pdf"), bbox_inches="tight") plt.close() for time_series_i in time_series_array: time_series_i.forecaster.del_memmap()