def main(): time_series = joblib.load("result/TimeSeriesHyperSlice.gz") test_set = dataset.CardiffTest() test_rain = test_set.rain forecaster = time_series.forecaster forecaster.load_memmap("r") seed = random.SeedSequence(254267254235771235840594891069714545013) rng = random.RandomState(random.MT19937(seed)) rain_array = [0, 5, 10, 15, 20, 25, 30] decimial_place_array = [] n_bootstrap = 32 for rain in rain_array: auc_array = [] for i in range(n_bootstrap): bootstrap = forecaster.bootstrap(rng) roc = bootstrap.get_roc_curve(rain, test_rain) auc_array.append(roc.area_under_curve) auc_std = np.std(auc_array, ddof=1) decimial_place_array.append(-round(math.log10(auc_std))) data_frame = pd.DataFrame(decimial_place_array, rain_array, ["no. dec. places"]) print("rain (mm)") print(data_frame)
def __init__(self, figure_directory, seed): self.figure_directory = figure_directory self.n_simulate = 10 self.downscale = None self.angle_resolution = dataset.ANGLE_RESOLUTION self.rng = random.RandomState(random.MT19937(seed.spawn(1)[0])) self.instantiate_downscale() self.downscale.set_rng(seed)
def main(): time_length = 365 #length of the time series #no model fields, set it to one model field, filled with zeros n_model_field = 1 x_array = np.zeros((time_length, n_model_field)) n_arma = [0, 1] #sets number of ar and ma terms to be 0 and 1 #value of the ma parameter ma_parameter = np.asarray([0.3]) #set seed of the rng seed = random.SeedSequence(103616317136878112071633291725501775781) rng = random.RandomState(random.MT19937(seed)) #define the parameters for this model poisson_rate = parameter.PoissonRate(n_model_field, n_arma) gamma_mean = parameter.GammaMean(n_model_field, n_arma) gamma_dispersion = parameter.GammaDispersion(n_model_field) #set the ma parameter poisson_rate["MA"] = ma_parameter gamma_mean["MA"] = ma_parameter #instantiate the time series parameter_array = [ poisson_rate, gamma_mean, gamma_dispersion, ] time_series = compound_poisson.TimeSeries( x_array, cp_parameter_array=parameter_array) #set the x_shift and x_scale as by default, TimeSeries normalise the model #fields using mean and std. Since std of all zeros is 0, set x_scale #to an appropriate value time_series.x_shift = 0 time_series.x_scale = 1 time_series.rng = rng #set rng time_series.simulate() #and simulate #plot the time series plt.figure() plt.plot(time_series[:]) plt.title("Compound-Poisson with MA(1)") plt.xlabel("time (days)") plt.ylabel("precipitation (mm)") plt.show() plt.close() #plt the sample autocorrelation #a peak at lag 1 indicate MA(1) behaviour acf = stattools.acf(time_series[:]) plt.figure() plt.bar(range(len(acf)), acf) plt.title("Compound-Poisson with MA(1)") plt.xlabel("lag (days)") plt.ylabel("autocorrelation") plt.show()
def test_random_state(): import numpy.random as npr # Check with seed state = com.random_state(5) assert state.uniform() == npr.RandomState(5).uniform() # Check with random state object state2 = npr.RandomState(10) assert com.random_state(state2).uniform() == npr.RandomState(10).uniform() # check with no arg random state assert com.random_state() is np.random # check array-like # GH32503 state_arr_like = npr.randint(0, 2 ** 31, size=624, dtype="uint32") assert ( com.random_state(state_arr_like).uniform() == npr.RandomState(state_arr_like).uniform() ) # Check BitGenerators # GH32503 if not np_version_under1p17: assert ( com.random_state(npr.MT19937(3)).uniform() == npr.RandomState(npr.MT19937(3)).uniform() ) assert ( com.random_state(npr.PCG64(11)).uniform() == npr.RandomState(npr.PCG64(11)).uniform() ) # Error for floats or strings msg = ( "random_state must be an integer, array-like, a BitGenerator, " "a numpy RandomState, or None" ) with pytest.raises(ValueError, match=msg): com.random_state("test") with pytest.raises(ValueError, match=msg): com.random_state(5.5)
def set_rng(self, seed_sequence): """Set rng """ rng_array = [] for s in seed_sequence.spawn(3): rng_s = random.RandomState(random.MT19937(s)) rng_array.append(rng_s) self.rng = rng_array[0] self.forecaster_rng = rng_array[1] self.self_forecaster_rng = rng_array[2]
def main(): time_length = 2 * 365 #length of the time series #one model field with sine wave n_model_field = 1 x_array = np.zeros((time_length, n_model_field)) x_array[:, 0] = range(time_length) x_array = np.sin(2 * math.pi * x_array / 365) n_arma = [0, 0] #no arma #value of the regression parameter reg_parameter = np.asarray([0.8]) #set seed of the rng seed = random.SeedSequence(199412950541405529670631357604770615867) rng = random.RandomState(random.MT19937(seed)) #define the parameters for this model poisson_rate = parameter.PoissonRate(n_model_field, n_arma) gamma_mean = parameter.GammaMean(n_model_field, n_arma) gamma_dispersion = parameter.GammaDispersion(n_model_field) #set the ma parameter poisson_rate["reg"] = reg_parameter gamma_mean["reg"] = reg_parameter #instantiate the time series parameter_array = [ poisson_rate, gamma_mean, gamma_dispersion, ] time_series = compound_poisson.TimeSeries( x_array, cp_parameter_array=parameter_array) time_series.rng = rng #set rng time_series.simulate() #and simulate #plot the time series #note the sine behaviour plt.figure() plt.plot(time_series[:]) plt.title("Seasonal Compound-Poisson") plt.xlabel("time (days)") plt.ylabel("precipitation (mm)") plt.show() plt.close() #plt the sample autocorrelation acf = stattools.acf(time_series[:]) plt.figure() plt.bar(range(len(acf)), acf) plt.title("Seasonal Compound-Poisson") plt.xlabel("lag (days)") plt.ylabel("autocorrelation") plt.show()
def gen_data(seed=97006855): n, m, l = 512, 256, 2 mu = 1e-2 generator = random.Generator(random.MT19937(seed=seed)) A = generator.standard_normal(size=(m, n)) k = round(n * 0.1) p = generator.permutation(n)[:k] u = np.zeros(shape=(n, l)) u[p, :] = generator.standard_normal(size=(k, l)) # ground truth b = np.matmul(A, u) x0 = generator.standard_normal(size=(n, l)) errfun = lambda x1, x2: norm(x1 - x2, 'fro') / (1 + norm(x1, 'fro')) errfun_exact = lambda x: norm(x - u, 'fro') / (1 + norm(x, 'fro')) sparsity = lambda x: np.sum(np.abs(x) > 1e-6 * np.max(np.abs(x))) / (n * l) return n, m, l, mu, A, b, u, x0, errfun, errfun_exact, sparsity
def spawn_rng(self, n=1): """Return array of substream rng Return array of independent random number generators by spawning from the seed sequence Return: array of numpy.random.RandomState objects if n > 1, or just a single object if n == 1 """ seed_spawn = self.seed_seq.spawn(n) rng_array = [] for seed in seed_spawn: rng_i = random.RandomState(random.MT19937(seed)) rng_array.append(rng_i) if len(rng_array) == 1: rng_array = rng_array[0] return rng_array
def streamplot(function, resolution=None, min_length=None, max_time=None, start_width=0.5, end_width=1.5, tolerance=3e-3, loc_tolerance=1e-10, seed=None, complex_component="real", **kwargs): r"""Create a streamline plot of a vector field Similar to matplotlib :func:`streamplot <matplotlib.pyplot.streamplot>` :arg function: the Firedrake :class:`~.Function` to plot :arg resolution: minimum spacing between streamlines (defaults to domain size / 20) :arg min_length: minimum length of a streamline (defaults to 4x resolution) :arg max_time: maximum time to integrate a streamline :arg start_width: line width at beginning of streamline :arg end_width: line width at end of streamline, to convey direction :arg tolerance: dimensionless tolerance for adaptive ODE integration :arg loc_tolerance: point location tolerance for :meth:`~firedrake.functions.Function.at` :kwarg complex_component: If plotting complex data, which component? (``'real'`` or ``'imag'``). Default is ``'real'``. :kwarg kwargs: same as for matplotlib :class:`~matplotlib.collections.LineCollection` """ if function.ufl_shape != (2, ): raise ValueError("Streamplot only defined for 2D vector fields!") axes = kwargs.pop("axes", None) if axes is None: figure = plt.figure() axes = figure.add_subplot(111) mesh = function.ufl_domain() if resolution is None: coords = toreal(mesh.coordinates.dat.data_ro, "real") resolution = (coords.max(axis=0) - coords.min(axis=0)).max() / 20 if min_length is None: min_length = 4 * resolution if max_time is None: area = assemble(Constant(1) * dx(mesh)) average_speed = np.sqrt( assemble(inner(function, function) * dx) / area) max_time = 50 * min_length / average_speed streamplotter = Streamplotter(function, resolution, min_length, max_time, tolerance, loc_tolerance, complex_component=complex_component) # TODO: better way of seeding start points shape = streamplotter._grid.shape xmin = streamplotter._grid_point((0, 0)) xmax = streamplotter._grid_point((shape[0] - 2, shape[1] - 2)) X, Y = np.meshgrid(np.linspace(xmin[0], xmax[0], shape[0] - 2), np.linspace(xmin[1], xmax[1], shape[1] - 2)) start_points = np.vstack((X.ravel(), Y.ravel())).T # Randomly shuffle the start points generator = randomgen.Generator(randomgen.MT19937(seed)) for x in generator.permutation(np.array(start_points)): streamplotter.add_streamline(x) # Colors are determined by the speed, thicknesses by arc length speeds = [] widths = [] for streamline in streamplotter.streamlines: velocity = toreal( np.array(function.at(streamline, tolerance=loc_tolerance)), complex_component) speed = np.sqrt(np.sum(velocity**2, axis=1)) speeds.extend(speed[:-1]) delta = np.sqrt(np.sum(np.diff(streamline, axis=0)**2, axis=1)) arc_length = np.cumsum(delta) length = arc_length[-1] s = arc_length / length linewidth = (1 - s) * start_width + s * end_width widths.extend(linewidth) points = [] for streamline in streamplotter.streamlines: pts = streamline.reshape(-1, 1, 2) points.extend(np.hstack((pts[:-1], pts[1:]))) speeds = np.array(speeds) widths = np.array(widths) points = np.asarray(points) vmin = kwargs.pop("vmin", speeds.min()) vmax = kwargs.pop("vmax", speeds.max()) norm = kwargs.pop("norm", matplotlib.colors.Normalize(vmin=vmin, vmax=vmax)) cmap = plt.get_cmap(kwargs.pop("cmap", None)) collection = LineCollection(points, cmap=cmap, norm=norm, linewidth=widths) collection.set_array(speeds) axes.add_collection(collection) _autoscale_view(axes, function.ufl_domain().coordinates.dat.data_ro) return collection
""" The random module in NumPy provides several alternatives to the default PRNG, which uses a 128-bit permutation congruential generator. While this is a good general-purpose random number generator, it might not be sufficient some particular needs. This module illustrates how to use other PSNG. """ from numpy import random seed_seq = random.SeedSequence() print(seed_seq) bit_gen = random.MT19937(seed_seq) rng = random.Generator(bit_gen)
J = 1 nn = 6 nn_table = np.loadtxt("NN_tables/NN_3D_L" + str(L) + ".txt") # Metropolis parameters temperatures = np.linspace(0.01, 3, 10) steps = int(5e4) measure = int(4e4) n_measure = steps - measure print(f"temperatures = {temperatures}") M = np.zeros(len(temperatures)) E = np.zeros(len(temperatures)) # Initialization of configuration and RNG rng = npr.default_rng(npr.MT19937()) rng1 = npr.default_rng(npr.MT19937(seed=1)) rng2 = npr.default_rng(npr.MT19937(seed=2)) spin_lattice = dict() for i in range(N_atm): x1 = rng1.uniform(-1, 1) x2 = rng2.uniform(-1, 1) while x1**2 + x2**2 >= 1: x1 = rng1.uniform(-1, 1) x2 = rng2.uniform(-1, 1) spin_lattice[i] = (2 * x1 * np.sqrt(1 - x1**2 - x2**2), 2 * x2 * np.sqrt(1 - x1**2 - x2**2), 1 - 2 * (x1**2 + x2**2))
def main(): monochrome = (cycler.cycler('color', ['k']) * cycler.cycler('linestyle', LINESTYLE)) monochrome2 = (cycler.cycler('color', LINECOLOUR2) + cycler.cycler('linestyle', LINESTYLE2) + cycler.cycler('marker', LINEMARKER2)) plt.rcParams.update({'font.size': 14}) #where to save the figures directory = "figure" if not path.isdir(directory): os.mkdir(directory) seed = random.SeedSequence(301608752619507842997952162996242447135) rng = random.RandomState(random.MT19937(seed)) era5 = compound_poisson.era5.TimeSeries() era5.fit(dataset.Era5Cardiff()) observed_data = dataset.CardiffTest() observed_rain = observed_data.rain time_array = observed_data.time_array training_size_array = [1, 5, 10, 20] script_dir_array = [ "cardiff_1_20", "cardiff_5_20", "cardiff_10_20", "cardiff_20_20", ] for i, dir_i in enumerate(script_dir_array): script_dir_array[i] = path.join("..", dir_i) time_series_name_array = [] #time series for each training set time_series_array = [] #will need to update the location of each time series memmap_path because #they would be using relative paths for i, dir_i in enumerate(script_dir_array): time_series = joblib.load( path.join(dir_i, "result", "TimeSeriesHyperSlice.gz")) old_dir = time_series.forecaster.memmap_path time_series.forecaster.memmap_path = path.join(dir_i, old_dir) time_series.forecaster.load_memmap("r") time_series_array.append(time_series) time_series_name_array.append("CP-MCMC (" + str(training_size_array[i]) + ")") #plot auc for varying precipitation #array of array: #for each training set, then for each value in rain_array auc_array = [] bootstrap_error_array = [] n_bootstrap = 32 rain_array = [0, 5, 10, 15] for i_training_size, size_i in enumerate(training_size_array): auc_array.append([]) bootstrap_error_array.append([]) forecaster_i = time_series_array[i_training_size].forecaster for rain_i in rain_array: roc_i = forecaster_i.get_roc_curve(rain_i, observed_rain) auc_array[i_training_size].append(roc_i.area_under_curve) bootstrap_i_array = [] for j_bootstrap in range(n_bootstrap): bootstrap = forecaster_i.bootstrap(rng) roc_ij = bootstrap.get_roc_curve(rain_i, observed_rain) bootstrap_i_array.append( math.pow(roc_ij.area_under_curve - roc_i.area_under_curve, 2)) bootstrap_error_array[i_training_size].append( math.sqrt(np.mean(bootstrap_i_array))) #figure format plt.figure() ax = plt.gca() ax.set_prop_cycle(monochrome) for i_training_size, size_i in enumerate(training_size_array): plt.plot(rain_array, auc_array[i_training_size], label=time_series_name_array[i_training_size]) plt.ylim([0.5, 1]) plt.xlabel("precipitation (mm)") plt.ylabel("Area under ROC curve") plt.legend() plt.savefig(path.join(directory, "auc.pdf"), bbox_inches="tight") plt.close() #table format rain_label_array = [] for rain in rain_array: rain_label_array.append(str(rain) + " mm") #table format with uncertainity values auc_table = [] for auc_i, error_i in zip(auc_array, bootstrap_error_array): auc_table.append([]) for auc_ij, error_ij in zip(auc_i, error_i): auc_table[-1].append("${:0.4f}\pm {:0.4f}$".format( auc_ij, error_ij)) data_frame = pd.DataFrame( np.asarray(auc_table).T, rain_label_array, time_series_name_array) data_frame.to_latex(path.join(directory, "auc.txt"), escape=False) #add era5 (for loss evaluation) #roc unavailable for era5 time_series_array.append(era5) time_series_name_array.append("IFS") #yearly plot of the bias losses time_segmentator = time_segmentation.YearSegmentator(time_array) loss_segmentator_array = [] for time_series_i in time_series_array: loss_segmentator_i = loss_segmentation.TimeSeries( time_series_i.forecaster, observed_rain) loss_segmentator_i.evaluate_loss(time_segmentator) loss_segmentator_array.append(loss_segmentator_i) pandas.plotting.register_matplotlib_converters() for i_loss, Loss in enumerate(loss_segmentation.LOSS_CLASSES): #array of arrays, one for each time_series in time_series_array #for each array, contains array of loss for each time point bias_loss_plot_array = [] bias_median_loss_plot_array = [] for loss_segmentator_i in loss_segmentator_array: bias_loss_plot, bias_median_loss_plot = ( loss_segmentator_i.get_bias_plot(i_loss)) bias_loss_plot_array.append(bias_loss_plot) bias_median_loss_plot_array.append(bias_median_loss_plot) plt.figure() ax = plt.gca() ax.set_prop_cycle(monochrome2) for time_series_label, bias_plot_array in zip(time_series_name_array, bias_loss_plot_array): plt.plot(loss_segmentator_i.time_array, bias_plot_array, label=time_series_label) plt.legend(bbox_to_anchor=(0, 1, 1, 0), loc="lower left", mode="expand", ncol=3) plt.ylabel(Loss.get_axis_bias_label()) plt.xlabel("year") plt.xticks(rotation=45) plt.savefig(path.join(directory, Loss.get_short_bias_name() + "_mean.pdf"), bbox_inches="tight") plt.close() plt.figure() ax = plt.gca() ax.set_prop_cycle(monochrome2) for time_series_label, bias_plot_array in zip( time_series_name_array, bias_median_loss_plot_array): plt.plot(loss_segmentator_i.time_array, bias_plot_array, label=time_series_label) plt.legend(bbox_to_anchor=(0, 1, 1, 0), loc="lower left", mode="expand", ncol=3) plt.ylabel(Loss.get_axis_bias_label()) plt.xlabel("year") plt.xticks(rotation=45) plt.savefig(path.join(directory, Loss.get_short_bias_name() + "_median.pdf"), bbox_inches="tight") plt.close() #plot table of test set bias loss time_segmentator_array = { "all_years": time_segmentation.AllInclusive(time_array), "spring": time_segmentation.SpringSegmentator(time_array), "summer": time_segmentation.SummerSegmentator(time_array), "autumn": time_segmentation.AutumnSegmentator(time_array), "winter": time_segmentation.WinterSegmentator(time_array), } time_segmentator_names = list(time_segmentator_array.keys()) #array of loss_segmentator objects, for each time series #dim 0: for each time series #dim 1: for each time segmentator loss_array = [] #plot the table (for mean, the median bias) for i, time_series_i in enumerate(time_series_array): loss_array.append([]) for time_segmentator_k in time_segmentator_array.values(): forecaster_i = time_series_i.forecaster loss_i = loss_segmentation.TimeSeries(forecaster_i, observed_rain) loss_i.evaluate_loss(time_segmentator_k) loss_array[i].append(loss_i) for i_loss, Loss in enumerate(loss_segmentation.LOSS_CLASSES): #using training set size 5 years to get bootstrap variance, this is used #to guide the number of decimial places to use n_decimial = 3 float_format = ("{:." + str(n_decimial) + "f}").format #table of losses #columns: for each time segmentator #rows: for each time series loss_mean_array = [] loss_median_array = [] #plot the table (for mean, the median bias) for i_time_series, time_series_i in enumerate(time_series_array): loss_mean_array.append([]) loss_median_array.append([]) for loss_segmentator_i in loss_array[i_time_series]: loss = loss_segmentator_i.loss_all_array[i_loss] loss_mean_array[i_time_series].append(loss.get_bias_loss()) loss_median_array[i_time_series].append( loss.get_bias_median_loss()) for prefix, loss_table in zip(["mean", "median"], [loss_mean_array, loss_median_array]): data_frame = pd.DataFrame(loss_table, time_series_name_array, time_segmentator_names) path_to_table = path.join( directory, prefix + "_" + Loss.get_short_bias_name() + ".txt") data_frame.to_latex(path_to_table, float_format=float_format) for i, time_series_i in enumerate(time_series_array): residual_plot = residual_analysis.ResidualLnqqPlotter() #add residuals data residual_plot.add_data(time_series_i.forecaster, observed_rain) #plot residual data residual_plot.plot_heatmap([[0, 3.8], [0, 3.8]], 1.8, 5.3, 'Greys') plt.savefig(path.join( directory, time_series_name_array[i] + "_residual_qq_hist.pdf"), bbox_inches="tight") plt.close() for time_series_i in time_series_array: time_series_i.forecaster.del_memmap()