Example #1
0
def main():
    time_series = joblib.load("result/TimeSeriesHyperSlice.gz")
    test_set = dataset.CardiffTest()
    test_rain = test_set.rain

    forecaster = time_series.forecaster
    forecaster.load_memmap("r")

    seed = random.SeedSequence(254267254235771235840594891069714545013)
    rng = random.RandomState(random.MT19937(seed))

    rain_array = [0, 5, 10, 15, 20, 25, 30]
    decimial_place_array = []
    n_bootstrap = 32

    for rain in rain_array:
        auc_array = []
        for i in range(n_bootstrap):
            bootstrap = forecaster.bootstrap(rng)
            roc = bootstrap.get_roc_curve(rain, test_rain)
            auc_array.append(roc.area_under_curve)
        auc_std = np.std(auc_array, ddof=1)
        decimial_place_array.append(-round(math.log10(auc_std)))

    data_frame = pd.DataFrame(decimial_place_array, rain_array,
                              ["no. dec. places"])
    print("rain (mm)")
    print(data_frame)
Example #2
0
 def __init__(self, figure_directory, seed):
     self.figure_directory = figure_directory
     self.n_simulate = 10
     self.downscale = None
     self.angle_resolution = dataset.ANGLE_RESOLUTION
     self.rng = random.RandomState(random.MT19937(seed.spawn(1)[0]))
     self.instantiate_downscale()
     self.downscale.set_rng(seed)
def main():

    time_length = 365 #length of the time series
    #no model fields, set it to one model field, filled with zeros
    n_model_field = 1
    x_array = np.zeros((time_length, n_model_field))
    n_arma = [0, 1] #sets number of ar and ma terms to be 0 and 1
    #value of the ma parameter
    ma_parameter = np.asarray([0.3])

    #set seed of the rng
    seed = random.SeedSequence(103616317136878112071633291725501775781)
    rng = random.RandomState(random.MT19937(seed))

    #define the parameters for this model
    poisson_rate = parameter.PoissonRate(n_model_field, n_arma)
    gamma_mean = parameter.GammaMean(n_model_field, n_arma)
    gamma_dispersion = parameter.GammaDispersion(n_model_field)

    #set the ma parameter
    poisson_rate["MA"] = ma_parameter
    gamma_mean["MA"] = ma_parameter

    #instantiate the time series
    parameter_array = [
        poisson_rate,
        gamma_mean,
        gamma_dispersion,
    ]
    time_series = compound_poisson.TimeSeries(
        x_array, cp_parameter_array=parameter_array)
    #set the x_shift and x_scale as by default, TimeSeries normalise the model
        #fields using mean and std. Since std of all zeros is 0, set x_scale
        #to an appropriate value
    time_series.x_shift = 0
    time_series.x_scale = 1
    time_series.rng = rng #set rng
    time_series.simulate() #and simulate

    #plot the time series
    plt.figure()
    plt.plot(time_series[:])
    plt.title("Compound-Poisson with MA(1)")
    plt.xlabel("time (days)")
    plt.ylabel("precipitation (mm)")
    plt.show()
    plt.close()

    #plt the sample autocorrelation
    #a peak at lag 1 indicate MA(1) behaviour
    acf = stattools.acf(time_series[:])
    plt.figure()
    plt.bar(range(len(acf)), acf)
    plt.title("Compound-Poisson with MA(1)")
    plt.xlabel("lag (days)")
    plt.ylabel("autocorrelation")
    plt.show()
Example #4
0
def test_random_state():
    import numpy.random as npr

    # Check with seed
    state = com.random_state(5)
    assert state.uniform() == npr.RandomState(5).uniform()

    # Check with random state object
    state2 = npr.RandomState(10)
    assert com.random_state(state2).uniform() == npr.RandomState(10).uniform()

    # check with no arg random state
    assert com.random_state() is np.random

    # check array-like
    # GH32503
    state_arr_like = npr.randint(0, 2 ** 31, size=624, dtype="uint32")
    assert (
        com.random_state(state_arr_like).uniform()
        == npr.RandomState(state_arr_like).uniform()
    )

    # Check BitGenerators
    # GH32503
    if not np_version_under1p17:
        assert (
            com.random_state(npr.MT19937(3)).uniform()
            == npr.RandomState(npr.MT19937(3)).uniform()
        )
        assert (
            com.random_state(npr.PCG64(11)).uniform()
            == npr.RandomState(npr.PCG64(11)).uniform()
        )

    # Error for floats or strings
    msg = (
        "random_state must be an integer, array-like, a BitGenerator, "
        "a numpy RandomState, or None"
    )
    with pytest.raises(ValueError, match=msg):
        com.random_state("test")

    with pytest.raises(ValueError, match=msg):
        com.random_state(5.5)
Example #5
0
 def set_rng(self, seed_sequence):
     """Set rng
     """
     rng_array = []
     for s in seed_sequence.spawn(3):
         rng_s = random.RandomState(random.MT19937(s))
         rng_array.append(rng_s)
     self.rng = rng_array[0]
     self.forecaster_rng = rng_array[1]
     self.self_forecaster_rng = rng_array[2]
def main():

    time_length = 2 * 365  #length of the time series
    #one model field with sine wave
    n_model_field = 1
    x_array = np.zeros((time_length, n_model_field))
    x_array[:, 0] = range(time_length)
    x_array = np.sin(2 * math.pi * x_array / 365)

    n_arma = [0, 0]  #no arma
    #value of the regression parameter
    reg_parameter = np.asarray([0.8])

    #set seed of the rng
    seed = random.SeedSequence(199412950541405529670631357604770615867)
    rng = random.RandomState(random.MT19937(seed))

    #define the parameters for this model
    poisson_rate = parameter.PoissonRate(n_model_field, n_arma)
    gamma_mean = parameter.GammaMean(n_model_field, n_arma)
    gamma_dispersion = parameter.GammaDispersion(n_model_field)

    #set the ma parameter
    poisson_rate["reg"] = reg_parameter
    gamma_mean["reg"] = reg_parameter

    #instantiate the time series
    parameter_array = [
        poisson_rate,
        gamma_mean,
        gamma_dispersion,
    ]
    time_series = compound_poisson.TimeSeries(
        x_array, cp_parameter_array=parameter_array)
    time_series.rng = rng  #set rng
    time_series.simulate()  #and simulate

    #plot the time series
    #note the sine behaviour
    plt.figure()
    plt.plot(time_series[:])
    plt.title("Seasonal Compound-Poisson")
    plt.xlabel("time (days)")
    plt.ylabel("precipitation (mm)")
    plt.show()
    plt.close()

    #plt the sample autocorrelation
    acf = stattools.acf(time_series[:])
    plt.figure()
    plt.bar(range(len(acf)), acf)
    plt.title("Seasonal Compound-Poisson")
    plt.xlabel("lag (days)")
    plt.ylabel("autocorrelation")
    plt.show()
Example #7
0
def gen_data(seed=97006855):
    n, m, l = 512, 256, 2
    mu = 1e-2
    generator = random.Generator(random.MT19937(seed=seed))
    A = generator.standard_normal(size=(m, n))
    k = round(n * 0.1)
    p = generator.permutation(n)[:k]
    u = np.zeros(shape=(n, l))
    u[p, :] = generator.standard_normal(size=(k, l))  # ground truth
    b = np.matmul(A, u)
    x0 = generator.standard_normal(size=(n, l))
    errfun = lambda x1, x2: norm(x1 - x2, 'fro') / (1 + norm(x1, 'fro'))
    errfun_exact = lambda x: norm(x - u, 'fro') / (1 + norm(x, 'fro'))
    sparsity = lambda x: np.sum(np.abs(x) > 1e-6 * np.max(np.abs(x))) / (n * l)
    return n, m, l, mu, A, b, u, x0, errfun, errfun_exact, sparsity
Example #8
0
    def spawn_rng(self, n=1):
        """Return array of substream rng

        Return array of independent random number generators by spawning from
            the seed sequence

        Return:
            array of numpy.random.RandomState objects if n > 1, or just a
                single object if n == 1
        """
        seed_spawn = self.seed_seq.spawn(n)
        rng_array = []
        for seed in seed_spawn:
            rng_i = random.RandomState(random.MT19937(seed))
            rng_array.append(rng_i)
        if len(rng_array) == 1:
            rng_array = rng_array[0]
        return rng_array
Example #9
0
def streamplot(function,
               resolution=None,
               min_length=None,
               max_time=None,
               start_width=0.5,
               end_width=1.5,
               tolerance=3e-3,
               loc_tolerance=1e-10,
               seed=None,
               complex_component="real",
               **kwargs):
    r"""Create a streamline plot of a vector field

    Similar to matplotlib :func:`streamplot <matplotlib.pyplot.streamplot>`

    :arg function: the Firedrake :class:`~.Function` to plot
    :arg resolution: minimum spacing between streamlines (defaults to domain size / 20)
    :arg min_length: minimum length of a streamline (defaults to 4x resolution)
    :arg max_time: maximum time to integrate a streamline
    :arg start_width: line width at beginning of streamline
    :arg end_width: line width at end of streamline, to convey direction
    :arg tolerance: dimensionless tolerance for adaptive ODE integration
    :arg loc_tolerance: point location tolerance for :meth:`~firedrake.functions.Function.at`
    :kwarg complex_component: If plotting complex data, which
        component? (``'real'`` or ``'imag'``). Default is ``'real'``.
    :kwarg kwargs: same as for matplotlib :class:`~matplotlib.collections.LineCollection`
    """
    if function.ufl_shape != (2, ):
        raise ValueError("Streamplot only defined for 2D vector fields!")

    axes = kwargs.pop("axes", None)
    if axes is None:
        figure = plt.figure()
        axes = figure.add_subplot(111)

    mesh = function.ufl_domain()
    if resolution is None:
        coords = toreal(mesh.coordinates.dat.data_ro, "real")
        resolution = (coords.max(axis=0) - coords.min(axis=0)).max() / 20

    if min_length is None:
        min_length = 4 * resolution

    if max_time is None:
        area = assemble(Constant(1) * dx(mesh))
        average_speed = np.sqrt(
            assemble(inner(function, function) * dx) / area)
        max_time = 50 * min_length / average_speed

    streamplotter = Streamplotter(function,
                                  resolution,
                                  min_length,
                                  max_time,
                                  tolerance,
                                  loc_tolerance,
                                  complex_component=complex_component)

    # TODO: better way of seeding start points
    shape = streamplotter._grid.shape
    xmin = streamplotter._grid_point((0, 0))
    xmax = streamplotter._grid_point((shape[0] - 2, shape[1] - 2))
    X, Y = np.meshgrid(np.linspace(xmin[0], xmax[0], shape[0] - 2),
                       np.linspace(xmin[1], xmax[1], shape[1] - 2))
    start_points = np.vstack((X.ravel(), Y.ravel())).T

    # Randomly shuffle the start points
    generator = randomgen.Generator(randomgen.MT19937(seed))
    for x in generator.permutation(np.array(start_points)):
        streamplotter.add_streamline(x)

    # Colors are determined by the speed, thicknesses by arc length
    speeds = []
    widths = []
    for streamline in streamplotter.streamlines:
        velocity = toreal(
            np.array(function.at(streamline, tolerance=loc_tolerance)),
            complex_component)
        speed = np.sqrt(np.sum(velocity**2, axis=1))
        speeds.extend(speed[:-1])

        delta = np.sqrt(np.sum(np.diff(streamline, axis=0)**2, axis=1))
        arc_length = np.cumsum(delta)
        length = arc_length[-1]
        s = arc_length / length
        linewidth = (1 - s) * start_width + s * end_width
        widths.extend(linewidth)

    points = []
    for streamline in streamplotter.streamlines:
        pts = streamline.reshape(-1, 1, 2)
        points.extend(np.hstack((pts[:-1], pts[1:])))

    speeds = np.array(speeds)
    widths = np.array(widths)

    points = np.asarray(points)
    vmin = kwargs.pop("vmin", speeds.min())
    vmax = kwargs.pop("vmax", speeds.max())
    norm = kwargs.pop("norm", matplotlib.colors.Normalize(vmin=vmin,
                                                          vmax=vmax))
    cmap = plt.get_cmap(kwargs.pop("cmap", None))

    collection = LineCollection(points, cmap=cmap, norm=norm, linewidth=widths)
    collection.set_array(speeds)
    axes.add_collection(collection)

    _autoscale_view(axes, function.ufl_domain().coordinates.dat.data_ro)
    return collection
"""
The random module in NumPy provides several alternatives to the default PRNG,
which uses a 128-bit permutation congruential generator. While this is a good
general-purpose random number generator, it might not be sufficient some
particular needs. This module illustrates how to use other PSNG.
"""
from numpy import random

seed_seq = random.SeedSequence()
print(seed_seq)

bit_gen = random.MT19937(seed_seq)
rng = random.Generator(bit_gen)
Example #11
0
J = 1
nn = 6
nn_table = np.loadtxt("NN_tables/NN_3D_L" + str(L) + ".txt")

# Metropolis parameters
temperatures = np.linspace(0.01, 3, 10)
steps = int(5e4)
measure = int(4e4)
n_measure = steps - measure
print(f"temperatures = {temperatures}")

M = np.zeros(len(temperatures))
E = np.zeros(len(temperatures))

# Initialization of configuration and RNG
rng = npr.default_rng(npr.MT19937())
rng1 = npr.default_rng(npr.MT19937(seed=1))
rng2 = npr.default_rng(npr.MT19937(seed=2))

spin_lattice = dict()
for i in range(N_atm):
    x1 = rng1.uniform(-1, 1)
    x2 = rng2.uniform(-1, 1)
    while x1**2 + x2**2 >= 1:
        x1 = rng1.uniform(-1, 1)
        x2 = rng2.uniform(-1, 1)
    
    spin_lattice[i] = (2 * x1 * np.sqrt(1 - x1**2 - x2**2), 
                       2 * x2 * np.sqrt(1 - x1**2 - x2**2), 
                       1 - 2 * (x1**2 + x2**2))
Example #12
0
def main():

    monochrome = (cycler.cycler('color', ['k']) *
                  cycler.cycler('linestyle', LINESTYLE))
    monochrome2 = (cycler.cycler('color', LINECOLOUR2) +
                   cycler.cycler('linestyle', LINESTYLE2) +
                   cycler.cycler('marker', LINEMARKER2))

    plt.rcParams.update({'font.size': 14})

    #where to save the figures
    directory = "figure"
    if not path.isdir(directory):
        os.mkdir(directory)

    seed = random.SeedSequence(301608752619507842997952162996242447135)
    rng = random.RandomState(random.MT19937(seed))

    era5 = compound_poisson.era5.TimeSeries()
    era5.fit(dataset.Era5Cardiff())

    observed_data = dataset.CardiffTest()
    observed_rain = observed_data.rain
    time_array = observed_data.time_array

    training_size_array = [1, 5, 10, 20]
    script_dir_array = [
        "cardiff_1_20",
        "cardiff_5_20",
        "cardiff_10_20",
        "cardiff_20_20",
    ]
    for i, dir_i in enumerate(script_dir_array):
        script_dir_array[i] = path.join("..", dir_i)

    time_series_name_array = []  #time series for each training set
    time_series_array = []
    #will need to update the location of each time series memmap_path because
    #they would be using relative paths
    for i, dir_i in enumerate(script_dir_array):
        time_series = joblib.load(
            path.join(dir_i, "result", "TimeSeriesHyperSlice.gz"))
        old_dir = time_series.forecaster.memmap_path
        time_series.forecaster.memmap_path = path.join(dir_i, old_dir)
        time_series.forecaster.load_memmap("r")
        time_series_array.append(time_series)
        time_series_name_array.append("CP-MCMC (" +
                                      str(training_size_array[i]) + ")")

    #plot auc for varying precipitation

    #array of array:
    #for each training set, then for each value in rain_array
    auc_array = []
    bootstrap_error_array = []
    n_bootstrap = 32
    rain_array = [0, 5, 10, 15]
    for i_training_size, size_i in enumerate(training_size_array):
        auc_array.append([])
        bootstrap_error_array.append([])
        forecaster_i = time_series_array[i_training_size].forecaster
        for rain_i in rain_array:
            roc_i = forecaster_i.get_roc_curve(rain_i, observed_rain)
            auc_array[i_training_size].append(roc_i.area_under_curve)

            bootstrap_i_array = []
            for j_bootstrap in range(n_bootstrap):
                bootstrap = forecaster_i.bootstrap(rng)
                roc_ij = bootstrap.get_roc_curve(rain_i, observed_rain)
                bootstrap_i_array.append(
                    math.pow(roc_ij.area_under_curve - roc_i.area_under_curve,
                             2))
            bootstrap_error_array[i_training_size].append(
                math.sqrt(np.mean(bootstrap_i_array)))

    #figure format
    plt.figure()
    ax = plt.gca()
    ax.set_prop_cycle(monochrome)
    for i_training_size, size_i in enumerate(training_size_array):
        plt.plot(rain_array,
                 auc_array[i_training_size],
                 label=time_series_name_array[i_training_size])
    plt.ylim([0.5, 1])
    plt.xlabel("precipitation (mm)")
    plt.ylabel("Area under ROC curve")
    plt.legend()
    plt.savefig(path.join(directory, "auc.pdf"), bbox_inches="tight")
    plt.close()

    #table format
    rain_label_array = []
    for rain in rain_array:
        rain_label_array.append(str(rain) + " mm")
    #table format with uncertainity values
    auc_table = []
    for auc_i, error_i in zip(auc_array, bootstrap_error_array):
        auc_table.append([])
        for auc_ij, error_ij in zip(auc_i, error_i):
            auc_table[-1].append("${:0.4f}\pm {:0.4f}$".format(
                auc_ij, error_ij))

    data_frame = pd.DataFrame(
        np.asarray(auc_table).T, rain_label_array, time_series_name_array)
    data_frame.to_latex(path.join(directory, "auc.txt"), escape=False)

    #add era5 (for loss evaluation)
    #roc unavailable for era5
    time_series_array.append(era5)
    time_series_name_array.append("IFS")

    #yearly plot of the bias losses
    time_segmentator = time_segmentation.YearSegmentator(time_array)
    loss_segmentator_array = []
    for time_series_i in time_series_array:
        loss_segmentator_i = loss_segmentation.TimeSeries(
            time_series_i.forecaster, observed_rain)
        loss_segmentator_i.evaluate_loss(time_segmentator)
        loss_segmentator_array.append(loss_segmentator_i)

    pandas.plotting.register_matplotlib_converters()
    for i_loss, Loss in enumerate(loss_segmentation.LOSS_CLASSES):

        #array of arrays, one for each time_series in time_series_array
        #for each array, contains array of loss for each time point
        bias_loss_plot_array = []
        bias_median_loss_plot_array = []

        for loss_segmentator_i in loss_segmentator_array:
            bias_loss_plot, bias_median_loss_plot = (
                loss_segmentator_i.get_bias_plot(i_loss))
            bias_loss_plot_array.append(bias_loss_plot)
            bias_median_loss_plot_array.append(bias_median_loss_plot)

        plt.figure()
        ax = plt.gca()
        ax.set_prop_cycle(monochrome2)

        for time_series_label, bias_plot_array in zip(time_series_name_array,
                                                      bias_loss_plot_array):
            plt.plot(loss_segmentator_i.time_array,
                     bias_plot_array,
                     label=time_series_label)
        plt.legend(bbox_to_anchor=(0, 1, 1, 0),
                   loc="lower left",
                   mode="expand",
                   ncol=3)
        plt.ylabel(Loss.get_axis_bias_label())
        plt.xlabel("year")
        plt.xticks(rotation=45)
        plt.savefig(path.join(directory,
                              Loss.get_short_bias_name() + "_mean.pdf"),
                    bbox_inches="tight")
        plt.close()

        plt.figure()
        ax = plt.gca()
        ax.set_prop_cycle(monochrome2)
        for time_series_label, bias_plot_array in zip(
                time_series_name_array, bias_median_loss_plot_array):
            plt.plot(loss_segmentator_i.time_array,
                     bias_plot_array,
                     label=time_series_label)
        plt.legend(bbox_to_anchor=(0, 1, 1, 0),
                   loc="lower left",
                   mode="expand",
                   ncol=3)
        plt.ylabel(Loss.get_axis_bias_label())
        plt.xlabel("year")
        plt.xticks(rotation=45)
        plt.savefig(path.join(directory,
                              Loss.get_short_bias_name() + "_median.pdf"),
                    bbox_inches="tight")
        plt.close()

    #plot table of test set bias loss
    time_segmentator_array = {
        "all_years": time_segmentation.AllInclusive(time_array),
        "spring": time_segmentation.SpringSegmentator(time_array),
        "summer": time_segmentation.SummerSegmentator(time_array),
        "autumn": time_segmentation.AutumnSegmentator(time_array),
        "winter": time_segmentation.WinterSegmentator(time_array),
    }
    time_segmentator_names = list(time_segmentator_array.keys())

    #array of loss_segmentator objects, for each time series
    #dim 0: for each time series
    #dim 1: for each time segmentator
    loss_array = []

    #plot the table (for mean, the median bias)
    for i, time_series_i in enumerate(time_series_array):
        loss_array.append([])
        for time_segmentator_k in time_segmentator_array.values():
            forecaster_i = time_series_i.forecaster
            loss_i = loss_segmentation.TimeSeries(forecaster_i, observed_rain)
            loss_i.evaluate_loss(time_segmentator_k)
            loss_array[i].append(loss_i)

    for i_loss, Loss in enumerate(loss_segmentation.LOSS_CLASSES):

        #using training set size 5 years to get bootstrap variance, this is used
        #to guide the number of decimial places to use
        n_decimial = 3
        float_format = ("{:." + str(n_decimial) + "f}").format

        #table of losses
        #columns: for each time segmentator
        #rows: for each time series
        loss_mean_array = []
        loss_median_array = []

        #plot the table (for mean, the median bias)
        for i_time_series, time_series_i in enumerate(time_series_array):
            loss_mean_array.append([])
            loss_median_array.append([])
            for loss_segmentator_i in loss_array[i_time_series]:
                loss = loss_segmentator_i.loss_all_array[i_loss]
                loss_mean_array[i_time_series].append(loss.get_bias_loss())
                loss_median_array[i_time_series].append(
                    loss.get_bias_median_loss())

        for prefix, loss_table in zip(["mean", "median"],
                                      [loss_mean_array, loss_median_array]):
            data_frame = pd.DataFrame(loss_table, time_series_name_array,
                                      time_segmentator_names)
            path_to_table = path.join(
                directory, prefix + "_" + Loss.get_short_bias_name() + ".txt")
            data_frame.to_latex(path_to_table, float_format=float_format)

    for i, time_series_i in enumerate(time_series_array):
        residual_plot = residual_analysis.ResidualLnqqPlotter()

        #add residuals data
        residual_plot.add_data(time_series_i.forecaster, observed_rain)

        #plot residual data
        residual_plot.plot_heatmap([[0, 3.8], [0, 3.8]], 1.8, 5.3, 'Greys')
        plt.savefig(path.join(
            directory, time_series_name_array[i] + "_residual_qq_hist.pdf"),
                    bbox_inches="tight")
        plt.close()

    for time_series_i in time_series_array:
        time_series_i.forecaster.del_memmap()