Esempio n. 1
0
    def generate_posterior(self,
                           num_dimensions=20,
                           known_active_dimensions=3,
                           num_data=100,
                           labels=None,
                           num_samples=1000,
                           num_warmup=500,
                           num_chains=1,
                           device='gpu'):

        X = self.X
        Y = self.Y
        hypers = self.hypers
        sigma = hypers['sigma']
        # args -- needed: num-chains
        # num_dimensions
        # active dimensions

        if labels is None:
            labs = [str(_) for _ in range(X.shape[1])]
        else:
            labs = labels

        # TODO:set up Numpyro device (should maybe do this in main script ?)
        numpyro.set_platform(device)
        numpyro.set_host_device_count(num_chains)

        # do inference
        rng_key = random.PRNGKey(self.seed)
        samples = self.run_inference(self.model,
                                     rng_key,
                                     X,
                                     Y,
                                     hypers,
                                     num_warmup=num_warmup,
                                     num_chains=num_chains,
                                     num_samples=num_samples)

        # compute the mean and square root variance of each coefficient theta_i
        means, stds = vmap(
            lambda dim: self.analyze_dimension(samples, X, Y, dim, hypers))(
                np.arange(X.shape[1]))
        num_dims = len(means)
        active_dimensions = []

        for dim, (mean, std) in enumerate(zip(means, stds)):
            # we mark the dimension as inactive if the interval [mean - 3 * std, mean + 3 * std] contains zero
            lower, upper = mean - sigma * std, mean + sigma * std
            inactive = "inactive" if lower < 0.0 and upper > 0.0 else "active"
            if inactive == "active":
                active_dimensions.append(dim)
            print("[dimension %02d/%02d]  %s:\t%.2e +- %.2e" %
                  (dim + 1, X.shape[1], inactive, mean, std))

        print("Identified a total of %d active dimensions." %
              (len(active_dimensions)))

        # Compute the mean and square root variance of coefficients theta_ij for i,j active dimensions.
        # Note that the resulting numbers are only meaningful for i != j.
        if len(active_dimensions) > 0:

            dim_pairs = np.array(
                list(itertools.product(active_dimensions, active_dimensions)))
            means, stds = vmap(
                lambda dim_pair: self.analyze_pair_of_dimensions(
                    samples, X, Y, dim_pair[0], dim_pair[1], hypers))(
                        dim_pairs)
            # print(dim_pairs)
            dim_pair_arr = []
            dim_pair_index = num_dims - 1
            dim_pair_name = []
            pair_labs = []
            for dim_pair, mean, std in zip(dim_pairs, means, stds):
                dim1, dim2 = dim_pair
                if dim1 >= dim2:
                    continue
                dim_pair_index += 1
                lower, upper = mean - sigma * std, mean + sigma * std
                if not (lower < 0.0 and upper > 0.0):
                    dim_pair_arr.append(dim_pair_index)
                    dim_pair_name.append('%d and %d' % (dim1 + 1, dim2 + 1))
                    format_str = "Identified pairwise interaction between dimensions %d and %d: %.2e +- %.2e"
                    print(format_str % (dim1 + 1, dim2 + 1, mean, std))
                    pair_labs.append(str(labs[dim1]) + ' + ' + str(labs[dim2]))

            # Draw a single sample of coefficients theta from the posterior, where we return all singleton
            # coefficients theta_i and pairwise coefficients theta_ij for i, j active dimensions. We use the
            # final MCMC sample obtained from the HMC sampler.

            ## Get posterior samples from the sample_theta_space_modified() method
            thetas = self.sample_theta_posterior(
                X, Y, active_dimensions, samples['msq'][-1],
                samples['lambda'][-1], samples['eta1'][-1],
                samples['xisq'][-1], hypers['c'], samples['var_obs'][-1],
                num_samples, dim_pair_arr)
            print("Active dimensions: " + str(active_dimensions))

            ##  Visualize the posterior from the example with corner

            labels = ['dim ' + str(i) for i in active_dimensions]
            active_dimensions = active_dimensions + dim_pair_arr
            if len(dim_pair_name) != 0:
                for n in range(len(dim_pair_name)):
                    labels.append('dim ' + dim_pair_name[n])
            #fig = corner.corner(thetas, labels = labels);
            return active_dimensions, thetas, labels, pair_labs
        else:
            return active_dimensions, [], []
Esempio n. 2
0
if __name__ == "__main__":
    assert numpyro.__version__.startswith("0.8.0")
    parser = argparse.ArgumentParser(
        description="Bayesian Models of Annotation")
    parser.add_argument("-n",
                        "--num-samples",
                        nargs="?",
                        default=1000,
                        type=int)
    parser.add_argument("--num-warmup", nargs="?", default=1000, type=int)
    parser.add_argument("--num-chains", nargs="?", default=1, type=int)
    parser.add_argument(
        "--model",
        nargs="?",
        default="ds",
        help='one of "mn" (multinomial), "ds" (dawid_skene), "mace",'
        ' "hds" (hierarchical_dawid_skene),'
        ' "id" (item_difficulty), "lre" (logistic_random_effects)',
    )
    parser.add_argument("--device",
                        default="cpu",
                        type=str,
                        help='use "cpu" or "gpu".')
    args = parser.parse_args()

    numpyro.set_platform(args.device)
    numpyro.set_host_device_count(args.num_chains)

    main(args)
Esempio n. 3
0
    randwalk = numpyro.sample('mu', dist.Normal(loc=0, scale=sd_randwalk))
    value = X + randwalk
    sd_value = numpyro.sample('sd', dist.Uniform(low=0.0, high=100.0))
    pred_y = numpyro.sample('pred_y',
                            dist.Normal(loc=value, scale=sd_value),
                            obs=y)


if __name__ == "__main__":
    # 初期設定
    num_samples = 1000
    num_warmup = 100
    num_chains = 1
    device = 'cpu'
    rng_key = random.PRNGKey(0)
    numpyro.set_platform(device)
    numpyro.set_host_device_count(num_chains)

    # ランダムウォーク波形生成
    random_walk = jnp.cumsum(0.5 * np.random.randn(100000))

    # 価格がランダムな場合を想定
    # random_walk = 0.5*np.random.randn(100000)

    random_walks = {
        1: random_walk,
        2: random_walk[::2],
        4: random_walk[::2][::2],
        8: random_walk[::2][::2][::2],
        16: random_walk[::2][::2][::2][::2],
        32: random_walk[::2][::2][::2][::2][::2],
Esempio n. 4
0
def main(params):
    #X, Y, expected_thetas, expected_pairwise = get_data(N=args.num_data, P=args.num_dimensions,
                                                       # S=args.active_dimensions)
    fxdata5 = pd.read_csv('fxdata5.csv', header=0)

    df = fxdata5.copy()
    df = df.iloc[-64:,:]
    drop_cols = list(df.columns[df.columns.str.contains('Dummy')])
    drop_cols = drop_cols + list(df.columns[df.columns.str.contains('LogReturn')])
    drop_cols = drop_cols + list(df.columns[df.columns.str.contains('Spot')])
    # drop_cols = drop_cols + list(df.columns[df.columns.str.contains('HAR')])
    df.drop(drop_cols, axis=1, inplace=True)

    target_col = 'EURGBP_log_RealVol'
    df['Target_' + target_col] = df[target_col].shift(1).copy()
    cols = df.columns.tolist()
    cols.insert(0, cols.pop(cols.index('Target_' + target_col)))
    df = df[cols].set_index('Date').dropna()
    df[df.columns] = StandardScaler().fit_transform(df) # scale


    

    Y = np.array(df.iloc[:,0])
    X = np.array(df.iloc[:,1:])

    parser = argparse.ArgumentParser(description="Gaussian Process example")
    parser.add_argument("-n", "--num-samples", nargs="?", default=8000, type=int)
    parser.add_argument("--num-warmup", nargs='?', default=500, type=int)
    parser.add_argument("--num-chains", nargs='?', default=1, type=int)
    parser.add_argument("--num-data", nargs='?', default=len(X), type=int)
    parser.add_argument("--num-dimensions", nargs='?', default=len(X.T), type=int)
    parser.add_argument("--active-dimensions", nargs='?', default=3, type=int)
    parser.add_argument("--device", default='cpu', type=str, help='use "cpu" or "gpu".')
    args = parser.parse_args(args=[])

    numpyro.set_platform(args.device)
    numpyro.set_host_device_count(args.num_chains)


    # setup hyperparameters
    hypers = {'expected_sparsity': max(1.0, args.num_dimensions / 2),
              }
    
    # add in hyperopt params
    hypers.update(params)

    # do inference
    rng_key = random.PRNGKey(0)
    samples = run_inference(model, args, rng_key, X, Y, hypers)

    # compute the mean and square root variance of each coefficient theta_i
    means, stds = vmap(lambda dim: analyze_dimension(samples, X, Y, dim, hypers))(np.arange(args.num_dimensions))
    num_dims = len(means)
   # print("Coefficients theta_1 to theta_%d used to generate the data:" % args.active_dimensions, expected_thetas)
   # print("The single quadratic coefficient theta_{1,2} used to generate the data:", expected_pairwise)
    active_dimensions = []

    for dim, (mean, std) in enumerate(zip(means, stds)):
        # we mark the dimension as inactive if the interval [mean - 3 * std, mean + 3 * std] contains zero
        lower, upper = mean - 1.5 * std, mean + 1.5 * std
        inactive = "inactive" if lower < 0.0 and upper > 0.0 else "active"
        if inactive == "active":
            active_dimensions.append(dim)
        print("[dimension %02d/%02d]  %s:\t%.2e +- %.2e" % (dim + 1, args.num_dimensions, inactive, mean, std))

    print("Identified a total of %d active dimensions; expected %d." % (len(active_dimensions),
                                                                        args.active_dimensions))



     # Compute the mean and square root variance of coefficients theta_ij for i,j active dimensions.
    # Note that the resulting numbers are only meaningful for i != j.
    if len(active_dimensions) > 0:
        dim_pairs = np.array(list(itertools.product(active_dimensions, active_dimensions)))
        means, stds = vmap(lambda dim_pair: analyze_pair_of_dimensions(samples, X, Y,
                                                                       dim_pair[0], dim_pair[1], hypers))(dim_pairs)
        # print(dim_pairs)
        dim_pair_arr = []
        dim_pair_index = num_dims -1
        dim_pair_name = []
        for dim_pair, mean, std in zip(dim_pairs, means, stds):
            dim1, dim2 = dim_pair
            if dim1 >= dim2:
                continue
            dim_pair_index += 1  
            lower, upper = mean - 1.5 * std, mean + 1.5 * std
            if not (lower < 0.0 and upper > 0.0):
                dim_pair_arr.append(dim_pair_index)
                dim_pair_name.append('%d and %d'%(dim1 + 1, dim2 + 1))
                format_str = "Identified pairwise interaction between dimensions %d and %d: %.2e +- %.2e"
                print(format_str % (dim1 + 1, dim2 + 1, mean, std))

        active_dimensions = active_dimensions + dim_pair_arr

    

    return np.exp(-len(active_dimensions))
Esempio n. 5
0
from time import time

import jax.numpy as jnp

# https://pytorch.org/tutorials/recipes/recipes/benchmark.html
from jax import random

from numpyro import set_platform
from numpyro.distributions import Sine, SineSkewed

if __name__ == "__main__":
    set_platform("gpu")

    key = random.PRNGKey(0)
    loc = jnp.array([0.0])
    conc = jnp.array([1.0])
    corr = jnp.array([0.6])
    sine = Sine(loc, loc, conc, conc, corr)
    skewness = jnp.array([0.3, -0.2])

    ss = SineSkewed(sine, skewness)
    first_timings = []
    after_timings = []
    for samples in [1, 5, 25, 125, 625, 3125, 15625, 78125, 390625, 1953125]:
        times = []
        print(samples)
        for _ in range(11):
            key, sample_key = random.split(key)
            start = time()
            data = ss.sample(sample_key, (samples, ))
            times.append(time() - start)
Esempio n. 6
0
    jnp.savez(path, **params)


def load_params(path: Union[str, pathlib.Path]) -> Dict[str, jnp.ndarray]:

    with jnp.load(path) as data:
        res = {k: jnp.array(v) for k, v in data.items()}

    return res


if __name__ == "__main__":

    num_chains = 4
    numpyro.set_platform("cpu")
    numpyro.set_host_device_count(num_chains)

    y = np.array([28.0, 8.0, -3.0, 7.0, -1.0, 1.0, 18.0, 12.0])
    sigma = np.array([15.0, 10.0, 16.0, 11.0, 9.0, 11.0, 10.0, 18.0])
    rng_key = random.PRNGKey(0)
    rng_key, rng_key_posterior, rng_key_prior = random.split(rng_key, 3)

    mcmc = inference(model, sigma, y, rng_key, num_chains=num_chains)
    posterior_samples = mcmc.get_samples()
    posterior_predictive = predict(model,
                                   sigma,
                                   rng_key_posterior,
                                   posterior_samples=posterior_samples)
    prior = predict(model, sigma, rng_key_prior, num_samples=500)
Esempio n. 7
0
# Copyright 2019- d3p Developers and their Assignees

# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at

#     http://www.apache.org/licenses/LICENSE-2.0

# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

import jax
import numpyro
import logging

try:
    jax.lib.xla_bridge.get_backend(
        'gpu')  # this will fail if gpu not available
    numpyro.set_platform('gpu')
except RuntimeError:
    logging.info("GPU not available. Falling back to CPU.")