Esempio n. 1
0
def main(args: argparse.Namespace) -> None:

    numpyro.set_platform(args.device)
    numpyro.set_host_device_count(args.num_chains)

    _, fetch_train = load_dataset(BASEBALL, split="train", shuffle=False)
    train, player_names = fetch_train()

    _, fetch_test = load_dataset(BASEBALL, split="test", shuffle=False)
    test, _ = fetch_test()

    at_bats = train[:, 0]
    hits = train[:, 1]
    at_bats_test = test[:, 0]
    hist_test = test[:, 1]

    model_list = [fully_pooled, not_pooled, partially_pooled, partially_pooled_with_logit]
    for i, model in enumerate(model_list, 1):
        rng_key, rng_key_predict = random.split(random.PRNGKey(i))
        posterior = run_inference(
            model,
            at_bats,
            hits,
            rng_key,
            num_warmup=args.num_warmup,
            num_samples=args.num_samples,
            num_chains=args.num_chains,
            algo_name=args.algo_name,
        )
        predictions = predict(model, at_bats, posterior, rng_key_predict)["obs"]
        print_results(model.__name__, predictions, at_bats, hits, player_names, is_train=True)
        print_results(
            model.__name__, predictions, at_bats_test, hist_test, player_names, is_train=False
        )
Esempio n. 2
0
def MIPS_24(prior,num_samples=500,num_warmup=500,num_chains=4,chain_method='parallel'):
    numpyro.set_host_device_count(4)
    nuts_kernel = NUTS(mips_model)
    mcmc = MCMC(nuts_kernel, num_samples=num_samples, num_warmup=num_warmup,num_chains=num_chains,chain_method=chain_method)
    rng_key = random.PRNGKey(0)
    mcmc.run(rng_key, [prior])
    return mcmc
Esempio n. 3
0
def main(args: argparse.Namespace) -> None:

    numpyro.set_platform(args.device)
    numpyro.set_host_device_count(args.num_chains)

    (
        transition_prob,
        emission_prob,
        supervised_categories,
        supervised_words,
        unsupervised_words,
    ) = simulate_data(
        random.PRNGKey(1),
        num_categories=args.num_categories,
        num_words=args.num_words,
        num_supervised=args.num_supervised,
        num_unsupservised=args.num_unsupervised,
    )

    rng_key = random.PRNGKey(2)
    posterior = inference(
        semi_supervised_hmm,
        args.num_categories,
        args.num_words,
        supervised_categories,
        supervised_words,
        unsupervised_words,
        rng_key,
        num_warmup=args.num_warmup,
        num_samples=args.num_samples,
        num_chains=args.num_chains,
    )

    print_results(posterior, transition_prob, emission_prob)
    plot_results(posterior, transition_prob)
    def __init__(self, random_state=0, num_bins=3, tot_init_items=1000, bin_values=[1.0,1.0,1.0], num_users=50):
        self.num_bins = num_bins # The number of bins is roughly equivalent to the number of arms.
        self.bin_values = bin_values # The bin values correspond to the potential reward if the user is provided the item and they purchase it.

        self.rng_key = random.PRNGKey(random_state)

        # Create initial allotments of bins where the lowest value items will be plentiful while the high value items may be scarce
        bin_allotments = sorted(random.randint(self.rng_key,(self.num_bins,),25,75))[::-1]
        bin_allotments /= sum(bin_allotments) # Normalize to get the percentage of items that are assigned to each bin initially
        self.init_bins = onp.round(tot_init_items*bin_allotments) # Place most items in first, low value bin and decrease from there.
        self.num_users = num_users

        # Set initialization for BNN parameters
        self.bnn_dx = num_bins+1 # Input dimensions
        self.bnn_dh = 5 # Size of hidden layer
        self.bnn_dy = 1 # Size of output dimensions
        self.bnn_warm_up = 500 # Number of warmup runs for MCMC
        self.bnn_num_samples = 2500 # Number of BNN Samples
        self.bnn_num_chains = 1 # Number of MCMC chains
        self.bnn_device = 'cpu'

        numpyro.set_platform(self.bnn_device)
        numpyro.set_host_device_count(self.bnn_num_chains)

        self.restock() # Fill bins
        self._generate_user_preferences()
Esempio n. 5
0
def main() -> None:

    _, y, x_missing = _load_dataset()
    train_len = int(len(y) * 0.8)
    x_train = x_missing[:train_len]
    y_train = y[:train_len]

    num_chains = 1
    numpyro.set_platform("cpu")
    numpyro.set_host_device_count(num_chains)

    rng_key = random.PRNGKey(0)
    rng_key, rng_key_posterior, rng_key_prior = random.split(rng_key, 3)

    predictive = infer.Predictive(bayesian_regression, num_samples=500)
    prior = predictive(rng_key_prior, x_train)

    kernel = infer.NUTS(bayesian_regression)
    mcmc = infer.MCMC(kernel,
                      num_warmup=1000,
                      num_samples=1000,
                      num_chains=num_chains)
    mcmc.run(rng_key, x_train, y_train)
    posterior_samples = mcmc.get_samples()

    predictive = infer.Predictive(bayesian_regression,
                                  posterior_samples=posterior_samples)
    posterior_predictive = predictive(rng_key_posterior, x_missing)

    _save_results(y, mcmc, prior, posterior_samples, posterior_predictive)
Esempio n. 6
0
def select_device(use_gpu, num_chains):
    if use_gpu:
        try:
            numpyro.set_platform('gpu')
            numpyro.set_host_device_count(1)
        except RuntimeError as e:
            warnings.warn(f'No GPU found: {e}')
            numpyro.set_platform('cpu')
    else:
        numpyro.set_platform('cpu')
        numpyro.set_host_device_count(min((num_chains, os.cpu_count())))
    # Sanity check
    jax.lib.xla_bridge.get_backend().platform
Esempio n. 7
0
def all_bands(priors,
              num_samples=500,
              num_warmup=500,
              num_chains=4,
              chain_method='parallel'):
    numpyro.set_host_device_count(4)
    nuts_kernel = NUTS(spire_model)
    mcmc = MCMC(nuts_kernel,
                num_samples=num_samples,
                num_warmup=num_warmup,
                num_chains=num_chains,
                chain_method=chain_method)
    rng_key = random.PRNGKey(0)
    mcmc.run(rng_key, priors)
    return mcmc
Esempio n. 8
0
def main(args: argparse.Namespace) -> None:

    _, y, x_missing = _load_dataset()
    batch, x_dim = x_missing.shape
    train_len = int(len(y) * 0.8)
    x_train = x_missing[:train_len]
    y_train = y[:train_len]

    numpyro.set_platform("cpu")
    numpyro.set_host_device_count(args.num_chains)
    rng_key = random.PRNGKey(1)
    rng_key, rng_key_prior, rng_key_posterior, rng_key_pca_pred = random.split(
        rng_key, 4)

    predictive = infer.Predictive(pca_regression, num_samples=500)
    prior = predictive(rng_key_prior, batch=batch, x_dim=x_dim)

    kernel = infer.NUTS(pca_regression)
    mcmc = infer.MCMC(
        kernel,
        num_warmup=args.num_warmup,
        num_samples=args.num_samples,
        num_chains=args.num_chains,
    )
    mcmc.run(rng_key_posterior, x_train, y_train)
    posterior_samples = mcmc.get_samples()

    posterior_without_z = posterior_samples.copy()
    posterior_without_z.pop("z")
    predictive = infer.Predictive(pca_regression,
                                  posterior_samples=posterior_without_z)
    posterior_predictive = predictive(rng_key_pca_pred,
                                      batch=batch,
                                      x_dim=x_dim)

    _save_results(
        y,
        mcmc,
        prior,
        posterior_samples,
        posterior_predictive,
        var_names=["phi", "eta", "theta", "sigma"],
    )
Esempio n. 9
0
    ax.plot(X_test[:, 1], mean_prediction, 'blue', ls='solid', lw=2.0)
    ax.set(xlabel="X", ylabel="Y", title="Mean predictions with 90% CI")

    plt.savefig('bnn_plot.pdf')
    plt.close()


if __name__ == "__main__":
    assert numpyro.__version__.startswith('0.2.0')
    parser = argparse.ArgumentParser(
        description="Bayesian neural network example")
    parser.add_argument("-n",
                        "--num-samples",
                        nargs="?",
                        default=2000,
                        type=int)
    parser.add_argument("--num-warmup", nargs='?', default=1000, type=int)
    parser.add_argument("--num-chains", nargs='?', default=1, type=int)
    parser.add_argument("--num-data", nargs='?', default=100, type=int)
    parser.add_argument("--num-hidden", nargs='?', default=5, type=int)
    parser.add_argument("--device",
                        default='cpu',
                        type=str,
                        help='use "cpu" or "gpu".')
    args = parser.parse_args()

    numpyro.set_platform(args.device)
    numpyro.set_host_device_count(args.num_chains)

    main(args)
Esempio n. 10
0
about things in a gaussian model.
"""
import json

import jax.numpy as np
import numpy as onp
import numpyro
import pandas as pd
import pytest
from numpyro import distributions as dist

import shabadoo.exceptions as exceptions
from shabadoo import Normal

try:
    numpyro.set_host_device_count(2)
except Exception:
    print("Failed to set device count")


def test_single_coef_is_about_right():
    """Test that a single coef model which has a known value gets there.
    
    This is a good round trip test that fitting works.
    """
    # going to make a coef be the mean of y
    df = pd.DataFrame(dict(y=[1, 2, 2, 2, 2, 3] * 100))

    class Model(Normal):
        dv = "y"
        features = dict(mu=dict(transformer=1, prior=dist.Normal(0, 5)))
Esempio n. 11
0
_author__ = 'pdh21'

import numpyro
import numpyro.distributions as dist
from numpyro.infer import MCMC, NUTS
import jax.numpy as jnp
from jax import random
import numpy as np
import jax
import os
from xidplus.numpyro_fit.misc import sp_matmul

numpyro.set_host_device_count(os.cpu_count())


def pacs_model(priors):
    pointing_matrices = [([p.amat_row, p.amat_col], p.amat_data)
                         for p in priors]
    flux_lower = np.asarray([p.prior_flux_lower for p in priors]).T
    flux_upper = np.asarray([p.prior_flux_upper for p in priors]).T

    bkg_mu = np.asarray([p.bkg[0] for p in priors]).T
    bkg_sig = np.asarray([p.bkg[1] for p in priors]).T

    with numpyro.plate('bands', len(priors)):
        sigma_conf = numpyro.sample('sigma_conf', dist.HalfCauchy(1.0, 0.5))
        bkg = numpyro.sample('bkg', dist.Normal(bkg_mu, bkg_sig))

        with numpyro.plate('nsrc', priors[0].nsrc):
            src_f = numpyro.sample('src_f',
                                   dist.Uniform(flux_lower, flux_upper))

if __name__ == "__main__":
    parser = argparse.ArgumentParser(description="numpyro model")
    parser.add_argument(
        "-n",
        "--num-samples",
        nargs="?",
        default=10000,
        type=int,
    )
    parser.add_argument("--num-warmup", nargs="?", default=5000, type=int)
    parser.add_argument("--num-chains", nargs="?", default=2, type=int)
    parser.add_argument(
        "--device",
        default="cpu",
        type=str,
        help='"cpu" or "gpu"?',
    )
    parser.add_argument("--num-cores", nargs="?", default=2, type=int)
    parser.add_argument("--rng_seed",
                        default=1,
                        type=int,
                        help="random number generator seed")
    args = parser.parse_args()

    numpyro.set_platform(args.device)
    numpyro.set_host_device_count(args.num_cores)

    main(args)
Esempio n. 13
0
    def generate_posterior(self,
                           num_dimensions=20,
                           known_active_dimensions=3,
                           num_data=100,
                           labels=None,
                           num_samples=1000,
                           num_warmup=500,
                           num_chains=1,
                           device='gpu'):

        X = self.X
        Y = self.Y
        hypers = self.hypers
        sigma = hypers['sigma']
        # args -- needed: num-chains
        # num_dimensions
        # active dimensions

        if labels is None:
            labs = [str(_) for _ in range(X.shape[1])]
        else:
            labs = labels

        # TODO:set up Numpyro device (should maybe do this in main script ?)
        numpyro.set_platform(device)
        numpyro.set_host_device_count(num_chains)

        # do inference
        rng_key = random.PRNGKey(self.seed)
        samples = self.run_inference(self.model,
                                     rng_key,
                                     X,
                                     Y,
                                     hypers,
                                     num_warmup=num_warmup,
                                     num_chains=num_chains,
                                     num_samples=num_samples)

        # compute the mean and square root variance of each coefficient theta_i
        means, stds = vmap(
            lambda dim: self.analyze_dimension(samples, X, Y, dim, hypers))(
                np.arange(X.shape[1]))
        num_dims = len(means)
        active_dimensions = []

        for dim, (mean, std) in enumerate(zip(means, stds)):
            # we mark the dimension as inactive if the interval [mean - 3 * std, mean + 3 * std] contains zero
            lower, upper = mean - sigma * std, mean + sigma * std
            inactive = "inactive" if lower < 0.0 and upper > 0.0 else "active"
            if inactive == "active":
                active_dimensions.append(dim)
            print("[dimension %02d/%02d]  %s:\t%.2e +- %.2e" %
                  (dim + 1, X.shape[1], inactive, mean, std))

        print("Identified a total of %d active dimensions." %
              (len(active_dimensions)))

        # Compute the mean and square root variance of coefficients theta_ij for i,j active dimensions.
        # Note that the resulting numbers are only meaningful for i != j.
        if len(active_dimensions) > 0:

            dim_pairs = np.array(
                list(itertools.product(active_dimensions, active_dimensions)))
            means, stds = vmap(
                lambda dim_pair: self.analyze_pair_of_dimensions(
                    samples, X, Y, dim_pair[0], dim_pair[1], hypers))(
                        dim_pairs)
            # print(dim_pairs)
            dim_pair_arr = []
            dim_pair_index = num_dims - 1
            dim_pair_name = []
            pair_labs = []
            for dim_pair, mean, std in zip(dim_pairs, means, stds):
                dim1, dim2 = dim_pair
                if dim1 >= dim2:
                    continue
                dim_pair_index += 1
                lower, upper = mean - sigma * std, mean + sigma * std
                if not (lower < 0.0 and upper > 0.0):
                    dim_pair_arr.append(dim_pair_index)
                    dim_pair_name.append('%d and %d' % (dim1 + 1, dim2 + 1))
                    format_str = "Identified pairwise interaction between dimensions %d and %d: %.2e +- %.2e"
                    print(format_str % (dim1 + 1, dim2 + 1, mean, std))
                    pair_labs.append(str(labs[dim1]) + ' + ' + str(labs[dim2]))

            # Draw a single sample of coefficients theta from the posterior, where we return all singleton
            # coefficients theta_i and pairwise coefficients theta_ij for i, j active dimensions. We use the
            # final MCMC sample obtained from the HMC sampler.

            ## Get posterior samples from the sample_theta_space_modified() method
            thetas = self.sample_theta_posterior(
                X, Y, active_dimensions, samples['msq'][-1],
                samples['lambda'][-1], samples['eta1'][-1],
                samples['xisq'][-1], hypers['c'], samples['var_obs'][-1],
                num_samples, dim_pair_arr)
            print("Active dimensions: " + str(active_dimensions))

            ##  Visualize the posterior from the example with corner

            labels = ['dim ' + str(i) for i in active_dimensions]
            active_dimensions = active_dimensions + dim_pair_arr
            if len(dim_pair_name) != 0:
                for n in range(len(dim_pair_name)):
                    labels.append('dim ' + dim_pair_name[n])
            #fig = corner.corner(thetas, labels = labels);
            return active_dimensions, thetas, labels, pair_labs
        else:
            return active_dimensions, [], []
Esempio n. 14
0
import jax.numpy as jnp
import jax.numpy as np  # it is the same anyways
from jax import random, vmap

import numpyro
import numpyro.distributions as dist
from numpyro.infer import SVI, Trace_ELBO, MCMC, NUTS

numpyro.set_host_device_count(4)  # MCMC chain


def mcmc_sample(data, num_samples=2000, num_chains=4, num_warmup=1000):
    # data = dict(W=W, L=L)
    mcmc = MCMC(NUTS(model), num_warmup=1000, num_samples=2000, num_chains=4)
    mcmc.run(random.PRNGKey(0), **data)

    post = mcmc.get_samples()
    return post


class Sampler():
    def __init__(self, model, data=None):
        self.data = data
        self.num_warmup = 1000
        self.num_samples = 2000
        self.num_chains = 4
        self.mcmc = MCMC(NUTS(model),
                         num_warmup=self.num_warmup,
                         num_samples=self.num_samples,
                         num_chains=self.num_chains)
        self.data = data
Esempio n. 15
0
def main(params):
    #X, Y, expected_thetas, expected_pairwise = get_data(N=args.num_data, P=args.num_dimensions,
                                                       # S=args.active_dimensions)
    fxdata5 = pd.read_csv('fxdata5.csv', header=0)

    df = fxdata5.copy()
    df = df.iloc[-64:,:]
    drop_cols = list(df.columns[df.columns.str.contains('Dummy')])
    drop_cols = drop_cols + list(df.columns[df.columns.str.contains('LogReturn')])
    drop_cols = drop_cols + list(df.columns[df.columns.str.contains('Spot')])
    # drop_cols = drop_cols + list(df.columns[df.columns.str.contains('HAR')])
    df.drop(drop_cols, axis=1, inplace=True)

    target_col = 'EURGBP_log_RealVol'
    df['Target_' + target_col] = df[target_col].shift(1).copy()
    cols = df.columns.tolist()
    cols.insert(0, cols.pop(cols.index('Target_' + target_col)))
    df = df[cols].set_index('Date').dropna()
    df[df.columns] = StandardScaler().fit_transform(df) # scale


    

    Y = np.array(df.iloc[:,0])
    X = np.array(df.iloc[:,1:])

    parser = argparse.ArgumentParser(description="Gaussian Process example")
    parser.add_argument("-n", "--num-samples", nargs="?", default=8000, type=int)
    parser.add_argument("--num-warmup", nargs='?', default=500, type=int)
    parser.add_argument("--num-chains", nargs='?', default=1, type=int)
    parser.add_argument("--num-data", nargs='?', default=len(X), type=int)
    parser.add_argument("--num-dimensions", nargs='?', default=len(X.T), type=int)
    parser.add_argument("--active-dimensions", nargs='?', default=3, type=int)
    parser.add_argument("--device", default='cpu', type=str, help='use "cpu" or "gpu".')
    args = parser.parse_args(args=[])

    numpyro.set_platform(args.device)
    numpyro.set_host_device_count(args.num_chains)


    # setup hyperparameters
    hypers = {'expected_sparsity': max(1.0, args.num_dimensions / 2),
              }
    
    # add in hyperopt params
    hypers.update(params)

    # do inference
    rng_key = random.PRNGKey(0)
    samples = run_inference(model, args, rng_key, X, Y, hypers)

    # compute the mean and square root variance of each coefficient theta_i
    means, stds = vmap(lambda dim: analyze_dimension(samples, X, Y, dim, hypers))(np.arange(args.num_dimensions))
    num_dims = len(means)
   # print("Coefficients theta_1 to theta_%d used to generate the data:" % args.active_dimensions, expected_thetas)
   # print("The single quadratic coefficient theta_{1,2} used to generate the data:", expected_pairwise)
    active_dimensions = []

    for dim, (mean, std) in enumerate(zip(means, stds)):
        # we mark the dimension as inactive if the interval [mean - 3 * std, mean + 3 * std] contains zero
        lower, upper = mean - 1.5 * std, mean + 1.5 * std
        inactive = "inactive" if lower < 0.0 and upper > 0.0 else "active"
        if inactive == "active":
            active_dimensions.append(dim)
        print("[dimension %02d/%02d]  %s:\t%.2e +- %.2e" % (dim + 1, args.num_dimensions, inactive, mean, std))

    print("Identified a total of %d active dimensions; expected %d." % (len(active_dimensions),
                                                                        args.active_dimensions))



     # Compute the mean and square root variance of coefficients theta_ij for i,j active dimensions.
    # Note that the resulting numbers are only meaningful for i != j.
    if len(active_dimensions) > 0:
        dim_pairs = np.array(list(itertools.product(active_dimensions, active_dimensions)))
        means, stds = vmap(lambda dim_pair: analyze_pair_of_dimensions(samples, X, Y,
                                                                       dim_pair[0], dim_pair[1], hypers))(dim_pairs)
        # print(dim_pairs)
        dim_pair_arr = []
        dim_pair_index = num_dims -1
        dim_pair_name = []
        for dim_pair, mean, std in zip(dim_pairs, means, stds):
            dim1, dim2 = dim_pair
            if dim1 >= dim2:
                continue
            dim_pair_index += 1  
            lower, upper = mean - 1.5 * std, mean + 1.5 * std
            if not (lower < 0.0 and upper > 0.0):
                dim_pair_arr.append(dim_pair_index)
                dim_pair_name.append('%d and %d'%(dim1 + 1, dim2 + 1))
                format_str = "Identified pairwise interaction between dimensions %d and %d: %.2e +- %.2e"
                print(format_str % (dim1 + 1, dim2 + 1, mean, std))

        active_dimensions = active_dimensions + dim_pair_arr

    

    return np.exp(-len(active_dimensions))