def generate_posterior(self, num_dimensions=20, known_active_dimensions=3, num_data=100, labels=None, num_samples=1000, num_warmup=500, num_chains=1, device='gpu'): X = self.X Y = self.Y hypers = self.hypers sigma = hypers['sigma'] # args -- needed: num-chains # num_dimensions # active dimensions if labels is None: labs = [str(_) for _ in range(X.shape[1])] else: labs = labels # TODO:set up Numpyro device (should maybe do this in main script ?) numpyro.set_platform(device) numpyro.set_host_device_count(num_chains) # do inference rng_key = random.PRNGKey(self.seed) samples = self.run_inference(self.model, rng_key, X, Y, hypers, num_warmup=num_warmup, num_chains=num_chains, num_samples=num_samples) # compute the mean and square root variance of each coefficient theta_i means, stds = vmap( lambda dim: self.analyze_dimension(samples, X, Y, dim, hypers))( np.arange(X.shape[1])) num_dims = len(means) active_dimensions = [] for dim, (mean, std) in enumerate(zip(means, stds)): # we mark the dimension as inactive if the interval [mean - 3 * std, mean + 3 * std] contains zero lower, upper = mean - sigma * std, mean + sigma * std inactive = "inactive" if lower < 0.0 and upper > 0.0 else "active" if inactive == "active": active_dimensions.append(dim) print("[dimension %02d/%02d] %s:\t%.2e +- %.2e" % (dim + 1, X.shape[1], inactive, mean, std)) print("Identified a total of %d active dimensions." % (len(active_dimensions))) # Compute the mean and square root variance of coefficients theta_ij for i,j active dimensions. # Note that the resulting numbers are only meaningful for i != j. if len(active_dimensions) > 0: dim_pairs = np.array( list(itertools.product(active_dimensions, active_dimensions))) means, stds = vmap( lambda dim_pair: self.analyze_pair_of_dimensions( samples, X, Y, dim_pair[0], dim_pair[1], hypers))( dim_pairs) # print(dim_pairs) dim_pair_arr = [] dim_pair_index = num_dims - 1 dim_pair_name = [] pair_labs = [] for dim_pair, mean, std in zip(dim_pairs, means, stds): dim1, dim2 = dim_pair if dim1 >= dim2: continue dim_pair_index += 1 lower, upper = mean - sigma * std, mean + sigma * std if not (lower < 0.0 and upper > 0.0): dim_pair_arr.append(dim_pair_index) dim_pair_name.append('%d and %d' % (dim1 + 1, dim2 + 1)) format_str = "Identified pairwise interaction between dimensions %d and %d: %.2e +- %.2e" print(format_str % (dim1 + 1, dim2 + 1, mean, std)) pair_labs.append(str(labs[dim1]) + ' + ' + str(labs[dim2])) # Draw a single sample of coefficients theta from the posterior, where we return all singleton # coefficients theta_i and pairwise coefficients theta_ij for i, j active dimensions. We use the # final MCMC sample obtained from the HMC sampler. ## Get posterior samples from the sample_theta_space_modified() method thetas = self.sample_theta_posterior( X, Y, active_dimensions, samples['msq'][-1], samples['lambda'][-1], samples['eta1'][-1], samples['xisq'][-1], hypers['c'], samples['var_obs'][-1], num_samples, dim_pair_arr) print("Active dimensions: " + str(active_dimensions)) ## Visualize the posterior from the example with corner labels = ['dim ' + str(i) for i in active_dimensions] active_dimensions = active_dimensions + dim_pair_arr if len(dim_pair_name) != 0: for n in range(len(dim_pair_name)): labels.append('dim ' + dim_pair_name[n]) #fig = corner.corner(thetas, labels = labels); return active_dimensions, thetas, labels, pair_labs else: return active_dimensions, [], []
if __name__ == "__main__": assert numpyro.__version__.startswith("0.8.0") parser = argparse.ArgumentParser( description="Bayesian Models of Annotation") parser.add_argument("-n", "--num-samples", nargs="?", default=1000, type=int) parser.add_argument("--num-warmup", nargs="?", default=1000, type=int) parser.add_argument("--num-chains", nargs="?", default=1, type=int) parser.add_argument( "--model", nargs="?", default="ds", help='one of "mn" (multinomial), "ds" (dawid_skene), "mace",' ' "hds" (hierarchical_dawid_skene),' ' "id" (item_difficulty), "lre" (logistic_random_effects)', ) parser.add_argument("--device", default="cpu", type=str, help='use "cpu" or "gpu".') args = parser.parse_args() numpyro.set_platform(args.device) numpyro.set_host_device_count(args.num_chains) main(args)
randwalk = numpyro.sample('mu', dist.Normal(loc=0, scale=sd_randwalk)) value = X + randwalk sd_value = numpyro.sample('sd', dist.Uniform(low=0.0, high=100.0)) pred_y = numpyro.sample('pred_y', dist.Normal(loc=value, scale=sd_value), obs=y) if __name__ == "__main__": # 初期設定 num_samples = 1000 num_warmup = 100 num_chains = 1 device = 'cpu' rng_key = random.PRNGKey(0) numpyro.set_platform(device) numpyro.set_host_device_count(num_chains) # ランダムウォーク波形生成 random_walk = jnp.cumsum(0.5 * np.random.randn(100000)) # 価格がランダムな場合を想定 # random_walk = 0.5*np.random.randn(100000) random_walks = { 1: random_walk, 2: random_walk[::2], 4: random_walk[::2][::2], 8: random_walk[::2][::2][::2], 16: random_walk[::2][::2][::2][::2], 32: random_walk[::2][::2][::2][::2][::2],
def main(params): #X, Y, expected_thetas, expected_pairwise = get_data(N=args.num_data, P=args.num_dimensions, # S=args.active_dimensions) fxdata5 = pd.read_csv('fxdata5.csv', header=0) df = fxdata5.copy() df = df.iloc[-64:,:] drop_cols = list(df.columns[df.columns.str.contains('Dummy')]) drop_cols = drop_cols + list(df.columns[df.columns.str.contains('LogReturn')]) drop_cols = drop_cols + list(df.columns[df.columns.str.contains('Spot')]) # drop_cols = drop_cols + list(df.columns[df.columns.str.contains('HAR')]) df.drop(drop_cols, axis=1, inplace=True) target_col = 'EURGBP_log_RealVol' df['Target_' + target_col] = df[target_col].shift(1).copy() cols = df.columns.tolist() cols.insert(0, cols.pop(cols.index('Target_' + target_col))) df = df[cols].set_index('Date').dropna() df[df.columns] = StandardScaler().fit_transform(df) # scale Y = np.array(df.iloc[:,0]) X = np.array(df.iloc[:,1:]) parser = argparse.ArgumentParser(description="Gaussian Process example") parser.add_argument("-n", "--num-samples", nargs="?", default=8000, type=int) parser.add_argument("--num-warmup", nargs='?', default=500, type=int) parser.add_argument("--num-chains", nargs='?', default=1, type=int) parser.add_argument("--num-data", nargs='?', default=len(X), type=int) parser.add_argument("--num-dimensions", nargs='?', default=len(X.T), type=int) parser.add_argument("--active-dimensions", nargs='?', default=3, type=int) parser.add_argument("--device", default='cpu', type=str, help='use "cpu" or "gpu".') args = parser.parse_args(args=[]) numpyro.set_platform(args.device) numpyro.set_host_device_count(args.num_chains) # setup hyperparameters hypers = {'expected_sparsity': max(1.0, args.num_dimensions / 2), } # add in hyperopt params hypers.update(params) # do inference rng_key = random.PRNGKey(0) samples = run_inference(model, args, rng_key, X, Y, hypers) # compute the mean and square root variance of each coefficient theta_i means, stds = vmap(lambda dim: analyze_dimension(samples, X, Y, dim, hypers))(np.arange(args.num_dimensions)) num_dims = len(means) # print("Coefficients theta_1 to theta_%d used to generate the data:" % args.active_dimensions, expected_thetas) # print("The single quadratic coefficient theta_{1,2} used to generate the data:", expected_pairwise) active_dimensions = [] for dim, (mean, std) in enumerate(zip(means, stds)): # we mark the dimension as inactive if the interval [mean - 3 * std, mean + 3 * std] contains zero lower, upper = mean - 1.5 * std, mean + 1.5 * std inactive = "inactive" if lower < 0.0 and upper > 0.0 else "active" if inactive == "active": active_dimensions.append(dim) print("[dimension %02d/%02d] %s:\t%.2e +- %.2e" % (dim + 1, args.num_dimensions, inactive, mean, std)) print("Identified a total of %d active dimensions; expected %d." % (len(active_dimensions), args.active_dimensions)) # Compute the mean and square root variance of coefficients theta_ij for i,j active dimensions. # Note that the resulting numbers are only meaningful for i != j. if len(active_dimensions) > 0: dim_pairs = np.array(list(itertools.product(active_dimensions, active_dimensions))) means, stds = vmap(lambda dim_pair: analyze_pair_of_dimensions(samples, X, Y, dim_pair[0], dim_pair[1], hypers))(dim_pairs) # print(dim_pairs) dim_pair_arr = [] dim_pair_index = num_dims -1 dim_pair_name = [] for dim_pair, mean, std in zip(dim_pairs, means, stds): dim1, dim2 = dim_pair if dim1 >= dim2: continue dim_pair_index += 1 lower, upper = mean - 1.5 * std, mean + 1.5 * std if not (lower < 0.0 and upper > 0.0): dim_pair_arr.append(dim_pair_index) dim_pair_name.append('%d and %d'%(dim1 + 1, dim2 + 1)) format_str = "Identified pairwise interaction between dimensions %d and %d: %.2e +- %.2e" print(format_str % (dim1 + 1, dim2 + 1, mean, std)) active_dimensions = active_dimensions + dim_pair_arr return np.exp(-len(active_dimensions))
from time import time import jax.numpy as jnp # https://pytorch.org/tutorials/recipes/recipes/benchmark.html from jax import random from numpyro import set_platform from numpyro.distributions import Sine, SineSkewed if __name__ == "__main__": set_platform("gpu") key = random.PRNGKey(0) loc = jnp.array([0.0]) conc = jnp.array([1.0]) corr = jnp.array([0.6]) sine = Sine(loc, loc, conc, conc, corr) skewness = jnp.array([0.3, -0.2]) ss = SineSkewed(sine, skewness) first_timings = [] after_timings = [] for samples in [1, 5, 25, 125, 625, 3125, 15625, 78125, 390625, 1953125]: times = [] print(samples) for _ in range(11): key, sample_key = random.split(key) start = time() data = ss.sample(sample_key, (samples, )) times.append(time() - start)
jnp.savez(path, **params) def load_params(path: Union[str, pathlib.Path]) -> Dict[str, jnp.ndarray]: with jnp.load(path) as data: res = {k: jnp.array(v) for k, v in data.items()} return res if __name__ == "__main__": num_chains = 4 numpyro.set_platform("cpu") numpyro.set_host_device_count(num_chains) y = np.array([28.0, 8.0, -3.0, 7.0, -1.0, 1.0, 18.0, 12.0]) sigma = np.array([15.0, 10.0, 16.0, 11.0, 9.0, 11.0, 10.0, 18.0]) rng_key = random.PRNGKey(0) rng_key, rng_key_posterior, rng_key_prior = random.split(rng_key, 3) mcmc = inference(model, sigma, y, rng_key, num_chains=num_chains) posterior_samples = mcmc.get_samples() posterior_predictive = predict(model, sigma, rng_key_posterior, posterior_samples=posterior_samples) prior = predict(model, sigma, rng_key_prior, num_samples=500)
# Copyright 2019- d3p Developers and their Assignees # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # http://www.apache.org/licenses/LICENSE-2.0 # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. import jax import numpyro import logging try: jax.lib.xla_bridge.get_backend( 'gpu') # this will fail if gpu not available numpyro.set_platform('gpu') except RuntimeError: logging.info("GPU not available. Falling back to CPU.")