Ejemplo n.º 1
0
 def test_specify_args(self):
     y = (0.70,  -0.16,  0.77, -1.37, -1.99,  1.35, 0.08,
          0.02,  -1.48, -0.08,  0.34,  0.03, -0.42, 0.87,
          -1.36,  1.43,  0.80, -0.48, -1.61, -1.27)
     code = """
         data {
             real y[20];
         }
         parameters {
             real mu;
             real<lower=0> sigma;
         }
         model {
             y ~ normal(mu, sigma);
         }"""
     stepsize0 = 0.15
     sf = stan(model_code=code, data=dict(y=y), iter=200,
               control=dict(adapt_engaged=False, stepsize=stepsize0))
     self.assertEqual(sf.get_sampler_params()[0]['stepsize__'][0], stepsize0)
     sf2 = stan(fit=sf, iter=20, algorithm='HMC', data=dict(y=y),
                control=dict(adapt_engaged=False, stepsize=stepsize0))
     self.assertEqual(sf2.get_sampler_params()[0]['stepsize__'][0], stepsize0)
     sf3 = stan(fit=sf, iter=1, data=list(y=y), init=0, chains=1)
     i_u = sf3.unconstrain_pars(sf3.get_inits[0])
     self.assertEqual(i_u, [0, 0])
Ejemplo n.º 2
0
 def test_user_init(self):
     model_code = self.model_code
     data = self.data
     fit1 = pystan.stan(model_code=model_code, iter=10, chains=1, seed=2,
                        data=data, init=[dict(mu=4)], warmup=0)
     self.assertEqual(fit1.get_inits()[0]['mu'], 4)
     fit2 = pystan.stan(model_code=model_code, iter=10, chains=1, seed=2,
                        data=data, init=[dict(mu=400)], warmup=0)
     self.assertEqual(fit2.get_inits()[0]['mu'], 400)
     self.assertFalse(all(fit1.extract()['mu'] == fit2.extract()['mu']))
Ejemplo n.º 3
0
 def test_user_init(self):
     model_code = self.model_code
     data = self.data
     beta = np.ones((data['K'], data['D']))
     fit1 = pystan.stan(model_code=model_code, iter=10, chains=1, seed=2,
                        data=data, init=[dict(beta=beta)], warmup=0)
     np.testing.assert_equal(fit1.get_inits()[0]['beta'], beta)
     beta = 5 * np.ones((data['K'], data['D']))
     fit2 = pystan.stan(model_code=model_code, iter=10, chains=1, seed=2,
                        data=data, init=[dict(beta=beta)], warmup=0)
     np.testing.assert_equal(fit2.get_inits()[0]['beta'], beta)
Ejemplo n.º 4
0
    def _initialize(self, xs):
        print("The following message exists as Stan instantiates the model.")
        if hasattr(self, 'file'):
            self.model = pystan.stan(file=self.file,
                                     data=xs, iter=1, chains=1)
        else:
            self.model = pystan.stan(model_code=self.model_code,
                                     data=xs, iter=1, chains=1)

        self.num_vars = sum([sum(dim) if sum(dim) != 0 else 1
                             for dim in self.model.par_dims])
        self.flag_init = True
Ejemplo n.º 5
0
    def setUpClass(cls):

        np.random.seed(1)

        n = 10000
        p = 3

        cls.beta_true = beta_true = (1, 3, 5)
        X = np.random.normal(size=(n, p))
        X = (X - np.mean(X, axis=0)) / np.std(X, ddof=1, axis=0, keepdims=True)
        y = np.dot(X, beta_true) + np.random.normal(size=n)

        model_code = """
        data {
            int<lower=0> N;
            int<lower=0> p;
            matrix[N,p] x;
            vector[N] y;
        }
        parameters {
            vector[p] beta;
            real<lower=0> sigma;
        }
        model {
            y ~ normal(x * beta, sigma);
        }
        """

        data = {'N': n, 'p': p, 'x': X, 'y': y}

        cls.fit = pystan.stan(model_code=model_code, data=data, iter=500)
Ejemplo n.º 6
0
def fit():
    data = make_data()
    model = pystan.stan(model_code=stan_code, data=data, 
                        iter=1000, chains=5)
    #model.plot()
    #plt.show()
    return model
Ejemplo n.º 7
0
 def test_stan_file(self):
     schools_code = self.schools_code
     schools_dat = self.schools_dat
     with tempfile.NamedTemporaryFile(delete=False) as f:
         f.write(schools_code.encode('utf-8'))
     fit = pystan.stan(file=f.name, data=schools_dat, iter=1000, chains=4)
     validate_data(fit)
def run_stan(stan_data, target):

    project_dir = os.path.join(os.environ['HOME'], 'Projects', 'Kaggle', 'otto')
    data_dir = os.path.join(project_dir, 'data')

    iguesses = get_initial_values(stan_data['counts'])

    stan_file = os.path.join(project_dir, 'stan', 'single_component.stan')
    fit = pystan.stan(file=stan_file, model_name='single_component_' + target, data=stan_data,
                      chains=4, iter=400, warmup=200, init=iguesses)

    # dump the MCMC samples to an HDF file
    samples = fit.extract()

    cnames = ['concentration', 'negbin_nfailures']
    nbins = stan_data['counts'].shape[1]
    cnames += ['bin_prob_' + str(i + 1) for i in range(nbins)]

    raw_samples = np.column_stack((samples['concentration'], samples['negbin_nfailures'], samples['bin_probs']))
    samples = pd.DataFrame(data=raw_samples, columns=cnames)
    samples.to_hdf(os.path.join(data_dir, 'single_component_' + target + '_samples.h5'), 'df')

    # dump the stan results to a text file
    with open(os.path.join(data_dir, 'single_component_' + target + '_stan_summary.txt'), 'w') as f:
        print >> f, fit

    # make plots of the stan results
    plot_dir = os.path.join(project_dir, 'plots')
    fit.plot()
    plt.savefig(os.path.join(plot_dir, 'single_component_' + target + '_trace.png'))

    return
Ejemplo n.º 9
0
def fit(model_code, *args, **kwargs):
    """
    Fit a Stan model. Caches the compiled model.

    *args and **kwargs are passed to the pystan.stan function.

    Arguments you most likely want to pass: data, init, iter, chains.        

    Unlike pystan.stan, if the n_jobs kwarg is not specified, it defaults to
    -1.

    Parameters
    -------------------
    model_code : string
        Stan model


    Returns
    -------------------
    pystan StanFit4Model instance : the fit model
    """
    kwargs = dict(kwargs)
    kwargs['model_code'] = model_code
    if 'n_jobs' not in kwargs:
        kwargs['n_jobs'] = -1
    if model_code in FIT_CACHE:
        print("Reusing model.")
        kwargs['fit'] = FIT_CACHE[model_code]
    else:
        print("NOT reusing model.")
    start = time.time()
    FIT_CACHE[model_code] = pystan.stan(*args, **kwargs)
    print("Ran in %0.3f sec." % (time.time() - start))
    return FIT_CACHE[model_code]
Ejemplo n.º 10
0
    def test_stan_args_basic(self):
        y = np.array([0.70,  -0.16,  0.77, -1.37, -1.99,  1.35, 0.08,
                      0.02,  -1.48, -0.08,  0.34,  0.03, -0.42, 0.87,
                      -1.36,  1.43,  0.80, -0.48, -1.61, -1.27])

        code = '''
        data {
            real y[20];
        }
        parameters {
            real mu;
            real<lower=0> sigma;
        }
        model {
            y ~ normal(mu, sigma);
        }'''

        sf = pystan.stan(model_code=code, iter=10, thin=3, data=dict(y=y))
        args = sf.stan_args[0]
        self.assertEqual(args['iter'], 10)
        self.assertEqual(args['thin'], 3)
        self.assertEqual(args['init'], b'random')

        sampling = args['ctrl']['sampling']
        self.assertEqual(sampling['adapt_engaged'], True)
        self.assertEqual(sampling['adapt_window'], 25)
        self.assertEqual(sampling['adapt_init_buffer'], 75)
        self.assertEqual(sampling['adapt_gamma'], 0.05)
        self.assertEqual(sampling['adapt_delta'], 0.8)
        self.assertEqual(sampling['adapt_kappa'], 0.75)
        self.assertEqual(sampling['adapt_t0'], 10)
Ejemplo n.º 11
0
def main():
  # data
  J = 8
  data_y = np.array([28, 8, -3, 7, -1, 1, 18, 12])
  data_sigma = np.array([15, 10, 16, 11, 9, 11, 10, 18])

  standata = dict(J=J, y=data_y, sigma=data_sigma)
  fit = pystan.stan('eight_schools.stan', data=standata, iter=100000)
  print(fit)
def gamma_tutorial():
    '''
    Basic stan gamma GLM with log link function
    Taken from http://seananderson.ca/2014/04/08/gamma-glms.html
    :return:
    '''
    # define stan data
    N = 100
    x = np.random.uniform(-1, 1, N)
    a = 0.5
    b = 1.2
    y_true = np.exp(a + b * x)
    shape = 10.0

    # Passing a vector for scale generates one sample per value in vector; |y_true| = 100 => |y| = 100
    # note that scale is generated in odd fashion (w/ shape parameter), hence odd shape parameter in stan model
    y = np.random.gamma(shape, scale=y_true/shape)

    # Now we put data into a dictionary so we can give to stan
    this_data = {
        'N': N,
        'x': x,
        'y': y
    }

    # define stan model
    stan_code = """
    data {
      int<lower=0> N;
      vector[N] x;
      vector[N] y;
    }
    parameters {
      real a;
      real b;
      real<lower=0.001,upper=100> shape;
    }
    model {
      a ~ normal(0,100);
      b ~ normal(0,100);
      for(i in 1:N)
        y[i] ~ gamma(shape, (shape / exp(a + b * x[i])));
    }
    """

    # fit model
    fit = pystan.stan(model_code = stan_code, data=this_data, iter=1000, chains=4, thin=3)

    '''
    Note that the following two statements are equivalent:
    y ~ normal(alpha + beta * x, sigma)
    for (n in 1:N):
        y[n] ~ normal(alpha + beta * x[n], sigma)
    ...meaning that stan automatically vectorizes
    '''

    """
Ejemplo n.º 13
0
def main():
    # Load the data for the toy model
    ids, x, y, xerr, yerr, pxy = np.loadtxt("../data/hogg-toy-model.data", unpack=True)

    with open("models/mixture-model-with-uncertainties.stan", "r") as fp:
        model_code = fp.read()

    # Fit the model
    fit = pystan.stan(model_code=model_code, iter=10000, chains=8,
        data={
            "x_measured": x, "x_uncertainty": xerr,
            "y_measured": y, "y_uncertainty": yerr,
            "N": len(x)
        })

    print(fit)
    fit.traceplot()

    samples = fit.extract(permuted=True)
    parameters = pd.DataFrame({"m": samples["m"], "b": samples["b"]})

    # Predictive model
    pred_x = np.arange(0, 300)
    model = lambda theta: pd.Series({"fitted": theta[0] + theta[1] * pred_x})

    median_parameters = parameters.median()

    yhat = model(median_parameters)

    # get the predicted values for each chain
    chain_predictions = parameters.apply(model, axis=1)

    fig = plt.figure()
    ax = fig.add_subplot(111)

    num_chains = 50
    indices = np.random.choice(300, num_chains)

    for i, index in enumerate(indices):
        ax.plot(pred_x, chain_predictions.iloc[index, 0], color="lightgrey")

    #  data
    ax.errorbar(x, y, xerr=xerr, yerr=yerr, fmt=None, facecolor="k", ecolor="k", zorder=10)
    ax.plot(x, y, 'ko', zorder=10)

    # fitted values
    ax.plot(pred_x, yhat["fitted"], "k", lw=2)

    # supplementals
    ax.set_xlim(0, 300)
    ax.set_ylim(0, 750)

    ax.set_xlabel('X')
    ax.set_ylabel('Y')

    plt.show()
Ejemplo n.º 14
0
 def test_user_init_unspecified(self):
     model_code = """
     data {
       real x;
     }
     parameters {
       real mu;
       real<lower=0> sigma;
     }
     model {
       x ~ normal(mu, sigma);
     }
     """
     data = self.data
     # NOTE: we are only specifying 'mu' and not 'sigma'
     assertRaisesRegex = self.assertRaisesRegexp if PY2 else self.assertRaisesRegex
     with assertRaisesRegex(RuntimeError, "sigma missing"):
         pystan.stan(model_code=model_code, iter=10, chains=1, seed=2,
                     data=data, init=[dict(mu=4)], warmup=0)
Ejemplo n.º 15
0
 def test_stan_parallel(self):
     schools_code = self.schools_code
     schools_dat = self.schools_dat
     try:
         fit = pystan.stan(model_code=schools_code, data=schools_dat,
                           iter=1000, chains=4, n_jobs=-1)
         validate_data(fit)
     except OSError:
         # hosted testing environments may not allow querying of # of CPUs
         pass
def gamma_glm(data):
    '''
    Gamma GLM for use in bayesian class project
    input: data - dictionary of data for stan model
    :return:
    '''

    # if you want to make this better/run more expts, dynamically generate some of this model.
    # presently, assume flat priors for all parameters
    stan_data = data[0]

    stan_model = """
    data {
        int<lower=0> N;              // number of samples
        int<lower=0> N_test;
        int<lower=0> K;              // length of each X vector
        matrix[N,K] x;
        vector[N] y;

        matrix[N_test,K] x_test;    // samples for testing
    }

    parameters {
        real<lower=.001> beta_0;
        vector<lower=.001>[K] beta;
        real<lower=.001> phi;
        real<lower=.001> alpha;
    }

    model {
        alpha ~ normal(20,10);
        for (k in 1:K)
            beta[k] ~ normal(0,1000);
        beta_0 ~ normal(10000,1000);
        phi ~ normal(0,2);
        for (k in 1:K)
            y ~ gamma(alpha, inv(x[k] * beta + beta_0) + phi);       // inverse-link
    }

    generated quantities {                              // predictions!
        vector[N_test] y_test;
        for (n in 1:N_test)
            y_test[n] <- gamma_rng(alpha, inv(x_test[n] * beta + beta_0) + phi);
    }
    """

    # fit model
    fit = pystan.stan(model_code=stan_model, data=stan_data, iter=2000, chains=4, thin=1)

    print "Model:"

    print fit

    eval_acc(fit, data)
Ejemplo n.º 17
0
    def test_user_initfun(self):
        model_code = self.model_code
        data = self.data

        def make_inits(chain_id):
            return dict(mu=chain_id)

        fit1 = pystan.stan(model_code=model_code, iter=10, chains=4, seed=2,
                           data=data, init=make_inits, warmup=0)
        for i, inits in enumerate(fit1.get_inits()):
            self.assertEqual(inits['mu'], i)
Ejemplo n.º 18
0
def get_basic_model_fit(prior_params, num_iters, num_chains, init_f, seed, the_type, data):
    """
    convention is to return by permuted and unpermuted traces
    """
    import problem_props.constants as constants
    import pystan
    stan_file = constants.basic_model_stan_file
    stan_data = data_to_basic_model_pystan_input(data)
    other_inputs = {\
        'n_types': max(stan_data['types']),\
            'n_props': len(data),\
            'n_events': len(stan_data['types']),\
            'the_type': the_type + 1\
            }
    all_stan_data = dict(prior_params.items() + stan_data.items() + other_inputs.items())
    if init_f == None:
        fit = pystan.stan(file = stan_file, data = all_stan_data, seed = seed, iter = num_iters, chains = num_chains, verbose = True)
    else:
        init_d = [init_f(data, i) for i in range(num_chains)]
        fit = pystan.stan(file = stan_file, data = all_stan_data, seed = seed, iter = num_iters, chains = num_chains, verbose = True, init = init_d)
    return fit.extract(permuted=True), fit.extract(permuted=False)
Ejemplo n.º 19
0
    def test_user_initfun_chainid(self):
        model_code = self.model_code
        data = self.data

        def make_inits(chain_id):
            return dict(mu=chain_id)

        chain_id = [9, 10, 11, 12]
        fit1 = pystan.stan(model_code=model_code, iter=10, chains=4, seed=2,
                           data=data, init=make_inits, warmup=0, chain_id=chain_id)
        for i, inits in zip(chain_id, fit1.get_inits()):
            self.assertEqual(inits['mu'], i)
Ejemplo n.º 20
0
def test_array_param():
    """
    Make sure shapes are getting unraveled correctly. Mixing up row-major and
    column-major data is a potential issue.
    """
    model_code = """
    data {
      int<lower=2> K;
    }
    parameters {
      real beta[K,1,2];
    }
    model {
      for (k in 1:K)
        beta[k,1,1] ~ normal(0,1);
      for (k in 1:K)
        beta[k,1,2] ~ normal(100,1);
    }"""
    fit = stan(model_code=model_code, data=dict(K=4))

    # extract, permuted
    beta = fit.extract()['beta']
    assert beta.shape == (4000, 4, 1, 2)
    beta_mean = np.mean(beta, axis=0)
    assert beta_mean.shape == (4, 1, 2)
    assert np.all(beta_mean[:, 0, 0] < 4)
    assert np.all(beta_mean[:, 0, 1] > 100 - 4)

    # extract, permuted=False
    extracted = fit.extract(permuted=False)
    assert extracted.shape == (1000, 4, 9)
    # in theory 0:4 should be
    # 'beta[0,0,0]'
    # 'beta[1,0,0]'
    # 'beta[2,0,0]'
    # 'beta[3,0,0]'
    #
    # and 4:8 should be
    # 'beta[0,0,1]'
    # 'beta[1,0,1]'
    # 'beta[2,0,1]'
    # 'beta[3,0,1]'
    assert np.all(np.mean(extracted[:, :, 0:4], axis=(0, 1)) < 4)
    assert np.all(np.mean(extracted[:, :, 4:8], axis=(0, 1)) > 100 - 4)
    assert np.all(extracted[:, :, 8] < 0)  # lp__

    # optimizing
    sm = fit.stanmodel
    op = sm.optimizing(data=dict(K=4))
    beta = op['beta']
    assert beta.shape == (4, 1, 2)
    assert np.all(beta[:, 0, 0] < 4)
    assert np.all(beta[:, 0, 1] > 100 - 4)
Ejemplo n.º 21
0
    def test_user_initfun(self):
        model_code = self.model_code
        data = self.data

        beta = np.ones((data['K'], data['D']))

        def make_inits(chain_id):
            return dict(beta=beta * chain_id)

        fit1 = pystan.stan(model_code=model_code, iter=10, chains=4, seed=2,
                           data=data, init=make_inits, warmup=0)
        for i, inits in enumerate(fit1.get_inits()):
            np.testing.assert_equal(beta * i, inits['beta'])
Ejemplo n.º 22
0
def get_gaussian_subspace_model_traces(num_iters, num_chains, seed, init_d, n_clusters, data, hypers):
    """
    hypers should be in dictionary form, read to pass to pystan
    parallel version would map this function partialed with only seed undetermined to a list of seeds, then reduce the results
    returns list of dictionary of param_name:traces
    """
    
    import crime_pattern.constants as constants

    stan_file = '%s/%s' % (constants.stan_folder, 'gaussian_subspaces.stan')
    N, n_dim = len(data), len(iter(data).next())
    import pystan
    pystan_data = hypers.copy()
    pystan_data['d'] = data
    pystan_data['N'] = N
    pystan_data['n_dim'] = n_dim
    pystan_data['n_clusters'] = n_clusters

    if init_d == None:
        fit = pystan.stan(file=stan_file, data=pystan_data, seed = seed, iter=num_iters, chains=num_chains, verbose=True)
    else:
        fit = pystan.stan(file=stan_file, data=pystan_data, seed = seed, iter=num_iters, chains=num_chains, verbose=True, init=[init_d for i in xrange(num_chains)])
    return fit.extract()
def basic_linear(data):
    '''
    Use as baseline for stan implementation
    :return:
    '''

    # Model for matrix linear regression with D = {X, Y}, X is an n*k matrix, y is n*1 values
    # explicit priors are specified in the model section, otherwise a uniform prior is assumed
    # use generated quantities to predict N new values:
    stan_data = data[0]

    stan_model = """
    data {
        int<lower=0> N;
        int<lower=0> N_test;
        int<lower=0> K;
        matrix[N,K] x;
        vector[N] y;

        matrix[N_test,K] x_test;
    }

    parameters {
        real alpha;
        vector[K] beta;
        real <lower=0> sigma;
    }

    model {
        y ~ normal(alpha + x * beta, sigma);
    }
    generated quantities {
        vector[N_test] y_test;
        for (n in 1:N_test)
            y_test[n] <- normal_rng(x_test[n] * beta + alpha, sigma);   // NOTE: generated quantities do NOT vectorize
    }

    """

    fit = pystan.stan(model_code=stan_model, data=stan_data, iter=1000, chains=4, thin=1)

    print 'Model:'
    print fit

    # fit.traceplot()
    # py.show()
    eval_acc(fit, data)
Ejemplo n.º 24
0
 def test_user_init_unspecified(self):
     model_code = """
     data {
       real x;
     }
     parameters {
       real mu;
       real<lower=0> sigma;
     }
     model {
       x ~ normal(mu, sigma);
     }
     """
     data = self.data
     # NOTE: we are only specifying 'mu' and not 'sigma' (partial inits)
     fit = pystan.stan(model_code=model_code, iter=10, chains=1, seed=2, data=data, init=[dict(mu=4)], warmup=0)
     self.assertIsNotNone(fit)
Ejemplo n.º 25
0
def test_linear_regression():
    np.random.seed(1)

    n = 10000
    p = 3

    beta_true = (1, 3, 5)
    X = np.random.normal(size=(n, p))
    X = (X - np.mean(X, axis=0)) / np.std(X, ddof=1, axis=0, keepdims=True)
    y = np.dot(X, beta_true) + np.random.normal(size=n)

    # OLS
    beta = np.linalg.lstsq(X, y)[0]
    print(beta)
    print(np.std(y, ddof=1))

    mode_code = """
    data {
        int<lower=0> N;
        int<lower=0> p;
        matrix[N,p] x;
        vector[N] y;
    }
    parameters {
        vector[p] beta;
        real<lower=0> sigma;
    }
    model {
        y ~ normal(x * beta, sigma);
    }
    """

    data = {'N': n, 'p': p, 'x': X, 'y': y}

    fit = stan(model_code=mode_code, data=data, iter=2000)

    print(fit)
    np.mean(fit.extract()['beta'], axis=0)
    np.mean(fit.extract()['sigma'])

    sigma = fit.extract()['sigma']
    beta = fit.extract()['beta']

    # mean of sigma is 1
    assert np.count_nonzero(np.abs(sigma - 1) < 0.05)
    assert all(np.abs(np.mean(beta, 0) - np.array(beta_true)) < 0.05)
Ejemplo n.º 26
0
    def test_control(self):
        assertRaisesRegex = self.assertRaisesRegexp if PY2 else self.assertRaisesRegex
        model_code = 'parameters {real y;} model {y ~ normal(0,1);}'

        with assertRaisesRegex(ValueError, '`control` must be a dictionary'):
            control_invalid = 3
            pystan.stan(model_code=model_code, control=control_invalid)
        with assertRaisesRegex(ValueError, '`control` contains unknown'):
            control_invalid = dict(foo=3)
            pystan.stan(model_code=model_code, control=control_invalid)
        with assertRaisesRegex(ValueError, '`metric` must be one of'):
            pystan.stan(model_code=model_code, control={'metric': 'lorem-ipsum'})
Ejemplo n.º 27
0
    def test_grad_log(self):
        y = np.array([0.70,  -0.16,  0.77, -1.37, -1.99,  1.35, 0.08,
                      0.02,  -1.48, -0.08,  0.34,  0.03, -0.42, 0.87,
                      -1.36,  1.43,  0.80, -0.48, -1.61, -1.27])

        code = '''
        data {
            real y[20];
        }
        parameters {
            real mu;
            real<lower=0> sigma;
        }
        model {
            y ~ normal(mu, sigma);
        }'''

        def log_prob_fun(mu, log_sigma, adjust=True):
            sigma = np.exp(log_sigma)
            lp = -1 * np.sum((y - mu)**2) / (2 * (sigma**2)) - len(y) * np.log(sigma)
            if adjust:
                lp = lp + np.log(sigma)
            return lp

        def log_prob_grad_fun(mu, log_sigma, adjust=True):
            sigma = np.exp(log_sigma)
            g_lsigma = np.sum((y - mu)**2) * sigma**(-2) - len(y)
            if adjust:
                g_lsigma = g_lsigma + 1
            g_mu = np.sum(y - mu) * sigma**(-2)
            return (g_mu, g_lsigma)

        sf = stan(model_code=code, data=dict(y=y), iter=200)
        mu = 0.1
        sigma = 2
        self.assertEqual(sf.log_prob(sf.unconstrain_pars(dict(mu=mu, sigma=sigma))),
                         log_prob_fun(mu, np.log(sigma)))
        self.assertEqual(sf.log_prob(sf.unconstrain_pars(dict(mu=mu, sigma=sigma)), False),
                         log_prob_fun(mu, np.log(sigma), adjust=False))
        g1 = sf.grad_log_prob(sf.unconstrain_pars(dict(mu=mu, sigma=sigma)), False)
        g1_np = log_prob_grad_fun(mu, np.log(sigma), adjust=False)
        np.testing.assert_allclose(g1, g1_np)
Ejemplo n.º 28
0
def test_matrix_param_order():
    model_code = """
    data {
    int<lower=2> K;
    }
    parameters {
    matrix[K,2] beta;
    }
    model {
    for (k in 1:K)
      beta[k,1] ~ normal(0,1);
    for (k in 1:K)
      beta[k,2] ~ normal(100,1);
    }"""
    fit = stan(model_code=model_code, data=dict(K=3))
    beta = fit.extract()['beta']
    assert beta.shape == (4000, 3, 2)
    beta_mean = np.mean(beta, axis=0)
    beta_colmeans = np.mean(beta_mean, axis=0)
    assert beta_colmeans[0] < 4
    assert beta_colmeans[1] > 100 - 4
Ejemplo n.º 29
0
def test_matrix_param():
    model_code = """
    data {
    int<lower=2> K;
    int<lower=1> D;
    }
    parameters {
    matrix[K,D] beta;
    }
    model {
    for (k in 1:K)
        for (d in 1:D)
          beta[k,d] ~ normal(0,1);
    }"""
    fit = stan(model_code=model_code, data=dict(K=3,D=4))
    beta = fit.extract()['beta']
    assert beta.shape == (4000, 3, 4)
    assert np.mean(beta) < 4
    extracted = fit.extract(permuted=False)
    assert extracted.shape == (1000, 4, 13)
    assert np.mean(extracted[:,:,0:11]) < 4
    assert np.all(extracted[:,:,12] < 0)  # lp__
def bayes_logit(data):
    stan_data = data[0]

    stan_model = """
    data {
        int<lower=0> N;              // number of samples
        int<lower=0> N_test;
        int<lower=0> K;              // length of each X vector
        matrix[N,K] x;
        int<lower=0, upper=1> y[N];

        matrix[N_test,K] x_test;    // samples for testing
    }
    parameters {
        real alpha;
        vector[K] beta;
    }
    model {
        alpha ~ normal(0,1);
        for (k in 1:K)
            beta[k] ~ normal(1,1);
        for (n in 1:N)
            y[n] ~ bernoulli(Phi(alpha + x[n] * beta));
    }
    generated quantities{
        int<lower=0, upper=1> y_test[N_test];
        for (n in 1:N_test)
            y_test[n] <- bernoulli_rng(Phi(alpha + x_test[n] * beta));
    }

    """

    fit = pystan.stan(model_code=stan_model, data=stan_data, iter=2000, chains=4, thin=1)

    print fit

    pdb.set_trace()

    logit_acc(fit, data)
Ejemplo n.º 31
0
    args = parser.parse_args()

    with bz2.BZ2File('testdata.pkl.bz2', 'r') as inp:
        data = pickle.load(inp)

    desired_nout = 1000
    samples = args.niter // 2

    thin = samples // desired_nout
    if thin == 0:
        thin = 1

    fit = pystan.stan(file='nfw_hier.stan',
                      data=data,
                      iter=args.niter,
                      thin=thin,
                      chains=args.nchain,
                      n_jobs=args.nchain)

    print(fit)

    with bz2.BZ2File('testchains.pkl.bz2.temp', 'w') as out:
        pickle.dump(fit.extract(permuted=True), out)
    with bz2.BZ2File('testmodel.pkl.bz2.temp', 'w') as out:
        pickle.dump(fit.get_stanmodel(), out)
    with bz2.BZ2File('testfit.pkl.bz2.temp', 'w') as out:
        pickle.dump(fit, out)

    # Almost atomic rename/commit updates
    os.rename('testchains.pkl.bz2.temp', 'testchains.pkl.bz2')
    os.rename('testmodel.pkl.bz2.temp', 'testmodel.pkl.bz2')
Ejemplo n.º 32
0
#Finally, we model the log-radon measurements as a normal sample with a mean that is a function of the floor measurement.

pooled_model = """
model {
  y ~ normal(beta[1] + beta[2] * x, sigma);
}
"""

#We then pass the code, data, and parameters to the stan function. The sampling requires specifying how many iterations we want, and how many parallel chains to sample. Here, we will sample 2 chains of length 1000.

pooled_data_dict = {'N': len(log_radon), 'x': floor_measure, 'y': log_radon}
#notice we did not compile the model first --- either is fine
pooled_fit = pystan.stan(model_code=pooled_data + pooled_parameters +
                         pooled_model,
                         data=pooled_data_dict,
                         iter=1000,
                         chains=2)

#The sample can be extracted for plotting and summarization.

pooled_sample = pooled_fit.extract(permuted=True)

b0, m0 = pooled_sample['beta'].T.mean(1)

plt.scatter(srrs_mn.floor, np.log(srrs_mn.activity + 0.1))
xvals = np.linspace(-0.2, 1.2)
plt.plot(xvals, m0 * xvals + b0, 'r--')

#At the other end of the extreme, we can fit separate (independent) means for each county. The only things that are shared in this model are the coefficient for the basement measurement effect, and the standard deviation of the error.
Ejemplo n.º 33
0
# In[2]:

get_ipython().run_line_magic('matplotlib', 'inline')
get_ipython().run_line_magic('config', "InlineBackend.figure_format = 'svg'")

import matplotlib
import matplotlib.pyplot as plt
import scipy
import numpy as np
import pandas as pd

import pystan

print('matplotlib version :', matplotlib.__version__)
print('scipy  version :', scipy.__version__)
print('numpy  version :', np.__version__)
print('pystan version :', pystan.__version__)

# In[ ]:

schools_dat = {
    'J': 8,
    'y': [28, 8, -3, 7, -1, 1, 18, 12],
    'sigma': [15, 10, 16, 11, 9, 11, 10, 18]
}

fit = pystan.stan(file='8schools.stan', data=schools_dat, iter=100, chains=4)
print(fit)

# In[ ]:
  	beta_0 ~ normal(0,5);
  	a ~ normal(0, sigma_a);
  	y ~ poisson_log(lambda); 	//y and y_hat should have same type 
}
"""

data = np.genfromtxt("sumtable.txt",
                     delimiter=',',
                     skip_header=1,
                     dtype=None,
                     names=("chr", "gene", "sumenvarp", "sumenvarpfc"))
gene = range(1, len(data) + 1)
x = data["sumenvarp"]
y = data["sumenvarpfc"]
N = len(data)

M1_table = {'J': N, 'x': x, 'y': y, 'gene': gene}

fit1 = pystan.stan(model_code=pyfitfull, data=M1_table, iter=400, chains=4)

print(fit1)

## Plot
plot(pars=["beta", "beta_0",
           "sigma_a"])  #plot the density function of paraments

result1 = fit1.extract(permuted=True)
a = result1["a"]
beta = result1["beta"]
sigma_a = result1["sigma_a"]
Ejemplo n.º 35
0
    mu = X * beta;
    tau = pow(sigma, 2);
    tauBeta = pow(sdBeta, 2);
}
model{

    sdBeta ~ gamma(0.01, 0.01);

    for (i in 1:K){
        if (Ind[i] > 0) beta[i] ~ normal(0, tauBeta);
    }
 
    sigma ~  gamma(0.01, 0.01);

    Y ~ normal(mu, tau);
}
'''

fit = pystan.stan(model_code=stan_model, data=mydata, iter=5000, chains=3, thin=1,
                  warmup=2500, n_jobs=3)

# Output
nlines = 21                                 # number of lines in screen output

output = str(fit).split('\n')

for item in output[:nlines]:
    print(item)  


Ejemplo n.º 36
0
    real<lower=0> tau;
    real eta[J];
}
transformed parameters {
    real theta[J];
    for (j in 1:J)
    theta[j] <- mu + tau * eta[j];
}
model {
    eta ~ normal(0, 1);
    y ~ normal(theta, sigma);
}
"""
schools_dat = {
    'J': 8,
    'y': [28, 8, -3, 7, -1, 1, 18, 12],
    'sigma': [15, 10, 16, 11, 9, 11, 10, 18]
}

fit = pystan.stan(model_code=schools_code,
                  data=schools_dat,
                  iter=1000,
                  chains=4)

# fit object has number of methods
la = fit.extract(permuted=True)
mu = la['mu']

print mu
fit.plot()  # requires matplotlib
Ejemplo n.º 37
0
def test8schools():

    model_name = "_8chools"
    sfile = os.path.join(
        os.path.dirname(__file__),
        "../stan/src/models/misc/eight_schools/eight_schools.stan")
    m = StanModel(file=sfile, model_name=model_name, verbose=True)
    m.dso

    yam = StanModel(file=sfile,
                    model_name=model_name,
                    save_dso=False,
                    verbose=True)
    yam.dso

    dat = dict(J=8,
               y=(28, 8, -3, 7, -1, 1, 18, 12),
               sigma=(15, 10, 16, 11, 9, 11, 10, 18))

    iter = 5020

    # HMC
    ss1 = m.sampling(data=dat,
                     iter=iter,
                     chains=4,
                     algorithm='HMC',
                     refresh=100)
    ss1son = stan(fit=ss1, data=dat, init_r=0.0001)
    ss1son = stan(fit=ss1, data=dat, init_r=0)
    ainfo1 = ss1.get_adaptation_info()
    lp1 = ss1.get_logposterior()
    yalp1 = ss1.get_logposterior(inc_warmup=False)
    sp1 = ss1.get_sampler_params()
    yasp1 = ss1.get_sampler_params(inc_warmup=False)
    gm1 = ss1.get_posterior_mean()
    print(gm1)

    # NUTS 1
    ss2 = m.sampling(data=dat,
                     iter=iter,
                     chains=4,
                     refresh=100,
                     control=dict(metric="unit_e"))
    ainfo2 = ss2.get_adaptation_info()
    lp2 = ss2.get_logposterior()
    yalp2 = ss2.get_logposterior(inc_warmup=False)
    sp2 = ss2.get_sampler_params()
    yasp2 = ss2.get_sampler_params(inc_warmup=False)
    gm2 = ss2.get_posterior_mean()
    print(gm2)

    # NUTS 2
    ss3 = m.sampling(data=dat, iter=iter, chains=4, refresh=100)
    ainfo3 = ss3.get_adaptation_info()
    lp3 = ss3.get_logposterior()
    yalp3 = ss3.get_logposterior(inc_warmup=False)
    sp3 = ss3.get_sampler_params()
    yasp3 = ss3.get_sampler_params(inc_warmup=False)

    gm3 = ss3.get_posterior_mean()
    print(gm3)

    # Non-diag
    ss4 = m.sampling(data=dat,
                     iter=iter,
                     chains=4,
                     control=dict(metric='dense_e'),
                     refresh=100)
    ainfo4 = ss4.get_adaptation_info()
    lp4 = ss4.get_logposterior()
    yalp4 = ss4.get_logposterior(inc_warmup=False)
    sp4 = ss4.get_sampler_params()
    yasp4 = ss4.get_sampler_params(inc_warmup=False)

    gm4 = ss4.get_posterior_mean()
    print(gm4)

    print(ss1)
    print(ss2)
    print(ss3)

    ss1.plot()
    ss1.traceplot()

    ss9 = m.sampling(data=dat, iter=iter, chains=4, refresh=10)

    iter = 52012

    ss = stan(sfile, data=dat, iter=iter, chains=4, sample_file='8schools.csv')

    print(ss)

    ss_inits = ss.inits
    ss_same = stan(sfile,
                   data=dat,
                   iter=iter,
                   chains=4,
                   seed=ss.stan_args[0]['seed'],
                   init=ss_inits,
                   sample_file='ya8schools.csv')

    b = np.allclose(ss.extract(permuted=False),
                    ss_same.extract(permuted=False))
    # b is not true as ss is initialized randomly while ss.same is not.

    s = ss_same.summary(pars="mu", probs=(.3, .8))
    # not in python: print(ss.same, pars='theta', probs=c(.4, .8))
    print(ss_same)
Ejemplo n.º 38
0
model{
    vector[nobs] pi;
    real a[nobs];
    real b[nobs];
    
    for (i in 1:nobs){
       pi[i] = inv_logit(X[i] * beta);
       a[i]  = theta * pi[i];
       b[i]  = theta * (1 - pi[i]);
    }

    # priors and likelihood
    for (i in 1:K) beta[i] ~ normal(0, 100);
    theta ~ gamma(0.01, 0.01);

    Y ~ beta(a, b);
}
"""

# Run mcmc
fit = pystan.stan(model_code=stan_code,
                  data=data,
                  iter=7500,
                  chains=3,
                  warmup=5000,
                  thin=1,
                  n_jobs=3)

# Output
print(fit)
Ejemplo n.º 39
0
  b2 ~ normal(0,1);
  sigma_y ~ normal(0, 100);
  y ~ normal(y_hat, sigma_y);
}
"""

model_one_data = {'N': len(religiosity),
                 'J1': len(np.unique(countries)),
'J2': len(np.unique(year)),
                 'country': countries + 1,
        'year': year + 1,
                 'x1': inequality,
                 'x2': rgdpl,
                 'y': religiosity}

model_one_fit = pystan.stan(model_code=model_one, data=model_one_data, iter=1000, chains=2)

a_sample = pd.DataFrame(model_one_fit['a'])



#model 2:
model_two = """
data {
  int<lower=0> J1;
  int<lower=0> J2;
  int<lower=0> N;
  int<lower=1,upper=J1> country[N];
  int<lower=1,upper=J2> year[N];
  vector[N] x1; //inequality
  vector[N] x2; //rgdpl
Ejemplo n.º 40
0
 def stan(self, *args, **kwargs):
     # Run PyStan's "stan" method, replacing PyStan's StanModel with our
     # own cached StanModel.
     with patch('pystan.api.StanModel', self.stan_model):
         return pystan.stan(*args, **kwargs)
Ejemplo n.º 41
0
def hierarchical_MC(diff,
                    rope,
                    rho,
                    upperAlpha=2,
                    lowerAlpha=1,
                    lowerBeta=0.01,
                    upperBeta=0.1,
                    std_upper_bound=1000,
                    names=('C1', 'C2')):
    # upperAlpha, lowerAlpha, upperBeta, lowerBeta, are the upper and lower bound for alpha and beta, which are the parameters of
    #the  Gamma distribution used as a prior for the degress of freedom.
    #std_upper_bound is a constant which multiplies the sample standard deviation, to set the upper limit of the prior on the
    #standard deviation.  Posterior inferences are insensitive to this value as this is large enough, such as 100 or 1000.

    import scipy.stats as stats
    import pystan
    #data rescaling, to have homogenous scale among all dsets
    stdX = np.mean(
        np.std(diff, 1)
    )  #we scale all the data by the mean of the standard deviation of data sets
    x = diff / stdX
    rope = rope / stdX

    #to avoid numerical problems with zero variance
    for i in range(0, len(x)):
        if np.std(x[i, :]) == 0:
            x[i, :] = x[i, :] + np.random.normal(
                0, np.min(1 / 1000000000, np.abs(
                    np.mean(x[i, :]) / 100000000)))

    #This is the Hierarchical model written in Stan
    hierarchical_code = """
    /*Hierarchical Bayesian model for the analysis of competing cross-validated classifiers on multiple data sets.
    */

      data {

        real deltaLow;
        real deltaHi;

        //bounds of the sigma of the higher-level distribution
        real std0Low; 
        real std0Hi; 

        //bounds on the domain of the sigma of each data set
        real stdLow; 
        real stdHi; 


        //number of results for each data set. Typically 100 (10 runs of 10-folds cv)
        int<lower=2> Nsamples; 

        //number of data sets. 
        int<lower=1> q; 

        //difference of accuracy between the two classifier, on each fold of each data set.
        matrix[q,Nsamples] x;

        //correlation (1/(number of folds))
        real rho; 

        real upperAlpha;
        real lowerAlpha;
        real upperBeta;
        real lowerBeta;

         }


      transformed data {

        //vector of 1s appearing in the likelihood 
        vector[Nsamples] H;

        //vector of 0s: the mean of the mvn noise 
        vector[Nsamples] zeroMeanVec;

        /* M is the correlation matrix of the mvn noise.
        invM is its inverse, detM its determinant */
        matrix[Nsamples,Nsamples] invM;
        real detM;

        //The determinant of M is analytically known
        detM <- (1+(Nsamples-1)*rho)*(1-rho)^(Nsamples-1);

        //build H and invM. They do not depend on the data.
        for (j in 1:Nsamples){
          zeroMeanVec[j]<-0;
          H[j]<-1;
          for (i in 1:Nsamples){
            if (j==i)
              invM[j,i]<- (1 + (Nsamples-2)*rho)*pow((1-rho),Nsamples-2);
            else
              invM[j,i]<- -rho * pow((1-rho),Nsamples-2);
           }
        }
        /*at this point invM contains the adjugate of M.
        we  divide it by det(M) to obtain the inverse of M.*/
        invM <-invM/detM;
      }

      parameters {
        //mean of the  hyperprior from which we sample the delta_i
        real<lower=deltaLow,upper=deltaHi> delta0; 

        //std of the hyperprior from which we sample the delta_i
        real<lower=std0Low,upper=std0Hi> std0;

        //delta_i of each data set: vector of lenght q.
        vector[q] delta;               

        //sigma of each data set: : vector of lenght q.
        vector<lower=stdLow,upper=stdHi>[q] sigma; 

        /* the domain of (nu - 1) starts from 0
        and can be given a gamma prior*/
        real<lower=0> nuMinusOne; 

        //parameters of the Gamma prior on nuMinusOne
        real<lower=lowerAlpha,upper=upperAlpha> gammaAlpha;
        real<lower=lowerBeta, upper=upperBeta> gammaBeta;

      }

     transformed parameters {
        //degrees of freedom
        real<lower=1> nu ;

        /*difference between the data (x matrix) and 
        the vector of the q means.*/
        matrix[q,Nsamples] diff; 

        vector[q] diagQuad;

        /*vector of length q: 
        1 over the variance of each data set*/
        vector[q] oneOverSigma2; 

        vector[q] logDetSigma;

        vector[q] logLik;

        //degrees of freedom
        nu <- nuMinusOne + 1 ;

        //1 over the variance of each data set
        oneOverSigma2 <- rep_vector(1, q) ./ sigma;
        oneOverSigma2 <- oneOverSigma2 ./ sigma;

        /*the data (x) minus a matrix done as follows:
        the delta vector (of lenght q) pasted side by side Nsamples times*/
        diff <- x - rep_matrix(delta,Nsamples); 

        //efficient matrix computation of the likelihood.
        diagQuad <- diagonal (quad_form (invM,diff'));
        logDetSigma <- 2*Nsamples*log(sigma) + log(detM) ;
        logLik <- -0.5 * logDetSigma - 0.5*Nsamples*log(6.283);  
        logLik <- logLik - 0.5 * oneOverSigma2 .* diagQuad;

      }

      model {
        /*mu0 and std0 are not explicitly sampled here.
        Stan automatically samples them: mu0 as uniform and std0 as
        uniform over its domain (std0Low,std0Hi).*/

        //sampling the degrees of freedom
        nuMinusOne ~ gamma ( gammaAlpha, gammaBeta);

        //vectorial sampling of the delta_i of each data set
        delta ~ student_t(nu, delta0, std0);

        //logLik is computed in the previous block 
        increment_log_prob(sum(logLik));   
     }
    """
    datatable = x
    std_within = np.mean(np.std(datatable, 1))

    Nsamples = len(datatable[0])
    q = len(datatable)
    if q > 1:
        std_among = np.std(np.mean(datatable, 1))
    else:
        std_among = np.mean(np.std(datatable, 1))

    #Hierarchical data in Stan
    hierachical_dat = {
        'x': datatable,
        'deltaLow': -np.max(np.abs(datatable)),
        'deltaHi': np.max(np.abs(datatable)),
        'stdLow': 0,
        'stdHi': std_within * std_upper_bound,
        'std0Low': 0,
        'std0Hi': std_among * std_upper_bound,
        'Nsamples': Nsamples,
        'q': q,
        'rho': rho,
        'upperAlpha': upperAlpha,
        'lowerAlpha': lowerAlpha,
        'upperBeta': upperBeta,
        'lowerBeta': lowerBeta
    }

    #Call to Stan code
    fit = pystan.stan(model_code=hierarchical_code,
                      data=hierachical_dat,
                      iter=1000,
                      chains=4)

    la = fit.extract(permuted=True)  # return a dictionary of arrays
    mu = la['delta0']
    stdh = la['std0']
    nu = la['nu']

    samples = np.zeros((len(mu), 3), dtype=float)
    for i in range(0, len(mu)):
        samples[i, 2] = 1 - stats.t.cdf(rope, nu[i], mu[i], stdh[i])
        samples[i, 0] = stats.t.cdf(-rope, nu[i], mu[i], stdh[i])
        samples[i, 1] = 1 - samples[i, 0] - samples[i, 2]

    return samples
table1 = pd.read_csv('exon_table1.csv')

exon_dat = {
	
	'J': len(table),
	'N': len(table1),
	'gene': table.gene
	'genelevel':len(unique(gene))
	'index': match(gene, unique(gene))
	'M1_table': list(N=N, J=J, y = table.envarpfc,
	'x': table.envarp, gene=index)

	}


fit = pystan.stan(model_code=stan_code, data=exon_dat,
                  iter=1000, chains=4)

print(fit)

eta = fit.extract(permuted=True)['eta']
np.mean(eta, axis=0)

# if matplotlib is installed (optional, not required), a visual summary and
# traceplot are available

print("plot figure")

fit.plot()
mp.savefig("StanPlot.png")

#mp.figure(figsize=(8,6))
Ejemplo n.º 43
0
    vector[nobs] mu2;

    mu = x * beta;
    mu2 = to_vector(mu);                 # normal distribution 
                                          # does not take matrices as input
}
model {
    for (i in 1:k){                       # Diffuse normal priors for predictors
        beta[i] ~ normal(0.0, 100);
    }
    sigma ~ uniform(0, 100);            # Uniform prior for standard deviation

    y ~ normal(mu2, sigma);               # Likelihood function
}
"""

# Run mcmc
fit = pystan.stan(model_code=stan_code,
                  data=toy_data,
                  iter=5000,
                  chains=3,
                  n_jobs=3,
                  verbose=False)

# Output
nlines = 9  # number of lines in screen output

output = str(fit).split('\n')
for item in output[:nlines]:
    print(item)