Beispiel #1
0
def _indvdl_t(hparams, std_x, n_samples, L_cov, verbose=0):
    df_L = hparams.df_indvdl
    dist_scale_indvdl = hparams.dist_scale_indvdl
    scale1 = std_x[0] * _dist_from_str('scale_mu1s', dist_scale_indvdl)
    scale2 = std_x[1] * _dist_from_str('scale_mu2s', dist_scale_indvdl)

    scale1 = scale1 / np.sqrt(df_L / (df_L - 2))
    scale2 = scale2 / np.sqrt(df_L / (df_L - 2))

    u1s = StudentT('u1s',
                   nu=np.float32(df_L),
                   shape=(n_samples, ),
                   dtype=floatX)
    u2s = StudentT('u2s',
                   nu=np.float32(df_L),
                   shape=(n_samples, ),
                   dtype=floatX)

    L_cov_ = cholesky(L_cov).astype(floatX)

    mu1s_ = Deterministic(
        'mu1s_', L_cov_[0, 0] * u1s * scale1 + L_cov_[1, 0] * u2s * scale1)
    mu2s_ = Deterministic('mu2s_', L_cov_[1, 0] * u1s * scale2 +
                          L_cov_[1, 1] * u2s * scale2)  # [1, 0] is ... 0?

    if 10 <= verbose:
        print('StudentT for individual effect')
        print('u1s.dtype = {}'.format(u1s.dtype))
        print('u2s.dtype = {}'.format(u2s.dtype))

    return mu1s_, mu2s_
Beispiel #2
0
def _indvdl_gauss(
    hparams, std_x, n_samples, L_cov, Normal, Deterministic, floatX, 
    cholesky, tt, verbose):
    scale1 = np.float32(std_x[0] * hparams['v_indvdl_1'])
    scale2 = np.float32(std_x[1] * hparams['v_indvdl_2'])

    u1s = Normal(
        'u1s', mu=np.float32(0.), tau=np.float32(1.), 
        shape=(n_samples,), dtype=floatX
    )
    u2s = Normal(
        'u2s', mu=np.float32(0.), tau=np.float32(1.), 
        shape=(n_samples,), dtype=floatX
    )
    L_cov_ = cholesky(L_cov).astype(floatX)
    tt.set_subtensor(L_cov_[0, :], L_cov_[0, :] * scale1, inplace=True)
    tt.set_subtensor(L_cov_[1, :], L_cov_[1, :] * scale2, inplace=True)
    mu1s_ = Deterministic('mu1s_', 
                          L_cov[0, 0] * u1s + L_cov[0, 1] * u2s)
    mu2s_ = Deterministic('mu2s_', 
                          L_cov[1, 0] * u1s + L_cov[1, 1] * u2s)

    if 10 <= verbose:
        print('Normal for individual effect')
        print('u1s.dtype = {}'.format(u1s.dtype))
        print('u2s.dtype = {}'.format(u2s.dtype))

    return mu1s_, mu2s_
Beispiel #3
0
def _indvdl_t(
    hparams, std_x, n_samples, L_cov, StudentT, Deterministic, floatX, 
    cholesky, tt, verbose):
    df_L = hparams['df_indvdl']
    scale1 = np.float32(std_x[0] * hparams['v_indvdl_1'] / 
                        np.sqrt(df_L / (df_L - 2)))
    scale2 = np.float32(std_x[1] * hparams['v_indvdl_2'] / 
                        np.sqrt(df_L / (df_L - 2)))

    u1s = StudentT('u1s', nu=np.float32(df_L), shape=(n_samples,), 
                   dtype=floatX)
    u2s = StudentT('u2s', nu=np.float32(df_L), shape=(n_samples,), 
                   dtype=floatX)

    L_cov_ = cholesky(L_cov).astype(floatX)
    tt.set_subtensor(L_cov_[0, :], L_cov_[0, :] * scale1, inplace=True)
    tt.set_subtensor(L_cov_[1, :], L_cov_[1, :] * scale2, inplace=True)
    mu1s_ = Deterministic('mu1s_', 
                          L_cov_[0, 0] * u1s + L_cov_[0, 1] * u2s)
    mu2s_ = Deterministic('mu2s_', 
                          L_cov_[1, 0] * u1s + L_cov_[1, 1] * u2s)

    if 10 <= verbose:
        print('StudentT for individual effect')
        print('u1s.dtype = {}'.format(u1s.dtype))
        print('u2s.dtype = {}'.format(u2s.dtype))

    return mu1s_, mu2s_
Beispiel #4
0
    def get_formula(self, input_rvs, fixed_rvs, hyperparams, problem_config):
        """
        Get seismic likelihood formula for the model built. Has to be called
        within a with model context.

        Parameters
        ----------
        input_rvs : list
            of :class:`pymc3.distribution.Distribution`
        fixed_rvs : dict
            of :class:`numpy.array`
        hyperparams : dict
            of :class:`pymc3.distribution.Distribution`
        problem_config : :class:`config.ProblemConfig`

        Returns
        -------
        posterior_llk : :class:`theano.tensor.Tensor`
        """
        hp_specific = problem_config.dataset_specific_residual_noise_estimation

        self.input_rvs = input_rvs
        self.fixed_rvs = fixed_rvs

        logger.info('Seismic optimization on: \n '
                    ' %s' % ', '.join(self.input_rvs.keys()))

        t2 = time()
        wlogpts = []

        self.init_hierarchicals(problem_config)
        if self.config.station_corrections:
            logger.info('Initialized %i hierarchical parameters for '
                        'station corrections.' %
                        len(self.get_unique_stations()))

        for wmap in self.wavemaps:
            synths, tmins = self.synthesizers[wmap.name](self.input_rvs)

            if len(self.hierarchicals) > 0:
                tmins += self.hierarchicals[self.correction_name][
                    wmap.station_correction_idxs]

            data_trcs = self.choppers[wmap.name](tmins)
            residuals = data_trcs - synths

            logpts = multivariate_normal_chol(wmap.datasets,
                                              wmap.weights,
                                              hyperparams,
                                              residuals,
                                              hp_specific=hp_specific)

            wlogpts.append(logpts)

        t3 = time()
        logger.debug('Teleseismic forward model on test model takes: %f' %
                     (t3 - t2))

        llk = Deterministic(self._like_name, tt.concatenate((wlogpts)))
        return llk.sum()
Beispiel #5
0
def doMCMC(n, nxx, nxy, nyy, x):

    # Optional setting for reproducibility
    use_seed = False

    d = nxx.shape[0]
    ns = 2000
    if use_seed:  # optional setting for reproducibility
        seed = 42

    # Disable printing
    sys.stdout = open(os.devnull, 'w')

    # Sufficient statistics
    NXX = shared(nxx)
    NXY = shared(nxy)
    NYY = shared(nyy)

    # Define model and perform MCMC sampling
    with Model() as model:

        # Fixed hyperparameters for priors
        b0 = Deterministic('b0', th.zeros((d), dtype='float64'))
        ide = Deterministic('ide', th.eye(d, m=d, k=0, dtype='float64'))

        # Priors for parameters
        l0 = Gamma('l0', alpha=2.0, beta=2.0)
        l = Gamma('l', alpha=2.0, beta=2.0)
        b = MvNormal('b', mu=b0, tau=l0 * ide, shape=d)

        # Custom log likelihood
        def logp(xtx, xty, yty):
            return (n / 2.0) * th.log(l / (2 * np.pi)) + (-l / 2.0) * (
                th.dot(th.dot(b, xtx), b) - 2 * th.dot(b, xty) + yty)

        # Likelihood
        delta = DensityDist('delta',
                            logp,
                            observed={
                                'xtx': NXX,
                                'xty': NXY,
                                'yty': NYY
                            })

        # Inference
        print('doMCMC: start NUTS')
        step = NUTS()
        if use_seed:
            trace = sample(ns, step, progressbar=True, random_seed=seed)
        else:
            trace = sample(ns, step, progressbar=True)

    # Enable printing
    sys.stdout = sys.__stdout__

    # Compute prediction over posterior
    return np.mean([np.dot(x, trace['b'][i]) for i in range(ns)], 0)
Beispiel #6
0
    def get_formula(self, input_rvs, fixed_rvs, hyperparams, problem_config):
        """
        Get geodetic likelihood formula for the model built. Has to be called
        within a with model context.
        Part of the pymc3 model.

        Parameters
        ----------
        input_rvs : dict
            of :class:`pymc3.distribution.Distribution`
        fixed_rvs : dict
            of :class:`numpy.array`
        hyperparams : dict
            of :class:`pymc3.distribution.Distribution`
        problem_config : :class:`config.ProblemConfig`

        Returns
        -------
        posterior_llk : :class:`theano.tensor.Tensor`
        """
        hp_specific = self.config.dataset_specific_residual_noise_estimation

        self.input_rvs = input_rvs
        self.fixed_rvs = fixed_rvs

        logger.info('Geodetic optimization on: \n '
                    '%s' % ', '.join(self.input_rvs.keys()))

        self.input_rvs.update(fixed_rvs)

        t0 = time()
        disp = self.get_synths(self.input_rvs)
        t1 = time()
        logger.debug('Geodetic forward model on test model takes: %f' %
                     (t1 - t0))

        los_disp = (disp * self.slos_vectors).sum(axis=1)

        residuals = self.Bij.srmap(
            tt.cast((self.sdata - los_disp) * self.sodws, tconfig.floatX))

        self.init_hierarchicals(problem_config)
        if self.config.corrections_config.has_enabled_corrections:
            logger.info('Applying corrections! ...')
            residuals = self.apply_corrections(residuals, operation='-')

        logpts = multivariate_normal_chol(self.datasets,
                                          self.weights,
                                          hyperparams,
                                          residuals,
                                          hp_specific=hp_specific)

        llk = Deterministic(self._like_name, logpts)
        return llk.sum()
Beispiel #7
0
    def get_formula(self, input_rvs, fixed_rvs, hyperparams, problem_config):
        """
        Formulation of the distribution problem for the model built. Has to be
        called within a with-model-context.

        Parameters
        ----------
        input_rvs : list
            of :class:`pymc3.distribution.Distribution`
        hyperparams : dict
            of :class:`pymc3.distribution.Distribution`

        Returns
        -------
        llk : :class:`theano.tensor.Tensor`
            log-likelihood for the distributed slip
        """
        logger.info("Loading %s Green's Functions" % self.name)
        self.load_gfs(crust_inds=[self.config.gf_config.reference_model_idx],
                      make_shared=True)

        hp_specific = self.config.dataset_specific_residual_noise_estimation

        self.input_rvs = input_rvs
        self.fixed_rvs = fixed_rvs
        ref_idx = self.config.gf_config.reference_model_idx

        mu = tt.zeros((self.Bij.ordering.size), tconfig.floatX)
        for var in self.slip_varnames:
            key = self.get_gflibrary_key(crust_ind=ref_idx,
                                         wavename='static',
                                         component=var)
            mu += self.gfs[key].stack_all(slips=input_rvs[var])

        residuals = self.Bij.srmap(
            tt.cast((self.sdata - mu) * self.sodws, tconfig.floatX))

        self.init_hierarchicals(problem_config)
        if len(self.hierarchicals) > 0:
            residuals = self.remove_ramps(residuals)

        logpts = multivariate_normal_chol(self.datasets,
                                          self.weights,
                                          hyperparams,
                                          residuals,
                                          hp_specific=hp_specific)

        llk = Deterministic(self._like_name, logpts)

        return llk.sum()
Beispiel #8
0
    def get_hyper_formula(self, hyperparams):
        """
        Get likelihood formula for the hyper model built. Has to be called
        within a with model context.
        """

        logpts = tt.zeros((self.n_t), tconfig.floatX)
        for k in range(self.n_t):
            logpt = self._eval_prior(hyperparams[bconfig.hyper_name_laplacian],
                                     self._llks[k])
            logpts = tt.set_subtensor(logpts[k:k + 1], logpt)

        llk = Deterministic(self._like_name, logpts)
        return llk.sum()
Beispiel #9
0
    def get_hyper_formula(self, hyperparams, problem_config):
        """
        Get likelihood formula for the hyper model built. Has to be called
        within a with model context.

        problem_config : :class:`config.ProblemConfig`
        """

        hp_specific = problem_config.dataset_specific_residual_noise_estimation
        logpts = hyper_normal(
            self.datasets, hyperparams, self._llks,
            hp_specific=hp_specific)
        llk = Deterministic(self._like_name, logpts)
        return llk.sum()
Beispiel #10
0
def doADVI(n, xx, xy, yy, x):

    d = xx.shape[0]
    ns = 5000
    seed = 42  # for reproducibility

    # Disable printing
    sys.stdout = open(os.devnull, 'w')

    # Sufficient statistics
    NXX = shared(xx)
    NXY = shared(xy)
    NYY = shared(yy)

    # Define model and perform MCMC sampling
    with Model() as model:

        # Fixed hyperparameters for priors
        b0 = Deterministic('b0', th.zeros((d), dtype='float64'))
        ide = Deterministic('ide', th.eye(d, m=d, k=0, dtype='float64'))

        # Priors for parameters
        l0 = Gamma('l0', alpha=2.0, beta=2.0)
        l = Gamma('l', alpha=2.0, beta=2.0)
        b = MvNormal('b', mu=b0, tau=l0 * ide, shape=d)

        # Custom log likelihood
        def logp(xtx, xty, yty):
            return (n / 2.0) * th.log(l / (2 * np.pi)) + (-l / 2.0) * (
                th.dot(th.dot(b, xtx), b) - 2 * th.dot(b, xty) + yty)

        # Likelihood
        delta = DensityDist('delta',
                            logp,
                            observed={
                                'xtx': NXX,
                                'xty': NXY,
                                'yty': NYY
                            })

        # Inference
        v_params = advi(n=ns, random_seed=seed)
        trace = sample_vp(v_params, draws=ns, random_seed=seed)

    # Enable printing
    sys.stdout = sys.__stdout__

    # Compute prediction over posterior
    return np.mean([np.dot(x, trace['b'][i]) for i in range(ns)], 0)
Beispiel #11
0
 def __init__(self, mean=0, sigma=1, name="", model=None):
     super().__init__(name, model)
     self.Var("v1", Normal.dist(mu=mean, sigma=sigma))
     Normal("v2", mu=mean, sigma=sigma)
     Normal("v3", mu=mean, sigma=HalfCauchy("sd", beta=10, testval=1.0))
     Deterministic("v3_sq", self.v3 ** 2)
     Potential("p1", aet.constant(1))
Beispiel #12
0
    def built_hyper_model(self):
        """
        Initialise :class:`pymc3.Model` depending on configuration file,
        geodetic and/or seismic data are included. Estimates initial parameter
        bounds for hyperparameters.
        """

        logger.info('... Building Hyper model ...\n')

        pc = self.config.problem_config

        if len(self.hierarchicals) == 0:
            self.init_hierarchicals()

        point = self.get_random_point(include=['hierarchicals', 'priors'])
        for param in pc.priors.values():
            point[param.name] = param.testvalue

        self.update_llks(point)

        with Model() as self.model:

            self.init_hyperparams()

            total_llk = tt.zeros((1), tconfig.floatX)

            for composite in self.composites.itervalues():
                total_llk += composite.get_hyper_formula(self.hyperparams, pc)

            like = Deterministic('tmp', total_llk)
            llk = Potential(self._like_name, like)
            logger.info('Hyper model building was successful!')
Beispiel #13
0
 def __init__(self, mean=0, sigma=1, name="", model=None):
     super().__init__(name, model)
     self.register_rv(Normal.dist(mu=mean, sigma=sigma), "v1")
     Normal("v2", mu=mean, sigma=sigma)
     Normal("v3", mu=mean, sigma=Normal("sd", mu=10, sigma=1, initval=1.0))
     Deterministic("v3_sq", self.v3 ** 2)
     Potential("p1", at.constant(1))
Beispiel #14
0
 def __init__(self, mean=0, sigma=1, name='', model=None):
     super().__init__(name, model)
     self.Var('v1', Normal.dist(mu=mean, sigma=sigma))
     Normal('v2', mu=mean, sigma=sigma)
     Normal('v3', mu=mean, sigma=HalfCauchy('sd', beta=10, testval=1.))
     Deterministic('v3_sq', self.v3 ** 2)
     Potential('p1', tt.constant(1))
Beispiel #15
0
    def get_formula(self, input_rvs, fixed_rvs, hyperparams, problem_config):
        """
        Get smoothing likelihood formula for the model built. Has to be called
        within a with model context.
        Part of the pymc3 model.

        Parameters
        ----------
        input_rvs : dict
            of :class:`pymc3.distribution.Distribution`
        fixed_rvs : dict
            of :class:`numpy.array` here only dummy
        hyperparams : dict
            of :class:`pymc3.distribution.Distribution`
        problem_config : :class:`config.ProblemConfig`
            here it is not used

        Returns
        -------
        posterior_llk : :class:`theano.tensor.Tensor`
        """
        logger.info('Initialising Laplacian smoothing operator ...')

        self.input_rvs = input_rvs
        self.fixed_rvs = fixed_rvs

        hp_name = bconfig.hyper_name_laplacian
        self.input_rvs.update(fixed_rvs)

        logpts = tt.zeros((self.n_t), tconfig.floatX)
        for l, var in enumerate(self.slip_varnames):
            Ls = self.shared_smoothing_op.dot(input_rvs[var])
            exponent = Ls.T.dot(Ls)

            logpts = tt.set_subtensor(
                logpts[l:l + 1],
                self._eval_prior(hyperparams[hp_name], exponent=exponent))

        llk = Deterministic(self._like_name, logpts)
        return llk.sum()
Beispiel #16
0
    def built_model(self):
        """
        Initialise :class:`pymc3.Model` depending on problem composites,
        geodetic and/or seismic data are included. Composites also determine
        the problem to be solved.
        """

        logger.info('... Building model ...\n')

        pc = self.config.problem_config

        with Model() as self.model:

            self.rvs, self.fixed_params = self.get_random_variables()

            self.init_hyperparams()

            total_llk = tt.zeros((1), tconfig.floatX)

            for datatype, composite in self.composites.iteritems():
                if datatype in bconfig.modes_catalog[pc.mode].keys():
                    input_rvs = utility.weed_input_rvs(self.rvs,
                                                       pc.mode,
                                                       datatype=datatype)
                    fixed_rvs = utility.weed_input_rvs(self.fixed_params,
                                                       pc.mode,
                                                       datatype=datatype)

                    if pc.mode == 'ffi':
                        # do the optimization only on the
                        # reference velocity model
                        logger.info("Loading %s Green's Functions" % datatype)
                        data_config = self.config[datatype + '_config']
                        composite.load_gfs(crust_inds=[
                            data_config.gf_config.reference_model_idx
                        ],
                                           make_shared=True)

                    total_llk += composite.get_formula(input_rvs, fixed_rvs,
                                                       self.hyperparams, pc)

            # deterministic RV to write out llks to file
            like = Deterministic('tmp', total_llk)

            # will overwrite deterministic name ...
            llk = Potential(self._like_name, like)
            logger.info('Model building was successful!')
Beispiel #17
0
def _indvdl_gg(
    hparams, std_x, n_samples, L_cov, Normal, Gamma, Deterministic, sgn, gamma, 
    floatX, cholesky, tt, verbose):
    # Uniform distribution on sphere
    gs = Normal('gs', np.float32(0.0), np.float32(1.0), 
                shape=(n_samples, 2), dtype=floatX)
    ss = Deterministic('ss', gs + sgn(sgn(gs) + np.float32(1e-10)) * 
                             np.float32(1e-10))
    ns = Deterministic('ns', ss.norm(L=2, axis=1)[:, np.newaxis])
    us = Deterministic('us', ss / ns)

    # Scaling s.t. variance to 1
    n = 2 # dimension
    beta = np.float32(hparams['beta_coeff'])
    m = n * gamma(0.5 * n / beta) \
        / (2 ** (1 / beta) * gamma((n + 2) / (2 * beta)))
    L_cov_ = (np.sqrt(m) * cholesky(L_cov)).astype(floatX)

    # Scaling to v_indvdls
    scale1 = np.float32(std_x[0] * hparams['v_indvdl_1'])
    scale2 = np.float32(std_x[1] * hparams['v_indvdl_2'])
    tt.set_subtensor(L_cov_[0, :], L_cov_[0, :] * scale1, inplace=True)
    tt.set_subtensor(L_cov_[1, :], L_cov_[1, :] * scale2, inplace=True)

    # Draw samples
    ts = Gamma(
        'ts', alpha=np.float32(n / (2 * beta)), beta=np.float32(.5), 
        shape=n_samples, dtype=floatX
    )[:, np.newaxis]
    mus_ = Deterministic(
        'mus_', ts**(np.float32(0.5 / beta)) * us.dot(L_cov_)
    )
    mu1s_ = mus_[:, 0]
    mu2s_ = mus_[:, 1]

    if 10 <= verbose:
        print('GG for individual effect')
        print('gs.dtype = {}'.format(gs.dtype))
        print('ss.dtype = {}'.format(ss.dtype))
        print('ns.dtype = {}'.format(ns.dtype))
        print('us.dtype = {}'.format(us.dtype))
        print('ts.dtype = {}'.format(ts.dtype))

    return mu1s_, mu2s_
Beispiel #18
0
    def built_model(self):
        """
        Initialise :class:`pymc3.Model` depending on problem composites,
        geodetic and/or seismic data are included. Composites also determine
        the problem to be solved.
        """

        logger.info('... Building model ...\n')

        pc = self.config.problem_config

        with Model() as self.model:

            self.rvs, self.fixed_params = self.get_random_variables()

            self.init_hyperparams()

            total_llk = tt.zeros((1), tconfig.floatX)

            for datatype, composite in self.composites.items():
                if datatype in bconfig.modes_catalog[pc.mode].keys():
                    input_rvs = weed_input_rvs(self.rvs,
                                               pc.mode,
                                               datatype=datatype)
                    fixed_rvs = weed_input_rvs(self.fixed_params,
                                               pc.mode,
                                               datatype=datatype)

                else:
                    input_rvs = self.rvs
                    fixed_rvs = self.fixed_params

                total_llk += composite.get_formula(input_rvs, fixed_rvs,
                                                   self.hyperparams, pc)

            # deterministic RV to write out llks to file
            like = Deterministic('tmp', total_llk)

            # will overwrite deterministic name ...
            llk = Potential(self._like_name, like)
            logger.info('Model building was successful! \n')
Beispiel #19
0
def createSignalModelExponential(data):
  """
    Toy model that treats the first ~10% of the waveform as an exponential.  Does a good job of finding the start time (t_0)
    Since I made this as a toy, its super brittle.  Waveform must be normalized
  """
  with Model() as signal_model:
    switchpoint = Uniform('switchpoint', lower=0, upper=len(data), testval=len(data)/2)
    
    noise_sigma = HalfNormal('noise_sigma', sd=1.)
    
    #Modeling these parameters this way is why wf needs to be normalized
    exp_rate = Uniform('exp_rate', lower=0, upper=.5, testval = 0.05)
    exp_scale = Uniform('exp_scale', lower=0, upper=.5, testval = 0.1)
    
    timestamp = np.arange(0, len(data), dtype=np.float)
    
    rate = switch(switchpoint >= timestamp, 0, exp_rate)
    
    baseline_model = Deterministic('baseline_model', exp_scale * (exp( (timestamp-switchpoint)*rate)-1.) )
    
    baseline_observed = Normal("baseline_observed", mu=baseline_model, sd=noise_sigma, observed= data )
  return signal_model
Beispiel #20
0
    def get_formula(self, input_rvs, fixed_rvs, hyperparams, problem_config):

        hp_specific = problem_config.dataset_specific_residual_noise_estimation
        tpoint = problem_config.get_test_point()

        self.input_rvs = input_rvs
        self.fixed_rvs = fixed_rvs

        logger.info(
            'Seismic optimization on: \n '
            ' %s' % ', '.join(self.input_rvs.keys()))

        t2 = time()
        wlogpts = []

        self.analyse_noise(tpoint)
        self.init_weights()
        self.init_hierarchicals(problem_config)
        if self.config.station_corrections:
            logger.info(
                'Initialized %i hierarchical parameters for '
                'station corrections.' % len(self.get_unique_stations()))

        self.input_rvs.update(fixed_rvs)

        ref_idx = self.config.gf_config.reference_model_idx

        nuc_strike = input_rvs['nucleation_strike']
        nuc_dip = input_rvs['nucleation_dip']

        t2 = time()
        # convert velocities to rupture onset
        logger.debug('Fast sweeping ...')

        nuc_dip_idx, nuc_strike_idx = self.fault.fault_locations2idxs(
            positions_dip=nuc_dip,
            positions_strike=nuc_strike,
            backend='theano')

        starttimes0 = self.sweeper(
            (1. / input_rvs['velocities']), nuc_dip_idx, nuc_strike_idx)

        starttimes0 += input_rvs['nucleation_time']
        wlogpts = []
        for wmap in self.wavemaps:
            # station corrections
            if len(self.hierarchicals) > 0:
                raise NotImplementedError(
                    'Station corrections not fully implemented! for ffi!')
                starttimes = (
                    tt.tile(starttimes0, wmap.n_t) +
                    tt.repeat(self.hierarchicals[self.correction_name][
                        wmap.station_correction_idxs],
                        self.fault.npatches)).reshape(
                            wmap.n_t, self.fault.npatches)

                targetidxs = shared(
                    num.atleast_2d(num.arange(wmap.n_t)).T, borrow=True)
            else:
                starttimes = starttimes0
                targetidxs = shared(num.lib.index_tricks.s_[:], borrow=True)

            logger.debug('Stacking %s phase ...' % wmap.config.name)
            synthetics = tt.zeros(
                (wmap.n_t, wmap.config.arrival_taper.nsamples(
                    self.config.gf_config.sample_rate)),
                dtype=tconfig.floatX)

            for var in self.slip_varnames:
                logger.debug('Stacking %s variable' % var)
                key = self.get_gflibrary_key(
                    crust_ind=ref_idx, wavename=wmap.name, component=var)
                synthetics += self.gfs[key].stack_all(
                    targetidxs=targetidxs,
                    starttimes=starttimes,
                    durations=input_rvs['durations'],
                    slips=input_rvs[var],
                    interpolation=wmap.config.interpolation)

            residuals = wmap.shared_data_array - synthetics

            logger.debug('Calculating likelihoods ...')
            logpts = multivariate_normal_chol(
                wmap.datasets, wmap.weights, hyperparams, residuals,
                hp_specific=hp_specific)

            wlogpts.append(logpts)

        t3 = time()
        logger.debug(
            'Seismic formula on test model takes: %f' % (t3 - t2))

        llk = Deterministic(self._like_name, tt.concatenate((wlogpts)))
        return llk.sum()
Beispiel #21
0
    def __init__(self,
                 ploidy_config: PloidyModelConfig,
                 ploidy_workspace: PloidyWorkspace):
        super().__init__()

        # shorthands
        t_j = ploidy_workspace.t_j
        contig_exclusion_mask_jj = ploidy_workspace.contig_exclusion_mask_jj
        n_s = ploidy_workspace.n_s
        n_sj = ploidy_workspace.n_sj
        ploidy_k = ploidy_workspace.int_ploidy_values_k
        q_ploidy_sjk = tt.exp(ploidy_workspace.log_q_ploidy_sjk)
        eps_mapping = ploidy_config.mapping_error_rate

        register_as_global = self.register_as_global
        register_as_sample_specific = self.register_as_sample_specific

        # mean per-contig bias
        mean_bias_j = self.PositiveNormal('mean_bias_j',
                                          mu=1.0,
                                          sd=ploidy_config.mean_bias_sd,
                                          shape=(ploidy_workspace.num_contigs,))
        register_as_global(mean_bias_j)

        # contig coverage unexplained variance
        psi_j = Exponential(name='psi_j',
                            lam=1.0 / ploidy_config.psi_j_scale,
                            shape=(ploidy_workspace.num_contigs,))
        register_as_global(psi_j)

        # sample-specific contig unexplained variance
        psi_s = Exponential(name='psi_s',
                            lam=1.0 / ploidy_config.psi_s_scale,
                            shape=(ploidy_workspace.num_samples,))
        register_as_sample_specific(psi_s, sample_axis=0)

        # convert "unexplained variance" to negative binomial over-dispersion
        alpha_sj = tt.maximum(tt.inv((tt.exp(psi_j.dimshuffle('x', 0) + psi_s.dimshuffle(0, 'x')) - 1.0)),
                              _eps)

        # mean ploidy per contig per sample
        mean_ploidy_sj = tt.sum(tt.exp(ploidy_workspace.log_q_ploidy_sjk)
                                * ploidy_workspace.int_ploidy_values_k.dimshuffle('x', 'x', 0), axis=2)

        # mean-field amplification coefficient per contig
        gamma_sj = mean_ploidy_sj * t_j.dimshuffle('x', 0) * mean_bias_j.dimshuffle('x', 0)

        # gamma_rest_sj \equiv sum_{j' \neq j} gamma_sj
        gamma_rest_sj = tt.dot(gamma_sj, contig_exclusion_mask_jj)

        # NB per-contig counts
        mu_num_sjk = (t_j.dimshuffle('x', 0, 'x') * mean_bias_j.dimshuffle('x', 0, 'x')
                      * ploidy_k.dimshuffle('x', 'x', 0))
        mu_den_sjk = gamma_rest_sj.dimshuffle(0, 1, 'x') + mu_num_sjk
        eps_mapping_j = eps_mapping * t_j / tt.sum(t_j)  # average number of reads erroneously mapped to contig j

        # the switch is required for a single contig edge case
        mu_ratio_sjk = tt.switch(tt.eq(mu_den_sjk, 0.0), 0.0, mu_num_sjk / mu_den_sjk)
        mu_sjk = ((1.0 - eps_mapping) * mu_ratio_sjk
                  + eps_mapping_j.dimshuffle('x', 0, 'x')) * n_s.dimshuffle(0, 'x', 'x')

        def _get_logp_sjk(_n_sj):
            _logp_sjk = commons.negative_binomial_logp(
                mu_sjk,  # mean
                alpha_sj.dimshuffle(0, 1, 'x'),  # over-dispersion
                _n_sj.dimshuffle(0, 1, 'x'))  # contig counts
            return _logp_sjk

        DensityDist(name='n_sj_obs',
                    logp=lambda _n_sj: tt.sum(q_ploidy_sjk * _get_logp_sjk(_n_sj)),
                    observed=n_sj)

        # for log ploidy emission sampling
        Deterministic(name='logp_sjk', var=_get_logp_sjk(n_sj))
Beispiel #22
0
    def get_formula(self, input_rvs, fixed_rvs, hyperparams, problem_config):
        """
        Get seismic likelihood formula for the model built. Has to be called
        within a with model context.

        Parameters
        ----------
        input_rvs : list
            of :class:`pymc3.distribution.Distribution` of source parameters
        fixed_rvs : dict
            of :class:`numpy.array`
        hyperparams : dict
            of :class:`pymc3.distribution.Distribution`
        problem_config : :class:`config.ProblemConfig`

        Returns
        -------
        posterior_llk : :class:`theano.tensor.Tensor`
        """
        chop_bounds = ['b', 'c']  # we want llk calculation only between b c

        hp_specific = self.config.dataset_specific_residual_noise_estimation
        tpoint = problem_config.get_test_point()

        self.input_rvs = input_rvs
        self.fixed_rvs = fixed_rvs

        logger.info('Seismic optimization on: \n '
                    ' %s' % ', '.join(self.input_rvs.keys()))

        self.input_rvs.update(fixed_rvs)

        t2 = time()
        wlogpts = []

        self.init_hierarchicals(problem_config)
        self.analyse_noise(tpoint, chop_bounds=chop_bounds)
        self.init_weights()
        if self.config.station_corrections:
            logger.info('Initialized %i hierarchical parameters for '
                        'station corrections.' %
                        len(self.get_all_station_names()))

        for wmap in self.wavemaps:
            if len(self.hierarchicals) > 0:
                time_shifts = self.hierarchicals[wmap.time_shifts_id][
                    wmap.station_correction_idxs]
                self.input_rvs[self.correction_name] = time_shifts

            wc = wmap.config

            logger.info('Preparing data of "%s" for optimization' %
                        wmap._mapid)
            wmap.prepare_data(source=self.events[wc.event_idx],
                              engine=self.engine,
                              outmode='array',
                              chop_bounds=chop_bounds)

            logger.info('Initializing synthesizer for "%s"' % wmap._mapid)

            if self.nevents == 1:
                logger.info('Using all sources for wavemap %s !' % wmap._mapid)
                sources = self.sources
            else:
                logger.info('Using source based on event %i for wavemap %s!' %
                            (wc.event_idx, wmap._mapid))
                sources = [self.sources[wc.event_idx]]

            self.synthesizers[wmap._mapid] = theanof.SeisSynthesizer(
                engine=self.engine,
                sources=sources,
                targets=wmap.targets,
                event=self.events[wc.event_idx],
                arrival_taper=wc.arrival_taper,
                arrival_times=wmap._arrival_times,
                wavename=wmap.name,
                filterer=wc.filterer,
                pre_stack_cut=self.config.pre_stack_cut,
                station_corrections=self.config.station_corrections)

            synths, _ = self.synthesizers[wmap._mapid](self.input_rvs)

            residuals = wmap.shared_data_array - synths

            logpts = multivariate_normal_chol(wmap.datasets,
                                              wmap.weights,
                                              hyperparams,
                                              residuals,
                                              hp_specific=hp_specific)

            wlogpts.append(logpts)

        t3 = time()
        logger.debug('Teleseismic forward model on test model takes: %f' %
                     (t3 - t2))

        llk = Deterministic(self._like_name, tt.concatenate((wlogpts)))
        return llk.sum()
    threshold = Normal('threshold', mu=0, sd=10)
    # priors for latency
    lf = HalfNormal('lf', sd=1)
    le = HalfNormal('le', sd=1)
    # compute activation
    scaled_time = time ** (-decay)
    def compute_activation(scaled_time_vector):
        compare = tt.isinf(scaled_time_vector)
        subvector = scaled_time_vector[(1-compare).nonzero()]
        activation_from_time = tt.log(subvector.sum())
        return activation_from_time
    activation_from_time, _ = theano.scan(fn=compute_activation,\
                                          sequences=scaled_time)
    # latency likelihood -- this is where pyactr is used
    pyactr_rt = actrmodel_latency(lf, le, decay, activation_from_time)
    mu_rt = Deterministic('mu_rt', pyactr_rt)
    rt_observed = Normal('rt_observed', mu=mu_rt, sd=0.01, observed=RT)
    # accuracy likelihood
    odds_reciprocal = tt.exp(-(activation_from_time - threshold)/noise)
    mu_prob = Deterministic('mu_prob', 1/(1 + odds_reciprocal))
    prob_observed = Normal('prob_observed', mu=mu_prob, sd=0.01,\
                           observed=ACCURACY)
    # we start the sampling
    #step = Metropolis()
    #db = SQLite('lex_dec_pyactr_chain_no_imaginal.sqlite')
    #trace = sample(draws=60000, trace=db, njobs=1, step=step, init='auto')

with lex_decision_with_bayes:
    trace = load('./data/lex_dec_pyactr_chain_no_imaginal.sqlite')
    trace = trace[10500:]
Beispiel #24
0
def hmetad_groupLevel(data: dict, sample_model: bool = True, **kwargs):
    """Compute hierachical meta-d' at the subject level.

    This is an internal function. The group level model must be
    called using :py:func:`metadPy.hierarchical.hmetad`.

    Parameters
    ----------
    data : dict
        Response data.
    sample_model : boolean
        If `False`, only the model is returned without sampling.
    **kwargs : keyword arguments
        All keyword arguments are passed to `func::pymc3.sampling.sample`.

    Returns
    -------
    model : :py:class:`pymc3.Model` instance
        The pymc3 model. Encapsulates the variables and likelihood factors.
    trace : :py:class:`pymc3.backends.base.MultiTrace` or
        :py:class:`arviz.InferenceData`
        A `MultiTrace` or `ArviZ InferenceData` object that contains the
        samples.

    References
    ----------
    .. [#] Fleming, S.M. (2017) HMeta-d: hierarchical Bayesian estimation
    of metacognitive efficiency from confidence ratings, Neuroscience of
    Consciousness, 3(1) nix007, https://doi.org/10.1093/nc/nix007
    """
    nSubj = data["nSubj"]
    hits = data["hits"]
    falsealarms = data["falsealarms"]
    s = data["s"]
    n = data["n"]
    counts = data["counts"]
    nRatings = data["nRatings"]
    Tol = data["Tol"]
    cr = data["cr"]
    m = data["m"]

    with Model() as model:

        # hyperpriors on d, c and c2
        mu_c1 = Normal(
            "mu_c1", mu=0, tau=0.01, shape=(1), testval=np.random.rand() * 0.1
        )
        mu_c2 = Normal(
            "mu_c2", mu=0, tau=0.01, shape=(1, 1), testval=np.random.rand() * 0.1
        )
        mu_d1 = Normal(
            "mu_d1", mu=0, tau=0.01, shape=(1), testval=np.random.rand() * 0.1
        )

        sigma_c1 = HalfNormal(
            "sigma_c1", tau=0.01, shape=(1), testval=np.random.rand() * 0.1
        )
        sigma_c2 = HalfNormal(
            "sigma_c2", tau=0.01, shape=(1, 1), testval=np.random.rand() * 0.1
        )
        sigma_d1 = HalfNormal(
            "sigma_d1", tau=0.01, shape=(1), testval=np.random.rand() * 0.1
        )

        # Type 1 priors
        c1_tilde = Normal("c1_tilde", mu=0, sigma=1, shape=(nSubj, 1))
        c1 = Deterministic("c1", mu_c1 + sigma_c1 * c1_tilde)

        d1_tilde = Normal("d1_tilde", mu=0, sigma=1, shape=(nSubj, 1))
        d1 = Deterministic("d1", mu_d1 + sigma_d1 * d1_tilde)

        # TYPE 1 SDT BINOMIAL MODEL
        h = cumulative_normal(d1 / 2 - c1)
        f = cumulative_normal(-d1 / 2 - c1)
        H = Binomial("H", n=s, p=h, observed=hits)
        FA = Binomial("FA", n=n, p=f, observed=falsealarms)

        # Hyperpriors on mRatio
        mu_logMratio = Normal(
            "mu_logMratio", mu=0, tau=1, shape=(1), testval=np.random.rand() * 0.1
        )
        sigma_delta = HalfNormal("sigma_delta", tau=1, shape=(1))

        delta_tilde = Normal("delta_tilde", mu=0, sigma=1, shape=(nSubj, 1))
        delta = Deterministic("delta", sigma_delta * delta_tilde)

        epsilon_logMratio = Beta("epsilon_logMratio", 1, 1, shape=(1))
        logMratio = Deterministic("logMratio", mu_logMratio + epsilon_logMratio * delta)
        mRatio = Deterministic("mRatio", math.exp(logMratio))

        # Type 2 priors
        meta_d = Deterministic("meta_d", mRatio * d1)

        # Specify ordered prior on criteria
        # bounded above and below by Type 1 c1
        cS1_hn = Normal(
            "cS1_hn",
            mu=0,
            sigma=1,
            shape=(nSubj, nRatings - 1),
            testval=np.linspace(-1.5, -0.5, nRatings - 1)
            .reshape(1, nRatings - 1)
            .repeat(nSubj, axis=0),
        )
        cS1 = Deterministic("cS1", -mu_c2 + (cS1_hn * sigma_c2))

        cS2_hn = Normal(
            "cS2_hn",
            mu=0,
            sigma=1,
            shape=(nSubj, nRatings - 1),
            testval=np.linspace(0.5, 1.5, nRatings - 1)
            .reshape(1, nRatings - 1)
            .repeat(nSubj, axis=0),
        )
        cS2 = Deterministic("cS2", mu_c2 + (cS2_hn * sigma_c2))

        # Means of SDT distributions
        S2mu = meta_d / 2
        S1mu = -meta_d / 2

        # Calculate normalisation constants
        C_area_rS1 = cumulative_normal(c1 - S1mu)
        I_area_rS1 = cumulative_normal(c1 - S2mu)
        C_area_rS2 = 1 - cumulative_normal(c1 - S2mu)
        I_area_rS2 = 1 - cumulative_normal(c1 - S1mu)

        # Get nC_rS1 probs
        nC_rS1 = cumulative_normal(cS1 - S1mu) / C_area_rS1
        nC_rS1 = Deterministic(
            "nC_rS1",
            math.concatenate(
                (
                    [
                        cumulative_normal(cS1[:, 0].reshape((nSubj, 1)) - S1mu)
                        / C_area_rS1,
                        nC_rS1[:, 1:] - nC_rS1[:, :-1],
                        (
                            (
                                cumulative_normal(c1 - S1mu)
                                - cumulative_normal(
                                    cS1[:, nRatings - 2].reshape((nSubj, 1)) - S1mu
                                )
                            )
                            / C_area_rS1
                        ),
                    ]
                ),
                axis=1,
            ),
        )

        # Get nI_rS2 probs
        nI_rS2 = (1 - cumulative_normal(cS2 - S1mu)) / I_area_rS2
        nI_rS2 = Deterministic(
            "nI_rS2",
            math.concatenate(
                (
                    [
                        (
                            (1 - cumulative_normal(c1 - S1mu))
                            - (
                                1
                                - cumulative_normal(
                                    cS2[:, 0].reshape((nSubj, 1)) - S1mu
                                )
                            )
                        )
                        / I_area_rS2,
                        nI_rS2[:, :-1]
                        - (1 - cumulative_normal(cS2[:, 1:] - S1mu)) / I_area_rS2,
                        (
                            1
                            - cumulative_normal(
                                cS2[:, nRatings - 2].reshape((nSubj, 1)) - S1mu
                            )
                        )
                        / I_area_rS2,
                    ]
                ),
                axis=1,
            ),
        )

        # Get nI_rS1 probs
        nI_rS1 = (-cumulative_normal(cS1 - S2mu)) / I_area_rS1
        nI_rS1 = Deterministic(
            "nI_rS1",
            math.concatenate(
                (
                    [
                        cumulative_normal(cS1[:, 0].reshape((nSubj, 1)) - S2mu)
                        / I_area_rS1,
                        nI_rS1[:, :-1]
                        + (cumulative_normal(cS1[:, 1:] - S2mu)) / I_area_rS1,
                        (
                            cumulative_normal(c1 - S2mu)
                            - cumulative_normal(
                                cS1[:, nRatings - 2].reshape((nSubj, 1)) - S2mu
                            )
                        )
                        / I_area_rS1,
                    ]
                ),
                axis=1,
            ),
        )

        # Get nC_rS2 probs
        nC_rS2 = (1 - cumulative_normal(cS2 - S2mu)) / C_area_rS2
        nC_rS2 = Deterministic(
            "nC_rS2",
            math.concatenate(
                (
                    [
                        (
                            (1 - cumulative_normal(c1 - S2mu))
                            - (
                                1
                                - cumulative_normal(
                                    cS2[:, 0].reshape((nSubj, 1)) - S2mu
                                )
                            )
                        )
                        / C_area_rS2,
                        nC_rS2[:, :-1]
                        - ((1 - cumulative_normal(cS2[:, 1:] - S2mu)) / C_area_rS2),
                        (
                            1
                            - cumulative_normal(
                                cS2[:, nRatings - 2].reshape((nSubj, 1)) - S2mu
                            )
                        )
                        / C_area_rS2,
                    ]
                ),
                axis=1,
            ),
        )

        # Avoid underflow of probabilities
        nC_rS1 = math.switch(nC_rS1 < Tol, Tol, nC_rS1)
        nI_rS2 = math.switch(nI_rS2 < Tol, Tol, nI_rS2)
        nI_rS1 = math.switch(nI_rS1 < Tol, Tol, nI_rS1)
        nC_rS2 = math.switch(nC_rS2 < Tol, Tol, nC_rS2)

        # TYPE 2 SDT MODEL (META-D)
        # Multinomial likelihood for response counts ordered as c(nR_S1,nR_S2)
        Multinomial(
            "CR_counts",
            cr,
            nC_rS1,
            shape=(nSubj, nRatings),
            observed=counts[:, :nRatings],
        )
        Multinomial(
            "FA_counts",
            FA,
            nI_rS2,
            shape=(nSubj, nRatings),
            observed=counts[:, nRatings : nRatings * 2],
        )
        Multinomial(
            "M_counts",
            m,
            nI_rS1,
            shape=(nSubj, nRatings),
            observed=counts[:, nRatings * 2 : nRatings * 3],
        )
        Multinomial(
            "H_counts",
            H,
            nC_rS2,
            shape=(nSubj, nRatings),
            observed=counts[:, nRatings * 3 : nRatings * 4],
        )

        if sample_model is True:

            trace = sample(return_inferencedata=True, **kwargs)

            return model, trace

        else:
            return model
    #print(predicted_rts)
    return predicted_rts


stimuli_csv = load_file(SENTENCES, sep=",")  #sentences with frequencies
sentences = stimuli_csv.groupby(['item', 'label'], sort=False)

parser_with_bayes = pm.Model()
with parser_with_bayes:
    lf = HalfNormal('lf', sd=0.3)
    le = HalfNormal('le', sd=0.5)
    rf = HalfNormal('rf', sd=0.05)
    emap = HalfNormal('emap', sd=1.0)
    # latency likelihood -- this is where pyactr is used
    pyactr_rt = actrmodel_latency(lf, le, rf, emap)
    subj_mu_rt = Deterministic('subj_mu_rt', pyactr_rt[0])
    subj_rt_observed = Normal('subj_rt_observed',
                              mu=subj_mu_rt,
                              sd=10,
                              observed=subj_extraction['rt'])
    obj_mu_rt = Deterministic('obj_mu_rt', pyactr_rt[1])
    obj_rt_observed = Normal('obj_rt_observed',
                             mu=obj_mu_rt,
                             sd=10,
                             observed=obj_extraction['rt'])
    # we start the sampling
    step = Metropolis()
    db = Text('subj_obj_extraction/')
    trace = sample(draws=100, trace=db, step=step, init='auto', tune=10)
    traceplot(trace)
    plt.savefig("subj_obj_extraction_posteriors.pdf")
Beispiel #26
0
# define the model
# \sig ~ exp(50)
#       why? stdev of returns is approx 0.02
#       stdev of exp(lam=50) = 0.2
# \nu ~ exp(0.1)
#       the DOF for the student T...which should be sample size
#       mean of exp(lam=0.1) = 10
# s_i ~ normal(s_i-1, \sig^-2)
# log(y_i) ~ studentT(\nu, 0, exp(-2s_i))
with Model() as sp500_model:
    nu = Exponential('nu', 1. / 10,
                     testval=5.)  #50, testval=5.)#results similar...
    sigma = Exponential('sigma', 1. / .02, testval=.1)
    s = GaussianRandomWalk('s', sigma**-2, shape=len(returns))
    volatility_process = Deterministic('volatility_process', exp(-2 * s))
    r = StudentT('r', nu, lam=1 / volatility_process, observed=returns)

# fit the model using NUTS
# NUTS is auto-assigned in sample()...why?
# you may get an error like:
#   WARNING (theano.gof.compilelock): Overriding existing lock by dead process '10876' (I am process '3456')
# ignore it...the process will move along
with sp500_model:
    trace = sample(2000, progressbar=False)
# plot results from model fitting...
# is there a practical reason for starting the plot from 200th sample
traceplot(trace[200:], [nu, sigma])

# plot the results: volatility inferred by the model
fig, ax = plt.subplots()  #figsize=(15, 8))
Beispiel #27
0
from pymc3 import Model, Normal, HalfNormal, Uniform, Bernoulli, find_MAP, NUTS, sample, Slice, Deterministic
from scipy import optimize
import pymc3 as pm
N = 100

basic_model = Model()
with basic_model:
    p = Uniform("freq_cheating", 0, 1)
    true_answers = Bernoulli("truths", p)
    first_coin_flips = Bernoulli("first_flips", 0.5)
    second_coin_flips = Bernoulli("second_flips", 0.5)

    determin_val1 = Deterministic(
        'determin_val1', first_coin_flips * true_answers +
        (1 - first_coin_flips) * second_coin_flips)
    determin_val = determin_val1.sum() / float(N)

    start = find_MAP(fmin=optimize.fmin_powell)

    # instantiate sampler
    step = Slice(vars=[true_answers])
    # draw 5000 posterior samples
    trace = sample(100, step=step, start=start)

    step = Slice(vars=[first_coin_flips])
    # draw 5000 posterior samples
    trace = sample(100, step=step, start=start)

    step = Slice(vars=[second_coin_flips])
    # draw 5000 posterior samples
    trace = sample(100, step=step, start=start)
Beispiel #28
0
def mcmc(model_prior_params, data_prior_params, N, epsilon, Z, sensitivity,
         num_samples):

    data_dim = Z['XX'].shape[0] - 1

    if data_dim > 1:
        raise ValueError(f'MCMC only works for data dim 1! ({data_dim})')

    Z = Z.copy()
    Z['X'] = Z['XX'][:, -1][:, None]

    import pymc3 as pm
    from pymc3.distributions.continuous import InverseGamma
    from pymc3.distributions.continuous import Normal
    from pymc3.distributions.multivariate import MvNormal
    from pymc3.distributions.continuous import Laplace
    from pymc3 import Deterministic
    import theano.tensor as T

    num_tune_samples = 500
    max_treedepth = 12
    target_accept = .95

    with pm.Model():
        # data prior
        tau_squared = InverseGamma('ts',
                                   alpha=data_prior_params[2],
                                   beta=data_prior_params[3][0, 0])

        mu_x_offset = Normal('mu_x_offset', mu=0, sd=1)
        mu_x = Deterministic(
            'mu', data_prior_params[0][0, 0] + mu_x_offset *
            pm.math.sqrt(tau_squared / data_prior_params[1][0, 0]))

        x_offset = Normal('x_offset', mu=0, sd=1, shape=N)
        x_temp = Deterministic('X',
                               mu_x + x_offset * pm.math.sqrt(tau_squared))

        ones = T.shape_padright(pm.math.ones_like(x_temp))
        x = pm.math.concatenate((T.shape_padright(x_temp), ones), axis=1)

        # regression model
        sigma_squared = InverseGamma('ss',
                                     alpha=model_prior_params[2],
                                     beta=model_prior_params[3])

        L = pm.math.sqrt(sigma_squared) * np.linalg.cholesky(
            np.linalg.inv(model_prior_params[1]))
        theta_offset = MvNormal('theta_offset',
                                mu=[0] * (data_dim + 1),
                                cov=np.diag([1] * (data_dim + 1)),
                                shape=data_dim + 1)
        thetas = Deterministic(
            't',
            model_prior_params[0].flatten() + pm.math.dot(L, theta_offset))

        # response data
        y_offset = Normal('y_offset', mu=0, sd=1, shape=N)
        y = Deterministic(
            'y',
            pm.math.flatten(pm.math.dot(thetas, x.T)) +
            y_offset * pm.math.sqrt(sigma_squared))

        # noisy sufficient statistics
        noise_scale = sensitivity / epsilon
        Laplace('z-X', mu=pm.math.sum(x), b=noise_scale, observed=Z['X'])
        Laplace('z-XX',
                mu=pm.math.sum(pm.math.sqr(x)),
                b=noise_scale,
                observed=Z['XX'].flatten())
        Laplace('z-Xy',
                mu=pm.math.sum(x.T * y),
                b=noise_scale,
                observed=Z['Xy'])
        Laplace('z-yy',
                mu=pm.math.sum(pm.math.sqr(y)),
                b=noise_scale,
                observed=Z['yy'])

        trace = pm.sampling.sample(draws=num_samples,
                                   tune=num_tune_samples,
                                   nuts_kwargs={
                                       'max_treedepth': max_treedepth,
                                       'target_accept': target_accept,
                                   })

    theta = trace.get_values('t')
    sigma_squared = trace.get_values('ss')

    return theta.squeeze(), sigma_squared
Beispiel #29
0
    def get_formula(self, input_rvs, fixed_rvs, hyperparams, problem_config):

        # no a, d taper bounds as GF library saved between b c
        chop_bounds = ['b', 'c']

        logger.info("Loading %s Green's Functions" % self.name)
        self.load_gfs(crust_inds=[self.config.gf_config.reference_model_idx],
                      make_shared=False)

        hp_specific = self.config.dataset_specific_residual_noise_estimation
        tpoint = problem_config.get_test_point()

        self.input_rvs = input_rvs
        self.fixed_rvs = fixed_rvs

        logger.info('Seismic optimization on: \n '
                    ' %s' % ', '.join(self.input_rvs.keys()))

        t2 = time()
        wlogpts = []

        self.analyse_noise(tpoint, chop_bounds=chop_bounds)
        for gfs in self.gfs.values():
            gfs.init_optimization()

        self.init_weights()
        self.init_hierarchicals(problem_config)
        if self.config.station_corrections:
            logger.info('Initialized %i hierarchical parameters for '
                        'station corrections.' %
                        len(self.get_all_station_names()))

        self.input_rvs.update(fixed_rvs)

        ref_idx = self.config.gf_config.reference_model_idx

        nuc_strike = input_rvs['nucleation_strike']
        nuc_dip = input_rvs['nucleation_dip']

        t2 = time()
        # convert velocities to rupture onset
        logger.debug('Fast sweeping ...')
        starttimes0 = tt.zeros((self.fault.npatches), dtype=tconfig.floatX)
        for index in range(self.fault.nsubfaults):
            nuc_dip_idx, nuc_strike_idx = self.fault.fault_locations2idxs(
                index=index,
                positions_dip=nuc_dip[index],
                positions_strike=nuc_strike[index],
                backend='theano')

            sf_patch_indexs = self.fault.cum_subfault_npatches[index:index + 2]
            starttimes_tmp = self.sweepers[index](
                (1. /
                 self.fault.vector2subfault(index, input_rvs['velocities'])),
                nuc_dip_idx, nuc_strike_idx)

            starttimes_tmp += input_rvs['time'][index]
            starttimes0 = tt.set_subtensor(
                starttimes0[sf_patch_indexs[0]:sf_patch_indexs[1]],
                starttimes_tmp)

        wlogpts = []
        for wmap in self.wavemaps:
            wc = wmap.config
            # station corrections
            if len(self.hierarchicals) > 0:
                logger.info('Applying station corrections ...')
                starttimes = (tt.tile(starttimes0, wmap.n_t) - tt.repeat(
                    self.hierarchicals[wmap.time_shifts_id][
                        wmap.station_correction_idxs],
                    self.fault.npatches)).reshape(
                        (wmap.n_t, self.fault.npatches))
            else:
                logger.info('No station corrections ...')
                starttimes = tt.tile(starttimes0, wmap.n_t).reshape(
                    (wmap.n_t, self.fault.npatches))

            targetidxs = shared(num.atleast_2d(num.arange(wmap.n_t)).T,
                                borrow=True)
            logger.debug('Stacking %s phase ...' % wc.name)
            synthetics = tt.zeros(
                (wmap.n_t,
                 wc.arrival_taper.nsamples(self.config.gf_config.sample_rate)),
                dtype=tconfig.floatX)

            # make sure data is init as array, if non-toeplitz above-traces!
            wmap.prepare_data(source=self.events[wc.event_idx],
                              engine=self.engine,
                              outmode='array',
                              chop_bounds=chop_bounds)

            for var in self.slip_varnames:
                logger.debug('Stacking %s variable' % var)
                key = self.get_gflibrary_key(crust_ind=ref_idx,
                                             wavename=wmap.name,
                                             component=var)
                synthetics += self.gfs[key].stack_all(
                    targetidxs=targetidxs,
                    starttimes=starttimes,
                    durations=input_rvs['durations'],
                    slips=input_rvs[var],
                    interpolation=wc.interpolation)

            residuals = wmap.shared_data_array - synthetics

            logger.debug('Calculating likelihoods ...')
            logpts = multivariate_normal_chol(wmap.datasets,
                                              wmap.weights,
                                              hyperparams,
                                              residuals,
                                              hp_specific=hp_specific)

            wlogpts.append(logpts)

        t3 = time()
        logger.debug('Seismic formula on test model takes: %f' % (t3 - t2))

        llk = Deterministic(self._like_name, tt.concatenate((wlogpts)))
        return llk.sum()
Beispiel #30
0
    lf = HalfNormal('lf', sd=1)
    le = HalfNormal('le', sd=1)
    # compute activation
    scaled_time = time**(-decay)

    def compute_activation(scaled_time_vector):
        compare = tt.isinf(scaled_time_vector)
        subvector = scaled_time_vector[(1 - compare).nonzero()]
        activation_from_time = tt.log(subvector.sum())
        return activation_from_time

    activation_from_time, _ = theano.scan(fn=compute_activation,
                                          sequences=scaled_time)
    # latency likelihood -- this is where pyactr is used
    pyactr_rt = actrmodel_latency(lf, le, decay, activation_from_time)
    mu_rt = Deterministic('mu_rt', pyactr_rt)
    rt_observed = Normal('rt_observed', mu=mu_rt, sd=0.01, observed=RT)
    # accuracy likelihood
    odds_reciprocal = tt.exp(-(activation_from_time - threshold) / noise)
    mu_prob = Deterministic('mu_prob', 1 / (1 + odds_reciprocal))
    prob_observed = Normal('prob_observed',
                           mu=mu_prob,
                           sd=0.01,
                           observed=ACCURACY)

#with lex_decision_with_bayes:
#step = pm.SMC(parallel=True)
#trace = pm.sample(draws=5000, step=step, njobs=1, cores=25)

#dump('../data/lex_dec_pyactr_no_imaginal', trace)
#######################################
# Model definition
# MEASURE = exp(ALPHA)*exp(BETA)**log(X)
# mu = log(MEASURE) = ALPHA+BETA*log(X)
#######################################

with Model() as cost_model:
    # Priors for unknown cost model parameters
    ALPHA = Normal('ALPHA', mu=0, sigma=1000)
    BETA = Normal('BETA', mu=0, sigma=1000, shape=len(ATT))
    SIGMA = HalfNormal('SIGMA', sigma=100)

    # Model
    MU = ALPHA + dot(X_INPUT, BETA)
    NU = Deterministic('NU', Exponential('nu_', 1 / 29))

    # Likelihood (sampling distribution) of observations
    #     Y_OBS = Normal('Y_OBS', mu=mu, sigma=sigma, observed=Y_OUTPUT)
    Y_OBS = StudentT('Y_OBS', mu=MU, sigma=SIGMA, observed=Y_OUTPUT, nu=NU)

with cost_model:
    TRACE = sample(SAMPLES, tune=TUNE, cores=6)
    traceplot(TRACE)

with cost_model:
    Y_PRED = sample_posterior_predictive(TRACE, 1000, cost_model)
    Y_ = Y_PRED['Y_OBS'].mean(axis=0)
    PP['model_cost'] = exp(Y_)  # depends on imput/output
    SUMMARY = df_summary(TRACE)