Beispiel #1
0
def get_lg_funs(graph, funs, beta_range, stdev):
    """Creates PyMC variables for all children of the variables specified in 'funs', using linear Gaussian structural
    equation model. Adds new variables to the 'funs' dictionary.
    
    Called while traversing directed acyclic graph for model-building.
    Uses structural equation model to generate data:
        child = beta1 * parent1 + beta2 * parent2 + ... + noise
        beta1, beta2, ... follow a uniform distribution over beta_range = (lower,upper)
    
    graph = networkx.DiGraph object specifying causal dependences
    funs = dict of PyMC variables already created. keys: node names, values: PyMC variables
    beta = linear coefficient in all structural equations
    stdev = standard deviation of exogenous noise added to each child variable
    
    Returns funs = dict of all extant PyMC variables, including children of variables in funs. keys: node names,
    values: PyMC variables
    """

    old_nodes = [node for node in graph if graph.node[node]['name'] in funs.keys()]
    new_nodes = [node for node in graph if node not in old_nodes and
                 set(graph.predecessors(node)) <= set(old_nodes)]
    for node in new_nodes:
        node_name = graph.node[node]['name']
        parent_names = [graph.node[parent]['name'] for parent in graph.predecessors(node)]
        parent_funs = [funs[parent_name] for parent_name in parent_names]
        if parent_funs:
            betas = np.array([np.random.uniform(*beta_range) for _ in range(len(parent_funs))])
            lc = pymc.LinearCombination(node_name, betas, parent_funs, 'doc')
            funs[node_name] = pymc.Normal(node_name, mu=lc, tau=1 / stdev ** 2)
        else:
            funs[node_name] = pymc.Normal(node_name, 0, 1)
    return funs
Beispiel #2
0
 class likelihood_model: 
     
    # Stochastic variables for signal, background, and total event rates
    #signal_rate     = pymc.Normal('signal_rate',     mu=s*muT,  tau=1/sigmas**2)
    #background_rate = pymc.Normal('background_rate', mu=b,      tau=1/sigmab**2)
    # Doh, need to use truncated normal to prevent negative values
    signal_rate     = pymc.TruncatedNormal('signal_rate',     mu=s*muT, tau=1/sigmas**2, a=0, b=np.inf)
    background_rate = pymc.TruncatedNormal('background_rate', mu=b,     tau=1/sigmab**2, a=0, b=np.inf)
   
    # Deterministic variable (simply the sum of the signal and background rates)
    total_rate = pymc.LinearCombination('total_rate', [1,1], [signal_rate, background_rate])
    # Stochastic variable for number of observed events
    observed_events = pymc.Poisson('observed_events', mu=total_rate)
   
    # Deterministic variable for the test statistic
    @pymc.deterministic()
    def qCLs(n=observed_events):
       q,chi2B = self.QCLs(n,s) 
       return q
import pylab as pl
import pymc as mc

data = pl.csv2rec('data.csv')

## Design matrix for intercept and intervention effect
X = [[1., t_ij] for t_ij in data.treat]

## Design matrix for cluster effect
Z = pl.zeros([len(data), 10])
for row, j in enumerate(data.j):
    if j <= 10:
        Z[row, j - 1] = 1.

## Priors
var_u = mc.Gamma('var_u', alpha=1, beta=1, value=1.)
B = mc.Normal('B', mu=[0, 0], tau=10000**-1)
U = mc.Normal('u', mu=0, tau=var_u**-1, value=pl.zeros(10))
var_e1 = mc.Uniform('var_e1', lower=0, upper=100, value=[1., 1.])

## Systematic Model
y_hat = mc.LinearCombination('y_hat', [X, Z], [B, U], doc='y_hat = X*B + Z*U')

## Stochastic Model
y_i = mc.Normal('y_i',
                value=data.y,
                mu=y_hat,
                tau=var_e1[data.treat]**-1,
                observed=True)
Beispiel #4
0
    def __init__(self, name, X_matrices, betas, states, *args, **kwds):
        r"""
        Parameters
        ==========
        X_matrices: list of np.ndarrays
            Matrices for each possible value of :math:`S_t` with rows
            corresponding to :math:`x_t` in the product
            :math:`x_t^\top \beta^{(S_t)}`.
        betas: list of np.ndarrays or pymc.Stochastics
            Vectors corresponding to each :math:`beta^{(S_t)}` in the
            product :math:`x_t^\top \beta^{(S_t)}`.
        states: np.ndarray of int
            Vector of the state sequence :math:`S_t`.
        """

        self.N_states = np.alen(X_matrices)

        if not self.N_states == np.alen(betas):
            raise ValueError("len(X_matrices) must equal len(betas)")

        if not np.count_nonzero(np.diff([np.alen(X_)
                                         for X_ in X_matrices])) == 0:
            raise ValueError("X_matrices must all have equal first dimension")

        self.N_obs = np.alen(X_matrices[0])

        if not self.N_obs == np.alen(states.value):
            raise ValueError("states do not match X_matrices dimensions")

        if not all([
                np.shape(X_)[-1] == np.alen(getattr(b_, 'value', b_))
                for X_, b_ in zip(X_matrices, betas)
        ]):
            raise ValueError("X_matrices and betas dimensions don't match")

        # TODO: Change these to indices instead of boolean masks
        self.which_k = tuple()
        self.X_k_matrices = tuple()
        self.k_prods = tuple()
        self.beta_obs_idx = dict()

        for k in xrange(self.N_states):

            def which_k_func(s_=states, k_=k):
                return np.equal(k_, s_)

            w_k = pymc.Lambda("w_{}".format(k), which_k_func, trace=False)

            def X_k_func(X_=X_matrices[k], t_=w_k):
                return X_[t_]

            X_k_mat = pymc.Lambda("X_{}".format(k), X_k_func, trace=False)

            this_beta = betas[k]
            k_p = pymc.LinearCombination("mu_{}".format(k), (X_k_mat, ),
                                         (this_beta, ),
                                         trace=False)

            self.which_k += (w_k, )
            self.X_k_matrices += (X_k_mat, )
            self.k_prods += (k_p, )

            if isinstance(this_beta, collections.Hashable):
                self.beta_obs_idx[this_beta] = w_k

        def eval_fun(which_k, k_prods):
            res = np.empty(self.N_obs, dtype=np.float)
            for idx, k_p in zip(which_k, k_prods):
                res[idx] = k_p
            return res

        parents = {'which_k': self.which_k, 'k_prods': self.k_prods}
        super(HMMLinearCombination, self).__init__(eval=eval_fun,
                                                   doc=self.__doc__,
                                                   name=name,
                                                   parents=parents,
                                                   *args,
                                                   **kwds)
Beispiel #5
0
    def __init__(self, F, G, V, W, m_0, C_0, Y_vals = None):
        """
        D = DLM(F, G, V, W, m_0, C_0[, Y_vals])

        Returns special NormalSubmodel instance representing the dynamic
        linear model formed by F, G, V and W.

        Resulting probability model:

            theta[0] | m_0, C_0 ~ N(m_0, C_0)

            theta[t] | theta[t-1], G[t], W[t] ~ N(G[t] theta[t-1], W[t]), t = 1..T

            Y[t] | theta[t], F[t], V[t] ~ N(F[t] theta[t], V[t]), t = 0..T


        Arguments F, G, V should be dictionaries keyed by name of component.
            F[comp], G[comp], V[comp] should be lists.
                F[comp][t] should be the design vector of component 'comp' at time t.
                G[comp][t] should be the system matrix.

        Argument W should be either a number between 0 and 1 or a dictionary of lists
        like V.
            If a dictionary of lists, W[comp][t] should be the system covariance or
            variance at time t.
            If a scalar, W should be the discount factor for the DLM.

        Arguments V and Y_vals, if given, should be lists.
            V[t] should be the observation covariance or variance at time t.
            Y_vals[t] should give the value of output Y at time t.

        Arguments m_0 and C_0 should be dictionaries keyed by name of component.
            m_0[comp] should be the mean of theta[comp][0].
            C_0[comp] should be the covariance or variance of theta[comp][0].

        Note: if multiple components are correlated in W or V, they should be made into
        a single component.

        D.comp is a handle to a list.
            D.comp[t] is a Stochastic representing the value of system state 'theta'
            sliced according to component 'comp' at time t.

        D.theta is a dictionary of lists analogous to F, G, V and W.

        D.Y is a list. D.Y[t] is a Stochastic representing the value of the output
        'Y' at time t.
        """

        self.comps = F.keys()

        self.F = dict_to_recarray(F)
        self.G = dict_to_recarray(G)
        self.V = pymc.ListContainer(V)
        if np.isscalar(W):
            self.discount = True
            self.delta = W
        else:
            self.W = dict_to_recarray(W)
            self.discount = False
            self.delta = None
        if self.discount:
            raise NotImplemented, "Have yet to code up the discount factor."
        self.m_0 = dict_to_recarray(m_0)
        self.C_0 = dict_to_recarray(C_0)
        self.T = len(self.V)

        theta = {}
        theta_mean = {}

        Y_mean = []
        Y = []

        # ==============
        # = Make theta =
        # ==============
        for comp in self.comps:
            # Is diagonal the covariance or variance?
            if isinstance(self.W[comp][0], pymc.Variable):
                diag = isvector(self.W[comp][0].value)
            else:
                diag = isvector(self.W[comp][0])

            if diag:
                # Normal variates if diagonal.
                theta[comp] = [pymc.Normal('%s[0]'%comp, m_0[comp], C_0[comp])]
            else:
                # MV normal otherwise.
                theta[comp] = [pymc.MvNormal('%s[0]'%comp, m_0[comp], C_0[comp])]

            theta_mean[comp] = []

            for t in xrange(1,self.T):

                theta_mean[comp].append(pymc.LinearCombination('%s_mean[%i]'%(comp, t), [G[comp][t-1]], [theta[comp][t-1]]))

                if diag:
                    # Normal variates if diagonal.
                    theta[comp].append(pymc.Normal('%s[%i]'%(comp,t), theta_mean[comp][t-1], W[comp][t-1]))
                else:
                    # MV normal otherwise.
                    theta[comp].append(pymc.MvNormal('%s[%i]'%(comp,t), theta_mean[comp][t-1], W[comp][t-1]))


        self.theta = dict_to_recarray(theta)
        self.theta_mean = dict_to_recarray(theta_mean)


        # ==========
        # = Make Y =
        # ==========
        Y_diag = isvector(self.V.value[0])

        for t in xrange(self.T):
            x_coef = []
            y_coef = []

            for comp in self.comps:
                x_coef.append(self.F[comp][t])
                y_coef.append(theta[comp][t])

            Y_mean.append(pymc.LinearCombination('Y_mean[%i]'%t, x_coef, y_coef))
            if Y_diag:
                # Normal variates if diagonal.
                Y.append(pymc.Normal('Y[%i]'%t, Y_mean[t], V[t]))
            else:
                # MV normal otherwise.
                Y.append(pymc.MvNormal('Y[%i]'%t, Y_mean[t], V[t]))

            # If data provided, use it.
            if Y_vals is not None:
                Y[t].value = Y_vals[t]
                Y[t].observed = True

        self.Y_mean = pymc.Container(np.array(Y_mean))
        self.Y = pymc.Container(np.array(Y))

        # No sense creating a NormalSubmodel here... just stay a ListContainer.
        NormalSubmodel.__init__(self, [F,G,W,V,m_0,C_0,Y,theta,theta_mean,Y_mean])