Example #1
0
def test_gaussian():
    prior_data = 2*np.random.randn(5,2) + np.array([1.,3.])
    a = Gaussian().empirical_bayes(prior_data)

    # data = a.rvs(10)

    gibbs_statistics = []
    for itr in range(20000):
        a.resample()
        # a.resample(data)
        gibbs_statistics.append(a.mu)
    gibbs_statistics = np.array(gibbs_statistics)

    b = AnnealedGaussianModel().empirical_bayes(prior_data)
    # b.add_data(data)

    pt = ParallelTempering(b,[5.])
    pt_samples = pt.run(20000,1)
    pt_statistics = np.array([m.mu for m in pt_samples])

    fig = plt.figure()
    testing.populations_eq_quantile_plot(gibbs_statistics,pt_statistics,fig=fig)
    plt.savefig('gaussian_test.png')

    testing.assert_populations_eq_moments(gibbs_statistics,pt_statistics), \
            'Annealing MAY have failed, check FIGURES'
Example #2
0
def test_gaussian():
    prior_data = 2 * np.random.randn(5, 2) + np.array([1., 3.])
    a = Gaussian().empirical_bayes(prior_data)

    # data = a.rvs(10)

    gibbs_statistics = []
    for itr in range(20000):
        a.resample()
        # a.resample(data)
        gibbs_statistics.append(a.mu)
    gibbs_statistics = np.array(gibbs_statistics)

    b = AnnealedGaussianModel().empirical_bayes(prior_data)
    # b.add_data(data)

    pt = ParallelTempering(b, [5.])
    pt_samples = pt.run(20000, 1)
    pt_statistics = np.array([m.mu for m in pt_samples])

    fig = plt.figure()
    testing.populations_eq_quantile_plot(gibbs_statistics,
                                         pt_statistics,
                                         fig=fig)
    plt.savefig('gaussian_test.png')

    testing.assert_populations_eq_moments(gibbs_statistics,pt_statistics), \
            'Annealing MAY have failed, check FIGURES'
Example #3
0
class StickbreakingCorrelatedLDA(_LDABase):
    "Correlated LDA with the stick breaking representation"

    def __init__(self, data, T, alpha_beta):
        mu, sigma = compute_uniform_mean_psi(T)
        self.theta_prior = Gaussian(
            mu=mu, sigma=sigma, mu_0=mu, sigma_0=T * sigma / 10.0, nu_0=T / 10.0, kappa_0=1.0 / 10
        )

        self.ppgs = initialize_polya_gamma_samplers()
        self.omega = np.zeros((data.shape[0], T - 1))

        super(StickbreakingCorrelatedLDA, self).__init__(data, T, alpha_beta)

    @property
    def theta(self):
        return psi_to_pi(self.psi)

    @theta.setter
    def theta(self, theta):
        self.psi = pi_to_psi(theta)

    def initialize_theta(self):
        self.psi = np.tile(self.theta_prior.mu, (self.D, 1))

    def resample_theta(self):
        self.resample_omega()
        self.resample_psi()

    def resample(self):
        super(StickbreakingCorrelatedLDA, self).resample()
        self.resample_theta_prior()

    def resample_omega(self):
        pgdrawvpar(
            self.ppgs, N_vec(self.doc_topic_counts).astype("float64").ravel(), self.psi.ravel(), self.omega.ravel()
        )
        np.clip(self.omega, 1e-32, np.inf, out=self.omega)

    def resample_psi(self):
        Lmbda = np.linalg.inv(self.theta_prior.sigma)
        h = Lmbda.dot(self.theta_prior.mu)
        randvec = np.random.randn(self.D, self.T - 1)  # pre-generate randomness

        for d, c in enumerate(self.doc_topic_counts):
            self.psi[d] = sample_infogaussian(Lmbda + np.diag(self.omega[d]), h + kappa_vec(c), randvec[d])

    def resample_theta_prior(self):
        self.theta_prior.resample(self.psi)

    def copy_sample(self):
        new = copy.copy(self)
        new.beta = self.beta.copy()
        new.psi = self.psi.copy()
        new.theta_prior = self.theta_prior.copy_sample()
        del new.z
        del new.omega
        return new
Example #4
0
class NIWGaussianWeightDistribution(GaussianWeightDistribution, GibbsSampling):
    """
    Gaussian weight distribution with a normal inverse-Wishart prior.
    """
    # TODO: Specify the self weight parameters in the constructor
    def __init__(self, N, B=1, mu_0=None, Sigma_0=None, nu_0=None, kappa_0=None):
        super(NIWGaussianWeightDistribution, self).__init__(N)
        self.B = B


        if mu_0 is None:
            mu_0 = np.zeros(B)

        if Sigma_0 is None:
            Sigma_0 = np.eye(B)

        if nu_0 is None:
            nu_0 = B + 2

        if kappa_0 is None:
            kappa_0 = 1.0

        self._gaussian = Gaussian(mu_0=mu_0, sigma_0=Sigma_0,
                                  nu_0=nu_0, kappa_0=kappa_0)

        # Special case self-weights (along the diagonal)
        self._self_gaussian = Gaussian(mu_0=mu_0, sigma_0=Sigma_0,
                                       nu_0=nu_0, kappa_0=kappa_0)

    @property
    def Mu(self):
        mu = self._gaussian.mu
        Mu = np.tile(mu[None,None,:], (self.N, self.N,1))

        for n in xrange(self.N):
            Mu[n,n,:] = self._self_gaussian.mu

        return Mu

    @property
    def Sigma(self):
        sig = self._gaussian.sigma
        Sig = np.tile(sig[None,None,:,:], (self.N, self.N,1,1))

        for n in xrange(self.N):
            Sig[n,n,:,:] = self._self_gaussian.sigma

        return Sig

    def initialize_from_prior(self):
        self._gaussian.resample()
        self._self_gaussian.resample()

    def initialize_hypers(self, W):
        # self.B = W.shape[2]
        mu_0 = W.mean(axis=(0,1))
        sigma_0 = np.diag(W.var(axis=(0,1)))
        self._gaussian.mu_0 = mu_0
        self._gaussian.sigma_0 = sigma_0
        self._gaussian.resample()
        # self._gaussian.nu_0 = self.B + 2

        W_self = W[np.arange(self.N), np.arange(self.N)]
        self._self_gaussian.mu_0 = W_self.mean(axis=0)
        self._self_gaussian.sigma_0 = np.diag(W_self.var(axis=0))
        self._self_gaussian.resample()
        # self._self_gaussian.nu_0 = self.B + 2

    def log_prior(self):
        from graphistician.internals.utils import normal_inverse_wishart_log_prob
        lp = 0
        lp += normal_inverse_wishart_log_prob(self._gaussian)
        lp += normal_inverse_wishart_log_prob(self._self_gaussian)

        return lp

    def sample_predictive_parameters(self):
        Murow = Mucol = np.tile(self._gaussian.mu[None,:], (self.N+1,1))
        Lrow = Lcol = np.tile(self._gaussian.sigma_chol[None,:,:], (self.N+1,1,1))

        Murow[-1,:] = self._self_gaussian.mu
        Mucol[-1,:] = self._self_gaussian.mu
        Lrow[-1,:,:] = self._self_gaussian.sigma_chol
        Lcol[-1,:,:] = self._self_gaussian.sigma_chol
        return Murow, Mucol, Lrow, Lcol

    def resample(self, (A,W)):
        # Resample the Normal-inverse Wishart prior over mu and W
        # given W for which A=1
        A_offdiag = A.copy()
        np.fill_diagonal(A_offdiag, 0)

        A_ondiag = A * np.eye(self.N)
        self._gaussian.resample(W[A_offdiag==1])
        self._self_gaussian.resample(W[A_ondiag==1])
Example #5
0
class SBMGaussianWeightDistribution(GaussianWeightDistribution, GibbsSampling):
    """
    A stochastic block model is a clustered network model with
    C:          Number of blocks
    m[c]:       Probability that a node belongs block c
    mu[c,c']:   Mean weight from node in block c to node in block c'
    Sig[c,c']:  Cov of weight from node in block c to node in block c'

    It has hyperparameters:
    pi:         Parameter of Dirichlet prior over m
    mu0, nu0, kappa0, Sigma0: Parameters of NIW prior over (mu,Sig)
    """

    # TODO: Specify the self weight parameters in the constructor
    def __init__(self, N, B=1,
                 C=2, pi=10.0,
                 mu_0=None, Sigma_0=None, nu_0=None, kappa_0=None,
                 special_case_self_conns=True):
        """
        Initialize SBM with parameters defined above.
        """
        super(SBMGaussianWeightDistribution, self).__init__(N)
        self.B = B

        assert isinstance(C, int) and C >= 1, "C must be a positive integer number of blocks"
        self.C = C

        if isinstance(pi, (int, float)):
            self.pi = pi * np.ones(C)
        else:
            assert isinstance(pi, np.ndarray) and pi.shape == (C,), "pi must be a sclar or a C-vector"
            self.pi = pi

        self.m = np.random.dirichlet(self.pi)
        self.c = np.random.choice(self.C, p=self.m, size=(self.N))

        if mu_0 is None:
            mu_0 = np.zeros(B)

        if Sigma_0 is None:
            Sigma_0 = np.eye(B)

        if nu_0 is None:
            nu_0 = B + 2

        if kappa_0 is None:
            kappa_0 = 1.0

        self._gaussians = [[Gaussian(mu_0=mu_0, nu_0=nu_0,
                                     kappa_0=kappa_0, sigma_0=Sigma_0)
                            for _ in xrange(C)]
                           for _ in xrange(C)]

        # Special case self-weights (along the diagonal)
        self.special_case_self_conns = special_case_self_conns
        if special_case_self_conns:
            self._self_gaussian = Gaussian(mu_0=mu_0, sigma_0=Sigma_0,
                                           nu_0=nu_0, kappa_0=kappa_0)

    @property
    def _Mu(self):
        return np.array([[self._gaussians[c1][c2].mu
                          for c2 in xrange(self.C)]
                         for c1 in xrange(self.C)])

    @property
    def _Sigma(self):
        return np.array([[self._gaussians[c1][c2].sigma
                          for c2 in xrange(self.C)]
                         for c1 in xrange(self.C)])

    @property
    def Mu(self):
        """
        Get the NxNxB matrix of weight means
        :return:
        """
        _Mu = self._Mu
        Mu = _Mu[np.ix_(self.c, self.c)]

        if self.special_case_self_conns:
            for n in xrange(self.N):
                Mu[n,n] = self._self_gaussian.mu

        return Mu

    @property
    def Sigma(self):
        """
        Get the NxNxBxB matrix of weight covariances
        :return:
        """
        _Sigma = self._Sigma
        Sigma = _Sigma[np.ix_(self.c, self.c)]

        if self.special_case_self_conns:
            for n in xrange(self.N):
                Sigma[n,n] = self._self_gaussian.sigma

        return Sigma

    def initialize_from_prior(self):
        self.m = np.random.dirichlet(self.pi)
        self.c = np.random.choice(self.C, p=self.m, size=(self.N))

        for c1 in xrange(self.C):
            for c2 in xrange(self.C):
                self._gaussians[c1][c2].resample()
        if self.special_case_self_conns:
            self._self_gaussian.resample()

    def initialize_hypers(self, W):
        mu_0 = W.mean(axis=(0,1))
        sigma_0 = np.diag(W.var(axis=(0,1)))
        for c1 in xrange(self.C):
            for c2 in xrange(self.C):
                nu_0 = self._gaussians[c1][c2].nu_0
                self._gaussians[c1][c2].mu_0 = mu_0
                self._gaussians[c1][c2].sigma_0 = sigma_0 * (nu_0 - self.B - 1) / self.C
                self._gaussians[c1][c2].resample()

        if self.special_case_self_conns:
            W_self = W[np.arange(self.N), np.arange(self.N)]
            self._self_gaussian.mu_0 = W_self.mean(axis=0)
            self._self_gaussian.sigma_0 = np.diag(W_self.var(axis=0))
            self._self_gaussian.resample()

        # Cluster the neurons based on their rows and columns
        from sklearn.cluster import KMeans
        features = np.hstack((W[:,:,0], W[:,:,0].T))
        km = KMeans(n_clusters=self.C)
        km.fit(features)
        self.c = km.labels_.astype(np.int)

        print "Initial c: ", self.c

    def _get_mask(self, A, c1, c2):
            mask = ((self.c==c1)[:,None] * (self.c==c2)[None,:])
            mask &= A.astype(np.bool)
            if self.special_case_self_conns:
                mask &= True - np.eye(self.N, dtype=np.bool)

            return mask

    def log_likelihood(self, (A,W)):
        N = self.N
        assert A.shape == (N,N)
        assert W.shape == (N,N,self.B)

        ll = 0
        for c1 in xrange(self.C):
            for c2 in xrange(self.C):
                mask = self._get_mask(A, c1, c2)
                ll += self._gaussians[c1][c2].log_likelihood(W[mask]).sum()

        if self.special_case_self_conns:
            mask = np.eye(self.N).astype(np.bool) & A.astype(np.bool)
            ll += self._self_gaussian.log_likelihood(W[mask]).sum()

        return ll
Example #6
0
class NIWGaussianWeightDistribution(GaussianWeightDistribution, GibbsSampling):
    """
    Gaussian weight distribution with a normal inverse-Wishart prior.
    """

    # TODO: Specify the self weight parameters in the constructor
    def __init__(self,
                 N,
                 B=1,
                 mu_0=None,
                 Sigma_0=None,
                 nu_0=None,
                 kappa_0=None):
        super(NIWGaussianWeightDistribution, self).__init__(N)
        self.B = B

        if mu_0 is None:
            mu_0 = np.zeros(B)

        if Sigma_0 is None:
            Sigma_0 = np.eye(B)

        if nu_0 is None:
            nu_0 = B + 2

        if kappa_0 is None:
            kappa_0 = 1.0

        self._gaussian = Gaussian(mu_0=mu_0,
                                  sigma_0=Sigma_0,
                                  nu_0=nu_0,
                                  kappa_0=kappa_0)

        # Special case self-weights (along the diagonal)
        self._self_gaussian = Gaussian(mu_0=mu_0,
                                       sigma_0=Sigma_0,
                                       nu_0=nu_0,
                                       kappa_0=kappa_0)

    @property
    def Mu(self):
        mu = self._gaussian.mu
        Mu = np.tile(mu[None, None, :], (self.N, self.N, 1))

        for n in xrange(self.N):
            Mu[n, n, :] = self._self_gaussian.mu

        return Mu

    @property
    def Sigma(self):
        sig = self._gaussian.sigma
        Sig = np.tile(sig[None, None, :, :], (self.N, self.N, 1, 1))

        for n in xrange(self.N):
            Sig[n, n, :, :] = self._self_gaussian.sigma

        return Sig

    def initialize_from_prior(self):
        self._gaussian.resample()
        self._self_gaussian.resample()

    def initialize_hypers(self, W):
        # self.B = W.shape[2]
        mu_0 = W.mean(axis=(0, 1))
        sigma_0 = np.diag(W.var(axis=(0, 1)))
        self._gaussian.mu_0 = mu_0
        self._gaussian.sigma_0 = sigma_0
        self._gaussian.resample()
        # self._gaussian.nu_0 = self.B + 2

        W_self = W[np.arange(self.N), np.arange(self.N)]
        self._self_gaussian.mu_0 = W_self.mean(axis=0)
        self._self_gaussian.sigma_0 = np.diag(W_self.var(axis=0))
        self._self_gaussian.resample()
        # self._self_gaussian.nu_0 = self.B + 2

    def log_prior(self):
        from graphistician.internals.utils import normal_inverse_wishart_log_prob
        lp = 0
        lp += normal_inverse_wishart_log_prob(self._gaussian)
        lp += normal_inverse_wishart_log_prob(self._self_gaussian)

        return lp

    def sample_predictive_parameters(self):
        Murow = Mucol = np.tile(self._gaussian.mu[None, :], (self.N + 1, 1))
        Lrow = Lcol = np.tile(self._gaussian.sigma_chol[None, :, :],
                              (self.N + 1, 1, 1))

        Murow[-1, :] = self._self_gaussian.mu
        Mucol[-1, :] = self._self_gaussian.mu
        Lrow[-1, :, :] = self._self_gaussian.sigma_chol
        Lcol[-1, :, :] = self._self_gaussian.sigma_chol
        return Murow, Mucol, Lrow, Lcol

    def resample(self, (A, W)):
        # Resample the Normal-inverse Wishart prior over mu and W
        # given W for which A=1
        A_offdiag = A.copy()
        np.fill_diagonal(A_offdiag, 0)

        A_ondiag = A * np.eye(self.N)
        self._gaussian.resample(W[A_offdiag == 1])
        self._self_gaussian.resample(W[A_ondiag == 1])
Example #7
0
class SBMGaussianWeightDistribution(GaussianWeightDistribution, GibbsSampling):
    """
    A stochastic block model is a clustered network model with
    C:          Number of blocks
    m[c]:       Probability that a node belongs block c
    mu[c,c']:   Mean weight from node in block c to node in block c'
    Sig[c,c']:  Cov of weight from node in block c to node in block c'

    It has hyperparameters:
    pi:         Parameter of Dirichlet prior over m
    mu0, nu0, kappa0, Sigma0: Parameters of NIW prior over (mu,Sig)
    """

    # TODO: Specify the self weight parameters in the constructor
    def __init__(self,
                 N,
                 B=1,
                 C=2,
                 pi=10.0,
                 mu_0=None,
                 Sigma_0=None,
                 nu_0=None,
                 kappa_0=None,
                 special_case_self_conns=True):
        """
        Initialize SBM with parameters defined above.
        """
        super(SBMGaussianWeightDistribution, self).__init__(N)
        self.B = B

        assert isinstance(
            C, int) and C >= 1, "C must be a positive integer number of blocks"
        self.C = C

        if isinstance(pi, (int, float)):
            self.pi = pi * np.ones(C)
        else:
            assert isinstance(pi, np.ndarray) and pi.shape == (
                C, ), "pi must be a sclar or a C-vector"
            self.pi = pi

        self.m = np.random.dirichlet(self.pi)
        self.c = np.random.choice(self.C, p=self.m, size=(self.N))

        if mu_0 is None:
            mu_0 = np.zeros(B)

        if Sigma_0 is None:
            Sigma_0 = np.eye(B)

        if nu_0 is None:
            nu_0 = B + 2

        if kappa_0 is None:
            kappa_0 = 1.0

        self._gaussians = [[
            Gaussian(mu_0=mu_0, nu_0=nu_0, kappa_0=kappa_0, sigma_0=Sigma_0)
            for _ in xrange(C)
        ] for _ in xrange(C)]

        # Special case self-weights (along the diagonal)
        self.special_case_self_conns = special_case_self_conns
        if special_case_self_conns:
            self._self_gaussian = Gaussian(mu_0=mu_0,
                                           sigma_0=Sigma_0,
                                           nu_0=nu_0,
                                           kappa_0=kappa_0)

    @property
    def _Mu(self):
        return np.array([[self._gaussians[c1][c2].mu for c2 in xrange(self.C)]
                         for c1 in xrange(self.C)])

    @property
    def _Sigma(self):
        return np.array(
            [[self._gaussians[c1][c2].sigma for c2 in xrange(self.C)]
             for c1 in xrange(self.C)])

    @property
    def Mu(self):
        """
        Get the NxNxB matrix of weight means
        :return:
        """
        _Mu = self._Mu
        Mu = _Mu[np.ix_(self.c, self.c)]

        if self.special_case_self_conns:
            for n in xrange(self.N):
                Mu[n, n] = self._self_gaussian.mu

        return Mu

    @property
    def Sigma(self):
        """
        Get the NxNxBxB matrix of weight covariances
        :return:
        """
        _Sigma = self._Sigma
        Sigma = _Sigma[np.ix_(self.c, self.c)]

        if self.special_case_self_conns:
            for n in xrange(self.N):
                Sigma[n, n] = self._self_gaussian.sigma

        return Sigma

    def initialize_from_prior(self):
        self.m = np.random.dirichlet(self.pi)
        self.c = np.random.choice(self.C, p=self.m, size=(self.N))

        for c1 in xrange(self.C):
            for c2 in xrange(self.C):
                self._gaussians[c1][c2].resample()
        if self.special_case_self_conns:
            self._self_gaussian.resample()

    def initialize_hypers(self, W):
        mu_0 = W.mean(axis=(0, 1))
        sigma_0 = np.diag(W.var(axis=(0, 1)))
        for c1 in xrange(self.C):
            for c2 in xrange(self.C):
                nu_0 = self._gaussians[c1][c2].nu_0
                self._gaussians[c1][c2].mu_0 = mu_0
                self._gaussians[c1][c2].sigma_0 = sigma_0 * (nu_0 - self.B -
                                                             1) / self.C
                self._gaussians[c1][c2].resample()

        if self.special_case_self_conns:
            W_self = W[np.arange(self.N), np.arange(self.N)]
            self._self_gaussian.mu_0 = W_self.mean(axis=0)
            self._self_gaussian.sigma_0 = np.diag(W_self.var(axis=0))
            self._self_gaussian.resample()

        # Cluster the neurons based on their rows and columns
        from sklearn.cluster import KMeans
        features = np.hstack((W[:, :, 0], W[:, :, 0].T))
        km = KMeans(n_clusters=self.C)
        km.fit(features)
        self.c = km.labels_.astype(np.int)

        print "Initial c: ", self.c

    def _get_mask(self, A, c1, c2):
        mask = ((self.c == c1)[:, None] * (self.c == c2)[None, :])
        mask &= A.astype(np.bool)
        if self.special_case_self_conns:
            mask &= True - np.eye(self.N, dtype=np.bool)

        return mask

    def log_likelihood(self, (A, W)):
        N = self.N
        assert A.shape == (N, N)
        assert W.shape == (N, N, self.B)

        ll = 0
        for c1 in xrange(self.C):
            for c2 in xrange(self.C):
                mask = self._get_mask(A, c1, c2)
                ll += self._gaussians[c1][c2].log_likelihood(W[mask]).sum()

        if self.special_case_self_conns:
            mask = np.eye(self.N).astype(np.bool) & A.astype(np.bool)
            ll += self._self_gaussian.log_likelihood(W[mask]).sum()

        return ll
Example #8
0
class LinearDynamicalSystemBackground(Component):
    """
    Linear Dynamical System model for the background activation.
    Since the potentials for the activation are of a Gaussian form,
    we can perform conjugate Gibbs sampling or variational inference
    for a Gaussian LDS model.
    """
    def __init__(self,
                 population,
                 D=2,
                 A=None,
                 C=None,
                 sigma_states=None,
                 sigma_C=1.0):

        self.population = population
        self.activation = population.activation_model
        self.N = self.population.N
        self.D = D

        from pybasicbayes.distributions import Gaussian
        self.init_dynamics_distn = Gaussian(mu_0=np.ones(D),
                                            kappa_0=1.0,
                                            sigma_0=0.000001 * np.eye(D),
                                            nu_0=3.0)

        from autoregressive.distributions import AutoRegression
        self.dynamics_distn = AutoRegression(A=A,
                                             sigma=sigma_states,
                                             nu_0=D + 1.0,
                                             S_0=0.5 * np.eye(D),
                                             M_0=np.zeros((D, D)),
                                             K_0=0.5 * np.eye(D))

        # Initialize the emission matrix
        if C is None:
            self.C = sigma_C * np.random.randn(self.N, self.D)
        else:
            assert C.shape == (self.N, self.D)
            self.C = C

        self.sigma_C = sigma_C

    def augment_data(self, augmented_data):
        # Add a latent state sequence
        augmented_data["states"] = self.generate_states(augmented_data["T"])

    def log_likelihood(self, augmented_data):
        raise NotImplementedError

    def generate(self, T):
        states = self.generate_states(T)
        return states.dot(self.C.T)

    def generate_states(self, T):
        stateseq = np.empty((T, self.D))
        stateseq[0] = self.init_dynamics_distn.rvs()

        chol = np.linalg.cholesky(self.dynamics_distn.sigma)
        randseq = np.random.randn(T - 1, self.D)

        for t in xrange(1, T):
            stateseq[t] = \
                self.dynamics_distn.A.dot(stateseq[t-1]) \
                + chol.dot(randseq[t-1])

        return stateseq

    def mean_background_activation(self, augmented_data):
        return augmented_data["states"].dot(self.C.T)

    def resample(self, augmented_data_list):
        self.resample_states(augmented_data_list)
        self.resample_parameters(augmented_data_list)

    def resample_states(self, augmented_data_list):
        from pylds.lds_messages import filter_and_sample

        for data in augmented_data_list:
            # Compute the residual activation from other components
            psi = self.activation.compute_psi(data)
            psi_residual = psi - self.mean_background_activation(data)

            # Get the observed mean and variance
            mu_obs = self.activation.new_mean(data)
            prec_obs = self.activation.new_precision(data)

            # Subtract off the activation from other components
            mu_obs -= psi_residual

            # Convert prec_obs into an array of diagonal covariance matrices
            sigma_obs = np.empty((data["T"], self.N, self.N), order="C")
            for t in xrange(data["T"]):
                sigma_obs[t, :, :] = np.diag(1. / prec_obs[t, :])

            data["states"] = filter_and_sample(self.init_dynamics_distn.mu,
                                               self.init_dynamics_distn.sigma,
                                               self.dynamics_distn.A,
                                               self.dynamics_distn.sigma,
                                               self.C, sigma_obs, mu_obs)

    def resample_parameters(self, augmented_data_list):
        self.resample_init_dynamics_distn(augmented_data_list)
        self.resample_dynamics_distn(augmented_data_list)
        self.resample_emission_distn(augmented_data_list)

    def resample_init_dynamics_distn(self, augmented_data_list):
        states_list = [ad["states"][0] for ad in augmented_data_list]
        self.init_dynamics_distn.resample(states_list)

    def resample_dynamics_distn(self, augmented_data_list):
        from pyhsmm.util.general import AR_striding
        states_list = [ad["states"] for ad in augmented_data_list]
        strided_states_list = [AR_striding(s, 1) for s in states_list]
        self.dynamics_distn.resample(strided_states_list)

    def resample_emission_distn(self, augmented_data_list):
        """
        Resample the observation vectors. Since the emission noise is diagonal,
        we can resample the columns of C independently
        :return:
        """
        # Get the prior
        prior_precision = 1. / self.sigma_C * np.eye(self.D)
        prior_mean = np.zeros(self.D)
        prior_mean_dot_precision = prior_mean.dot(prior_precision)

        # Get the sufficient statistics from the likelihood
        lkhd_precision = np.zeros((self.N, self.D, self.D))
        lkhd_mean_dot_precision = np.zeros((self.N, self.D))

        for data in augmented_data_list:
            # Compute the residual activation from other components
            psi = self.activation.compute_psi(data)
            psi_residual = psi - self.mean_background_activation(data)

            # Get the observed mean and variance
            mu_obs = self.activation.new_mean(data)
            prec_obs = self.activation.new_precision(data)

            # Subtract off the residual
            mu_obs -= psi_residual

            # Update the sufficient statistics for each neuron
            for n in xrange(self.N):
                lkhd_precision[n, :, :] += (data["states"] *
                                            prec_obs[:, n][:, None]).T.dot(
                                                data["states"])
                lkhd_mean_dot_precision[n,:] += \
                    (mu_obs[:,n] * prec_obs[:,n]).T.dot(data["states"])

        # Sample each column of C
        for n in xrange(self.N):
            post_prec = prior_precision + lkhd_precision[n, :, :]
            post_cov = np.linalg.inv(post_prec)
            post_mu = (prior_mean_dot_precision +
                       lkhd_mean_dot_precision[n, :]).dot(post_cov)
            post_mu = post_mu.ravel()

            self.C[n, :] = np.random.multivariate_normal(post_mu, post_cov)

    ### Variational inference
    def meanfieldupdate(self, augmented_data):
        raise NotImplementedError

    def get_vlb(self, augmented_data):
        raise NotImplementedError

    def resample_from_mf(self, augmented_data):
        raise NotImplementedError

    def svi_step(self, augmented_data, minibatchfrac, stepsize):
        raise NotImplementedError
Example #9
0
class LogisticNormalCorrelatedLDA(_LDABase):
    "Correlated LDA with the stick breaking representation"

    def __init__(self, data, T, alpha_beta):
        mu, sigma = np.zeros(T), np.eye(T)
        self.theta_prior = \
            Gaussian(
                mu=mu, sigma=sigma, mu_0=mu, sigma_0=T*sigma/10.,
                nu_0=T/10., kappa_0=10.)

        self.ppgs = initialize_polya_gamma_samplers()
        self.omega = np.zeros((data.shape[0], T))

        super(LogisticNormalCorrelatedLDA, self).__init__(data, T, alpha_beta)

    @property
    def theta(self):
        return ln_psi_to_pi(self.psi)

    @theta.setter
    def theta(self, theta):
        self.psi = ln_pi_to_psi(theta)

    def initialize_theta(self):
        self.psi = np.tile(self.theta_prior.mu, (self.D, 1))

    def resample_theta(self):
        self.resample_psi_and_omega()

    def resample(self):
        super(LogisticNormalCorrelatedLDA, self).resample()
        self.resample_theta_prior()

    def resample_psi_and_omega(self):
        Lmbda = np.linalg.inv(self.theta_prior.sigma)
        for d in xrange(self.D):
            N = self.data[d].sum()
            c = self.doc_topic_counts[d]
            for t in xrange(self.T):
                self.omega[d, t] = self.ppgs[0].pgdraw(
                    N, self._conditional_omega(d, t))

                mu_cond, sigma_cond = self._conditional_psi(d, t, Lmbda, N, c)
                self.psi[d, t] = np.random.normal(mu_cond, np.sqrt(sigma_cond))

    def _conditional_psi(self, d, t, Lmbda, N, c):
        nott = np.arange(self.T) != t
        psi = self.psi[d]
        omega = self.omega[d]
        mu = self.theta_prior.mu

        zetat = logsumexp(psi[nott])

        mut_marg = mu[t] - 1. / Lmbda[t, t] * Lmbda[t, nott].dot(psi[nott] -
                                                                 mu[nott])
        sigmat_marg = 1. / Lmbda[t, t]

        sigmat_cond = 1. / (omega[t] + 1. / sigmat_marg)

        # kappa is the mean dot precision, i.e. the sufficient statistic of a Gaussian
        # therefore we can sum over datapoints
        kappa = (c[t] - N / 2.0).sum()
        mut_cond = sigmat_cond * (kappa + mut_marg / sigmat_marg +
                                  omega[t] * zetat)

        return mut_cond, sigmat_cond

    def _conditional_omega(self, d, t):
        nott = np.arange(self.T) != t
        psi = self.psi[d]
        zetat = logsumexp(psi[nott])
        return psi[t] - zetat

    def resample_theta_prior(self):
        self.theta_prior.resample(self.psi)

    def copy_sample(self):
        new = copy.copy(self)
        new.beta = self.beta.copy()
        new.psi = self.psi.copy()
        new.theta_prior = self.theta_prior.copy_sample()
        del new.z
        del new.omega
        return new
Example #10
0
class StickbreakingCorrelatedLDA(_LDABase):
    "Correlated LDA with the stick breaking representation"

    def __init__(self, data, T, alpha_beta):
        mu, sigma = compute_uniform_mean_psi(T)
        self.theta_prior = Gaussian(mu=mu,
                                    sigma=sigma,
                                    mu_0=mu,
                                    sigma_0=T * sigma / 10.,
                                    nu_0=T / 10.,
                                    kappa_0=1. / 10)

        self.ppgs = initialize_polya_gamma_samplers()
        self.omega = np.zeros((data.shape[0], T - 1))

        super(StickbreakingCorrelatedLDA, self).__init__(data, T, alpha_beta)

    @property
    def theta(self):
        return psi_to_pi(self.psi)

    @theta.setter
    def theta(self, theta):
        self.psi = pi_to_psi(theta)

    def initialize_theta(self):
        self.psi = np.tile(self.theta_prior.mu, (self.D, 1))

    def resample_theta(self):
        self.resample_omega()
        self.resample_psi()

    def resample(self):
        super(StickbreakingCorrelatedLDA, self).resample()
        self.resample_theta_prior()

    def resample_omega(self):
        pgdrawvpar(self.ppgs,
                   N_vec(self.doc_topic_counts).astype('float64').ravel(),
                   self.psi.ravel(), self.omega.ravel())
        np.clip(self.omega, 1e-32, np.inf, out=self.omega)

    def resample_psi(self):
        Lmbda = np.linalg.inv(self.theta_prior.sigma)
        h = Lmbda.dot(self.theta_prior.mu)
        randvec = np.random.randn(self.D,
                                  self.T - 1)  # pre-generate randomness

        for d, c in enumerate(self.doc_topic_counts):
            self.psi[d] = sample_infogaussian(Lmbda + np.diag(self.omega[d]),
                                              h + kappa_vec(c), randvec[d])

    def resample_theta_prior(self):
        self.theta_prior.resample(self.psi)

    def copy_sample(self):
        new = copy.copy(self)
        new.beta = self.beta.copy()
        new.psi = self.psi.copy()
        new.theta_prior = self.theta_prior.copy_sample()
        del new.z
        del new.omega
        return new
Example #11
0
class LinearDynamicalSystemBackground(Component):
    """
    Linear Dynamical System model for the background activation.
    Since the potentials for the activation are of a Gaussian form,
    we can perform conjugate Gibbs sampling or variational inference
    for a Gaussian LDS model.
    """
    def __init__(self, population, D=2,
                 A=None, C=None,
                 sigma_states=None,
                 sigma_C=1.0):

        self.population = population
        self.activation = population.activation_model
        self.N = self.population.N
        self.D = D

        from pybasicbayes.distributions import Gaussian
        self.init_dynamics_distn = Gaussian(mu_0=np.ones(D),
                                            kappa_0=1.0,
                                            sigma_0=0.000001 * np.eye(D),
                                            nu_0=3.0)

        from autoregressive.distributions import AutoRegression
        self.dynamics_distn = AutoRegression(A=A, sigma=sigma_states,
                                             nu_0=D+1.0, S_0=0.5 * np.eye(D),
                                             M_0=np.zeros((D,D)), K_0=0.5 * np.eye(D))

        # Initialize the emission matrix
        if C is None:
            self.C = sigma_C * np.random.randn(self.N, self.D)
        else:
            assert C.shape == (self.N, self.D)
            self.C = C

        self.sigma_C = sigma_C

    def augment_data(self, augmented_data):
        # Add a latent state sequence
        augmented_data["states"] = self.generate_states(augmented_data["T"])

    def log_likelihood(self, augmented_data):
        raise NotImplementedError

    def generate(self,T):
        states = self.generate_states(T)
        return states.dot(self.C.T)

    def generate_states(self, T):
        stateseq = np.empty((T,self.D))
        stateseq[0] = self.init_dynamics_distn.rvs()

        chol = np.linalg.cholesky(self.dynamics_distn.sigma)
        randseq = np.random.randn(T-1,self.D)

        for t in xrange(1,T):
            stateseq[t] = \
                self.dynamics_distn.A.dot(stateseq[t-1]) \
                + chol.dot(randseq[t-1])

        return stateseq

    def mean_background_activation(self, augmented_data):
        return augmented_data["states"].dot(self.C.T)

    def resample(self, augmented_data_list):
        self.resample_states(augmented_data_list)
        self.resample_parameters(augmented_data_list)

    def resample_states(self, augmented_data_list):
        from pylds.lds_messages import filter_and_sample

        for data in augmented_data_list:
            # Compute the residual activation from other components
            psi = self.activation.compute_psi(data)
            psi_residual = psi - self.mean_background_activation(data)

            # Get the observed mean and variance
            mu_obs = self.activation.new_mean(data)
            prec_obs = self.activation.new_precision(data)

            # Subtract off the activation from other components
            mu_obs -= psi_residual

            # Convert prec_obs into an array of diagonal covariance matrices
            sigma_obs = np.empty((data["T"], self.N, self.N), order="C")
            for t in xrange(data["T"]):
                sigma_obs[t,:,:] = np.diag(1./prec_obs[t,:])

            data["states"] = filter_and_sample(
                self.init_dynamics_distn.mu,
                self.init_dynamics_distn.sigma,
                self.dynamics_distn.A,
                self.dynamics_distn.sigma,
                self.C,
                sigma_obs,
                mu_obs)

    def resample_parameters(self, augmented_data_list):
        self.resample_init_dynamics_distn(augmented_data_list)
        self.resample_dynamics_distn(augmented_data_list)
        self.resample_emission_distn(augmented_data_list)

    def resample_init_dynamics_distn(self, augmented_data_list):
        states_list = [ad["states"][0] for ad in augmented_data_list]
        self.init_dynamics_distn.resample(states_list)

    def resample_dynamics_distn(self, augmented_data_list):
        from pyhsmm.util.general import AR_striding
        states_list = [ad["states"] for ad in augmented_data_list]
        strided_states_list = [AR_striding(s,1) for s in states_list]
        self.dynamics_distn.resample(strided_states_list)

    def resample_emission_distn(self, augmented_data_list):
        """
        Resample the observation vectors. Since the emission noise is diagonal,
        we can resample the columns of C independently
        :return:
        """
        # Get the prior
        prior_precision = 1./self.sigma_C * np.eye(self.D)
        prior_mean = np.zeros(self.D)
        prior_mean_dot_precision = prior_mean.dot(prior_precision)

        # Get the sufficient statistics from the likelihood
        lkhd_precision = np.zeros((self.N, self.D, self.D))
        lkhd_mean_dot_precision = np.zeros((self.N, self.D))

        for data in augmented_data_list:
            # Compute the residual activation from other components
            psi = self.activation.compute_psi(data)
            psi_residual = psi - self.mean_background_activation(data)

            # Get the observed mean and variance
            mu_obs = self.activation.new_mean(data)
            prec_obs = self.activation.new_precision(data)

            # Subtract off the residual
            mu_obs -= psi_residual

            # Update the sufficient statistics for each neuron
            for n in xrange(self.N):
                lkhd_precision[n,:,:] += (data["states"] * prec_obs[:,n][:,None]).T.dot(data["states"])
                lkhd_mean_dot_precision[n,:] += \
                    (mu_obs[:,n] * prec_obs[:,n]).T.dot(data["states"])

        # Sample each column of C
        for n in xrange(self.N):
            post_prec = prior_precision + lkhd_precision[n,:,:]
            post_cov  = np.linalg.inv(post_prec)
            post_mu   =  (prior_mean_dot_precision +
                          lkhd_mean_dot_precision[n,:]).dot(post_cov)
            post_mu   = post_mu.ravel()

            self.C[n,:] = np.random.multivariate_normal(post_mu, post_cov)

    ### Variational inference
    def meanfieldupdate(self, augmented_data): raise NotImplementedError
    def get_vlb(self, augmented_data): raise NotImplementedError
    def resample_from_mf(self, augmented_data): raise NotImplementedError
    def svi_step(self, augmented_data, minibatchfrac, stepsize): raise NotImplementedError
Example #12
0
class _IndependentGaussianMixin(_NetworkModel):
    """
    Each weight is an independent Gaussian with a shared NIW prior.
    Special case the self-connections.
    """
    def __init__(self, N, B,
                 mu_0=0.0, sigma_0=1.0, kappa_0=1.0, nu_0=3.0,
                 is_diagonal_weight_special=True,
                 **kwargs):
        super(_IndependentGaussianMixin, self).__init__(N, B)

        mu_0 = expand_scalar(mu_0, (B,))
        sigma_0 = expand_cov(sigma_0, (B,B))
        self._gaussian = Gaussian(mu_0=mu_0, sigma_0=sigma_0, kappa_0=kappa_0, nu_0=max(nu_0, B+2.))

        self.is_diagonal_weight_special = is_diagonal_weight_special
        if is_diagonal_weight_special:
            self._self_gaussian = \
                Gaussian(mu_0=mu_0, sigma_0=sigma_0, kappa_0=kappa_0, nu_0=nu_0)

    @property
    def mu_W(self):
        N, B = self.N, self.B
        mu = np.zeros((N, N, B))
        if self.is_diagonal_weight_special:
            # Set off-diagonal weights
            mask = np.ones((N, N), dtype=bool)
            mask[np.diag_indices(N)] = False
            mu[mask] = self._gaussian.mu

            # set diagonal weights
            mask = np.eye(N).astype(bool)
            mu[mask] = self._self_gaussian.mu

        else:
            mu = np.tile(self._gaussian.mu[None,None,:], (N, N, 1))
        return mu

    @property
    def sigma_W(self):
        N, B = self.N, self.B
        if self.is_diagonal_weight_special:
            sigma = np.zeros((N, N, B, B))
            # Set off-diagonal weights
            mask = np.ones((N, N), dtype=bool)
            mask[np.diag_indices(N)] = False
            sigma[mask] = self._gaussian.sigma

            # set diagonal weights
            mask = np.eye(N).astype(bool)
            sigma[mask] = self._self_gaussian.sigma

        else:
            sigma = np.tile(self._gaussian.mu[None, None, :, :], (N, N, 1, 1))
        return sigma

    def resample(self, data=[]):
        super(_IndependentGaussianMixin, self).resample(data)
        A, W = data
        N, B = self.N, self.B
        if self.is_diagonal_weight_special:
            # Resample prior for off-diagonal weights
            mask = np.ones((N, N), dtype=bool)
            mask[np.diag_indices(N)] = False
            mask = mask & A
            self._gaussian.resample(W[mask])

            # Resample prior for diagonal weights
            mask = np.eye(N).astype(bool) & A
            self._self_gaussian.resample(W[mask])

        else:
            # Resample prior for all weights
            self._gaussian.resample(W[A])
Example #13
0
class LogisticNormalCorrelatedLDA(_LDABase):
    "Correlated LDA with the stick breaking representation"

    def __init__(self, data, T, alpha_beta):
        mu, sigma = np.zeros(T), np.eye(T)
        self.theta_prior = \
            Gaussian(
                mu=mu, sigma=sigma, mu_0=mu, sigma_0=T*sigma/10.,
                nu_0=T/10., kappa_0=10.)

        self.ppgs = initialize_polya_gamma_samplers()
        self.omega = np.zeros((data.shape[0], T))

        super(LogisticNormalCorrelatedLDA, self).__init__(data, T, alpha_beta)

    @property
    def theta(self):
        return ln_psi_to_pi(self.psi)

    @theta.setter
    def theta(self, theta):
        self.psi = ln_pi_to_psi(theta)

    def initialize_theta(self):
        self.psi = np.tile(self.theta_prior.mu, (self.D, 1))

    def resample_theta(self):
        self.resample_psi_and_omega()

    def resample(self):
        super(LogisticNormalCorrelatedLDA, self).resample()
        self.resample_theta_prior()

    def resample_psi_and_omega(self):
        Lmbda = np.linalg.inv(self.theta_prior.sigma)
        for d in xrange(self.D):
            N = self.data[d].sum()
            c = self.doc_topic_counts[d]
            for t in xrange(self.T):
                self.omega[d,t] = self.ppgs[0].pgdraw(
                    N, self._conditional_omega(d,t))

                mu_cond, sigma_cond = self._conditional_psi(d, t, Lmbda, N, c)
                self.psi[d,t] = np.random.normal(mu_cond, np.sqrt(sigma_cond))

    def _conditional_psi(self, d, t, Lmbda, N, c):
        nott = np.arange(self.T) != t
        psi = self.psi[d]
        omega = self.omega[d]
        mu = self.theta_prior.mu

        zetat = logsumexp(psi[nott])

        mut_marg = mu[t] - 1./Lmbda[t,t] * Lmbda[t,nott].dot(psi[nott] - mu[nott])
        sigmat_marg = 1./Lmbda[t,t]

        sigmat_cond = 1./(omega[t] + 1./sigmat_marg)

        # kappa is the mean dot precision, i.e. the sufficient statistic of a Gaussian
        # therefore we can sum over datapoints
        kappa = (c[t] - N/2.0).sum()
        mut_cond = sigmat_cond * (kappa + mut_marg / sigmat_marg + omega[t]*zetat)

        return mut_cond, sigmat_cond

    def _conditional_omega(self, d, t):
        nott = np.arange(self.T) != t
        psi = self.psi[d]
        zetat = logsumexp(psi[nott])
        return psi[t] - zetat

    def resample_theta_prior(self):
        self.theta_prior.resample(self.psi)

    def copy_sample(self):
        new = copy.copy(self)
        new.beta = self.beta.copy()
        new.psi = self.psi.copy()
        new.theta_prior = self.theta_prior.copy_sample()
        del new.z
        del new.omega
        return new
Example #14
0
class _LatentDistanceModelGaussianMixin(_NetworkModel):
    """
    l_n ~ N(0, sigma^2 I)
    W_{n', n} ~ N(A * ||l_{n'} - l_{n}||_2^2 + b, ) for n' != n
    """
    def __init__(self,
                 N,
                 B=1,
                 dim=2,
                 b=0.5,
                 sigma=None,
                 Sigma_0=None,
                 nu_0=None,
                 mu_self=0.0,
                 eta=0.01):

        super(_LatentDistanceModelGaussianMixin, self).__init__(N, B)
        self.B = B
        self.dim = dim

        self.b = b
        self.eta = eta
        self.L = np.sqrt(eta) * np.random.randn(N, dim)

        if Sigma_0 is None:
            Sigma_0 = np.eye(B)

        if nu_0 is None:
            nu_0 = B + 2

        self.cov = GaussianFixedMean(mu=np.zeros(B),
                                     sigma=sigma,
                                     lmbda_0=Sigma_0,
                                     nu_0=nu_0)

        # Special case self-weights (along the diagonal)
        self._self_gaussian = Gaussian(mu_0=mu_self * np.ones(B),
                                       sigma_0=Sigma_0,
                                       nu_0=nu_0,
                                       kappa_0=1.0)

    @property
    def D(self):
        # return np.sqrt(((self.L[:, None, :] - self.L[None, :, :]) ** 2).sum(2))
        return ((self.L[:, None, :] - self.L[None, :, :])**2).sum(2)

    @property
    def mu_W(self):
        Mu = -self.D + self.b
        Mu = np.tile(Mu[:, :, None], (1, 1, self.B))
        for n in range(self.N):
            Mu[n, n, :] = self._self_gaussian.mu

        return Mu

    @property
    def sigma_W(self):
        sig = self.cov.sigma
        Sig = np.tile(sig[None, None, :, :], (self.N, self.N, 1, 1))

        for n in range(self.N):
            Sig[n, n, :, :] = self._self_gaussian.sigma

        return Sig

    def initialize_from_prior(self):
        self.L = np.sqrt(self.eta) * np.random.randn(self.N, self.dim)
        self.cov.resample()

    def initialize_hypers(self, W):
        # Optimize the initial locations
        self._optimize_L(np.ones((self.N, self.N)), W)

    def _hmc_log_probability(self, L, b, A, W):
        """
        Compute the log probability as a function of L.
        This allows us to take the gradients wrt L using autograd.
        :param L:
        :param A:
        :return:
        """
        assert self.B == 1
        import autograd.numpy as atnp

        # Compute pairwise distance
        L1 = atnp.reshape(L, (self.N, 1, self.dim))
        L2 = atnp.reshape(L, (1, self.N, self.dim))
        # Mu = a * anp.sqrt(anp.sum((L1-L2)**2, axis=2)) + b
        Mu = -atnp.sum((L1 - L2)**2, axis=2) + b

        Aoff = A * (1 - atnp.eye(self.N))
        X = (W - Mu[:, :, None]) * Aoff[:, :, None]

        # Get the covariance and precision
        Sig = self.cov.sigma[0, 0]
        Lmb = 1. / Sig

        lp = atnp.sum(-0.5 * X**2 * Lmb)

        # Log prior of L under spherical Gaussian prior
        lp += -0.5 * atnp.sum(L * L / self.eta)

        # Log prior of mu0 under standardGaussian prior
        lp += -0.5 * b**2

        return lp

    def resample(self, data=[]):
        super(_LatentDistanceModelGaussianMixin, self).resample(data)
        A, W = data
        N, B = self.N, self.B
        self._resample_L(A, W)
        self._resample_b(A, W)
        self._resample_cov(A, W)
        self._resample_self_gaussian(A, W)
        self._resample_eta()
        # print "eta: ", self.eta, "\tb: ", self.b

    def _resample_L(self, A, W):
        """
        Resample the locations given A
        :return:
        """
        from autograd import grad
        from hips.inference.hmc import hmc

        lp = lambda L: self._hmc_log_probability(L, self.b, A, W)
        dlp = grad(lp)

        stepsz = 0.005
        nsteps = 10
        # lp0 = lp(self.L)
        self.L = hmc(lp,
                     dlp,
                     stepsz,
                     nsteps,
                     self.L.copy(),
                     negative_log_prob=False)
        # lpf = lp(self.L)
        # print "diff lp: ", (lpf - lp0)

    def _optimize_L(self, A, W):
        """
        Resample the locations given A
        :return:
        """
        import autograd.numpy as atnp
        from autograd import grad
        from scipy.optimize import minimize

        lp = lambda Lflat: -self._hmc_log_probability(
            atnp.reshape(Lflat, (self.N, 2)), self.b, A, W)
        dlp = grad(lp)

        res = minimize(lp, np.ravel(self.L), jac=dlp, method="bfgs")

        self.L = np.reshape(res.x, (self.N, 2))

    def _resample_b_hmc(self, A, W):
        """
        Resample the distance dependence offset
        :return:
        """
        # TODO: We could sample from the exact Gaussian conditional
        from autograd import grad
        from hips.inference.hmc import hmc

        lp = lambda b: self._hmc_log_probability(self.L, b, A, W)
        dlp = grad(lp)

        stepsz = 0.0001
        nsteps = 10
        b = hmc(lp,
                dlp,
                stepsz,
                nsteps,
                np.array(self.b),
                negative_log_prob=False)
        self.b = float(b)
        print("b: ", self.b)

    def _resample_b(self, A, W):
        """
        Resample the distance dependence offset
        W ~ N(mu, sigma)
          = N(-D + b, sigma)
    
        implies
        W + D ~ N(b, sigma).
    
        If b ~ N(0, 1), we can compute the Gaussian conditional
        in closed form.
        """
        D = self.D
        sigma = self.cov.sigma[0, 0]
        Aoff = (A * (1 - np.eye(self.N))).astype(np.bool)
        X = (W + D[:, :, None])[Aoff]

        # Now X ~ N(b, sigma)
        mu0, sigma0 = 0.0, 1.0
        N = X.size
        sigma_post = 1. / (1. / sigma0 + N / sigma)
        mu_post = sigma_post * (mu0 / sigma0 + X.sum() / sigma)

        self.b = mu_post + np.sqrt(sigma_post) * np.random.randn()
        # print "b: ", self.b

    def _resample_cov(self, A, W):
        # Resample covariance matrix
        Mu = self.Mu
        mask = (True - np.eye(self.N, dtype=np.bool)) & A.astype(np.bool)
        self.cov.resample(W[mask] - Mu[mask])

    def _resample_self_gaussian(self, A, W):
        # Resample self connection
        mask = np.eye(self.N, dtype=np.bool) & A.astype(np.bool)
        self._self_gaussian.resample(W[mask])

    def _resample_eta(self):
        """
        Resample sigma under an inverse gamma prior, sigma ~ IG(1,1)
        :return:
        """
        L = self.L

        a_prior = 1.0
        b_prior = 1.0

        a_post = a_prior + L.size / 2.0
        b_post = b_prior + (L**2).sum() / 2.0

        from scipy.stats import invgamma
        self.eta = invgamma.rvs(a=a_post, scale=b_post)