Example #1
0
def sample_aux_vars(betas, num_matches, time_range, covariates=None):
    pg = PyPolyaGamma()
    if covariates is None:
        covariates = identity_matrix(len(betas))

    if covariates.ndim == 2:
        num_players = len(covariates)
        aux_vars = [
            np.matrix([
                [
                    pg.pgdraw(num_matches[t][i, j],
                              (covariates[i] - covariates[j]).dot(betas[t]))
                    #entries
                    for j in range(num_players)  # columns
                ] for i in range(num_players)  # rows
            ]) for t in time_range  # index of matrix-list
        ]
    else:
        num_players = len(covariates[0])
        aux_vars = [
            np.matrix([
                [
                    pg.pgdraw(num_matches[t][i, j],
                              (covariates[t][i] - covariates[t][j]).dot(
                                  betas[t]))
                    #entries
                    for j in range(num_players)  # columns
                ] for i in range(num_players)  # rows
            ]) for t in time_range  # index of matrix-list
        ]

    return aux_vars
Example #2
0
 def logisticAndReject(self, X, Y):
     pg = PyPolyaGamma()  # use N(0, I) prior
     n = X.shape[0]
     # Output layer
     #out_fit = LinearRegression(fit_intercept = False).fit(self.layers[self.nlayer-1].h, Y)
     #self.layers[self.nlayer].W = out_fit.coef_
     prior = np.random.normal(0, 1, size=self.hid_dim)
     w = np.zeros(n)
     for k in range(n):
         w[k] = pg.pgdraw(
             1, np.dot(self.layers[self.nlayer - 1].h[k, :], prior))
     kappa = self.layers[self.nlayer].h[:, 0] - 0.5
     omega = np.diag(w)
     Vw = np.linalg.inv(
         np.dot(np.dot(np.transpose(self.layers[self.nlayer].h), omega),
                self.layers[self.nlayer].h) + 1)[0]
     mw = Vw * np.dot(np.transpose(self.layers[self.nlayer].h), kappa)[0]
     self.layers[self.nlayer].W[:, 0] = np.random.normal(mw, Vw)
     # Hidden layers
     for l in range(self.nlayer - 1, 0, -1):
         for j in range(self.hid_dim):
             # Draw prior beta
             curr = np.random.normal(0, 1, size=self.hid_dim)
             for t in range(self.mc_iter):
                 # Draw latent w
                 w = np.zeros(n)
                 for k in range(n):
                     w[k] = pg.pgdraw(
                         1, np.dot(self.layers[l - 1].h[k, :], curr))
                 # Draw posterior beta
                 kappa = self.layers[l].h[:, j] - 0.5
                 omega = np.diag(w)
                 Vw = np.linalg.inv(
                     np.dot(np.dot(np.transpose(self.layers[l].h), omega),
                            self.layers[l].h) + np.eye(self.hid_dim))
                 mw = np.dot(Vw,
                             np.dot(np.transpose(self.layers[l].h), kappa))
                 curr = np.random.multivariate_normal(mw, Vw)
             self.layers[l].W[:, j] = curr
Example #3
0
    def _smpl_fn(cls, rng, b, c, size):
        pg = PyPolyaGamma(rng.randint(2 ** 16))

        if not size and b.shape == c.shape == ():
            return pg.pgdraw(b, c)
        else:
            b, c = np.broadcast_arrays(b, c)
            out_shape = b.shape + tuple(size or ())
            smpl_val = np.empty(out_shape, dtype="double")
            b = np.tile(b, tuple(size or ()) + (1,))
            c = np.tile(c, tuple(size or ()) + (1,))
            pg.pgdrawv(
                np.asarray(b.flat).astype("double", copy=True),
                np.asarray(c.flat).astype("double", copy=True),
                np.asarray(smpl_val.flat),
            )
            return smpl_val
Example #4
0
    def _sample_reference_posterior(
        self,
        num_samples: int,
        num_observation: Optional[int] = None,
    ) -> torch.Tensor:
        from pypolyagamma import PyPolyaGamma
        from tqdm import tqdm

        self.dim_data = 10
        # stimulus_I = torch.load(self.path / "files" / "stimulus_I.pt")
        design_matrix = torch.load(self.path / "files" / "design_matrix.pt")
        true_parameters = self.get_true_parameters(num_observation)
        self.raw = True
        observation_raw = self.get_observation(num_observation)
        self.raw = False

        mcmc_num_samples_warmup = 25000
        mcmc_thinning = 25
        mcmc_num_samples = mcmc_num_samples_warmup + mcmc_thinning * num_samples

        pg = PyPolyaGamma()
        X = design_matrix.numpy()
        obs = observation_raw.numpy()
        Binv = self.prior_params["precision_matrix"].numpy()

        sample = true_parameters.numpy().reshape(-1)  # Init at true parameters
        samples = []
        for j in tqdm(range(mcmc_num_samples)):
            psi = np.dot(X, sample)
            w = np.array([pg.pgdraw(1, b) for b in psi])
            O = np.diag(w)  # noqa: E741
            V = np.linalg.inv(np.dot(np.dot(X.T, O), X) + Binv)
            m = np.dot(V, np.dot(X.T, obs.reshape(-1) - 1 * 0.5))
            sample = np.random.multivariate_normal(np.ravel(m), V)
            samples.append(sample)
        samples = np.asarray(samples).astype(np.float32)
        samples_subset = samples[mcmc_num_samples_warmup::mcmc_thinning, :]

        reference_posterior_samples = torch.from_numpy(samples_subset)

        return reference_posterior_samples
Example #5
0
    def rng_fn(cls, rng, b, c, size):
        pg = PyPolyaGamma(rng.randint(2**16))

        if not size and b.shape == c.shape == ():
            return pg.pgdraw(b, c)
        else:
            b, c = np.broadcast_arrays(b, c)
            size = tuple(size or ())

            if len(size) > 0:
                b = np.broadcast_to(b, size)
                c = np.broadcast_to(c, size)

            smpl_val = np.empty(b.shape, dtype="double")

            pg.pgdrawv(
                np.asarray(b.flat).astype("double", copy=True),
                np.asarray(c.flat).astype("double", copy=True),
                np.asarray(smpl_val.flat),
            )
            return smpl_val
Example #6
0
class GibbSampler(SGCP_Sampler):

    def __init__(self, *args, **kwargs):
        super(GibbSampler, self).__init__(*args, **kwargs)

        self.pg = PyPolyaGamma(seed=np.random.randint(2 ** 16, size=None))

    def run(self):

        print('Starting Gibbs')

        latent_events = np.random.rand(self.M, self.dim) * self.diff
        latent_marks = np.random.rand(self.M, 1) * 2 ** 10  # distribute on space
        marks = np.random.rand(self.N, 1) * 2 ** 10  # distribute on space

        start = time.time()

        for k in range(self.maxiter):

            if k == 1:
                loop_start = time.time()
            if k == 2:
                print('Approximately %.2f min to go' % (loop * self.maxiter / 60))

            if self.inducing_points is not None:

                if k == 0:
                    self.events_base = self.inducing_points
                    self.K = self.cov_function(self.events_base, self.events_base, self.kernelparameter)
                    self.K += np.eye(len(self.K)) * self.noise
                    self.L = np.linalg.cholesky(self.K)
                    self.L_inv = np.linalg.solve(self.L, np.eye(self.L.shape[0]))
                    self.K_inv = self.L_inv.T @ self.L_inv

                self.sample_upper_bound(latent_marks.shape[0])
                self.sample_gaussian_induced(latent_events, marks, latent_marks)

                if self.sample_kernel_parameter:
                    if (k % 10) == 0:
                        self.sample_kernelparameter()

                intensity = self.sample_results()

                latent_events, g_M, g_N = self.sample_latent_events_induced()
                latent_marks = self.sample_latent_marks(g_M)
                marks = self.sample_marks(g_N)

            else:

                self.events_base = np.concatenate((self.observed_events, latent_events), axis=0)
                self.K = self.cov_function(self.events_base, self.events_base, self.kernelparameter)
                self.K += np.eye(len(self.K)) * self.noise
                self.L = np.linalg.cholesky(self.K)
                self.L_inv = np.linalg.solve(self.L, np.eye(self.L.shape[0]))
                self.K_inv = self.L_inv.T @ self.L_inv

                self.sample_upper_bound(latent_marks.shape[0])
                self.sample_gaussian(marks, latent_marks)

                if self.sample_kernel_parameter:
                    if (k % 10) == 0:
                        self.sample_kernelparameter()  # updates the kernels

                intensity = self.sample_results()

                latent_events, g_M = self.sample_latent_events()
                latent_marks = self.sample_latent_marks(g_M)
                marks = self.sample_marks(np.array(self.g[:self.N, :]))

            if ((k > 0) or (k == self.maxiter - 1)) and (k % 50 == 0):
                if self.inducing_points is not None:
                    print('%d   with inducing points' % k)
                else:
                    print(k)

            self.llambdas[k] = self.upper_bound
            self.latent_M[k] = latent_marks.shape[0]
            self.intensities[k, :] = intensity
            # self.log_likelihoods[k, :] = log_likelihood

            if k == 1:
                loop = time.time() - loop_start

        self.time = (time.time() - start) / 60
        print('Done in %.2f min' % self.time)
        self.mean_intensities = np.mean(self.intensities[self.burnin:], axis=0)

    ######################################################################

    def sample_gaussian_induced(self, latent_events, marks, latent_marks):
        L_ind = len(self.inducing_points)
        kN = self.cov_function(self.inducing_points, self.observed_events, self.kernelparameter)
        kM = self.cov_function(self.inducing_points, latent_events, self.kernelparameter)
        BN = kN[np.newaxis, ::] * kN[::, np.newaxis]  # (L,L,N)
        BM = kM[np.newaxis, ::] * kM[::, np.newaxis]  # (L,L,M)
        wN = np.repeat(marks, L_ind, axis=1)
        wN = np.repeat(wN[:, :, np.newaxis], L_ind, axis=2).T
        wM = np.repeat(latent_marks, L_ind, axis=1)
        wM = np.repeat(wM[:, :, np.newaxis], L_ind, axis=2).T

        B = np.sum(BN * wN, axis=2) + np.sum(BM * wM, axis=2)
        BLinv = np.linalg.solve(B + self.K, np.eye(L_ind))
        sigmaL = self.K @ BLinv @ self.K
        muL = 0.5 * self.K @ BLinv @ (np.sum(kN, axis=1, keepdims=True) - np.sum(kM, axis=1, keepdims=True))
        self.g = Utils.sample_gaussian(muL, sigmaL)  # + np.eye(L_ind) * self.noise)

    def sample_latent_events_induced(self):
        xx = 0
        while (xx == 0):
            latent_events, g_J, g_N = self.sample_latent_process_induced()
            xx = len(latent_events)
        return latent_events, g_J, g_N

    def sample_latent_process_induced(self):
        J = np.random.poisson(lam=self.vol * self.upper_bound, size=None)  # nb_events
        events = np.random.rand(J, self.dim) * self.diff
        g = self.sample_cond(np.concatenate((events, self.observed_events), axis=0))
        g_J = np.array(g[:len(events)])
        g_N = np.array(g[len(events):])
        R = np.random.rand(J) * self.upper_bound
        idx = R < self.upper_bound * SGCP_Sampler.sigmoid(-g_J.flatten())
        acc_events = events[idx, :]
        return acc_events, g_J[idx, :], g_N

    ######################################################################

    def sample_gaussian(self, marks, latent_marks):
        M = latent_marks.shape[0]
        marks_concat = np.concatenate((marks, latent_marks), axis=0)
        sigma = np.diag(1. / marks_concat.flatten())
        sigma_NM = sigma - sigma @ np.linalg.solve(sigma + self.K, np.eye(self.N + M)) @ sigma
        u = np.concatenate((np.full((self.N, 1), 1. / 2, ), np.full((M, 1), -1. / 2)), axis=0)
        mean_NM = sigma_NM @ u
        self.g = Utils.sample_gaussian(mean_NM, sigma_NM)  # + np.eye(sigma_NM.shape[0]) * self.noise)

    def sample_latent_process(self):
        J = np.random.poisson(lam=self.vol * self.upper_bound, size=None)  # nb_events
        events = np.random.rand(J, self.dim) * self.diff
        g_J = self.sample_cond(events)
        R = np.random.rand(J) * self.upper_bound
        idx = R < self.upper_bound * SGCP_Sampler.sigmoid(-g_J.flatten())
        acc_events = events[idx, :]
        return acc_events, g_J[idx, :]

    def sample_latent_events(self):
        xx = 0
        while (xx == 0):
            latent_events, g_J = self.sample_latent_process()
            xx = len(latent_events)
        return latent_events, g_J

    ######################################################################

    def sample_upper_bound(self, M):
        self.upper_bound = np.random.gamma(shape=self.alpha + M + self.N, scale=1. / (self.beta + self.vol))

    def sample_latent_marks(self, g_M):
        M = g_M.shape[0]
        latent_marks = np.zeros([M, 1])
        for i in range(M):
            latent_marks[i, :] = self.pg.pgdraw(1, g_M[i, :])
        return latent_marks

    def sample_kernelparameter(self):
        prop = np.random.randn(self.dim + 1)
        alpha = np.exp(np.log(self.kernelparameter[0]) + prop[0] * 0.05)
        beta = np.exp(np.log(self.kernelparameter[1]) + prop[1:] * 0.05)
        proposal = [alpha, beta]
        K = self.cov_function(self.events_base, self.events_base, proposal)
        K += np.eye(K.shape[0]) * self.noise
        L = np.linalg.cholesky(K)
        L_inv = np.linalg.solve(L, np.eye(L.shape[0]))
        K_inv = L_inv.T @ L_inv
        prop = - np.sum(np.log(L.diagonal())) - 0.5 * self.g.T @ K_inv @ self.g
        old = - np.sum(np.log(self.L.diagonal())) - 0.5 * self.g.T @ self.K_inv @ self.g
        A = min(0, np.asscalar(prop - old))
        u = np.log(np.random.rand())
        if u < A:
            self.K = K
            self.L = L
            self.L_inv = L_inv
            self.K_inv = K_inv
            self.kernelparameter = proposal
            print(self.kernelparameter)

    def predict(self, Xtest):  # predict unknown function values of Xtest
        C = self.cov_function(self.events_base, Xtest, self.kernelparameter)
        K_test = self.cov_function(Xtest, Xtest, self.kernelparameter)
        mean_predict = C.T @ self.K_inv @ self.g
        cov_predict = K_test - C.T @ self.K_inv @ C
        return mean_predict, cov_predict  # posterior mean and covariance

    def sample_cond(self, Xtest):
        mean, cov = self.predict(Xtest)
        tmp = Utils.sample_gaussian(mean, cov + np.eye(cov.shape[0]) * self.noise)
        return tmp

    def sample_marks(self, g_N):
        marks = np.zeros([self.N, 1])
        for i in range(self.N):
            marks[i, :] = self.pg.pgdraw(1, g_N[i, :])
        return marks

    def sample_results(self):
        self.events.append(self.events_base)
        self.gaussians.append(self.g)
        g = self.sample_cond(self.grid_events)
        return self.upper_bound * SGCP_Sampler.sigmoid(g.flatten())
Example #7
0
p_true = logistic(x_true)
y = npr.binomial(N, p_true)

# Gibbs sample the posterior distribution p(x | y)
# Introduce PG(N,0) auxiliary variables to render
# the model conjugate.  First, initialize the PG
# sampler and the model parameters.
N_samples = 10000
pg = PyPolyaGamma(seed=0)
xs = np.zeros(N_samples)
omegas = np.ones(N_samples)

# Now run the Gibbs sampler
for i in range(1, N_samples):
    # Sample omega given x, y from its PG conditional
    omegas[i] = pg.pgdraw(N, xs[i-1])

    # Sample x given omega, y from its Gaussian conditional
    sigmasq_hat = 1./(1. / sigmasq + omegas[i])
    mu_hat = sigmasq_hat * (mu / sigmasq + (y - N / 2.))
    xs[i] = npr.normal(mu_hat, np.sqrt(sigmasq_hat))

# Compute the true posterior density
xx = np.linspace(x_true-3., x_true+3, 1000)
prior = 1./np.sqrt(2 * np.pi * sigmasq) * np.exp(-0.5 * (xx - mu)**2 / sigmasq)
lkhd = logistic(xx) ** y * (1-logistic(xx))**(N-y)
post = prior * lkhd
post /= np.trapz(post, xx)

# Plot the results
plt.figure(figsize=(5,4))
def pg_mcmc(true_params, obs, duration=100, dt=1, seed=None,
    prior_dist=None):
    """Polya-Gamma sampler for GLM

    Returns
    -------
    array : samples from posterior
    """

    if prior_dist is None:
        prior_dist = smoothing_prior(n_params=true_params.size, seed=seed)

    # seeding
    np.random.seed(seed)
    pg = PyPolyaGamma()  # seed=seed

    # observation
    I = obs['I'].reshape(1,-1)
    S_obs = obs['data'].reshape(-1)

    # simulation protocol
    num_param_inf = len(true_params)
    dt = 1
    t = np.arange(0, duration, dt)

    N = 1   # Number of trials
    M = num_param_inf-1   # Length of the filter

    # build covariate matrix X, such that X * h returns convolution of x with filter h
    X = np.zeros(shape=(len(t), M))
    for j in range(M):
        X[j:,j] = I[0,0:len(t)-j]

    # prior
    # smoothing prior on h; N(0, 1) on b0. Smoothness encouraged by penalyzing
    # 2nd order differences of elements of filter
    #prior_dist = prior(n_params=true_params.size, seed=seed)
    Binv = prior_dist.P

    # The sampler consists of two iterative Gibbs updates
    # 1) sample auxiliary variables: w ~ PG(N, psi)
    # 2) sample parameters: beta ~ N(m, V); V = inv(X'O X + Binv), m = V*(X'k), k = y - N/2
    nsamp = 500000   # samples to evaluate the posterior

    # add a column of 1s to the covariate matrix X, in order to model the offset too
    X = np.concatenate((np.ones(shape=(len(t), 1)), X), axis=1)

    beta = true_params*1.
    BETA = np.zeros((M+1,nsamp))

    for j in tqdm(range(1, nsamp)):
        psi = np.dot(X, beta)
        w = np.array([pg.pgdraw(N, b) for b in psi])
        O = np.diag(w)

        V = np.linalg.inv(np.dot(np.dot(X.T, O), X) + Binv)
        m = np.dot(V, np.dot(X.T, S_obs - N * 0.5))

        beta = np.random.multivariate_normal(np.ravel(m), V)

        BETA[:,j] = beta

    # burn-in
    burn_in = 100000
    BETA_sub_samp = BETA[:, burn_in:nsamp:30]

    # return sampling results
    return BETA_sub_samp
Example #9
0
class WEIFTM():

    NO_TOPIC = -1

    def __init__(self,
                 n_topics,
                 alpha_0=.1,
                 beta_0=.01,
                 sig_0=1,
                 topic_sparsity=.3,
                 delta_0=1):
        self.n_topics = n_topics
        self.alpha_0 = alpha_0
        self.beta_0 = beta_0
        self.sig_0 = sig_0
        self.topic_sparsity = topic_sparsity
        self.delta_0 = delta_0
        self.log_likelihoods = []
        self.accuracies = []

    def get_documents_from_directory(self, directory_path):
        self.labels = {}
        count = 0
        class_count = -1
        classes = set()
        documents = []
        for (path, dirs, files) in os.walk(directory_path):
            files.sort()
            cl = path.strip(os.path.sep).split(os.path.sep)[-1]
            for file_path in files:
                if file_path.endswith('.txt'):
                    document_path = os.path.join(path, file_path)
                    try:
                        file = open(document_path, 'r')
                        document = file.read()
                        file.close()
                        documents.append(document)
                        if cl not in classes:
                            classes.add(cl)
                            class_count += 1
                        self.labels[count] = class_count
                        count += 1
                    except Exception as e:
                        print(e)
        return documents

    def get_documents_from_csv(self,
                               csv_path,
                               text_name="text",
                               class_name="class"):
        with open(csv_path, 'r', encoding='utf8', errors='ignore') as csv_file:
            dataframe = pd.read_csv(StringIO(csv_file.read()))
            # dataframe = dataframe.iloc[np.random.permutation(dataframe.shape[0])[:10]]
            # dataframe = dataframe.reset_index()
            dataframe = dataframe.fillna(value={class_name: ''})
            dataframe[class_name] = LabelEncoder().fit_transform(
                dataframe[class_name])
            self.labels = dict(dataframe[class_name])
            return list(dataframe[text_name])

    def get_embedding_vocabulary(self, embedding_path):
        vocabulary = set()
        with open(embedding_path) as emb_file:
            for line in emb_file:
                if line != "":
                    word = line.strip().split(" ", 1)[0]
                    vocabulary.add(word)
        return vocabulary

    def load_corpus(self, documents, vocabulary, custom_stop_words=[]):
        preprocessed_documents = preprocess_tweets(documents, vocabulary,
                                                   custom_stop_words)
        self.dictionary = corpora.Dictionary(preprocessed_documents)
        self.n_words = len(self.dictionary)
        self.corpus = [
            self.dictionary.doc2bow(document)
            for document in preprocessed_documents
        ]
        self.n_documents = len(self.corpus)

    def load_embeddings(self,
                        embedding_size,
                        embedding_path,
                        corpus_dir,
                        use_pca=False,
                        pca_var=.97):
        self.embedding_size = embedding_size
        cache_dir = "./cache/{}/".format(
            corpus_dir.strip(os.path.sep).strip('.csv').split(os.path.sep)[-1])
        embedding_cache_path = cache_dir + "embedding{}.npy".format(
            embedding_size)
        if os.path.isfile(embedding_cache_path):
            self.f = np.load(embedding_cache_path)
        else:
            vocabulary = set(self.dictionary.values())
            self.f = np.empty((self.n_words, self.embedding_size))
            with open(embedding_path) as emb_file:
                for line in emb_file:
                    if line != "":
                        word, str_embedding = line.strip().split(" ", 1)
                        if word in vocabulary:
                            word_index = self.dictionary.token2id[word]
                            self.f[word_index] = np.array(
                                str_embedding.split(" "), dtype=float)
            if not os.path.isdir(cache_dir):
                os.makedirs(cache_dir)
            np.save(embedding_cache_path, self.f)

        if use_pca == True:
            self._embedding_PCA(pca_var)

        self.f_outer = np.array([np.outer(f_v, f_v) for f_v in self.f])

    def _embedding_PCA(self, var_percent):
        self.pca = PCA(self.embedding_size)
        self.f_raw = self.f
        self.pca.fit(self.f_raw)
        n_components = np.argmax(
            np.cumsum(self.pca.explained_variance_ratio_) > var_percent)
        self.f = self.pca.transform(self.f_raw)[:, :n_components]
        self.embedding_size_raw = self.embedding_size
        self.embedding_size = n_components

    def initialize_parameters(self):
        self._init_b()
        self._init_n_m_Z()
        self._init_lamb()
        self._init_c()
        self._init_pi()
        self._init_embedding_aux_params()

    def _init_b(self):
        self.b = np.random.binomial(1, self.topic_sparsity,
                                    (self.n_topics, self.n_words))
        self.b_sum_ax1 = np.sum(self.b, axis=1)

    def _init_n_m_Z(self):
        self.n = np.zeros((self.n_topics, self.n_words))
        self.m = np.zeros((self.n_documents, self.n_topics))
        self.Z = []
        for document_index, document in enumerate(self.corpus):
            Z_document = []
            for word_occurrence_tuple in document:
                word_index = word_occurrence_tuple[0]
                count = word_occurrence_tuple[1]
                for _ in range(count):
                    nonzero_b = self.b[:, word_index].nonzero()[0]
                    if len(nonzero_b) == 0:
                        topic_assignment = WEIFTM.NO_TOPIC
                    else:
                        topic_assignment = np.random.choice(nonzero_b)
                        self.n[topic_assignment, word_index] += 1
                        self.m[document_index, topic_assignment] += 1
                    Z_document.append([word_index, topic_assignment])
            self.Z.append(Z_document)

    def _init_lamb(self):
        sig_I_lamb = self.sig_0**2 * np.eye(self.embedding_size)
        self.lamb = np.random.multivariate_normal(np.zeros(
            self.embedding_size),
                                                  sig_I_lamb,
                                                  size=self.n_topics)
        self.sig_I_lamb_inv = self.sig_0**-2 * np.eye(self.embedding_size)

    def _init_c(self):
        sig_I_c = self.sig_0**2 * np.eye(self.n_topics)
        self.c = np.random.multivariate_normal(np.zeros(self.n_topics),
                                               sig_I_c).reshape((-1, 1))

    def _init_pi(self):
        self.pi = np.matmul(self.lamb, self.f.T) + self.c

    def _init_embedding_aux_params(self):
        self.pg = PyPolyaGamma()
        self.gamma = np.empty((self.n_topics, self.n_words))
        self.gamma_sum_ax1 = np.zeros(self.n_topics)
        self.SIGMA_inv = np.empty(
            (self.n_topics, self.embedding_size, self.embedding_size))
        self.b_cgam = np.empty((self.n_topics, self.n_words))
        self.b_cgam_sum_ax1 = np.zeros(self.n_topics)
        self.MU = np.empty((self.n_topics, self.embedding_size))
        for k in range(self.n_topics):
            for word_index in range(self.n_words):
                self.gamma[k, word_index] = self.pg.pgdraw(
                    1, self.pi[k, word_index])
                self.gamma_sum_ax1[k] += self.gamma[k, word_index]

            self.SIGMA_inv[k] = np.matmul(self.f_outer.T,
                                          self.gamma[k]) + self.sig_I_lamb_inv
            self.b_cgam[k] = self.b[k] - .5 - self.c[k] * self.gamma[k]
            self.b_cgam_sum_ax1[k] = np.sum(self.b_cgam[k])

        self.b_cgam_f = np.matmul(self.b_cgam, self.f)
        for k in range(self.n_topics):
            SIGMA_k = np.linalg.inv(self.SIGMA_inv[k])
            self.MU[k] = np.matmul(SIGMA_k, self.b_cgam_f[k])

    def train(self, iters=10):
        for i in range(iters):
            start_time = time.time()
            self._gibbs_sample()
            print("gibbs", time.time() - start_time)

            # start_time = time.time()
            self.log_likelihoods.append(self._compute_total_log_likelihood())
            # print("log_likelihood", time.time() - start_time)

            self.accuracies.append(self.get_classification_accuracy())
        return self.log_likelihoods, self.accuracies

    def _gibbs_sample(self):
        # gibbs_iter_time = time.time()
        for document_index, Z_document in enumerate(self.Z):
            document_length = len(Z_document)
            for token_index, Z_token_pair in enumerate(Z_document):

                # print("gibbs iter", time.time() - gibbs_iter_time)
                # gibbs_iter_time = time.time()
                # print(token_index, "/", document_length, document_index, "/", self.n_documents)

                word_index = Z_token_pair[0]
                topic_assignment = Z_token_pair[1]
                if topic_assignment != WEIFTM.NO_TOPIC:
                    self.n[topic_assignment, word_index] -= 1
                    self.m[document_index, topic_assignment] -= 1

                # start_time = time.time()
                self._sample_b(word_index)
                # print("sample_b", time.time() - start_time)

                # start_time = time.time()
                topic_assignment = self._sample_z(document_index, word_index)
                # print("sample_z", time.time() - start_time)
                Z_token_pair[1] = topic_assignment

                if topic_assignment != WEIFTM.NO_TOPIC:
                    self.n[topic_assignment, word_index] += 1
                    self.m[document_index, topic_assignment] += 1

                # start_time = time.time()
                self._sample_embeddings(word_index)
                # print("sample_embeddings", time.time() - start_time)

    def _sample_b(self, word_index):
        b_not_v = self.b_sum_ax1 - self.b[:, word_index]

        b_not_v[b_not_v == 0] += self.delta_0
        b_not_v_beta = b_not_v * self.beta_0

        num_a = b_not_v_beta + np.sum(self.n, axis=1)
        num_b = self.beta_0
        num = beta_function(num_a, num_b)
        denom = beta_function(b_not_v_beta, self.beta_0)
        activation = sigmoid(self.pi[:, word_index])
        p_1 = num * activation / denom
        p_0 = 1 - activation
        p = p_1 / (p_1 + p_0)

        self.b_sum_ax1 -= self.b[:, word_index]
        self.b[:, word_index] |= np.random.binomial(1, p)
        self.b_sum_ax1 += self.b[:, word_index]

    def _sample_z(self, document_index, word_index):
        if self.b[:, word_index].sum() == 0:
            topic_assignment = WEIFTM.NO_TOPIC
        else:
            p = (self.alpha_0 + self.m[document_index]) * (
                self.n[:, word_index].flatten() +
                self.beta_0) / (self.n[:, word_index] +
                                self.beta_0).sum() * self.b[:, word_index]
            p /= p.sum()
            topic_assignment = np.random.multinomial(1, p).argmax()
        return topic_assignment

    def _sample_embeddings(self, word_index):
        for k in range(self.n_topics):
            # sample gamma
            old_gamma_k_word_index = self.gamma[k, word_index]
            self.gamma[k, word_index] = self.pg.pgdraw(1, self.pi[k,
                                                                  word_index])
            self.gamma_sum_ax1[k] += self.gamma[
                k, word_index] - old_gamma_k_word_index

            # sample lamb
            self.SIGMA_inv[k] += (
                self.gamma[k, word_index] -
                old_gamma_k_word_index) * self.f_outer[word_index]
            SIGMA_k = np.linalg.inv(self.SIGMA_inv[k])

            old_b_cgam_k_word_index = self.b_cgam[k, word_index]
            self.b_cgam[k, word_index] = self.b[
                k, word_index] - .5 - self.c[k] * self.gamma[k, word_index]
            self.b_cgam_sum_ax1[k] += self.b_cgam[
                k, word_index] - old_b_cgam_k_word_index

            self.b_cgam_f[k] = self.b_cgam[k, word_index] * self.f[word_index]
            self.MU[k] = np.matmul(SIGMA_k, self.b_cgam_f[k])

            self.lamb[k] = np.random.multivariate_normal(self.MU[k], SIGMA_k)

            # sample c
            sig_k = (self.gamma_sum_ax1[k] + self.sig_0**-2)**-1
            mu_k = sig_k * self.b_cgam_sum_ax1[k]
            self.c[k] = np.random.normal(mu_k, sig_k)

        # update pi
        self.pi = np.matmul(self.lamb, self.f.T) + self.c

    def dirichlet_pdf_log(self, x, alpha):
        return np.sum(np.log(np.power(x, alpha - 1))) - np.sum(
            np.log(gamma_function(alpha))) + np.log(
                gamma_function(np.sum(alpha)))

    def _compute_total_log_likelihood(self):
        log_likelihood = 0

        theta = self.get_theta()
        log_theta = np.log(theta)
        phi = self.get_phi()
        log_phi = np.log(phi)

        ALPHA = self.alpha_0 * np.ones(self.n_topics)

        for document_index in range(self.n_documents):
            # theta
            # log_likelihood += np.log(dirichlet.pdf(theta[document_index], ALPHA))
            log_likelihood += self.dirichlet_pdf_log(theta[document_index],
                                                     ALPHA)

            for token_index in range(len(self.Z[document_index])):
                word_index, topic_index = self.Z[document_index][token_index]
                if topic_index != WEIFTM.NO_TOPIC:
                    # w
                    log_likelihood += log_phi[topic_index, word_index]
                    # z
                    log_likelihood += log_theta[document_index, topic_index]

        log_likelihood += np.sum(
            np.log(bernoulli.pmf(self.b, sigmoid(self.pi))))

        for k in range(self.n_topics):
            # phi
            b_k_nonzero = self.b[k].nonzero()[0]
            BETA = self.beta_0 * np.ones(b_k_nonzero.shape[0])
            # log_likelihood += np.log(dirichlet.pdf(phi[k][b_k_nonzero], BETA))
            log_likelihood += self.dirichlet_pdf_log(phi[k][b_k_nonzero], BETA)
            # c
            log_likelihood += np.log(norm.pdf(self.c[k], 0, self.sig_0))

            for l in range(self.embedding_size):
                # lamb
                log_likelihood += np.log(
                    norm.pdf(self.lamb[k, l], 0, self.sig_0))

        return log_likelihood

    def get_phi(self):
        n_b = (self.n + self.beta_0) * self.b
        return n_b / n_b.sum(axis=1).reshape(-1, 1)

    def get_theta(self):
        return (self.m + self.alpha_0) / (self.m + self.alpha_0).sum(
            axis=1).reshape(-1, 1)

    def print_phi(self, n_words):
        phi = self.get_phi()
        for topic_index, topic, in enumerate(phi):
            labelled_probabilities = [(self.dictionary[word_index], prob)
                                      for word_index, prob in enumerate(topic)]
            sorted_probabilities = sorted(labelled_probabilities,
                                          key=lambda x: x[1],
                                          reverse=True)[:n_words]
            print('Topic {}:'.format(topic_index), sorted_probabilities)

    def print_theta(self):
        theta = self.get_theta()
        for document_index, document in enumerate(theta):
            print('Document {}:'.format(document_index),
                  '; Label {}'.format(self.labels[document_index]), document)

    def get_classification_accuracy(self):
        theta = self.get_theta()
        predictions = [distribution.argmax() for distribution in theta]
        prediction_set = set(predictions)
        label_set = set(self.labels.values())
        accuracies = []

        if self.n_topics >= len(label_set):
            for tup in itertools.permutations(prediction_set, len(label_set)):
                count = 0.
                for index in self.labels:
                    if tup[self.labels[index]] == predictions[index]:
                        count += 1.
                accuracies.append(count / len(predictions))
        else:
            for tup in itertools.permutations(label_set, self.n_topics):
                count = 0.
                for index in self.labels:
                    if self.labels[index] == tup[predictions[index]]:
                        count += 1.
                accuracies.append(count / len(predictions))

        return max(accuracies)

    def plot(self, values, ylabel, path):
        title = path.strip(os.path.sep).strip('.csv').split(os.path.sep)[-1]
        plt.title(title)
        plt.xlabel('epoch')
        plt.ylabel(ylabel)
        plt.plot(values)
        plt.show()

    def __getstate__(self):
        state = self.__dict__.copy()
        state.pop("pg")
        return state

    def __setstate__(self, state):
        self.__dict__.update(state)

    def save(self, path):
        pickle.dump(self, open(path, "wb"))

    @staticmethod
    def load(path):
        return pickle.load(open(path, "rb"))
Example #10
0
def s_blk(g_num, b_mu_ll, q_mu, b_v, q_v, b_mat_mu, q_arr, b_mu_lk, b_mat_v,
          ob, g_ij, z_i):
    #sample b_lk q
    n_lk, n_lk1, m_l, m_l1 = get_nlk(ob, g_num, g_ij, z_i)
    for l in range(g_num):
        for k in range(l, g_num):
            samplenum = 100
            if l == k:
                b = np.zeros((samplenum, 2))
                b[0, 0] = b_mu_ll
                b[0, 1] = q_mu
                mu = np.array([b_mu_ll, q_mu])
                var = np.array(([b_v, 0], [0, q_v]))
                pg = PyPolyaGamma(seed=0)
                omegas = np.ones(2)
                x = np.array(([1, 0], [1, 1]))
                k_arr = np.array(
                    [n_lk1[l, l] - n_lk[l, l] / 2, m_l1[l] - m_l[l] / 2])

                for t in range(1, samplenum):

                    omegas[0] = pg.pgdraw(n_lk[l, l], b[t - 1, 0])
                    omegas[1] = pg.pgdraw(m_l[l], np.sum(b[t - 1, :]))
                    omega = np.array(([omegas[0], 0], [0, omegas[1]]))
                    v = inv(
                        np.dot(np.dot(np.transpose(x), omega), x) + inv(var))
                    m = np.dot(
                        v,
                        np.dot(np.transpose(x), np.transpose(k_arr)) +
                        np.dot(inv(var), mu))
                    s = npr.multivariate_normal(m, v)
                    b[t, 0] = np.copy(s[0])
                    b[t, 1] = np.copy(s[1])
                b_mat_mu[l, l] = np.sum(b[50:samplenum, 0]) / (samplenum - 50)
                q_arr[l] = np.sum(b[50:samplenum, 1]) / (samplenum - 50)

            else:
                b = np.zeros((samplenum, 2))
                b[0, 0] = b_mu_lk
                b[0, 1] = b_mu_lk
                mu = np.array([b_mu_lk, b_mu_lk])
                var = np.copy(b_mat_v[:, :, l, k])
                pg = PyPolyaGamma(seed=0)
                omegas = np.ones(2)
                k_arr = np.array([
                    n_lk1[l, k] - n_lk[l, k] / 2, n_lk1[k, l] - n_lk[k, l] / 2
                ])
                x = np.array(([1, 0], [0, 1]))
                for t in range(1, samplenum):
                    omegas[0] = pg.pgdraw(n_lk[l, k], b[t - 1, 0])
                    omegas[1] = pg.pgdraw(n_lk[k, l], b[t - 1, 1])
                    omega = np.array(([omegas[0], 0], [0, omegas[1]]))

                    v = inv(
                        np.dot(np.dot(np.transpose(x), omega), x) + inv(var))
                    m = np.dot(
                        v,
                        np.dot(np.transpose(x), np.transpose(k_arr)) +
                        np.dot(inv(var), mu))
                    s = npr.multivariate_normal(m, v)
                    b[t, 0] = np.copy(s[0])
                    b[t, 1] = np.copy(s[1])
                b_mat_mu[l, k] = np.sum(b[50:samplenum, 0]) / (samplenum - 50)
                b_mat_mu[k, l] = np.sum(b[50:samplenum, 1]) / (samplenum - 50)