Example #1
0
 def get_gamma_ij(self, i, j):
     dnm = 0
     for l in range(0, self.num_clusters):
         dnm += utils.multivariate_gaussian(self.df.values[i],
                                            self.means[l], self.vars[l])
     return utils.multivariate_gaussian(self.df.values[i], self.means[j],
                                        self.vars[j]) / dnm
Example #2
0
    def __e_step(self):
        #        print(self.gamma)

        N = self.X.shape[0]
        k = self.n_components

        self.lower_bound_ = 0
        for i in range(N):

            p = np.zeros(k)
            for j in range(k):
                p[j] = self.weights_[j] * utils.multivariate_gaussian(
                    self.X[i], self.means_[j], self.covariances_[j])
#                print('x, mean, cov:  ', self.X[i], self.means_[j], self.covariances_[j])
#                print('self.weights_[j]  ', self.weights_[j])
#                print('utils.multivariate_gaussian(self.X[i], self.means_[j], self.covariances_[j])  ', utils.multivariate_gaussian(self.X[i], self.means_[j], self.covariances_[j]))
#
#                print('pij  ',i, '  ', j, '  ',  p[j])

            sp = p.sum()
            for j in range(k):
                self.gamma[i, j] = p[j] / sp

            self.lower_bound_ += np.log(sp)

#        print('e step self.gamma:  ', self.gamma)
        return self
Example #3
0
 def get_L(self):
     n = len(self.df)
     l = 0
     for i in range(0, n):
         t = 0
         for j in range(0, self.num_clusters):
             t += utils.multivariate_gaussian(self.df.values[i],
                                              self.means[j],
                                              self.vars[j])  # *self.w[j
         l += math.log(t)
     l /= n
     return l
Example #4
0
    def predict(self, X):
        N = X.shape[0]
        k = self.n_components

        gamma = np.zeros([N, k])
        for i in range(N):

            p = np.zeros(k)
            for j in range(k):
                p[j] = self.weights_[j] * utils.multivariate_gaussian(
                    self.X[i], self.means_[j], self.covariances_[j])

            sp = p.sum()
            for j in range(k):
                gamma[i, j] = p[j] / sp

#        print(gamma)
        return np.argmax(gamma, axis=1)
Example #5
0
beta_log = utils.beta_log(data_test, pi, mu, sigma_list, A, n_states)

# Gamma (smoothing disribution)
gamma_log = utils.gamma_log(alpha_log, beta_log)

gamma = np.exp(gamma_log)

# Csi (pair marginals)
csi_log = np.zeros([time_steps - 1, n_states, n_states])
for t in range(time_steps - 1):
    aux = np.zeros([n_states, n_states])
    for m in range(n_states):
        for l in range(n_states):
            aux[m, l] = alpha_log[m, t] + beta_log[l, t + 1] + np.log(
                A[l, m]) + np.log(
                    utils.multivariate_gaussian(data_train[t + 1, :], mu[:, l],
                                                sigma_list[l]))
    b = np.max(aux)
    den = b + np.log(np.sum(np.exp(aux - b)))

    for i in range(n_states):
        for j in range(n_states):
            csi_log[t, j, i] = alpha_log[i, t] + beta_log[j, t + 1] + np.log(
                A[j, i]) + np.log(
                    utils.multivariate_gaussian(data_train[t + 1, :], mu[:, j],
                                                sigma_list[j])) - den

csi = np.exp(csi_log)

plt.subplot(411)
plt.plot(gamma[0, 0:100], 'c')
plt.title("$p(z_t|x_1, \ldots , x_T)$ - HMM (Fake parameters) - Test data")
Example #6
0
def gmm1(train_test,
         save_plots=True,
         n_clusters=4,
         max_it=200,
         show_plots=True,
         print_llk=False):

    data = utils.load_dataset(train_test)

    # Initialization of mu and pi with kmeans
    mu_hat, pi_hat = kmeans('train',
                            save_plots=False,
                            n_clusters=n_clusters,
                            print_results=False)

    n_samples = data.shape[0]
    dim = data.shape[1]

    mu_hat = np.transpose(mu_hat)  #[mu] = dim x 1

    sig_hat = 100 * np.ones([n_clusters, 1])

    tau = np.zeros([n_samples, n_clusters])

    counter = 0

    llik_old = 0
    llik_new = 10

    while ((counter < max_it) and np.abs(llik_new - llik_old) > 1e-8):

        llik_old = llik_new

        # E-step
        for i in range(n_samples):

            aux = np.zeros(n_clusters)

            for l in range(n_clusters):

                sig_hat_matrix = sig_hat[l] * np.eye(dim)
                aux[l] = (pi_hat[l] * utils.multivariate_gaussian(
                    np.transpose(data[i, :]), mu_hat[:, l], sig_hat_matrix))

            tau[i, :] = aux / np.sum(aux)

        # M-step
        for k in range(n_clusters):

            pi_hat[k] = np.sum(tau[:, k]) / n_samples

            # mu_hat
            weighted_samples = np.zeros([1, dim])
            for n in range(n_samples):
                tau_ = tau[n, k]
                weighted_samples += tau_ * data[n, :]

            den = np.sum(tau[:, k])
            mu_hat[:, k] = (np.transpose(weighted_samples) / den).reshape(dim)

            # sigma_hat
            weighted_sqnorm = 0
            for n in range(n_samples):

                tau_ = tau[n, k]
                diff = data[n, :].reshape([-1, 1]) - mu_hat[:, k].reshape(
                    [-1, 1])  # dim x 1
                sq_norm = np.sum(diff**2)
                weighted_sqnorm += tau_ * sq_norm

            sig_hat[k, 0] = weighted_sqnorm / (2 * den)

        # Log likelihood
        sig_hat_list = [
            sig_hat[0, 0] * np.identity(dim), sig_hat[1, 0] * np.identity(dim),
            sig_hat[2, 0] * np.identity(dim), sig_hat[3, 0] * np.identity(dim)
        ]
        llik_new = 0.0
        for i in range(n_samples):
            for k in range(n_clusters):
                llik_new += tau[i, k] * (np.log(
                    utils.multivariate_gaussian(np.transpose(
                        data[i, :]), mu_hat[:, k], sig_hat_list[k])) +
                                         np.log(pi_hat[k]))

        llik_new = llik_new / n_samples

        counter += 1

    if (print_llk):
        print('Centroid for GMM1 on train data')
        print('C1', mu_hat[:, 0])
        print('C2', mu_hat[:, 1])
        print('C3', mu_hat[:, 2])
        print('C4', mu_hat[:, 3])
        print('Log-likelihood for GMM1 on train data :', llik_new)

    if (train_test == 'test'):
        data = utils.load_dataset(train_test)
        n_samples = data.shape[0]

        for i in range(n_samples):

            aux = np.zeros(n_clusters)

            for l in range(n_clusters):

                aux[l] = (pi_hat[l] * utils.multivariate_gaussian(
                    np.transpose(data[i, :]), mu_hat[:, l], sig_hat_list[l]))

            tau[i, :] = aux / np.sum(aux)

        # Log likelihood
        llik_new = 0.0
        for i in range(n_samples):
            for k in range(n_clusters):
                llik_new += tau[i, k] * (np.log(
                    utils.multivariate_gaussian(np.transpose(
                        data[i, :]), mu_hat[:, k], sig_hat_list[k])) +
                                         np.log(pi_hat[k]))

        llik_new = llik_new / n_samples

        if (print_llk):
            print('Log-likelihood for GMM1 on test data:', llik_new)

    if (show_plots):

        colors = ['c', 'lightskyblue', 'mediumpurple', 'hotpink']

        Z = np.argmax(tau, 1)

        for m in range(n_clusters):
            color = colors[m]
            cluster_samples = data[np.where(Z == m)]

            plt.plot(cluster_samples[:, 0],
                     cluster_samples[:, 1],
                     'o',
                     c=color,
                     label='Cluster' + ' ' + str(m))
            plt.scatter(mu_hat[0, m],
                        mu_hat[1, m],
                        marker='x',
                        s=100,
                        c=k,
                        linewidths=5,
                        zorder=10)

            ellipse_data = utils.plot_ellipse(x_cent=mu_hat[0, m],
                                              y_cent=mu_hat[1, m],
                                              cov=sig_hat_list[m],
                                              mass_level=0.9)
            plt.plot(ellipse_data[0], ellipse_data[1], c=color)

        plt.legend(loc='upper left', scatterpoints=1)
        plt.xlabel('Dimension 1')
        plt.ylabel('Dimension 2')

        plt.title('Gaussian Mixture Model 1 - ' + str(train_test) + ' data')

        if (save_plots):
            name = './Figures/gmm1_' + train_test + '.png'
            plt.savefig(name)

        plt.show()

        plt.clf()