def cluster(X, K, divergence, debug=False): if divergence == 'KL': dist_cls = distributions.KL if np.any(X <= 0): # for MFCCs... X = X - X.min() + 1e-8 X = 5. * X / X.sum(1)[:,nax] elif divergence == 'IS': dist_cls = distributions.ItakuraSaito if np.any(X <= 0): # for MFCCs... X = X - X.min() + 1e-8 # X = X / X.sum(1)[:,nax] elif divergence == 'EU': dist_cls = distributions.SquareDistance else: print 'Wrong divergence' sys.exit(0) assignments, centroids, _ = kmeans.kmeans_best_of_n(X, K, n_trials=10, dist_cls=dist_cls, debug=debug) init_pi = np.ones(K) / K init_obs_distr = centroids tau_em, obs_distr, pi, em_ll_train, _ = em.em(X, centroids, n_iter=10) # tau_hmm, A, obs_distr, pi, ll_train, _ = hmm.em_hmm(X, init_pi, init_obs_distr, n_iter=10) # seq_hmm, _ = hmm.viterbi(X, pi, A, obs_distr) Tracer()() return {'kmeans': assignments, 'EM': np.argmax(tau_em, axis=1), # 'hmm_smoothing': np.argmax(tau_hmm, axis=1), # 'hmm_viterbi': seq_hmm, }
ll_test.append(log_likelihood(lalpha_test, lbeta_test)) if monitor: monitor_vals.append(monitor(A, obs_distr)) Tracer()() return seq, A, obs_distr, ll_test, monitor_vals if __name__ == '__main__': X = np.loadtxt('EMGaussian.data') Xtest = np.loadtxt('EMGaussian.test') K = 4 # Run simple EM (no HMM) iterations = 40 assignments, centers, _ = kmeans.kmeans_best_of_n(X, K, n_trials=5) new_centers = [distributions.Gaussian(c.mean, np.eye(2)) \ for c in centers] tau, obs_distr, pi, gmm_ll_train, gmm_ll_test = \ em.em(X, new_centers, assignments, n_iter=iterations, Xtest=Xtest) # example with fixed parameters A = 1. / 6 * np.ones((K, K)) A[np.diag(np.ones(K)) == 1] = 0.5 lalpha, lbeta = alpha_beta(Xtest, pi, A, obs_distr) log_p = smoothing(lalpha, lbeta) p = np.exp(log_p) def plot_traj(p): plt.figure()
# algos to run algs = map(int, options.algos.split(',')) # number of clusters K = options.k ass_plots = [] seqs = {} results = {} # data/results obtained, indexed by algorithm # K-means if options.init == 'kmeans' or algos.kmeans in algs: t = time.time() assignments, centroids, dists = \ kmeans.kmeans_best_of_n(X, K, n_trials=4, dist_cls=distributions.KL) print 'K-means: {}s'.format(time.time() - t) results[algos.kmeans] = { 'seq': assignments, 'centroids': centroids, } seqs[algos.kmeans] = assignments # EM if options.init == 'em' or algos.em in algs: iterations = 10 t = time.time() tau_em, obs_distr, pi, em_ll_train, _ = em.em(X, centroids, n_iter=options.n_iter) print 'EM: {}s'.format(time.time() - t)
for j in range(K): x, y = np.arange(-10., 10., 0.04), np.arange(-15., 15., 0.04) xx, yy = np.meshgrid(x, y) sx = np.sqrt(sigmas[j][0,0]) sy = np.sqrt(sigmas[j][1,1]) sxy = sigmas[j][1,0] z = bivariate_normal(xx, yy, sx, sy, means[j,0], means[j,1], sxy) cs = plt.contour(xx, yy, z, [0.01]) if __name__ == '__main__': X = np.loadtxt('EMGaussian.data') Xtest = np.loadtxt('EMGaussian.test') K = 4 iterations = 40 assignments, centers, _ = kmeans.kmeans_best_of_n(X, K, n_trials=5) for k in range(K): centers[k].sigma2 = 1. # Isotropic tau, obs_distr, pi, ll_train_iso, ll_test_iso = \ em(X, centers, assignments, n_iter=iterations, Xtest=Xtest) plot_em(X, tau, obs_distr, contours=True) plt.title('EM with covariance matrices proportional to identity') # General new_centers = [distributions.Gaussian(c.mean, c.sigma2*np.eye(2)) \ for c in centers] tau, obs_distr, pi, ll_train_gen, ll_test_gen = \ em(X, new_centers, assignments, n_iter=iterations, Xtest=Xtest) plot_em(X, tau, obs_distr, contours=True)