def circles_or_spirals(num_points=[200, 200], num_times=10, run_times=10): k = 2 n1, n2 = num_points X, z = data.circles([1, 3], [0.1, 0.1], [n1, n2]) #X, z = data.spirals([1, -1], [n1, n2], noise=.2) #rho = lambda x, y: ke.euclidean_rho(x, y, alpha=0.5) #rho = lambda x, y: np.exp(-np.linalg.norm(x-y)/2/(1**2)) # this one works for circles #rho = lambda x, y: np.power(np.linalg.norm(x-y), 2)*\ # np.exp(-0.5*np.linalg.norm(x-y)) # this one is working decently for spirals rho = lambda x, y: np.power(np.linalg.norm(x-y), 2)*\ .9*np.sin(np.linalg.norm(x-y)/0.9) #rho = lambda x, y: np.power(np.linalg.norm(x-y), 2)*\ # delta(np.linalg.norm(x-y)/0.1) G = ke.kernel_matrix(X, rho) table = np.zeros((num_times, 4)) for nt in range(num_times): table[nt, 0] = cost_energy(k, X, G, z, run_times=run_times) table[nt, 1] = kernel_energy(k, X, G, z, run_times=run_times) table[nt, 2] = kmeans(k, X, z, run_times=run_times) table[nt, 3] = gmm(k, X, z, run_times=run_times) return table
def gauss_dimensions_pi(num_points=range(0, 180, 10), N=200, D=4, d=2, run_times=3, num_experiments=10): """Test unbalanced clusters.""" k = 2 table = np.zeros((num_experiments*len(num_points), 5)) count = 0 for p in num_points: for i in range(num_experiments): # generate data m1 = np.zeros(D) s1 = np.eye(D) m2 = np.concatenate((1.5*np.ones(d), np.zeros(D-d))) s2 = np.diag(np.concatenate((.5*np.ones(d), np.ones(D-d)))) n1 = N-p n2 = N+p X, z = data.multivariate_normal([m1, m2], [s1, s2], [n1, n2]) rho = lambda x, y: np.linalg.norm(x-y) G = ke.kernel_matrix(X, rho) table[count, 0] = p table[count, 1] = cost_energy(k, X, G, z, run_times=run_times) table[count, 2] = kernel_energy(k, X, G, z, run_times=run_times) table[count, 3] = kmeans(k, X, z, run_times=run_times) table[count, 4] = gmm(k, X, z, run_times=run_times) count += 1 return table
def circles_or_spirals(num_points=[200,200], num_times=10, run_times=10): k = 2 n1, n2 = num_points X, z = data.circles([1, 3], [0.1, 0.1], [n1, n2]) #X, z = data.spirals([1, -1], [n1, n2], noise=.2) #rho = lambda x, y: ke.euclidean_rho(x, y, alpha=0.5) #rho = lambda x, y: np.exp(-np.linalg.norm(x-y)/2/(1**2)) # this one works for circles #rho = lambda x, y: np.power(np.linalg.norm(x-y), 2)*\ # np.exp(-0.5*np.linalg.norm(x-y)) # this one is working decently for spirals rho = lambda x, y: np.power(np.linalg.norm(x-y), 2)*\ .9*np.sin(np.linalg.norm(x-y)/0.9) #rho = lambda x, y: np.power(np.linalg.norm(x-y), 2)*\ # delta(np.linalg.norm(x-y)/0.1) G = ke.kernel_matrix(X, rho) table = np.zeros((num_times, 4)) for nt in range(num_times): table[nt, 0] = cost_energy(k, X, G, z, run_times=run_times) table[nt, 1] = kernel_energy(k, X, G, z, run_times=run_times) table[nt, 2] = kmeans(k, X, z, run_times=run_times) table[nt, 3] = gmm(k, X, z, run_times=run_times) return table
def normal_or_lognormal(numpoints=range(10, 100, 10), run_times=10, num_experiments=10, kind='normal'): """Testing lognormal distributions.""" table = np.zeros((num_experiments * len(numpoints), 9)) count = 0 k = 2 for n in numpoints: for i in range(num_experiments): # generate data m1 = np.zeros(20) s1 = 0.5 * np.eye(20) m2 = 0.5 * np.concatenate((np.ones(5), np.zeros(15))) s2 = np.eye(20) if kind == 'normal': X, z = data.multivariate_normal([m1, m2], [s1, s2], [n, n]) else: X, z = data.multivariate_lognormal([m1, m2], [s1, s2], [n, n]) rho = lambda x, y: np.linalg.norm(x - y) G = ke.kernel_matrix(X, rho) rho2 = lambda x, y: np.power(np.linalg.norm(x - y), 0.5) G2 = ke.kernel_matrix(X, rho2) rho3 = lambda x, y: 2 - 2 * np.exp(-np.linalg.norm(x - y) / 2) G3 = ke.kernel_matrix(X, rho3) table[count, 0] = n table[count, 1] = cost_energy(k, X, G, z, run_times=run_times) table[count, 2] = cost_energy(k, X, G2, z, run_times=run_times) table[count, 3] = cost_energy(k, X, G3, z, run_times=run_times) table[count, 4] = kernel_energy(k, X, G, z, run_times=run_times) table[count, 5] = kernel_energy(k, X, G2, z, run_times=run_times) table[count, 6] = kernel_energy(k, X, G3, z, run_times=run_times) table[count, 7] = kmeans(k, X, z, run_times=run_times) table[count, 8] = gmm(k, X, z, run_times=run_times) count += 1 return table
def normal_or_lognormal(numpoints=range(10,100,10), run_times=10, num_experiments=10, kind='normal'): """Testing lognormal distributions.""" table = np.zeros((num_experiments*len(numpoints), 9)) count = 0 k = 2 for n in numpoints: for i in range(num_experiments): # generate data m1 = np.zeros(20) s1 = 0.5*np.eye(20) m2 = 0.5*np.concatenate((np.ones(5), np.zeros(15))) s2 = np.eye(20) if kind == 'normal': X, z = data.multivariate_normal([m1, m2], [s1, s2], [n, n]) else: X, z = data.multivariate_lognormal([m1, m2], [s1, s2], [n, n]) rho = lambda x, y: np.linalg.norm(x-y) G = ke.kernel_matrix(X, rho) rho2 = lambda x, y: np.power(np.linalg.norm(x-y), 0.5) G2 = ke.kernel_matrix(X, rho2) rho3 = lambda x, y: 2-2*np.exp(-np.linalg.norm(x-y)/2) G3 = ke.kernel_matrix(X, rho3) table[count, 0] = n table[count, 1] = cost_energy(k, X, G, z, run_times=run_times) table[count, 2] = cost_energy(k, X, G2, z, run_times=run_times) table[count, 3] = cost_energy(k, X, G3, z, run_times=run_times) table[count, 4] = kernel_energy(k, X, G, z, run_times=run_times) table[count, 5] = kernel_energy(k, X, G2, z, run_times=run_times) table[count, 6] = kernel_energy(k, X, G3, z, run_times=run_times) table[count, 7] = kmeans(k, X, z, run_times=run_times) table[count, 8] = gmm(k, X, z, run_times=run_times) count += 1 return table
def detect_clusters(num_points, num_permutations): """Check if algorithms are detecting clusters, compared to chance.""" # generate data from uniform Y1 = np.random.uniform(0, 1, num_points) Y2 = np.random.uniform(2, 3, num_points) X, z = data.shuffle_data([Y1, Y2]) X = np.array([[x] for x in X]) # cluster with k=2 and pick objectives values rho = lambda x, y: np.linalg.norm(x - y) G = ke.kernel_matrix(X, rho) k = 3 z_energy, J_energy = energy(k, X, G, run_times=5) z_kmeans, J_kmeans = kmeans(k, X, run_times=5) z_gmm, J_gmm = gmm(k, X, run_times=5) #print J_energy, J_kmeans, J_gmm # #k=4 #z_energy, J_energy = energy(k, X, G, run_times=5) #z_kmeans, J_kmeans = kmeans(k, X, run_times=5) #z_gmm, J_gmm = gmm(k, X, run_times=5) # #print J_energy, J_kmeans, J_gmm # random permute labels and compute objectives times_energy = 0 times_kmeans = 0 times_gmm = 0 for i in range(num_permutations): fake_z_energy = np.random.randint(0, 6, 2 * num_points) fake_z_kmeans = np.random.randint(0, 6, 2 * num_points) fake_z_gmm = np.random.randint(0, 6, 2 * num_points) #fake_z_energy = np.random.choice(z_energy, len(z_energy), replace=False) fake_Z_energy = ke.ztoZ(fake_z_energy) #fake_z_kmeans = np.random.choice(z_kmeans, len(z_kmeans), replace=False) #fake_z_gmm = np.random.choice(z_gmm, len(z_gmm), replace=False) JJ_energy = ke.objective(fake_Z_energy, G) JJ_kmeans = objectives.kmeans(data.from_label_to_sets( X, fake_z_kmeans)) JJ_gmm = objectives.loglikelihood( data.from_label_to_sets(X, fake_z_gmm)) if JJ_energy > J_energy: times_energy += 1 if JJ_kmeans < J_kmeans: times_kmeans += 1 if JJ_gmm > J_gmm: times_gmm += 1 return times_energy, times_kmeans, times_gmm
def gauss_dimensions_cov(dimensions=range(2, 100, 20), num_points=[200, 200], run_times=3, num_experiments=10, d=None): """High dimensions but with nontrivial covariance.""" k = 2 if not d: d = dimensions[0] n1, n2 = num_points table = np.zeros((num_experiments * len(dimensions), 5)) count = 0 for D in dimensions: for l in range(num_experiments): #m1 = np.zeros(D) #s1 = np.eye(D) #m2 = np.concatenate((np.ones(d), np.zeros(D-d))) #s2 = np.eye(D) #for a in range(int(d/2)): # s2[a,a] = a+1 m1 = np.zeros(D) m2 = np.concatenate((np.ones(d), np.zeros(D - d))) s1 = np.eye(D) s2 = np.eye(D) for a in range(d): s1[a, a] = np.power(1 / (a + 1), 0.5) for a in range(d): s2[a, a] = np.power(a + 1, 0.5) X, z = data.multivariate_normal([m1, m2], [s1, s2], [n1, n2]) rho = lambda x, y: np.linalg.norm(x - y) G = ke.kernel_matrix(X, rho) table[count, 0] = D table[count, 1] = cost_energy(k, X, G, z, run_times=run_times) table[count, 2] = kernel_energy(k, X, G, z, run_times=run_times) table[count, 3] = kmeans(k, X, z, run_times=run_times) table[count, 4] = gmm(k, X, z, run_times=run_times) count += 1 return table
def gauss_dimensions_cov(dimensions=range(2,100,20), num_points=[200, 200], run_times=3, num_experiments=10, d=None): """High dimensions but with nontrivial covariance.""" k = 2 if not d: d = dimensions[0] n1, n2 = num_points table = np.zeros((num_experiments*len(dimensions), 5)) count = 0 for D in dimensions: for l in range(num_experiments): #m1 = np.zeros(D) #s1 = np.eye(D) #m2 = np.concatenate((np.ones(d), np.zeros(D-d))) #s2 = np.eye(D) #for a in range(int(d/2)): # s2[a,a] = a+1 m1 = np.zeros(D) m2 = np.concatenate((np.ones(d), np.zeros(D-d))) s1 = np.eye(D) s2 = np.eye(D) for a in range(d): s1[a,a] = np.power(1/(a+1), 0.5) for a in range(d): s2[a,a] = np.power(a+1, 0.5) X, z = data.multivariate_normal([m1, m2], [s1, s2], [n1, n2]) rho = lambda x, y: np.linalg.norm(x-y) G = ke.kernel_matrix(X, rho) table[count, 0] = D table[count, 1] = cost_energy(k, X, G, z, run_times=run_times) table[count, 2] = kernel_energy(k, X, G, z, run_times=run_times) table[count, 3] = kmeans(k, X, z, run_times=run_times) table[count, 4] = gmm(k, X, z, run_times=run_times) count += 1 return table
def gauss_dimensions_mean(dimensions=range(2, 100, 20), num_points=[200, 200], delta=0.7, run_times=5, num_experiments=10, d=None): """Here we keep signal in one dimension and increase the ambient dimension. The covariances are kept fixed. """ k = 2 if not d: d = dimensions[0] n1, n2 = num_points table = np.zeros((num_experiments * len(dimensions), 5)) count = 0 for D in dimensions: for i in range(num_experiments): # generate data m1 = np.zeros(D) s1 = np.eye(D) m2 = np.concatenate((delta * np.ones(d), np.zeros(D - d))) s2 = np.eye(D) X, z = data.multivariate_normal([m1, m2], [s1, s2], [n1, n2]) rho = lambda x, y: np.linalg.norm(x - y) G = ke.kernel_matrix(X, rho) table[count, 0] = D table[count, 1] = cost_energy(k, X, G, z, run_times=run_times) table[count, 2] = kernel_energy(k, X, G, z, run_times=run_times) table[count, 3] = kmeans(k, X, z, run_times=run_times) table[count, 4] = gmm(k, X, z, run_times=run_times) count += 1 return table
def gauss_dimensions_pi(num_points=range(0, 180, 10), N=200, D=4, d=2, run_times=3, num_experiments=10): """Test unbalanced clusters.""" k = 2 table = np.zeros((num_experiments * len(num_points), 5)) count = 0 for p in num_points: for i in range(num_experiments): # generate data m1 = np.zeros(D) s1 = np.eye(D) m2 = np.concatenate((1.5 * np.ones(d), np.zeros(D - d))) s2 = np.diag(np.concatenate((.5 * np.ones(d), np.ones(D - d)))) n1 = N - p n2 = N + p X, z = data.multivariate_normal([m1, m2], [s1, s2], [n1, n2]) rho = lambda x, y: np.linalg.norm(x - y) G = ke.kernel_matrix(X, rho) table[count, 0] = p table[count, 1] = cost_energy(k, X, G, z, run_times=run_times) table[count, 2] = kernel_energy(k, X, G, z, run_times=run_times) table[count, 3] = kmeans(k, X, z, run_times=run_times) table[count, 4] = gmm(k, X, z, run_times=run_times) count += 1 return table
def gauss_dimensions_mean(dimensions=range(2,100,20), num_points=[200, 200], delta=0.7, run_times=5, num_experiments=10, d=None): """Here we keep signal in one dimension and increase the ambient dimension. The covariances are kept fixed. """ k = 2 if not d: d = dimensions[0] n1, n2 = num_points table = np.zeros((num_experiments*len(dimensions), 5)) count = 0 for D in dimensions: for i in range(num_experiments): # generate data m1 = np.zeros(D) s1 = np.eye(D) m2 = np.concatenate((delta*np.ones(d), np.zeros(D-d))) s2 = np.eye(D) X, z = data.multivariate_normal([m1, m2], [s1, s2], [n1, n2]) rho = lambda x, y: np.linalg.norm(x-y) G = ke.kernel_matrix(X, rho) table[count, 0] = D table[count, 1] = cost_energy(k, X, G, z, run_times=run_times) table[count, 2] = kernel_energy(k, X, G, z, run_times=run_times) table[count, 3] = kmeans(k, X, z, run_times=run_times) table[count, 4] = gmm(k, X, z, run_times=run_times) count += 1 return table
def mnist_pca(num_experiments=10, digits=[0, 1, 2], num_points=100, n_components=20, run_times=4): """MNIST clustering. We use Hartigan's and Lloyd's with different kernels and compare to k-means. We project the data into PCA with 20 or other components. """ k = len(digits) f = gzip.open('data/mnist.pkl.gz', 'rb') train_set, valid_set, test_set = cPickle.load(f) f.close() images, labels = train_set rhos = [rho_standard, rho_half] rho_names = ['standard', 'half', 'rbf'] table = [] for i in range(num_experiments): # sample digits data = [] true_labels = [] for l, d in enumerate(digits): x = np.where(labels == d)[0] js = np.random.choice(x, num_points, replace=False) for j in js: im = images[j] label = l data.append(im) true_labels.append(label) idx = range(len(data)) data = np.array(data) true_labels = np.array(true_labels) np.random.shuffle(idx) X = data[idx] z = true_labels[idx] ################ # do PCA pca = PCA(n_components=n_components) pca.fit(X) X_new = pca.transform(X) ######## n = len(X) sigma = np.sqrt( sum([ np.linalg.norm(X[i] - X[j])**2 for i in range(n) for j in range(n) ]) / (n**2)) #rho_exp2= lambda x, y: rho_exp(x, y, sigma) rho_rbf2 = lambda x, y: rho_rbf(x, y, sigma) #rho_exp2 = lambda x, y: rho_exp(x, y, 1) #rho_rbf2 = lambda x, y: rho_rbf(x, y, 1) # build several kernels Gs = [ke.kernel_matrix(X, rho) for rho in rhos] #Gs.append(ke.kernel_matrix(X, rho_exp2)) Gs.append(ke.kernel_matrix(X, rho_rbf2)) for G in Gs: table.append(cost_energy(k, X, G, z, run_times=run_times)) for G in Gs: table.append(kernel_energy(k, X, G, z, run_times=run_times)) table.append(kmeans(k, X, z, run_times=run_times)) X = X_new n = len(X) sigma = np.sqrt( sum([ np.linalg.norm(X[i] - X[j])**2 for i in range(n) for j in range(n) ]) / (n**2)) #rho_exp2= lambda x, y: rho_exp(x, y, sigma) rho_rbf2 = lambda x, y: rho_rbf(x, y, sigma) #rho_exp2 = lambda x, y: rho_exp(x, y, 1) #rho_rbf2 = lambda x, y: rho_rbf(x, y, 1) # build several kernels Gs = [ke.kernel_matrix(X, rho) for rho in rhos] #Gs.append(ke.kernel_matrix(X, rho_exp2)) Gs.append(ke.kernel_matrix(X, rho_rbf2)) for G in Gs: table.append(cost_energy(k, X, G, z, run_times=run_times)) for G in Gs: table.append(kernel_energy(k, X, G, z, run_times=run_times)) table.append(kmeans(k, X, z, run_times=run_times)) table = np.array(table) num_kernels = len(Gs) table = table.reshape((num_experiments, 2 * (2 * num_kernels + 1))) num_cols = 2 * num_kernels for j in range(num_kernels): vals = table[:, j] print "Energy %-10s:" % rho_names[j], vals.mean(), scipy.stats.sem( vals) for i, j in enumerate(range(num_kernels, num_cols)): vals = table[:, j] print "Kernel %-10s:" % rho_names[i], vals.mean(), scipy.stats.sem( vals) vals = table[:, num_cols + 1] print "k-means :", vals.mean(), scipy.stats.sem(vals) start = num_cols + 1 for i, j in enumerate(range(start, start + num_kernels)): vals = table[:, j] print "Energy PCA %-10s:" % rho_names[i], vals.mean(), scipy.stats.sem( vals) for i, j in enumerate(range(start + num_kernels, start + 2 * num_kernels)): vals = table[:, j] print "Kernel PCA %-10s:" % rho_names[i], vals.mean(), scipy.stats.sem( vals) vals = table[:, -1] print "k-means PCA :", vals.mean(), scipy.stats.sem(vals)
def other_examples(num_experiments=10, run_times=4): """Some other examples.""" rhos = [rho_standard, rho_half] rho_names = ['standard', 'half', 'exp', 'rbf'] table = [] for i in range(num_experiments): ### generate data ### # cigars #m1 = [0,0] #m2 = [6.5,0] #s1 = np.array([[1,0],[0,20]]) #s2 = np.array([[1,0],[0,20]]) #X, z = data.multivariate_normal([m1, m2], [s1, s2], [200, 200]) #rho_exp2= lambda x, y: rho_exp(x, y, 2) #rho_rbf2 = lambda x, y: rho_rbf(x, y, 2) # 2 circles #X, z = data.circles([1, 3], [[0,0], [0,0]], [0.2, 0.2], [400, 400]) #rho_exp2= lambda x, y: rho_exp(x, y, 1) #rho_rbf2 = lambda x, y: rho_rbf(x, y, 1) # 3 circles X, z = data.circles([1, 3, 5], [[0, 0], [0, 0], [0, 0]], [0.2, 0.2, 0.2], [400, 400, 400]) rho_exp2 = lambda x, y: rho_exp(x, y, 2) rho_rbf2 = lambda x, y: rho_rbf(x, y, 2) # 2 spirals #X, z = data.spirals([1,-1], [[0,0], [0,0]], [200,200], noise=0.1) ##################### k = 3 n = len(X) # build several kernels Gs = [ke.kernel_matrix(X, rho) for rho in rhos] Gs.append(ke.kernel_matrix(X, rho_rbf2)) Gs.append(ke.kernel_matrix(X, rho_exp2)) for G in Gs: table.append(cost_energy(k, X, G, z, run_times=run_times)) for G in Gs: table.append(kernel_energy(k, X, G, z, run_times=run_times)) table.append(kmeans(k, X, z, run_times=run_times)) table.append(gmm(k, X, z, run_times=run_times)) table = np.array(table) num_kernels = len(Gs) table = table.reshape((num_experiments, 2 * num_kernels + 2)) num_cols = num_kernels * 2 for j in range(num_kernels): vals = table[:, j] print "Energy %-10s:" % rho_names[j], vals.mean(), scipy.stats.sem( vals) for i, j in enumerate(range(num_kernels, num_cols)): vals = table[:, j] print "Kernel %-10s:" % rho_names[i], vals.mean(), scipy.stats.sem( vals) vals = table[:, -2] print "k-means :", vals.mean(), scipy.stats.sem(vals) vals = table[:, -1] print "gmm :", vals.mean(), scipy.stats.sem(vals)
def mnist_pca(num_experiments=10, digits=[0,1,2], num_points=100, n_components=20, run_times=4): """MNIST clustering. We use Hartigan's and Lloyd's with different kernels and compare to k-means. We project the data into PCA with 20 or other components. """ k = len(digits) f = gzip.open('data/mnist.pkl.gz', 'rb') train_set, valid_set, test_set = cPickle.load(f) f.close() images, labels = train_set rhos = [rho_standard, rho_half] rho_names = ['standard', 'half', 'rbf'] table = [] for i in range(num_experiments): # sample digits data = [] true_labels = [] for l, d in enumerate(digits): x = np.where(labels==d)[0] js = np.random.choice(x, num_points, replace=False) for j in js: im = images[j] label = l data.append(im) true_labels.append(label) idx = range(len(data)) data = np.array(data) true_labels = np.array(true_labels) np.random.shuffle(idx) X = data[idx] z = true_labels[idx] ################ # do PCA pca = PCA(n_components=n_components) pca.fit(X) X_new = pca.transform(X) ######## n = len(X) sigma = np.sqrt(sum([np.linalg.norm(X[i]-X[j])**2 for i in range(n) for j in range(n)])/(n**2)) #rho_exp2= lambda x, y: rho_exp(x, y, sigma) rho_rbf2 = lambda x, y: rho_rbf(x, y, sigma) #rho_exp2 = lambda x, y: rho_exp(x, y, 1) #rho_rbf2 = lambda x, y: rho_rbf(x, y, 1) # build several kernels Gs = [ke.kernel_matrix(X, rho) for rho in rhos] #Gs.append(ke.kernel_matrix(X, rho_exp2)) Gs.append(ke.kernel_matrix(X, rho_rbf2)) for G in Gs: table.append(cost_energy(k, X, G, z, run_times=run_times)) for G in Gs: table.append(kernel_energy(k, X, G, z, run_times=run_times)) table.append(kmeans(k, X, z, run_times=run_times)) X = X_new n = len(X) sigma = np.sqrt(sum([np.linalg.norm(X[i]-X[j])**2 for i in range(n) for j in range(n)])/(n**2)) #rho_exp2= lambda x, y: rho_exp(x, y, sigma) rho_rbf2 = lambda x, y: rho_rbf(x, y, sigma) #rho_exp2 = lambda x, y: rho_exp(x, y, 1) #rho_rbf2 = lambda x, y: rho_rbf(x, y, 1) # build several kernels Gs = [ke.kernel_matrix(X, rho) for rho in rhos] #Gs.append(ke.kernel_matrix(X, rho_exp2)) Gs.append(ke.kernel_matrix(X, rho_rbf2)) for G in Gs: table.append(cost_energy(k, X, G, z, run_times=run_times)) for G in Gs: table.append(kernel_energy(k, X, G, z, run_times=run_times)) table.append(kmeans(k, X, z, run_times=run_times)) table = np.array(table) num_kernels = len(Gs) table = table.reshape((num_experiments, 2*(2*num_kernels+1))) num_cols = 2*num_kernels for j in range(num_kernels): vals = table[:, j] print "Energy %-10s:"%rho_names[j], vals.mean(), scipy.stats.sem(vals) for i, j in enumerate(range(num_kernels, num_cols)): vals = table[:, j] print "Kernel %-10s:"%rho_names[i], vals.mean(), scipy.stats.sem(vals) vals = table[:, num_cols+1] print "k-means :", vals.mean(), scipy.stats.sem(vals) start = num_cols+1 for i, j in enumerate(range(start,start+num_kernels)): vals = table[:, j] print "Energy PCA %-10s:"%rho_names[i], vals.mean(), scipy.stats.sem(vals) for i, j in enumerate(range(start+num_kernels,start+2*num_kernels)): vals = table[:, j] print "Kernel PCA %-10s:"%rho_names[i], vals.mean(), scipy.stats.sem(vals) vals = table[:, -1] print "k-means PCA :", vals.mean(), scipy.stats.sem(vals)
def other_examples(num_experiments=10, run_times=4): """Some other examples.""" rhos = [rho_standard, rho_half] rho_names = ['standard', 'half', 'exp', 'rbf'] table = [] for i in range(num_experiments): ### generate data ### # cigars #m1 = [0,0] #m2 = [6.5,0] #s1 = np.array([[1,0],[0,20]]) #s2 = np.array([[1,0],[0,20]]) #X, z = data.multivariate_normal([m1, m2], [s1, s2], [200, 200]) #rho_exp2= lambda x, y: rho_exp(x, y, 2) #rho_rbf2 = lambda x, y: rho_rbf(x, y, 2) # 2 circles #X, z = data.circles([1, 3], [[0,0], [0,0]], [0.2, 0.2], [400, 400]) #rho_exp2= lambda x, y: rho_exp(x, y, 1) #rho_rbf2 = lambda x, y: rho_rbf(x, y, 1) # 3 circles X, z = data.circles([1, 3, 5], [[0,0], [0,0], [0,0]], [0.2, 0.2, 0.2], [400, 400, 400]) rho_exp2= lambda x, y: rho_exp(x, y, 2) rho_rbf2 = lambda x, y: rho_rbf(x, y, 2) # 2 spirals #X, z = data.spirals([1,-1], [[0,0], [0,0]], [200,200], noise=0.1) ##################### k = 3 n = len(X) # build several kernels Gs = [ke.kernel_matrix(X, rho) for rho in rhos] Gs.append(ke.kernel_matrix(X, rho_rbf2)) Gs.append(ke.kernel_matrix(X, rho_exp2)) for G in Gs: table.append(cost_energy(k, X, G, z, run_times=run_times)) for G in Gs: table.append(kernel_energy(k, X, G, z, run_times=run_times)) table.append(kmeans(k, X, z, run_times=run_times)) table.append(gmm(k, X, z, run_times=run_times)) table = np.array(table) num_kernels = len(Gs) table = table.reshape((num_experiments, 2*num_kernels+2)) num_cols = num_kernels*2 for j in range(num_kernels): vals = table[:, j] print "Energy %-10s:"%rho_names[j], vals.mean(), scipy.stats.sem(vals) for i, j in enumerate(range(num_kernels, num_cols)): vals = table[:, j] print "Kernel %-10s:"%rho_names[i], vals.mean(), scipy.stats.sem(vals) vals = table[:, -2] print "k-means :", vals.mean(), scipy.stats.sem(vals) vals = table[:, -1] print "gmm :", vals.mean(), scipy.stats.sem(vals)