n1, n2 = np.random.multinomial(total_points, [0.5, 0.5]) X, z = data.multivariate_normal([m1, m2], [s1, s2], [n1, n2]) return X, z r = [] for _ in range(num_experiments): for dim in dimensions: X, z = generate_data(dim) G = eclust.kernel_matrix(X, lambda x, y: np.linalg.norm(x - y)) zh = wrapper.kmeans(k, X) a = metric.accuracy(z, zh) r.append(['k-means', dim, a]) zh = wrapper.gmm(k, X) a = metric.accuracy(z, zh) r.append(['gmm', dim, a]) zh = wrapper.spectral_clustering(k, X, G) a = metric.accuracy(z, zh) r.append(['spectral clustering', dim, a]) zh = wrapper.kernel_kmeans(k, X, G) a = metric.accuracy(z, zh) r.append(['kernel k-means', dim, a]) zh = wrapper.kernel_kgroups(k, X, G) a = metric.accuracy(z, zh) r.append(['kernel k-groups', dim, a])
sigma2 = sum([np.linalg.norm(x-y)**2 for x in data for y in data])/(len(data)**2) sigma = np.sqrt(sigma2) rho_exp = lambda x, y: 2-2*np.exp(-np.linalg.norm(x-y)/(2*sigma)) rho_gauss = lambda x, y: 2-2*np.exp(-np.linalg.norm(x-y)**2/(2*(sigma)**2)) G = eclust.kernel_matrix(data, rho) #G = eclust.kernel_matrix(data, rho_gauss) #G = eclust.kernel_matrix(data, rho_exp) k = 3 r = [] r.append(wrapper.kmeans(k, data, run_times=5)) r.append(wrapper.gmm(k, data, run_times=5)) r.append(wrapper.spectral_clustering(k, data, G, run_times=5)) r.append(wrapper.spectral(k, data, G, run_times=5)) r.append(wrapper.kernel_kmeans(k, data, G, run_times=5, ini='random')) #r.append(wrapper.kernel_kmeans(k, data, G, run_times=5, ini='k-means++')) #r.append(wrapper.kernel_kmeans(k, data, G, run_times=5, ini='spectral')) r.append(wrapper.kernel_kgroups(k,data,G,run_times=5, ini='random')) #r.append(wrapper.kernel_kgroups(k,data,G,run_times=5, ini='k-means++')) #r.append(wrapper.kernel_kgroups(k,data,G,run_times=5, ini='spectral')) t = PrettyTable(['Algorithm', 'Accuracy', 'A-Rand']) algos = ['kmeans', 'GMM', 'spectral clustering', 'spectral', 'kernel k-means', 'kernel k-groups'] for algo, zh in zip(algos, r): t.add_row([algo,
s2 = np.eye(D) n1, n2 = np.random.multinomial(total_points, [0.5, 0.5]) X, z = data.multivariate_normal([m1, m2], [s1, s2], [n1, n2]) return X, z r = [] for _ in range(num_experiments): for dim in dimensions: X, z = generate_data(dim) G = eclust.kernel_matrix(X, lambda x, y: np.linalg.norm(x-y)) zh = wrapper.kmeans(k, X) a = metric.accuracy(z, zh) r.append(['k-means', dim, a]) zh = wrapper.gmm(k, X) a = metric.accuracy(z, zh) r.append(['gmm', dim, a]) zh = wrapper.spectral_clustering(k, X, G) a = metric.accuracy(z, zh) r.append(['spectral clustering', dim, a]) zh = wrapper.kernel_kmeans(k, X, G) a = metric.accuracy(z, zh) r.append(['kernel k-means', dim, a]) zh = wrapper.kernel_kgroups(k, X, G) a = metric.accuracy(z, zh) r.append(['kernel k-groups', dim, a])
X, z = data.univariate_normal([m1, m2], [s1, s2], [n1, n2]) #X, z = data.univariate_lognormal([m1, m2], [s1, s2], [n1, n2]) Y = np.array([[x] for x in X]) bw = 0.5 # bandwidth num_points = 1500 # number points for linspace low = -6 high = 6 #low = -2 #high = 20 ### clustering t = PrettyTable(['Method', 'Accuracy']) G = eclust.kernel_matrix(Y, lambda x, y: np.linalg.norm(x - y)) zh_kmeans = wrapper.kmeans(k, Y) t.add_row(['k-means', metric.accuracy(z, zh_kmeans)]) zh_gmm = wrapper.gmm(k, Y) t.add_row(['gmm', metric.accuracy(z, zh_gmm)]) zh_kgroups = wrapper.kernel_kgroups(k, Y, G) t.add_row(['kernel k-groups', metric.accuracy(z, zh_kgroups)]) print t X_plot = np.linspace(low, high, num_points)[:, np.newaxis] ### kernel density estimation x1_true = X[np.where(z == 0)][:, np.newaxis] x2_true = X[np.where(z == 1)][:, np.newaxis] fig = plt.figure() ax = fig.add_subplot(111) kde1 = KernelDensity(kernel='gaussian', bandwidth=bw).fit(x1_true)
# delete missing entries delete_missing = np.where(data=='?')[0] data = np.delete(data, delete_missing, axis=0) data = np.array(data, dtype=float) z = np.delete(z, delete_missing, axis=0) # normalize data data = (data - data.mean(axis=0))/data.std(axis=0) G = eclust.kernel_matrix(data, rho) #G = energy.eclust.kernel_matrix(data, rho_gauss) #G = energy.eclust.kernel_matrix(data, rho_exp) r = [] r.append(wrapper.kmeans(6, data, run_times=10)) r.append(wrapper.gmm(6, data, run_times=10)) r.append(wrapper.spectral_clustering(6, data, G, run_times=10)) r.append(wrapper.spectral(6, data, G, run_times=10)) #r.append(wrapper.kernel_kmeans(6, data, G, run_times=10, ini='random')) r.append(wrapper.kernel_kmeans(6, data, G, run_times=10, ini='k-means++')) #r.append(wrapper.kernel_kmeans(6, data, G, run_times=10, ini='spectral')) #r.append(wrapper.kernel_kgroups(6,data,G,run_times=10, ini='random')) r.append(wrapper.kernel_kgroups(6,data,G,run_times=10, ini='k-means++')) #r.append(wrapper.kernel_kgroups(6,data,G,run_times=10, ini='spectral')) t = PrettyTable(['Algorithm', 'Accuracy', 'A-Rand']) algos = ['kmeans', 'GMM', 'spectral clustering', 'spectral', 'kernel k-means', 'kernel k-groups']
n = 2000 n1, n2 = np.random.multinomial(n, [0.5, 0.5]) m1 = 0 s1 = 1.5 m2 = 1.5 s2 = 0.3 #X, z = data.univariate_normal([m1, m2], [s1, s2], [n1, n2]) X, z = data.univariate_lognormal([m1, m2], [s1, s2], [n1, n2]) Y = np.array([[x] for x in X]) ### clustering t = PrettyTable(['Method', 'Accuracy']) G = eclust.kernel_matrix(Y, lambda x, y: np.linalg.norm(x-y)) zh_kmeans = wrapper.kmeans(k, Y) t.add_row(['k-means', metric.accuracy(z, zh_kmeans)]) zh_gmm = wrapper.gmm(k, Y) t.add_row(['gmm', metric.accuracy(z, zh_gmm)]) zh_kgroups = wrapper.kernel_kgroups(k, Y, G) t.add_row(['kernel k-groups', metric.accuracy(z, zh_kgroups)]) print t ### estimated classes x1_true = X[np.where(z==0)] x2_true = X[np.where(z==1)] x1_kmeans = X[np.where(zh_kmeans==0)] x2_kmeans = X[np.where(zh_kmeans==1)] x1_gmm = X[np.where(zh_gmm==0)] x2_gmm = X[np.where(zh_gmm==1)]
# delete missing entries delete_missing = np.where(data == '?')[0] data = np.delete(data, delete_missing, axis=0) data = np.array(data, dtype=float) z = np.delete(z, delete_missing, axis=0) # normalize data data = (data - data.mean(axis=0)) / data.std(axis=0) G = eclust.kernel_matrix(data, rho) #G = energy.eclust.kernel_matrix(data, rho_gauss) #G = energy.eclust.kernel_matrix(data, rho_exp) r = [] r.append(wrapper.kmeans(6, data, run_times=10)) r.append(wrapper.gmm(6, data, run_times=10)) r.append(wrapper.spectral_clustering(6, data, G, run_times=10)) r.append(wrapper.spectral(6, data, G, run_times=10)) #r.append(wrapper.kernel_kmeans(6, data, G, run_times=10, ini='random')) r.append(wrapper.kernel_kmeans(6, data, G, run_times=10, ini='k-means++')) #r.append(wrapper.kernel_kmeans(6, data, G, run_times=10, ini='spectral')) #r.append(wrapper.kernel_kgroups(6,data,G,run_times=10, ini='random')) r.append(wrapper.kernel_kgroups(6, data, G, run_times=10, ini='k-means++')) #r.append(wrapper.kernel_kgroups(6,data,G,run_times=10, ini='spectral')) t = PrettyTable(['Algorithm', 'Accuracy', 'A-Rand']) algos = [ 'kmeans', 'GMM', 'spectral clustering', 'spectral', 'kernel k-means',