r = [] for _ in range(num_experiments): for dim in dimensions: X, z = generate_data(dim) G = eclust.kernel_matrix(X, lambda x, y: np.linalg.norm(x - y)) zh = wrapper.kmeans(k, X) a = metric.accuracy(z, zh) r.append(['k-means', dim, a]) zh = wrapper.gmm(k, X) a = metric.accuracy(z, zh) r.append(['gmm', dim, a]) zh = wrapper.spectral_clustering(k, X, G) a = metric.accuracy(z, zh) r.append(['spectral clustering', dim, a]) zh = wrapper.kernel_kmeans(k, X, G) a = metric.accuracy(z, zh) r.append(['kernel k-means', dim, a]) zh = wrapper.kernel_kgroups(k, X, G) a = metric.accuracy(z, zh) r.append(['kernel k-groups', dim, a]) df = pd.DataFrame(np.array(r), columns=['method', 'dimension', 'accuracy']) df.to_csv(output, index=False)
G = eclust.kernel_matrix(data, rho) #G = eclust.kernel_matrix(data, rho_gauss) #G = eclust.kernel_matrix(data, rho_exp) k = 3 r = [] r.append(wrapper.kmeans(k, data, run_times=5)) r.append(wrapper.gmm(k, data, run_times=5)) r.append(wrapper.spectral_clustering(k, data, G, run_times=5)) r.append(wrapper.spectral(k, data, G, run_times=5)) r.append(wrapper.kernel_kmeans(k, data, G, run_times=5, ini='random')) #r.append(wrapper.kernel_kmeans(k, data, G, run_times=5, ini='k-means++')) #r.append(wrapper.kernel_kmeans(k, data, G, run_times=5, ini='spectral')) r.append(wrapper.kernel_kgroups(k,data,G,run_times=5, ini='random')) #r.append(wrapper.kernel_kgroups(k,data,G,run_times=5, ini='k-means++')) #r.append(wrapper.kernel_kgroups(k,data,G,run_times=5, ini='spectral')) t = PrettyTable(['Algorithm', 'Accuracy', 'A-Rand']) algos = ['kmeans', 'GMM', 'spectral clustering', 'spectral', 'kernel k-means', 'kernel k-groups'] for algo, zh in zip(algos, r): t.add_row([algo, metric.accuracy(z, zh), sklearn.metrics.adjusted_rand_score(z, zh) ]) print t
lambda x, y: np.power(np.linalg.norm(x-y), 0.5)) G3 = eclust.kernel_matrix(X, lambda x, y: 2-2*np.exp(-np.linalg.norm(x-y)/2)) zh = wrapper.kmeans(k, X) a = metric.accuracy(z, zh) r.append(['k-means', n, a]) zh = wrapper.gmm(k, X) a = metric.accuracy(z, zh) r.append(['gmm', n, a]) zh = wrapper.spectral_clustering(k, X, G3) a = metric.accuracy(z, zh) r.append([r'spectral clustering $\widetilde{\rho}_1$', n, a]) zh = wrapper.kernel_kgroups(k, X, G1) a = metric.accuracy(z, zh) r.append([r'kernel k-groups $\rho_{1}$', n, a]) zh = wrapper.kernel_kgroups(k, X, G2) a = metric.accuracy(z, zh) r.append([r'kernel k-groups $\rho_{1/2}$', n, a]) zh = wrapper.kernel_kgroups(k, X, G3) a = metric.accuracy(z, zh) r.append([r'kernel k-groups $\widetilde{\rho}_{1}$', n, a]) df = pd.DataFrame(np.array(r), columns=['method', 'points', 'accuracy']) df.to_csv(output, index=False)
r = [] for _ in range(num_experiments): for dim in dimensions: X, z = generate_data(dim) G = eclust.kernel_matrix(X, lambda x, y: np.linalg.norm(x-y)) zh = wrapper.kmeans(k, X) a = metric.accuracy(z, zh) r.append(['k-means', dim, a]) zh = wrapper.gmm(k, X) a = metric.accuracy(z, zh) r.append(['gmm', dim, a]) zh = wrapper.spectral_clustering(k, X, G) a = metric.accuracy(z, zh) r.append(['spectral clustering', dim, a]) zh = wrapper.kernel_kmeans(k, X, G) a = metric.accuracy(z, zh) r.append(['kernel k-means', dim, a]) zh = wrapper.kernel_kgroups(k, X, G) a = metric.accuracy(z, zh) r.append(['kernel k-groups', dim, a]) df = pd.DataFrame(np.array(r), columns=['method', 'dimension', 'accuracy']) df.to_csv(output, index=False)
#G = energy.eclust.kernel_matrix(data, rho_gauss) #G = energy.eclust.kernel_matrix(data, rho_exp) r = [] r.append(wrapper.kmeans(6, data, run_times=10)) r.append(wrapper.gmm(6, data, run_times=10)) r.append(wrapper.spectral_clustering(6, data, G, run_times=10)) r.append(wrapper.spectral(6, data, G, run_times=10)) #r.append(wrapper.kernel_kmeans(6, data, G, run_times=10, ini='random')) r.append(wrapper.kernel_kmeans(6, data, G, run_times=10, ini='k-means++')) #r.append(wrapper.kernel_kmeans(6, data, G, run_times=10, ini='spectral')) #r.append(wrapper.kernel_kgroups(6,data,G,run_times=10, ini='random')) r.append(wrapper.kernel_kgroups(6,data,G,run_times=10, ini='k-means++')) #r.append(wrapper.kernel_kgroups(6,data,G,run_times=10, ini='spectral')) t = PrettyTable(['Algorithm', 'Accuracy', 'A-Rand']) algos = ['kmeans', 'GMM', 'spectral clustering', 'spectral', 'kernel k-means', 'kernel k-groups'] for algo, zh in zip(algos, r): t.add_row([algo, metric.accuracy(z, zh), sklearn.metrics.adjusted_rand_score(z, zh) ]) print t Z = np.array(eclust.ztoZ(z), dtype=int)
Y = np.array([[x] for x in X]) bw = 0.5 # bandwidth num_points = 1500 # number points for linspace low = -6 high = 6 #low = -2 #high = 20 ### clustering t = PrettyTable(['Method', 'Accuracy']) G = eclust.kernel_matrix(Y, lambda x, y: np.linalg.norm(x - y)) zh_kmeans = wrapper.kmeans(k, Y) t.add_row(['k-means', metric.accuracy(z, zh_kmeans)]) zh_gmm = wrapper.gmm(k, Y) t.add_row(['gmm', metric.accuracy(z, zh_gmm)]) zh_kgroups = wrapper.kernel_kgroups(k, Y, G) t.add_row(['kernel k-groups', metric.accuracy(z, zh_kgroups)]) print t X_plot = np.linspace(low, high, num_points)[:, np.newaxis] ### kernel density estimation x1_true = X[np.where(z == 0)][:, np.newaxis] x2_true = X[np.where(z == 1)][:, np.newaxis] fig = plt.figure() ax = fig.add_subplot(111) kde1 = KernelDensity(kernel='gaussian', bandwidth=bw).fit(x1_true) log_dens1 = kde1.score_samples(X_plot) kde2 = KernelDensity(kernel='gaussian', bandwidth=bw).fit(x2_true)
a = metric.accuracy(z, zh) row.append(a) zh = wrapper.gmm(k, X) a = metric.accuracy(z, zh) row.append(a) zh = wrapper.spectral_clustering(k, X, G) a = metric.accuracy(z, zh) row.append(a) zh = wrapper.kernel_kmeans(k, X, G, ini='random') a = metric.accuracy(z, zh) row.append(a) zh = wrapper.kernel_kgroups(k, X, G, ini='random') a = metric.accuracy(z, zh) row.append(a) r.append(row) r = np.array(r) t = PrettyTable(['Method', 'Accuracy', 'Std']) for i, m in enumerate([ 'k-means', 'gmm', 'spectral clustering', 'kernel k-means', 'kernel k-groups' ]): t.add_row([m, r[:, i].mean(), sem(r[:, i])]) print t
m1 = 0 s1 = 1.5 m2 = 1.5 s2 = 0.3 #X, z = data.univariate_normal([m1, m2], [s1, s2], [n1, n2]) X, z = data.univariate_lognormal([m1, m2], [s1, s2], [n1, n2]) Y = np.array([[x] for x in X]) ### clustering t = PrettyTable(['Method', 'Accuracy']) G = eclust.kernel_matrix(Y, lambda x, y: np.linalg.norm(x-y)) zh_kmeans = wrapper.kmeans(k, Y) t.add_row(['k-means', metric.accuracy(z, zh_kmeans)]) zh_gmm = wrapper.gmm(k, Y) t.add_row(['gmm', metric.accuracy(z, zh_gmm)]) zh_kgroups = wrapper.kernel_kgroups(k, Y, G) t.add_row(['kernel k-groups', metric.accuracy(z, zh_kgroups)]) print t ### estimated classes x1_true = X[np.where(z==0)] x2_true = X[np.where(z==1)] x1_kmeans = X[np.where(zh_kmeans==0)] x2_kmeans = X[np.where(zh_kmeans==1)] x1_gmm = X[np.where(zh_gmm==0)] x2_gmm = X[np.where(zh_gmm==1)] x1_kgroups = X[np.where(zh_kgroups==0)] x2_kgroups = X[np.where(zh_kgroups==1)]
z = np.array([classes[v] for v in df[4].values]) df = df.drop(4, axis=1) data = df.values data = (data - data.mean(axis=0))/data.std(axis=0) G = eclust.kernel_matrix(data, rho_gauss) k = 3 nt = 5 r = [] r.append(wrapper.kmeans(k, data, run_times=nt)) r.append(wrapper.gmm(k, data, run_times=nt)) r.append(wrapper.spectral_clustering(k, data, G, run_times=nt)) r.append(wrapper.spectral(k, data, G, run_times=nt)) r.append(wrapper.kernel_kmeans(k, data, G, run_times=nt, ini='spectral')) r.append(wrapper.kernel_kgroups(k,data,G,run_times=nt, ini='spectral')) t = PrettyTable(['Algorithm', 'Accuracy', 'A-Rand']) algos = ['kmeans', 'GMM', 'spectral clustering', 'spectral', 'kernel k-means', 'kernel k-groups'] for algo, zh in zip(algos, r): t.add_row([algo, metric.accuracy(z, zh), sklearn.metrics.adjusted_rand_score(z, zh) ]) print t
X, lambda x, y: np.power(np.linalg.norm(x - y), 0.5)) G3 = eclust.kernel_matrix( X, lambda x, y: 2 - 2 * np.exp(-np.linalg.norm(x - y) / 2)) zh = wrapper.kmeans(k, X) a = metric.accuracy(z, zh) r.append(['k-means', n, a]) zh = wrapper.gmm(k, X) a = metric.accuracy(z, zh) r.append(['gmm', n, a]) zh = wrapper.spectral_clustering(k, X, G3) a = metric.accuracy(z, zh) r.append([r'spectral clustering $\widetilde{\rho}_1$', n, a]) zh = wrapper.kernel_kgroups(k, X, G1) a = metric.accuracy(z, zh) r.append([r'kernel k-groups $\rho_{1}$', n, a]) zh = wrapper.kernel_kgroups(k, X, G2) a = metric.accuracy(z, zh) r.append([r'kernel k-groups $\rho_{1/2}$', n, a]) zh = wrapper.kernel_kgroups(k, X, G3) a = metric.accuracy(z, zh) r.append([r'kernel k-groups $\widetilde{\rho}_{1}$', n, a]) df = pd.DataFrame(np.array(r), columns=['method', 'points', 'accuracy']) df.to_csv(output, index=False)
G = eclust.kernel_matrix(data, rho_gauss) #G = energy.eclust.kernel_matrix(data, rho_gauss) #G = energy.eclust.kernel_matrix(data, rho_exp) r = [] r.append(wrapper.kmeans(3, data, run_times=5)) r.append(wrapper.gmm(3, data, run_times=5)) r.append(wrapper.spectral_clustering(3, data, G, run_times=5)) r.append(wrapper.spectral(3, data, G, run_times=5)) #r.append(wrapper.kernel_kmeans(3, data, G, run_times=5, ini='random')) #r.append(wrapper.kernel_kmeans(3, data, G, run_times=5, ini='k-means++')) r.append(wrapper.kernel_kmeans(3, data, G, run_times=5, ini='spectral')) #r.append(wrapper.kernel_kgroups(3,data,G,run_times=5, ini='random')) #r.append(wrapper.kernel_kgroups(3,data,G,run_times=5, ini='k-means++')) r.append(wrapper.kernel_kgroups(3, data, G, run_times=5, ini='spectral')) t = PrettyTable(['Algorithm', 'Accuracy', 'A-Rand']) algos = [ 'kmeans', 'GMM', 'spectral clustering', 'spectral', 'kernel k-means', 'kernel k-groups' ] for algo, zh in zip(algos, r): t.add_row([ algo, metric.accuracy(z, zh), sklearn.metrics.adjusted_rand_score(z, zh) ]) print t
zh = wrapper.kmeans(k, X) a = metric.accuracy(z, zh) row.append(a) zh = wrapper.gmm(k, X) a = metric.accuracy(z, zh) row.append(a) zh = wrapper.spectral_clustering(k, X, G) a = metric.accuracy(z, zh) row.append(a) zh = wrapper.kernel_kmeans(k, X, G, ini='random') a = metric.accuracy(z, zh) row.append(a) zh = wrapper.kernel_kgroups(k, X, G, ini='random') a = metric.accuracy(z, zh) row.append(a) r.append(row) r = np.array(r) t = PrettyTable(['Method', 'Accuracy', 'Std']) for i, m in enumerate(['k-means', 'gmm', 'spectral clustering', 'kernel k-means', 'kernel k-groups']): t.add_row([m, r[:,i].mean(), sem(r[:,i])]) print t
#G = energy.eclust.kernel_matrix(data, rho_gauss) #G = energy.eclust.kernel_matrix(data, rho_exp) r = [] r.append(wrapper.kmeans(6, data, run_times=10)) r.append(wrapper.gmm(6, data, run_times=10)) r.append(wrapper.spectral_clustering(6, data, G, run_times=10)) r.append(wrapper.spectral(6, data, G, run_times=10)) #r.append(wrapper.kernel_kmeans(6, data, G, run_times=10, ini='random')) r.append(wrapper.kernel_kmeans(6, data, G, run_times=10, ini='k-means++')) #r.append(wrapper.kernel_kmeans(6, data, G, run_times=10, ini='spectral')) #r.append(wrapper.kernel_kgroups(6,data,G,run_times=10, ini='random')) r.append(wrapper.kernel_kgroups(6, data, G, run_times=10, ini='k-means++')) #r.append(wrapper.kernel_kgroups(6,data,G,run_times=10, ini='spectral')) t = PrettyTable(['Algorithm', 'Accuracy', 'A-Rand']) algos = [ 'kmeans', 'GMM', 'spectral clustering', 'spectral', 'kernel k-means', 'kernel k-groups' ] for algo, zh in zip(algos, r): t.add_row([ algo, metric.accuracy(z, zh), sklearn.metrics.adjusted_rand_score(z, zh) ])