def initialize(method, k, G, X, W): if method == "spectral": z0 = init.topeigen(k, G, W) elif method == "k-means++": z0 = init.kmeans_plus(k, X) else: z0 = np.random.randint(0, k, len(X)) Z0 = eclust.ztoZ(z0) return Z0
def spectral(k, X, G, W=None, run_times=5): if type(W) == type(None): W = np.eye(len(X)) best_score = -np.inf for _ in range(run_times): zh = init.topeigen(k, G, W, run_times=run_times) Zh = eclust.ztoZ(zh) score = eclust.objective(Zh, G, W) if score > best_score: best_score = score best_z = zh return best_z
def kernel_kgroups(k, X, G, W=None, run_times=5, ini="k-means++"): if type(W) == type(None): W = np.eye(len(X)) best_score = -np.inf for _ in range(run_times): Z0 = initialize(ini, k, G, X, W) zh = eclust.kernel_kgroups(k, G, Z0, W, max_iter=300) Zh = eclust.ztoZ(zh) score = eclust.objective(Zh, G, W) if score > best_score: best_score = score best_z = zh return best_z
#r.append(wrapper.kernel_kmeans(6, data, G, run_times=10, ini='random')) r.append(wrapper.kernel_kmeans(6, data, G, run_times=10, ini='k-means++')) #r.append(wrapper.kernel_kmeans(6, data, G, run_times=10, ini='spectral')) #r.append(wrapper.kernel_kgroups(6,data,G,run_times=10, ini='random')) r.append(wrapper.kernel_kgroups(6,data,G,run_times=10, ini='k-means++')) #r.append(wrapper.kernel_kgroups(6,data,G,run_times=10, ini='spectral')) t = PrettyTable(['Algorithm', 'Accuracy', 'A-Rand']) algos = ['kmeans', 'GMM', 'spectral clustering', 'spectral', 'kernel k-means', 'kernel k-groups'] for algo, zh in zip(algos, r): t.add_row([algo, metric.accuracy(z, zh), sklearn.metrics.adjusted_rand_score(z, zh) ]) print t Z = np.array(eclust.ztoZ(z), dtype=int) Zh = np.array(eclust.ztoZ(zh), dtype=int) df = pd.DataFrame(Z) df.to_csv('data/dermatology_true_label_matrix.csv', index=False, header=None) df = pd.DataFrame(Zh) df.to_csv('data/dermatology_pred_label_matrix.csv', index=False, header=None)
num_experiments = 10 table = np.zeros((num_experiments, 5)) for i in range(num_experiments): X, z = data.univariate_lognormal([0, -1.5], [0.3, 1.5], [100, 100]) #X, z = data.univariate_normal([0, 5], [1, 22], [15, 15]) Y = np.array([[x] for x in X]) k = 2 # 1D energy clustering zh, cost = two_clusters1D(X) table[i,0] = accuracy(z, zh) # initialization z0 = initialization.kmeanspp(k, Y, ret='labels') Z0 = eclust.ztoZ(z0) rho = lambda x, y: np.linalg.norm(x-y) G = eclust.kernel_matrix(Y, rho) z1 = initialization.spectral(k, G) Z1 = eclust.ztoZ(z1) # Hartigan's method zh = eclust.energy_hartigan(k, G, Z0) table[i,1] = accuracy(z, zh) zh = eclust.energy_hartigan(k, G, Z1) table[i,2] = accuracy(z, zh) # standard k-means km = KMeans(2) zh = km.fit_predict(Y)
r.append(wrapper.kernel_kmeans(6, data, G, run_times=10, ini='k-means++')) #r.append(wrapper.kernel_kmeans(6, data, G, run_times=10, ini='spectral')) #r.append(wrapper.kernel_kgroups(6,data,G,run_times=10, ini='random')) r.append(wrapper.kernel_kgroups(6, data, G, run_times=10, ini='k-means++')) #r.append(wrapper.kernel_kgroups(6,data,G,run_times=10, ini='spectral')) t = PrettyTable(['Algorithm', 'Accuracy', 'A-Rand']) algos = [ 'kmeans', 'GMM', 'spectral clustering', 'spectral', 'kernel k-means', 'kernel k-groups' ] for algo, zh in zip(algos, r): t.add_row([ algo, metric.accuracy(z, zh), sklearn.metrics.adjusted_rand_score(z, zh) ]) print t Z = np.array(eclust.ztoZ(z), dtype=int) Zh = np.array(eclust.ztoZ(zh), dtype=int) df = pd.DataFrame(Z) df.to_csv('data/dermatology_true_label_matrix.csv', index=False, header=None) df = pd.DataFrame(Zh) df.to_csv('data/dermatology_pred_label_matrix.csv', index=False, header=None)