def generate_data(D): d = 10 m1 = np.zeros(D) s1 = np.eye(D) m2 = np.concatenate((0.7*np.ones(d), np.zeros(D-d))) s2 = np.eye(D) n1, n2 = np.random.multinomial(total_points, [0.5, 0.5]) X, z = data.multivariate_normal([m1, m2], [s1, s2], [n1, n2]) return X, z
def generate_data(D): d = 10 m1 = np.zeros(D) s1 = np.eye(D) m2 = np.concatenate((0.7 * np.ones(d), np.zeros(D - d))) s2 = np.eye(D) n1, n2 = np.random.multinomial(total_points, [0.5, 0.5]) X, z = data.multivariate_normal([m1, m2], [s1, s2], [n1, n2]) return X, z
def generate_data(m): m1 = np.zeros(D) s1 = np.eye(D) m2 = np.concatenate((1.5*np.ones(d), np.zeros(D-d))) s2 = np.diag(np.concatenate((.5*np.ones(d), np.ones(D-d)))) pi1 = (N-m)/N/2 pi2 = (N+m)/N/2 n1, n2 = np.random.multinomial(N, [pi1, pi2]) X, z = data.multivariate_normal([m1, m2], [s1, s2], [n1, n2]) return X, z
def generate_data(m): m1 = np.zeros(D) s1 = np.eye(D) m2 = np.concatenate((1.5 * np.ones(d), np.zeros(D - d))) s2 = np.diag(np.concatenate((.5 * np.ones(d), np.ones(D - d)))) pi1 = (N - m) / N / 2 pi2 = (N + m) / N / 2 n1, n2 = np.random.multinomial(N, [pi1, pi2]) X, z = data.multivariate_normal([m1, m2], [s1, s2], [n1, n2]) return X, z
def generate_data(n): m1 = np.zeros(D) s1 = 0.5*np.eye(D) m2 = 0.5*np.concatenate((np.ones(d), np.zeros(D-d))) s2 = np.eye(D) n1, n2 = np.random.multinomial(n, [0.5, 0.5]) if distr_type == 'normal': X, z = data.multivariate_normal([m1, m2], [s1, s2], [n1, n2]) elif distr_type == 'lognormal': X, z = data.multivariate_lognormal([m1, m2], [s1, s2], [n1, n2]) return X, z
def generate_data(D): d = 10 m1 = np.zeros(D) s1 = np.eye(D) m2 = np.concatenate((np.ones(d), np.zeros(D-d))) s2_1 = np.array([1.367, 3.175, 3.247, 4.403, 1.249, 1.969, 4.035, 4.237, 2.813, 3.637]) s2 = np.diag(np.concatenate((s2_1, np.ones(D-d)))) n1, n2 = np.random.multinomial(total_points, [0.5, 0.5]) X, z = data.multivariate_normal([m1, m2], [s1, s2], [n1, n2]) return X, z
def generate_data(n): m1 = np.zeros(D) s1 = 0.5 * np.eye(D) m2 = 0.5 * np.concatenate((np.ones(d), np.zeros(D - d))) s2 = np.eye(D) n1, n2 = np.random.multinomial(n, [0.5, 0.5]) if distr_type == 'normal': X, z = data.multivariate_normal([m1, m2], [s1, s2], [n1, n2]) elif distr_type == 'lognormal': X, z = data.multivariate_lognormal([m1, m2], [s1, s2], [n1, n2]) return X, z
cost = energy([point], points_in_partition) * (n / (n + 1)) costs.append(cost) costs = np.array(costs) min_index = costs.argmin() min_cost = costs[min_index] return min_cost, min_index ############################################################################### if __name__ == '__main__': import data from metric import accuracy m1 = np.array([0, 0]) s1 = np.array([[1, 0], [0, 1]]) n1 = 100 m2 = np.array([3, 0]) s2 = np.array([[1, 0], [0, 10]]) n2 = 100 X, true_labels = data.multivariate_normal([m1, m2], [s1, s2], [n1, n2]) ec = EClust(n_clusters=2, max_iter=10, init='kmeans++') labels = ec.fit_predict(X) print accuracy(labels, true_labels) km = KMeans(2) labels2 = km.fit_predict(X) print accuracy(labels2, true_labels)
############################################################################### if __name__ == "__main__": import data import metric from prettytable import PrettyTable import sys n = 400 d = 10 n1, n2 = np.random.multinomial(n, [1/2, 1/2]) m1 = np.zeros(d) m2 = 0.7*np.ones(d) s1 = s2 = np.eye(d) X, z = data.multivariate_normal([m1, m2], [s1, s2], [n1, n2]) G = eclust.kernel_matrix(X, lambda x, y: np.linalg.norm(x-y)) W = np.eye(n) k = 2 t = PrettyTable(["Method", "Accuracy"]) zh = kernel_kmeans(k, X, G, W, run_times=5, ini="k-means++") a = metric.accuracy(z, zh) t.add_row(["Kernel k-means", a]) zh = kernel_kgroups(k, X, G, W, run_times=5, ini="k-means++") a = metric.accuracy(z, zh) t.add_row(["Kernel k-groups", a])
import eclust import metric import sys table = [] for i in range(100): # generate data ############## D = 2 n1 = 100 n2 = 100 m1 = 0.5 * np.ones(D) s1 = np.eye(D) m2 = 2 * np.ones(D) s2 = 1.2 * np.eye(D) X, z = data.multivariate_normal([m1, m2], [s1, s2], [n1, n2]) k = 2 #X, z = data.circles([1, 3], [0.1, 0.1], [200, 200]) #G = eclust.kernel_matrix(X, # lambda x, y: 2-2*np.exp(-1/4*np.power(np.linalg.norm(x-y),2))) G = eclust.kernel_matrix( X, lambda x, y: np.power(np.linalg.norm(x - y), 1)) ############################## results = [] zh = kmeanspp(k, X) results.append(metric.accuracy(z, zh)) zh = spectral(k, G) results.append(metric.accuracy(z, zh)) zh = topeigen(k, G, run_times=10, init='k-means++')
dist = np.zeros((n_samples, self.n_clusters)) self._compute_dist(K, dist, self.within_distances_, update_within=False) return dist.argmin(axis=1) ############################################################################### if __name__ == '__main__': import energy import data from metric import accuracy from sklearn.cluster import KMeans X, z = data.multivariate_normal( [[0,0], [2,0]], [np.eye(2), np.eye(2)], [100, 100] ) kernel = energy.energy_kernel km = KernelEnergy(n_clusters=2, max_iter=100, verbose=1, kernel_params={'alpha':.8}) zh = km.fit_predict(X) print accuracy(z, zh) km = KMeans(n_clusters=2) zh = km.fit_predict(X) print accuracy(z, zh)
costs.append(cost) costs = np.array(costs) min_index = costs.argmin() min_cost = costs[min_index] return min_cost, min_index ############################################################################### if __name__ == '__main__': import data from metric import accuracy m1 = np.array([0,0]) s1 = np.array([[1,0],[0,1]]) n1 = 100 m2 = np.array([3,0]) s2 = np.array([[1,0],[0,10]]) n2 = 100 X, true_labels = data.multivariate_normal([m1,m2], [s1,s2], [n1,n2]) ec = EClust(n_clusters=2, max_iter=10, init='kmeans++') labels = ec.fit_predict(X) print accuracy(labels, true_labels) km = KMeans(2) labels2 = km.fit_predict(X) print accuracy(labels2, true_labels)
############################################################################### if __name__ == "__main__": from sklearn.mixture import GMM as sk_GMM import data import metric #np.random.seed(12) D = 10 m1 = np.zeros(D) s1 = np.eye(D) m2 = np.ones(D) s2 = 2*np.eye(D) X, z = data.multivariate_normal([m1, m2], [s1, s2], [100, 100]) k = 2 # scikit-learn library has a better procedure to estimate the covariance # matrix. g = GMM(k) zh = g.fit_predict(X) print "GMM class:", metric.accuracy(z, zh) zh = gmm(k, X) print "GMM func:", metric.accuracy(z, zh) sg = sk_GMM(k) sg.fit(X) zh = sg.predict(X)
n_samples = X.shape[0] dist = np.zeros((n_samples, self.n_clusters)) self._compute_dist(K, dist, self.within_distances_, update_within=False) return dist.argmin(axis=1) ############################################################################### if __name__ == '__main__': import energy import data from metric import accuracy from sklearn.cluster import KMeans X, z = data.multivariate_normal([[0, 0], [2, 0]], [np.eye(2), np.eye(2)], [100, 100]) kernel = energy.energy_kernel km = KernelEnergy(n_clusters=2, max_iter=100, verbose=1, kernel_params={'alpha': .8}) zh = km.fit_predict(X) print accuracy(z, zh) km = KMeans(n_clusters=2) zh = km.fit_predict(X) print accuracy(z, zh)
############################################################################### if __name__ == "__main__": from sklearn.mixture import GMM as sk_GMM import data import metric #np.random.seed(12) D = 10 m1 = np.zeros(D) s1 = np.eye(D) m2 = np.ones(D) s2 = 2 * np.eye(D) X, z = data.multivariate_normal([m1, m2], [s1, s2], [100, 100]) k = 2 # scikit-learn library has a better procedure to estimate the covariance # matrix. g = GMM(k) zh = g.fit_predict(X) print "GMM class:", metric.accuracy(z, zh) zh = gmm(k, X) print "GMM func:", metric.accuracy(z, zh) sg = sk_GMM(k) sg.fit(X) zh = sg.predict(X)