def fn(pipe):
     p = pipe \
         .split(5) \
             .pipe(kmeans(clusters)) \
             .y(seeding_centroids(0.1)) \
             .y(label_consensus()) \
             .pipe(knn(neighbors)) \
             .pipe(predict()) \
             .pipe(evaluate()) \
         .merge('evaluation', average('evaluation'))
     return p
sigma2 = sum([np.linalg.norm(x-y)**2 
                for x in data for y in data])/(len(data)**2)
sigma = np.sqrt(sigma2)

rho_exp = lambda x, y: 2-2*np.exp(-np.linalg.norm(x-y)/(2*sigma))
rho_gauss = lambda x, y: 2-2*np.exp(-np.linalg.norm(x-y)**2/(2*(sigma)**2))

G = eclust.kernel_matrix(data, rho)
#G = eclust.kernel_matrix(data, rho_gauss)
#G = eclust.kernel_matrix(data, rho_exp)

k = 3

r = []
r.append(wrapper.kmeans(k, data, run_times=5))
r.append(wrapper.gmm(k, data, run_times=5))
r.append(wrapper.spectral_clustering(k, data, G, run_times=5))
r.append(wrapper.spectral(k, data, G, run_times=5))
r.append(wrapper.kernel_kmeans(k, data, G, run_times=5, ini='random'))
#r.append(wrapper.kernel_kmeans(k, data, G, run_times=5, ini='k-means++'))
#r.append(wrapper.kernel_kmeans(k, data, G, run_times=5, ini='spectral'))
r.append(wrapper.kernel_kgroups(k,data,G,run_times=5, ini='random'))
#r.append(wrapper.kernel_kgroups(k,data,G,run_times=5, ini='k-means++'))
#r.append(wrapper.kernel_kgroups(k,data,G,run_times=5, ini='spectral'))

t = PrettyTable(['Algorithm', 'Accuracy', 'A-Rand'])
algos = ['kmeans', 'GMM', 'spectral clustering', 'spectral', 
         'kernel k-means', 'kernel k-groups']

for algo, zh in zip(algos, r):
    m1 = np.zeros(D)
    s1 = np.eye(D)
    m2 = np.concatenate((0.7 * np.ones(d), np.zeros(D - d)))
    s2 = np.eye(D)
    n1, n2 = np.random.multinomial(total_points, [0.5, 0.5])
    X, z = data.multivariate_normal([m1, m2], [s1, s2], [n1, n2])
    return X, z


r = []
for _ in range(num_experiments):
    for dim in dimensions:
        X, z = generate_data(dim)
        G = eclust.kernel_matrix(X, lambda x, y: np.linalg.norm(x - y))

        zh = wrapper.kmeans(k, X)
        a = metric.accuracy(z, zh)
        r.append(['k-means', dim, a])

        zh = wrapper.gmm(k, X)
        a = metric.accuracy(z, zh)
        r.append(['gmm', dim, a])

        zh = wrapper.spectral_clustering(k, X, G)
        a = metric.accuracy(z, zh)
        r.append(['spectral clustering', dim, a])

        zh = wrapper.kernel_kmeans(k, X, G)
        a = metric.accuracy(z, zh)
        r.append(['kernel k-means', dim, a])
    d = 10
    m1 = np.zeros(D)
    s1 = np.eye(D)
    m2 = np.concatenate((0.7*np.ones(d), np.zeros(D-d)))
    s2 = np.eye(D)
    n1, n2 = np.random.multinomial(total_points, [0.5, 0.5])
    X, z = data.multivariate_normal([m1, m2], [s1, s2], [n1, n2])
    return X, z

r = []
for _ in range(num_experiments):
    for dim in dimensions:
        X, z = generate_data(dim)
        G = eclust.kernel_matrix(X, lambda x, y: np.linalg.norm(x-y))
        
        zh = wrapper.kmeans(k, X)
        a = metric.accuracy(z, zh)
        r.append(['k-means', dim, a])
        
        zh = wrapper.gmm(k, X)
        a = metric.accuracy(z, zh)
        r.append(['gmm', dim, a])
        
        zh = wrapper.spectral_clustering(k, X, G)
        a = metric.accuracy(z, zh)
        r.append(['spectral clustering', dim, a])
        
        zh = wrapper.kernel_kmeans(k, X, G)
        a = metric.accuracy(z, zh)
        r.append(['kernel k-means', dim, a])
        
Example #5
0
m2 = 1.5
s2 = 0.3
X, z = data.univariate_normal([m1, m2], [s1, s2], [n1, n2])
#X, z = data.univariate_lognormal([m1, m2], [s1, s2], [n1, n2])
Y = np.array([[x] for x in X])
bw = 0.5  # bandwidth
num_points = 1500  # number points for linspace
low = -6
high = 6
#low = -2
#high = 20

### clustering
t = PrettyTable(['Method', 'Accuracy'])
G = eclust.kernel_matrix(Y, lambda x, y: np.linalg.norm(x - y))
zh_kmeans = wrapper.kmeans(k, Y)
t.add_row(['k-means', metric.accuracy(z, zh_kmeans)])
zh_gmm = wrapper.gmm(k, Y)
t.add_row(['gmm', metric.accuracy(z, zh_gmm)])
zh_kgroups = wrapper.kernel_kgroups(k, Y, G)
t.add_row(['kernel k-groups', metric.accuracy(z, zh_kgroups)])
print t

X_plot = np.linspace(low, high, num_points)[:, np.newaxis]

### kernel density estimation
x1_true = X[np.where(z == 0)][:, np.newaxis]
x2_true = X[np.where(z == 1)][:, np.newaxis]

fig = plt.figure()
ax = fig.add_subplot(111)
# delete missing entries
delete_missing = np.where(data=='?')[0]
data = np.delete(data, delete_missing, axis=0)
data = np.array(data, dtype=float)
z = np.delete(z, delete_missing, axis=0)

# normalize data
data = (data - data.mean(axis=0))/data.std(axis=0)

G = eclust.kernel_matrix(data, rho)
#G = energy.eclust.kernel_matrix(data, rho_gauss)
#G = energy.eclust.kernel_matrix(data, rho_exp)

r = []
r.append(wrapper.kmeans(6, data, run_times=10))
r.append(wrapper.gmm(6, data, run_times=10))
r.append(wrapper.spectral_clustering(6, data, G, run_times=10))

r.append(wrapper.spectral(6, data, G, run_times=10))

#r.append(wrapper.kernel_kmeans(6, data, G, run_times=10, ini='random'))
r.append(wrapper.kernel_kmeans(6, data, G, run_times=10, ini='k-means++'))
#r.append(wrapper.kernel_kmeans(6, data, G, run_times=10, ini='spectral'))

#r.append(wrapper.kernel_kgroups(6,data,G,run_times=10, ini='random'))
r.append(wrapper.kernel_kgroups(6,data,G,run_times=10, ini='k-means++'))
#r.append(wrapper.kernel_kgroups(6,data,G,run_times=10, ini='spectral'))

t = PrettyTable(['Algorithm', 'Accuracy', 'A-Rand'])
algos = ['kmeans', 'GMM', 'spectral clustering', 'spectral', 
### generate data
k = 2
n = 2000
n1, n2 = np.random.multinomial(n, [0.5, 0.5])
m1 = 0
s1 = 1.5
m2 = 1.5
s2 = 0.3
#X, z = data.univariate_normal([m1, m2], [s1, s2], [n1, n2])
X, z = data.univariate_lognormal([m1, m2], [s1, s2], [n1, n2])
Y = np.array([[x] for x in X])

### clustering
t = PrettyTable(['Method', 'Accuracy'])
G = eclust.kernel_matrix(Y, lambda x, y: np.linalg.norm(x-y))
zh_kmeans = wrapper.kmeans(k, Y)
t.add_row(['k-means', metric.accuracy(z, zh_kmeans)])
zh_gmm = wrapper.gmm(k, Y)
t.add_row(['gmm', metric.accuracy(z, zh_gmm)])
zh_kgroups = wrapper.kernel_kgroups(k, Y, G)
t.add_row(['kernel k-groups', metric.accuracy(z, zh_kgroups)])
print t

### estimated classes
x1_true = X[np.where(z==0)]
x2_true = X[np.where(z==1)]

x1_kmeans = X[np.where(zh_kmeans==0)]
x2_kmeans = X[np.where(zh_kmeans==1)]

x1_gmm = X[np.where(zh_gmm==0)]
from pipe import Pipe
from wrapper import kmeans
from pipetools import *
from dataset import *

dataset = get_pendigits()

a = Pipe()\
    .x(dataset.X)\
    .pipe(kmeans(dataset.cluster_cnt))\
    .connect(stop())

print(a)
Example #9
0
# delete missing entries
delete_missing = np.where(data == '?')[0]
data = np.delete(data, delete_missing, axis=0)
data = np.array(data, dtype=float)
z = np.delete(z, delete_missing, axis=0)

# normalize data
data = (data - data.mean(axis=0)) / data.std(axis=0)

G = eclust.kernel_matrix(data, rho)
#G = energy.eclust.kernel_matrix(data, rho_gauss)
#G = energy.eclust.kernel_matrix(data, rho_exp)

r = []
r.append(wrapper.kmeans(6, data, run_times=10))
r.append(wrapper.gmm(6, data, run_times=10))
r.append(wrapper.spectral_clustering(6, data, G, run_times=10))

r.append(wrapper.spectral(6, data, G, run_times=10))

#r.append(wrapper.kernel_kmeans(6, data, G, run_times=10, ini='random'))
r.append(wrapper.kernel_kmeans(6, data, G, run_times=10, ini='k-means++'))
#r.append(wrapper.kernel_kmeans(6, data, G, run_times=10, ini='spectral'))

#r.append(wrapper.kernel_kgroups(6,data,G,run_times=10, ini='random'))
r.append(wrapper.kernel_kgroups(6, data, G, run_times=10, ini='k-means++'))
#r.append(wrapper.kernel_kgroups(6,data,G,run_times=10, ini='spectral'))

t = PrettyTable(['Algorithm', 'Accuracy', 'A-Rand'])
algos = [