def generate_data(D):
    d = 10
    m1 = np.zeros(D)
    s1 = np.eye(D)
    m2 = np.concatenate((0.7*np.ones(d), np.zeros(D-d)))
    s2 = np.eye(D)
    n1, n2 = np.random.multinomial(total_points, [0.5, 0.5])
    X, z = data.multivariate_normal([m1, m2], [s1, s2], [n1, n2])
    return X, z
def generate_data(D):
    d = 10
    m1 = np.zeros(D)
    s1 = np.eye(D)
    m2 = np.concatenate((0.7 * np.ones(d), np.zeros(D - d)))
    s2 = np.eye(D)
    n1, n2 = np.random.multinomial(total_points, [0.5, 0.5])
    X, z = data.multivariate_normal([m1, m2], [s1, s2], [n1, n2])
    return X, z
def generate_data(m):
    m1 = np.zeros(D)
    s1 = np.eye(D)
    m2 = np.concatenate((1.5*np.ones(d), np.zeros(D-d)))
    s2 = np.diag(np.concatenate((.5*np.ones(d), np.ones(D-d))))
    pi1 = (N-m)/N/2
    pi2 = (N+m)/N/2
    n1, n2 = np.random.multinomial(N, [pi1, pi2])
    X, z = data.multivariate_normal([m1, m2], [s1, s2], [n1, n2])
    return X, z
Example #4
0
def generate_data(m):
    m1 = np.zeros(D)
    s1 = np.eye(D)
    m2 = np.concatenate((1.5 * np.ones(d), np.zeros(D - d)))
    s2 = np.diag(np.concatenate((.5 * np.ones(d), np.ones(D - d))))
    pi1 = (N - m) / N / 2
    pi2 = (N + m) / N / 2
    n1, n2 = np.random.multinomial(N, [pi1, pi2])
    X, z = data.multivariate_normal([m1, m2], [s1, s2], [n1, n2])
    return X, z
def generate_data(n):
    m1 = np.zeros(D)
    s1 = 0.5*np.eye(D)
    m2 = 0.5*np.concatenate((np.ones(d), np.zeros(D-d)))
    s2 = np.eye(D)
    n1, n2 = np.random.multinomial(n, [0.5, 0.5])
    if distr_type == 'normal':
        X, z = data.multivariate_normal([m1, m2], [s1, s2], [n1, n2])
    elif distr_type == 'lognormal':
        X, z = data.multivariate_lognormal([m1, m2], [s1, s2], [n1, n2])
    return X, z
Example #6
0
def generate_data(D):
    d = 10
    m1 = np.zeros(D)
    s1 = np.eye(D)
    m2 = np.concatenate((np.ones(d), np.zeros(D-d)))
    s2_1 = np.array([1.367,  3.175,  3.247,  4.403,  1.249,
                     1.969, 4.035,   4.237,  2.813,  3.637])
    s2 = np.diag(np.concatenate((s2_1, np.ones(D-d))))
    n1, n2 = np.random.multinomial(total_points, [0.5, 0.5])
    X, z = data.multivariate_normal([m1, m2], [s1, s2], [n1, n2])
    return X, z
def generate_data(n):
    m1 = np.zeros(D)
    s1 = 0.5 * np.eye(D)
    m2 = 0.5 * np.concatenate((np.ones(d), np.zeros(D - d)))
    s2 = np.eye(D)
    n1, n2 = np.random.multinomial(n, [0.5, 0.5])
    if distr_type == 'normal':
        X, z = data.multivariate_normal([m1, m2], [s1, s2], [n1, n2])
    elif distr_type == 'lognormal':
        X, z = data.multivariate_lognormal([m1, m2], [s1, s2], [n1, n2])
    return X, z
Example #8
0
                cost = energy([point], points_in_partition) * (n / (n + 1))
                costs.append(cost)
        costs = np.array(costs)
        min_index = costs.argmin()
        min_cost = costs[min_index]
        return min_cost, min_index


###############################################################################
if __name__ == '__main__':
    import data
    from metric import accuracy

    m1 = np.array([0, 0])
    s1 = np.array([[1, 0], [0, 1]])
    n1 = 100

    m2 = np.array([3, 0])
    s2 = np.array([[1, 0], [0, 10]])
    n2 = 100

    X, true_labels = data.multivariate_normal([m1, m2], [s1, s2], [n1, n2])

    ec = EClust(n_clusters=2, max_iter=10, init='kmeans++')
    labels = ec.fit_predict(X)
    print accuracy(labels, true_labels)

    km = KMeans(2)
    labels2 = km.fit_predict(X)
    print accuracy(labels2, true_labels)
###############################################################################
if __name__ == "__main__":
    
    import data
    import metric
    from prettytable import PrettyTable
    import sys

    n = 400
    d = 10
    n1, n2 = np.random.multinomial(n, [1/2, 1/2])
    m1 = np.zeros(d)
    m2 = 0.7*np.ones(d)
    s1 = s2 = np.eye(d)
    X, z = data.multivariate_normal([m1, m2], [s1, s2], [n1, n2])

    G = eclust.kernel_matrix(X, lambda x, y: np.linalg.norm(x-y))
    W = np.eye(n)
    k = 2

    t = PrettyTable(["Method", "Accuracy"])
    
    zh = kernel_kmeans(k, X, G, W, run_times=5, ini="k-means++")
    a = metric.accuracy(z, zh)
    t.add_row(["Kernel k-means", a])
    
    zh = kernel_kgroups(k, X, G, W, run_times=5, ini="k-means++")
    a = metric.accuracy(z, zh)
    t.add_row(["Kernel k-groups", a])
    
Example #10
0
    import eclust
    import metric
    import sys

    table = []
    for i in range(100):

        # generate data ##############
        D = 2
        n1 = 100
        n2 = 100
        m1 = 0.5 * np.ones(D)
        s1 = np.eye(D)
        m2 = 2 * np.ones(D)
        s2 = 1.2 * np.eye(D)
        X, z = data.multivariate_normal([m1, m2], [s1, s2], [n1, n2])
        k = 2
        #X, z = data.circles([1, 3], [0.1, 0.1], [200, 200])
        #G = eclust.kernel_matrix(X,
        #    lambda x, y: 2-2*np.exp(-1/4*np.power(np.linalg.norm(x-y),2)))
        G = eclust.kernel_matrix(
            X, lambda x, y: np.power(np.linalg.norm(x - y), 1))
        ##############################

        results = []

        zh = kmeanspp(k, X)
        results.append(metric.accuracy(z, zh))
        zh = spectral(k, G)
        results.append(metric.accuracy(z, zh))
        zh = topeigen(k, G, run_times=10, init='k-means++')
        dist = np.zeros((n_samples, self.n_clusters))
        self._compute_dist(K, dist, self.within_distances_,
                           update_within=False)
        return dist.argmin(axis=1)



###############################################################################
if __name__ == '__main__':
    import energy
    import data
    from metric import accuracy
    from sklearn.cluster import KMeans

    X, z = data.multivariate_normal(
        [[0,0], [2,0]], 
        [np.eye(2), np.eye(2)],
        [100, 100]
    )

    kernel = energy.energy_kernel
    km = KernelEnergy(n_clusters=2, max_iter=100, verbose=1, 
                      kernel_params={'alpha':.8})
    zh = km.fit_predict(X)
    print accuracy(z, zh)
    
    km = KMeans(n_clusters=2)
    zh = km.fit_predict(X)
    print accuracy(z, zh)

                costs.append(cost)
        costs = np.array(costs)
        min_index = costs.argmin()
        min_cost = costs[min_index]
        return min_cost, min_index


###############################################################################
if __name__ == '__main__':
    import data
    from metric import accuracy

    m1 = np.array([0,0])
    s1 = np.array([[1,0],[0,1]])
    n1 = 100

    m2 = np.array([3,0])
    s2 = np.array([[1,0],[0,10]])
    n2 = 100

    X, true_labels = data.multivariate_normal([m1,m2], [s1,s2], [n1,n2])
    
    ec = EClust(n_clusters=2, max_iter=10, init='kmeans++')
    labels = ec.fit_predict(X)
    print accuracy(labels, true_labels)

    km = KMeans(2)
    labels2 = km.fit_predict(X)
    print accuracy(labels2, true_labels)

###############################################################################
if __name__ == "__main__":

    from sklearn.mixture import GMM as sk_GMM
    
    import data
    import metric

    #np.random.seed(12)

    D = 10
    m1 = np.zeros(D)
    s1 = np.eye(D)
    m2 = np.ones(D)
    s2 = 2*np.eye(D)
    X, z = data.multivariate_normal([m1, m2], [s1, s2], [100, 100])
    k = 2

    # scikit-learn library has a better procedure to estimate the covariance
    # matrix.

    g = GMM(k)
    zh  = g.fit_predict(X)
    print "GMM class:", metric.accuracy(z, zh)

    zh = gmm(k, X)
    print "GMM func:", metric.accuracy(z, zh)
    
    sg = sk_GMM(k)
    sg.fit(X)
    zh = sg.predict(X)
        n_samples = X.shape[0]
        dist = np.zeros((n_samples, self.n_clusters))
        self._compute_dist(K,
                           dist,
                           self.within_distances_,
                           update_within=False)
        return dist.argmin(axis=1)


###############################################################################
if __name__ == '__main__':
    import energy
    import data
    from metric import accuracy
    from sklearn.cluster import KMeans

    X, z = data.multivariate_normal([[0, 0], [2, 0]],
                                    [np.eye(2), np.eye(2)], [100, 100])

    kernel = energy.energy_kernel
    km = KernelEnergy(n_clusters=2,
                      max_iter=100,
                      verbose=1,
                      kernel_params={'alpha': .8})
    zh = km.fit_predict(X)
    print accuracy(z, zh)

    km = KMeans(n_clusters=2)
    zh = km.fit_predict(X)
    print accuracy(z, zh)
Example #15
0
###############################################################################
if __name__ == "__main__":

    from sklearn.mixture import GMM as sk_GMM

    import data
    import metric

    #np.random.seed(12)

    D = 10
    m1 = np.zeros(D)
    s1 = np.eye(D)
    m2 = np.ones(D)
    s2 = 2 * np.eye(D)
    X, z = data.multivariate_normal([m1, m2], [s1, s2], [100, 100])
    k = 2

    # scikit-learn library has a better procedure to estimate the covariance
    # matrix.

    g = GMM(k)
    zh = g.fit_predict(X)
    print "GMM class:", metric.accuracy(z, zh)

    zh = gmm(k, X)
    print "GMM func:", metric.accuracy(z, zh)

    sg = sk_GMM(k)
    sg.fit(X)
    zh = sg.predict(X)