def experiment1(n, d, repeat, threashold):
    """
    experiment repeat time on measuring the connected components of
    Gaussian random matrix
    :param n: number of data points
    :param d: dimensions
    :param repeat: number of repetition
    :param threashold: threshold of edges
    :return: statistic metrics
    """
    mu = np.zeros(repeat)
    sigma = np.zeros(repeat)
    min_size = np.zeros(repeat)
    max_size = np.zeros(repeat)
    num_groups = np.zeros(repeat)
    for x in xrange(repeat):
        print 'epoch %d' % x
        X = np.random.randn(n, d)
        cross_correlation = utils.xcorr(np.abs(X))
        G = graph.gen_corr_graph(cross_correlation, threashold)
        connected_comp = nx.connected_component_subgraphs(G)
        num_nodes = np.array(
            [len(conncomp.nodes()) for conncomp in connected_comp])
        min_size[x] = min(num_nodes)
        max_size[x] = max(num_nodes)
        sigma[x] = np.std(num_nodes)
        mu[x] = np.mean(num_nodes)
        num_groups[x] = len(num_nodes)
    return mu, sigma, min_size, max_size, num_groups
Exemple #2
0
def split_data(X, P, split_mode):
    """
    split data for parallel processing at random
    :param X: data
    :param P: number of cores
    :param split_mode: split mode
    :return: list of P sequences
    """
    if split_mode == 'random':
        n = X.shape[0]
        random_seq = np.random.permutation(n)
        seq_par = [random_seq[x::P] for x in range(P)]
    elif split_mode == 'cross-correlation':
        cc, _ = utils.xcorr(np.abs(X))
        G = graph.gen_corr_graph(np.abs(cc))
        subGs = graph.split_evenly(G, P)
        seq_par = [x.nodes() for x in subGs]
    return seq_par
    print('Parallel SGD (random split) maximum learning rate=%f' % gamma2)
    print('train accuracy:%f, test accuracy:%f' %
          (accuracy(sm2.predict(X), oneHotDecode(y)),
           accuracy(sm2.predict(mnist.test.images),
                    oneHotDecode(mnist.test.labels))))

    # seq_par = sgd.split_data(X, P, 'cross-correlation')
    # gamma3 = sgd.max_learning_rate(sgd.parallel_sgd, softmax_learner, 0.001, 5, 0.1,
    #                                data_partition=seq_par, X=X, y=y, max_iter=20, tol=1e-10, P=P)
    # print ('Parallel SGD (random split) maximum learning rate=%f' % gamma3)
    # sm3, objs3, time_cost3 = sgd.parallel_sgd(softmax_learner, X, y, data_partition=seq_par, max_iter=max_iter, gamma=gamma3 * 0.8, P=P, tol=1e-3)
    # print('train accuracy:%f, test accuracy:%f' % (accuracy(sm3.predict(X), oneHotDecode(y)),
    #     accuracy(sm3.predict(mnist.test.images), oneHotDecode(mnist.test.labels))))

    max_deg = 100
    cc, _ = utils.xcorr(X)
    G = graph.gen_corr_graph(cc, max_deg=max_deg)
    cg = graph.ConflictGraph(G)
    gamma4, sm4, objs4, time_cost4 = sgd.max_learning_rate(sgd.parallel_sgd,
                                                           softmax_learner,
                                                           0.001,
                                                           5,
                                                           0.1,
                                                           data_partition=cg,
                                                           X=X,
                                                           y=y,
                                                           max_iter=max_iter *
                                                           max_deg,
                                                           tol=1e-10,
                                                           P=P)
    print('Parallel SGD (CYCLADES) maximum learning rate=%f' % gamma4)
Exemple #4
0
import numpy as np
from misc import utils
from SGDs import sgd, loss_functions, graph
import matplotlib.pyplot as plt
import matplotlib
import os

if __name__ == '__main__':
    n, d = 1000, 100
    np.random.seed(0)
    X = np.random.randn(n, d)
    w = np.random.uniform(low=0.0, high=1.0, size=(d, ))
    y = np.dot(X, w)
    w0 = np.zeros(d)

    cc, ncc = utils.xcorr(X)
    max_degrees = range(0, n + 1, 20)
    max_degrees_actual = []
    ths = []
    num_edges = []
    per_edges_used = []
    for max_deg in max_degrees:
        G, th = graph.gen_corr_graph(np.abs(ncc), max_deg=max_deg)
        ths.append(th)
        max_degrees_actual.append(max(list(G.degree().values())))
        num_edges.append(G.number_of_edges())
        per_edges_used.append(num_edges[-1] / float(n * (n - 1) / 2))
    print(max_degrees_actual)
    print(ths)
    print(num_edges)
"""
display 6 data points and their correlation
"""

import numpy as np
from misc import utils
from SGDs import graph
import os

# import data
from tensorflow.examples.tutorials.mnist import input_data

if __name__ == '__main__':
    mnist = input_data.read_data_sets(os.path.join('data', 'MNIST'),
                                      one_hot=True)
    n = 6
    X, y = mnist.train.next_batch(n)
    C = utils.xcorr(X)
    print C
import matplotlib.pyplot as plt
import networkx as nx
from SGDs import graph
import time

# import data
from tensorflow.examples.tutorials.mnist import input_data

if __name__ == '__main__':
    mnist_path = os.path.join('data', 'MNIST')
    mnist = input_data.read_data_sets(mnist_path)
    # only pick the first n data
    n = 1000
    img, labels = mnist.train.next_batch(n)

    cross_correlation, cc_re = utils.xcorr(img)
    # hist, bin_edges = np.histogram(cross_correlation.ravel(), bins=200)

    # the histogram of the data
    fig = plt.figure(num=1, figsize=(20, 12))
    n, bins, patches = plt.hist(cc_re.ravel(),
                                bins=200,
                                normed=1,
                                facecolor='green',
                                alpha=0.75)

    # add a 'best fit' line
    plt.xticks(fontsize=20)
    plt.yticks(fontsize=20)
    plt.xlabel('correlation', fontsize=20)
    plt.ylabel('frequency', fontsize=20)
    # for label in xrange(c):
    #     frames = []
    #     for i in xrange(0, len(data_by_number[label]), 100):
    #         frames.append(Image.fromarray(np.uint8(255*data_by_number[label][i].reshape((28, 28)))))
    #         frames[-1].save(os.path.join('results', 'real_data', 'MNIST',
    #                 'sample_images', 'digit_%d_%i.jpg' % (label, i)))

    # compute correlation
    # cc, ncc = [], []

    fig = plt.figure(num=1, figsize=(20, 12))
    gs = gridspec.GridSpec(c, c)
    for i in xrange(c):
        # cc.append([])
        # ncc.append([])
        for j in xrange(c):
            print('digit %d - digit %d' % (i, j))
            cc0, ncc0 = utils.xcorr(data_by_number[i], data_by_number[j])
            # cc[-1].append(cc0)
            # ncc[-1].append(ncc0)

            ax = fig.add_subplot(gs[i, j])
            utils.plot_hist(ncc0.ravel(), ax, num_bins=200)

    plt.tight_layout()
    save_dir = os.path.join('results', 'real_data', 'MNIST', 'correlation_500',
                            'mnist_ncc_hist_all.pdf')
    fig.savefig(save_dir)
    # # cross_correlation, cc_re = utils.xcorr(imgs)
    # hist, bin_edges = np.histogram(cross_correlation.ravel(), bins=200)
        G = graph.gen_corr_graph(cross_correlation, threashold)
        connected_comp = nx.connected_component_subgraphs(G)
        num_nodes = np.array(
            [len(conncomp.nodes()) for conncomp in connected_comp])
        min_size[x] = min(num_nodes)
        max_size[x] = max(num_nodes)
        sigma[x] = np.std(num_nodes)
        mu[x] = np.mean(num_nodes)
        num_groups[x] = len(num_nodes)
    return mu, sigma, min_size, max_size, num_groups


if __name__ == '__main__':
    n, d = 500, 100
    X = np.random.randn(n, d)
    cc = utils.xcorr(np.abs(X))
    c_sort = np.sort(np.abs(cc.ravel()))
    threashold = 0.75
    repeat = 10
    mu, sigma, min_size, max_size, num_groups = experiment1(
        n, d, repeat, threashold)
    print mu
    print sigma
    print min_size
    print max_size
    print num_groups
    X = np.zeros((repeat, 5))
    X[:, 0] = mu
    X[:, 1] = sigma
    X[:, 2] = min_size
    X[:, 3] = max_size