Example #1
0
    def test_svd(self):
        A = tensor.matrix("A", dtype=self.dtype)
        U, S, VT = svd(A)
        fn = function([A], [U, S, VT])
        a = self.rng.rand(4, 4).astype(self.dtype)
        n_u, n_s, n_vt = np.linalg.svd(a)
        t_u, t_s, t_vt = fn(a)

        assert _allclose(n_u, t_u)
        assert _allclose(n_s, t_s)
        assert _allclose(n_vt, t_vt)

        fn = function([A], svd(A, compute_uv=False))
        t_s = fn(a)
        assert _allclose(n_s, t_s)
def kmeans(train_set_x):

    if train_set_x is None:
        train_set_x = T.matrix('train_set_x')

    ########################
    # Normalize the inputs #
    ########################

    epsilon_norm = 10
    epsilon_zca = 0.015
    K = 500

    train_set_x = train_set_x - T.mean(train_set_x, axis=0) / T.sqrt(T.var(train_set_x, axis=0) + epsilon_norm)

    #####################
    # Whiten the inputs #
    #####################

    # a simple choice of whitening transform is the ZCA whitening transform
    # epsilon_zca is small constant
    # for contrast-normalizaed data, setting epsilon_zca to 0.01 for 16-by-16 pixel patches,
    #                                                 or to  0.1 for 8-by-8   pixel patches
    # is good starting point
    cov = T.dot(train_set_x, T.transpose(train_set_x)) / train_set_x.shape[1]
    U, S, V = linalg.svd(cov)
    tmp = T.dot(U, T.diag(1/T.sqrt(S + epsilon_zca)))
    tmp = T.dot(tmp, T.transpose(U))
    whitened_x = T.dot(tmp, train_set_x)

    ######################
    # Training the Model #
    ######################

    # Initialization
    dimension_size = whitened_x.shape[0]
    num_samples = whitened_x.shape[1]
    srng = RandomStreams(seed=234)

    D = srng.normal(size=(dimension_size, K))
    D = D / T.sqrt(T.sum(T.sqr(D), axis=0))

    # typically 10 iterations is enough
    num_iteration = 15

    # compute new centroids, D_new
    for i in xrange(num_iteration):

        dx = T.dot(D.T, whitened_x)
        arg_max_dx = T.argmax(dx, axis=0)
        s = dx[arg_max_dx, T.arange(num_samples)]

        S = T.zeros((K, num_samples))
        S = T.set_subtensor(S[arg_max_dx, T.arange(num_samples)], s)
        D = T.dot(whitened_x, T.transpose(S)) + D

        D = D / T.sqrt(T.sum(T.sqr(D), axis=0))

    return D
    def dimemsion_transform(self, X):
        self.mean = T.mean(X, axis=0)
        X -= self.mean
        U, s, V = linalg.svd(X, full_matrices=False)

        self.principal = V[:self.dim]

        return linalg.matrix_dot(X, T.transpose(self.principal))
Example #4
0
def Kmeans(X_train=None, K=300, epsilon_whitening=0.015):

    if X_train is None:
        X_train = T.matrix('X_train')

    ########################
    # Normalize the inputs #
    ########################

    # A constant added to the variance to avoid division by zero
    epsilon_norm = 10

    # We subtract from each training sample (each column in X_train) its mean
    X_train = X_train - T.mean(
        X_train, axis=0) / T.sqrt(T.var(X_train, axis=0) + epsilon_norm)

    #####################
    # Whiten the inputs #
    #####################

    sigma = T.dot(X_train, T.transpose(X_train)) / X_train.shape[1]
    U, s, V = linalg.svd(sigma, full_matrices=False)
    tmp = T.dot(U, T.diag(1 / T.sqrt(s + epsilon_whitening)))
    tmp = T.dot(tmp, T.transpose(U))
    X_Whitened = T.dot(tmp, X_train)

    ######################
    # Training the Model #
    ######################

    # Initialization
    dimensions = X_Whitened.shape[0]
    samples = X_Whitened.shape[1]
    srng = RandomStreams(seed=234)

    # We initialize the centroids by sampling them from a normal
    # distribution, and then normalizing them to unit length
    # D \in R^{n \times k}
    D = srng.normal(size=(dimensions, K))
    D = D / T.sqrt(T.sum(T.sqr(D), axis=0))

    iterations = 30

    for i in xrange(iterations):

        # Initialize new point representations
        # for every pass of the algorithm
        S = T.zeros((K, samples))

        tmp = T.dot(D.T, X_Whitened)
        res = T.argmax(tmp, axis=0)
        max_values = tmp[res, T.arange(samples)]
        S = T.set_subtensor(S[res, T.arange(samples)], max_values)

        D = T.dot(X_Whitened, T.transpose(S))
        D = D / T.sqrt(T.sum(T.sqr(D), axis=0))

    return D
def Kmeans(X_train=None, K=300, epsilon_whitening=0.015):

    if X_train is None:
        X_train = T.matrix("X_train")

    ########################
    # Normalize the inputs #
    ########################

    # A constant added to the variance to avoid division by zero
    epsilon_norm = 10

    # We subtract from each training sample (each column in X_train) its mean
    X_train = X_train - T.mean(X_train, axis=0) / T.sqrt(T.var(X_train, axis=0) + epsilon_norm)

    #####################
    # Whiten the inputs #
    #####################

    sigma = T.dot(X_train, T.transpose(X_train)) / X_train.shape[1]
    U, s, V = linalg.svd(sigma, full_matrices=False)
    tmp = T.dot(U, T.diag(1 / T.sqrt(s + epsilon_whitening)))
    tmp = T.dot(tmp, T.transpose(U))
    X_Whitened = T.dot(tmp, X_train)

    ######################
    # Training the Model #
    ######################

    # Initialization
    dimensions = X_Whitened.shape[0]
    samples = X_Whitened.shape[1]
    srng = RandomStreams(seed=234)

    # We initialize the centroids by sampling them from a normal
    # distribution, and then normalizing them to unit length
    # D \in R^{n \times k}
    D = srng.normal(size=(dimensions, K))
    D = D / T.sqrt(T.sum(T.sqr(D), axis=0))

    iterations = 30

    for i in xrange(iterations):

        # Initialize new point representations
        # for every pass of the algorithm
        S = T.zeros((K, samples))

        tmp = T.dot(D.T, X_Whitened)
        res = T.argmax(tmp, axis=0)
        max_values = tmp[res, T.arange(samples)]
        S = T.set_subtensor(S[res, T.arange(samples)], max_values)

        D = T.dot(X_Whitened, T.transpose(S))
        D = D / T.sqrt(T.sum(T.sqr(D), axis=0))

    return D
Example #6
0
def test_svd():
    rng = np.random.RandomState(utt.fetch_seed())
    A = tensor.matrix("A", dtype=theano.config.floatX)
    U, V, T = svd(A)
    fn = function([A], [U, V, T])
    a = rng.rand(4, 4).astype(theano.config.floatX)
    n_u, n_v, n_t = np.linalg.svd(a)
    t_u, t_v, t_t = fn(a)

    assert _allclose(n_u, t_u)
    assert _allclose(n_v, t_v)
    assert _allclose(n_t, t_t)
Example #7
0
def test_svd():
    rng = numpy.random.RandomState(utt.fetch_seed())
    A = tensor.matrix("A", dtype=theano.config.floatX)
    U, V, T = svd(A)
    fn = function([A], [U, V, T])
    a = rng.rand(4, 4).astype(theano.config.floatX)
    n_u, n_v, n_t = numpy.linalg.svd(a)
    t_u, t_v, t_t = fn(a)

    assert _allclose(n_u, t_u)
    assert _allclose(n_v, t_v)
    assert _allclose(n_t, t_t)
    def __init__(self, batch_size, data=None):

        if data is None:
            self.train_set_x = T.matrix('train_set_x')
        else:
            self.train_set_x = data

        ########################
        # Normalize the inputs #
        ########################

        self.epsilon_norm = 10
        self.epsilon_zca = 0.015
        self.K = 500

        self.train_set_x = self.train_set_x - T.mean(self.train_set_x, axis=0) / T.sqrt(T.var(self.train_set_x, axis=0) + self.epsilon_norm)

        #####################
        # Whiten the inputs #
        #####################

        # a simple choice of whitening transform is the ZCA whitening transform
        # epsilon_zca is small constant
        # for contrast-normalizaed data, setting epsilon_zca to 0.01 for 16-by-16 pixel patches,
        #                                                 or to  0.1 for 8-by-8   pixel patches
        # is good starting point
        cov = T.dot(self.train_set_x, T.transpose(self.train_set_x)) / self.train_set_x.shape[1]
        U, S, V = linalg.svd(cov)
        tmp = T.dot(U, T.diag(1 / T.sqrt(S + self.epsilon_zca)))
        tmp = T.dot(tmp, T.transpose(U))
        self.whitened_x = T.dot(tmp, self.train_set_x)

        ######################
        # Training the Model #
        ######################

        # initialization
        self.dimension_size = self.train_set_x.shape[0]
        self.num_samples = batch_size
        self.srng = RandomStreams(seed=234)

        # We initialize the centroids by sampling them from a normal
        # distribution, and then normalizing them to unit length
        # D \in R^{n \times k}
        self.D = self.srng.normal(size=(self.dimension_size, self.K))
        self.D = self.D / T.sqrt(T.sum(T.sqr(self.D), axis=0))
Example #9
0
    def __init__(self, batch_size, data=None, K=300, epsilon_whitening=0.015):

        if data is None:
            self.X = T.matrix('X_train')
        else:
            self.X = data

        ########################
        # Normalize the inputs #
        ########################

        # A constant added to the variance to avoid division by zero
        self.epsilon_norm = 10
        self.epsilon_whitening = epsilon_whitening

        # We subtract from each training sample (each column in X_train) its mean
        self.X = self.X - T.mean(self.X, axis=0) / T.sqrt(
            T.var(self.X, axis=0) + self.epsilon_norm)

        #####################
        # Whiten the inputs #
        #####################

        sigma = T.dot(self.X, T.transpose(self.X)) / self.X.shape[1]
        U, s, V = linalg.svd(sigma, full_matrices=False)
        tmp = T.dot(U, T.diag(1 / T.sqrt(s + self.epsilon_whitening)))
        tmp = T.dot(tmp, T.transpose(U))
        self.X = T.dot(tmp, self.X)

        ##################
        # Initialization #
        ##################
        self.K = K  # The number of clusters
        self.dimensions = self.X.shape[0]
        self.samples = batch_size
        self.srng = RandomStreams(seed=234)

        # We initialize the centroids by sampling them from a normal
        # distribution, and then normalizing them to unit length
        # D \in R^{n \times k}
        self.D = self.srng.normal(size=(self.dimensions, self.K))
        self.D = self.D / T.sqrt(T.sum(T.sqr(self.D), axis=0))
    def __init__(self, batch_size, data=None, K=300, epsilon_whitening=0.015):

        if data is None:
            self.X = T.matrix('X_train')
        else:
            self.X = data

        ########################
        # Normalize the inputs #
        ########################

        # A constant added to the variance to avoid division by zero
        self.epsilon_norm = 10
        self.epsilon_whitening = epsilon_whitening

        # We subtract from each training sample (each column in X_train) its mean
        self.X = self.X - T.mean(self.X, axis=0) / T.sqrt(T.var(self.X, axis=0) + self.epsilon_norm)

        #####################
        # Whiten the inputs #
        #####################

        sigma = T.dot(self.X, T.transpose(self.X)) / self.X.shape[1]
        U, s, V = linalg.svd(sigma, full_matrices=False)
        tmp = T.dot(U, T.diag(1/T.sqrt(s + self.epsilon_whitening)))
        tmp = T.dot(tmp, T.transpose(U))
        self.X = T.dot(tmp, self.X)

        ##################
        # Initialization #
        ##################
        self.K = K  # The number of clusters
        self.dimensions = self.X.shape[0]
        self.samples = batch_size
        self.srng = RandomStreams(seed=234)

        # We initialize the centroids by sampling them from a normal
        # distribution, and then normalizing them to unit length
        # D \in R^{n \times k}
        self.D = self.srng.normal(size=(self.dimensions, self.K))
        self.D = self.D / T.sqrt(T.sum(T.sqr(self.D), axis=0))
    def transform(self, X):
        """
        Perform dimensionality reduction of the input matrix X

        :param X: The matrix of observations, where the training samples
        populate the rows, and the features populate the columns

        :return: Xtilde, the dimensionally reduced representation of the data
        """
        # center the data by subtracting the mean
        self.mean = T.mean(X, axis=0)
        X -= self.mean
        U, s, V = linalg.svd(X, full_matrices=False)

        # Keep track of the 'M' principal directions
        # The svd actually produces V^T, so the
        # principal directions are stored in the rows of
        # V as opposed to the columns
        self.principal = V[:self.dim]

        # Return the transformed data
        return linalg.dot(X, T.transpose(self.principal))
Example #12
0
import theano
from theano.tensor.nlinalg import svd

a = theano.tensor.matrix('a')
y = svd(a)
SVD = theano.function([a], y, allow_input_downcast=True)

if __name__ == '__main__':
    import numpy as np
    N = 1000

    a = np.random.randint(0, 100, size=(N, N))
    u, s, v = SVD(a)
    print u, s, v
Example #13
0
import theano 
from theano.tensor.nlinalg import svd 

a = theano.tensor.matrix('a')
y = svd(a)
SVD = theano.function([a], y, allow_input_downcast=True)

if __name__ == '__main__':
    import numpy as np
    N = 1000

    a = np.random.randint(0, 100, size=(N, N))
    u, s, v = SVD(a)
    print u, s, v