Esempio n. 1
0
    def test_mean_H_given_V(self):
        tol = 1e-6

        # P(h_1 | v) / P(h_2 | v) = a
        # => exp(-E(v, h_1)) / exp(-E(v,h_2)) = a
        # => exp(E(v,h_2)-E(v,h_1)) = a
        # E(v,h_2) - E(v,h_1) = log(a)
        # also log P(h_1 | v) - log P(h_2) = log(a)

        rng = N.random.RandomState([1, 2, 3])

        m = 5

        Vv = as_floatX(N.zeros((m, self.nv)) + rng.randn(self.nv))

        Hv = as_floatX(rng.randn(m, self.nh) > 0.)

        log_Pv = self.log_P_H_given_V_func(Hv, Vv)

        Ev = self.E_func(Vv, Hv)

        for i in xrange(m):
            for j in xrange(i + 1, m):
                log_a = log_Pv[i] - log_Pv[j]
                e = Ev[j] - Ev[i]

                assert abs(e-log_a) < tol
Esempio n. 2
0
    def test_mean_H_given_V(self):
        tol = 1e-6

        # P(h_1 | v) / P(h_2 | v) = a
        # => exp(-E(v, h_1)) / exp(-E(v,h_2)) = a
        # => exp(E(v,h_2)-E(v,h_1)) = a
        # E(v,h_2) - E(v,h_1) = log(a)
        # also log P(h_1 | v) - log P(h_2) = log(a)

        rng = N.random.RandomState([1, 2, 3])

        m = 5

        Vv = as_floatX(N.zeros((m, nv)) + rng.randn(nv))

        Hv = as_floatX(rng.randn(m, nh) > 0.)

        log_Pv = log_P_H_given_V_func(Hv, Vv)

        Ev = E_func(Vv, Hv)

        for i in xrange(m):
            for j in xrange(i + 1, m):
                log_a = log_Pv[i] - log_Pv[j]
                e = Ev[j] - Ev[i]

                assert abs(e - log_a) < tol
Esempio n. 3
0
def test_triangle_code():
    rng = np.random.RandomState([20,18,9])

    m = 5
    n = 6
    k = 7

    X = as_floatX(rng.randn(m,n))
    D = as_floatX(rng.randn(k,n))

    D_norm_squared = np.sum(D**2,axis=1)
    X_norm_squared = np.sum(X**2,axis=1)
    sq_distance = -2.0 * np.dot(X,D.T) + D_norm_squared + np.atleast_2d(X_norm_squared).T
    distance = np.sqrt(sq_distance)

    mu = np.mean(distance, axis = 1)
    expected = np.maximum(0.0,mu.reshape(mu.size,1)-distance)

    Xv = T.matrix()
    Dv = T.matrix()

    code = triangle_code(X = Xv, centroids = Dv)
    actual = function([Xv,Dv],code)(X,D)

    assert np.allclose(expected, actual)
Esempio n. 4
0
    def test_d_negent_h_d_h(self):

        "tests that the gradient of the negative entropy of h with respect to \hat{h} matches my analytical version of it "

        model = self.model
        ip = self.model.e_step
        X = self.X

        assert X.shape[0] == self.m

        H = np.cast[config.floatX](self.model.rng.uniform(0.001,.999,(self.m, self.N)))
        S = np.cast[config.floatX](self.model.rng.uniform(-5.,5.,(self.m, self.N)))

        H_var = T.matrix(name='H_var')
        H_var.tag.test_value = H
        S_var = T.matrix(name='S_var')
        S_var.tag.test_value = S


        sigma0 = ip.infer_var_s0_hat()
        Sigma1 = ip.infer_var_s1_hat()
        mu0 = T.zeros_like(model.mu)

        negent = - self.model.entropy_h( H_hat =  H_var  ).sum()

        assert len(negent.type.broadcastable) == 0

        grad_H = T.grad(negent, H_var)

        grad_func = function([H_var, S_var], grad_H, on_unused_input = 'ignore')

        grad_theano = grad_func(H,S)


        half = as_floatX(0.5)
        one = as_floatX(1.)
        two = as_floatX(2.)
        pi = as_floatX(np.pi)
        e = as_floatX(np.e)
        mu = self.model.mu
        alpha = self.model.alpha
        W = self.model.W
        B = self.model.B
        w = self.model.w

        term1 = T.log(H_var)
        term2 = -T.log(one - H_var)

        analytical = term1 + term2

        grad_analytical = function([H_var, S_var], analytical, on_unused_input = 'ignore')(H,S)

        if not np.allclose(grad_theano, grad_analytical):
            print('grad theano: ',(grad_theano.min(), grad_theano.mean(), grad_theano.max()))
            print('grad analytical: ',(grad_analytical.min(), grad_analytical.mean(), grad_analytical.max()))
            ad = np.abs(grad_theano-grad_analytical)
            print('abs diff: ',(ad.min(),ad.mean(),ad.max()))
            assert False
Esempio n. 5
0
    def test_d_negent_h_d_h(self):

        "tests that the gradient of the negative entropy of h with respect to \hat{h} matches my analytical version of it "

        model = self.model
        ip = self.model.e_step
        X = self.X

        assert X.shape[0] == self.m

        H = np.cast[config.floatX](self.model.rng.uniform(0.001,.999,(self.m, self.N)))
        S = np.cast[config.floatX](self.model.rng.uniform(-5.,5.,(self.m, self.N)))

        H_var = T.matrix(name='H_var')
        H_var.tag.test_value = H
        S_var = T.matrix(name='S_var')
        S_var.tag.test_value = S


        sigma0 = ip.infer_var_s0_hat()
        Sigma1 = ip.infer_var_s1_hat()
        mu0 = T.zeros_like(model.mu)

        negent = - self.model.entropy_h( H_hat =  H_var  ).sum()

        assert len(negent.type.broadcastable) == 0

        grad_H = T.grad(negent, H_var)

        grad_func = function([H_var, S_var], grad_H, on_unused_input = 'ignore')

        grad_theano = grad_func(H,S)


        half = as_floatX(0.5)
        one = as_floatX(1.)
        two = as_floatX(2.)
        pi = as_floatX(np.pi)
        e = as_floatX(np.e)
        mu = self.model.mu
        alpha = self.model.alpha
        W = self.model.W
        B = self.model.B
        w = self.model.w

        term1 = T.log(H_var)
        term2 = -T.log(one - H_var)

        analytical = term1 + term2

        grad_analytical = function([H_var, S_var], analytical, on_unused_input = 'ignore')(H,S)

        if not np.allclose(grad_theano, grad_analytical):
            print 'grad theano: ',(grad_theano.min(), grad_theano.mean(), grad_theano.max())
            print 'grad analytical: ',(grad_analytical.min(), grad_analytical.mean(), grad_analytical.max())
            ad = np.abs(grad_theano-grad_analytical)
            print 'abs diff: ',(ad.min(),ad.mean(),ad.max())
            assert False
Esempio n. 6
0
def test_convolutional_compatible():
    """
    VAE allows convolutional encoding networks
    """
    encoding_model = MLP(
        layers=[
            SpaceConverter(
                layer_name='conv2d_converter',
                output_space=Conv2DSpace(shape=[4, 4], num_channels=1)
            ),
            ConvRectifiedLinear(
                layer_name='h',
                output_channels=2,
                kernel_shape=[2, 2],
                kernel_stride=[1, 1],
                pool_shape=[1, 1],
                pool_stride=[1, 1],
                pool_type='max',
                irange=0.01)
            ]
    )
    decoding_model = MLP(layers=[Linear(layer_name='h', dim=16, irange=0.01)])
    prior = DiagonalGaussianPrior()
    conditional = BernoulliVector(mlp=decoding_model, name='conditional')
    posterior = DiagonalGaussian(mlp=encoding_model, name='posterior')
    vae = VAE(nvis=16, prior=prior, conditional=conditional,
              posterior=posterior, nhid=16)
    X = T.matrix('X')
    lower_bound = vae.log_likelihood_lower_bound(X, num_samples=10)
    f = theano.function(inputs=[X], outputs=lower_bound)
    rng = make_np_rng(default_seed=11223)
    f(as_floatX(rng.uniform(size=(10, 16))))
Esempio n. 7
0
    def test_unit_norm(self):
        """ Test that using std_bias = 0.0 and use_norm = True
            results in vectors having unit norm """

        tol = 1e-5

        num_examples = 5
        num_features = 10

        rng = np.random.RandomState([1, 2, 3])

        X = as_floatX(rng.randn(num_examples, num_features))

        dataset = DenseDesignMatrix(X=X)

        # the setting of subtract_mean is not relevant to the test
        # the test only applies when std_bias = 0.0 and use_std = False
        preprocessor = GlobalContrastNormalization(subtract_mean=False,
                                                   sqrt_bias=0.0,
                                                   use_std=False)

        dataset.apply_preprocessor(preprocessor)

        result = dataset.get_design_matrix()

        norms = np.sqrt(np.square(result).sum(axis=1))

        max_norm_error = np.abs(norms - 1.).max()

        tol = 3e-5

        assert max_norm_error < tol
Esempio n. 8
0
    def gibbs_step_for_v(self, v, rng):
        # Sometimes, the number of examples in the data set is not a
        # multiple of self.batch_size.
        batch_size = v.shape[0]

        # sample h given v
        h_mean = self.mean_h_given_v(v)
        h_mean_shape = (batch_size, self.nhid)
        h_sample = as_floatX(rng.uniform(size=h_mean_shape) < h_mean)

        # sample s given (v,h)
        s_mu, s_var = self.mean_var_s_given_v_h1(v)
        #s_mu_shape = (batch_size, self.nslab)
        s_mu_shape = (16, self.nslab)  # @dave: THEANO HACK (bugfix for rita2)
        s_sample = s_mu + rng.normal(size=s_mu_shape) * tensor.sqrt(s_var)
        #s_sample=(s_sample.reshape()*h_sample.dimshuffle(0,1,'x')).flatten(2)

        # sample v given (s,h)
        v_mean, v_var = self.mean_var_v_given_h_s(h_sample, s_sample)
        #v_mean_shape = (batch_size, self.nvis)
        v_mean_shape = (16, int(self.nvis))  # @dave: THEANO HACK (bugfix for rita2)
        v_sample = rng.normal(size=v_mean_shape) * tensor.sqrt(v_var) + v_mean

        del batch_size
        return v_sample, locals()
Esempio n. 9
0
    def get_gradients(self, model, data, ** kwargs):
        
        v = data
        mean_matrix = model.propup(v)
        #======================================================
        part_j = self.p - mean_matrix.mean(axis=0)
        part_i1_matrix = mean_matrix * (1. - mean_matrix)
        #part_i = T.dot(v.T, part_i1_matrix)
        #part_orin = part_i * part_j #矩阵右乘一个行向量
        #coeff_w = -2. *  v.shape[0]
        #gW = coeff_w * part_orin #HL sparse项产生的梯度,不含lambda_
        #=======================================================
        
        part_j1 = part_j
        part_j2 = part_i1_matrix.mean(axis=0)
        gc = -2. * part_j1 * part_j2

        W, c, b = list(model.get_params())

        #gradients = OrderedDict(izip([W, c], [1/self.p*gW, 1/self.p*gc]))
        gradients = OrderedDict(izip([c], [as_floatX(1/self.p*gc)]))

        updates = OrderedDict()

        return gradients, updates
Esempio n. 10
0
def test_convolutional_compatible():
    """
    VAE allows convolutional encoding networks
    """
    encoding_model = MLP(
        layers=[
            SpaceConverter(layer_name="conv2d_converter", output_space=Conv2DSpace(shape=[4, 4], num_channels=1)),
            ConvRectifiedLinear(
                layer_name="h",
                output_channels=2,
                kernel_shape=[2, 2],
                kernel_stride=[1, 1],
                pool_shape=[1, 1],
                pool_stride=[1, 1],
                pool_type="max",
                irange=0.01,
            ),
        ]
    )
    decoding_model = MLP(layers=[Linear(layer_name="h", dim=16, irange=0.01)])
    prior = DiagonalGaussianPrior()
    conditional = BernoulliVector(mlp=decoding_model, name="conditional")
    posterior = DiagonalGaussian(mlp=encoding_model, name="posterior")
    vae = VAE(nvis=16, prior=prior, conditional=conditional, posterior=posterior, nhid=16)
    X = T.matrix("X")
    lower_bound = vae.log_likelihood_lower_bound(X, num_samples=10)
    f = theano.function(inputs=[X], outputs=lower_bound)
    rng = make_np_rng(default_seed=11223)
    f(as_floatX(rng.uniform(size=(10, 16))))
Esempio n. 11
0
 def setup(self):
     """
     We use a small predefined 8x5 matrix for
     which we know the ZCA transform.
     """
     self.X = np.array([[-10.0, 3.0, 19.0, 9.0, -15.0],
                       [7.0, 26.0, 26.0, 26.0, -3.0],
                       [17.0, -17.0, -37.0, -36.0, -11.0],
                       [19.0, 15.0, -2.0, 5.0, 9.0],
                       [-3.0, -8.0, -35.0, -25.0, -8.0],
                       [-18.0, 3.0, 4.0, 15.0, 14.0],
                       [5.0, -4.0, -5.0, -7.0, -11.0],
                       [23.0, 22.0, 15.0, 20.0, 12.0]])
     self.dataset = DenseDesignMatrix(X=as_floatX(self.X),
                                      y=as_floatX(np.ones((8, 1))))
     self.num_components = self.dataset.get_design_matrix().shape[1] - 1
Esempio n. 12
0
    def get_monitoring_channels(self, data):
        X, Y = data
        rval = OrderedDict()

        nll = self.nll(data)
        rval['perplexity'] = as_floatX(10 ** (nll/np.log(10)))
        return rval
Esempio n. 13
0
def create_colors(n_colors):
    """
    Create an array of n_colors


    Parameters
    ----------
    n_colors : int
        The number of colors to create

    Returns
    -------
    colors_rgb : np.array
        An array of shape (n_colors, 3) in RGB format
    """
    # Create the list of color hue
    colors_hue = np.arange(n_colors)
    colors_hue = as_floatX(colors_hue)
    colors_hue *= 1./n_colors

    # Set the color in HSV format
    colors_hsv = np.ones((n_colors, 3))
    colors_hsv[:, 2] *= .75
    colors_hsv[:, 0] = colors_hue

    # Put in a matplotlib-friendly format
    colors_hsv = colors_hsv.reshape((1, )+colors_hsv.shape)
    # Convert to RGB
    colors_rgb = matplotlib.colors.hsv_to_rgb(colors_hsv)
    colors_rgb = colors_rgb[0]

    return colors_rgb
Esempio n. 14
0
    def learning_rate_updates(self):
        """
        Compute a dictionary of shared variable updates related to annealing
        the learning rate.

        Returns
        -------
        updates : dict
            A dictionary with the shared variables representing SGD metadata
            as keys and a symbolic expression of how they are to be updated as
            values.
        """
        ups = {}

        # Annealing coefficient. Here we're using a formula of
        # min(base_lr, anneal_start / (iteration + 1))
        if self.anneal_start is None:
            annealed = sharedX(self.base_lr)
        else:
            frac = self.anneal_start / (self.iteration + 1.)
            annealed = tensor.minimum(
                    as_floatX(frac),
                    self.base_lr  # maximum learning rate
                    )

        # Update the shared variable for the annealed learning rate.
        ups[self.annealed] = annealed
        ups[self.iteration] = self.iteration + 1

        # Calculate the learning rates for each parameter, in the order
        # they appear in self.params
        learn_rates = [annealed * self.learning_rates[p] for p in self.params]
        return ups, learn_rates
Esempio n. 15
0
 def setup(self):
     """
     We use a small predefined 8x5 matrix for
     which we know the ZCA transform.
     """
     self.X = np.array([[-10.0, 3.0, 19.0, 9.0, -15.0],
                        [7.0, 26.0, 26.0, 26.0, -3.0],
                        [17.0, -17.0, -37.0, -36.0, -11.0],
                        [19.0, 15.0, -2.0, 5.0, 9.0],
                        [-3.0, -8.0, -35.0, -25.0, -8.0],
                        [-18.0, 3.0, 4.0, 15.0, 14.0],
                        [5.0, -4.0, -5.0, -7.0, -11.0],
                        [23.0, 22.0, 15.0, 20.0, 12.0]])
     self.dataset = DenseDesignMatrix(X=as_floatX(self.X),
                                      y=as_floatX(np.ones((8, 1))))
     self.num_components = self.dataset.get_design_matrix().shape[1] - 1
Esempio n. 16
0
    def test_unit_norm(self):
        """ Test that using std_bias = 0.0 and use_norm = True
            results in vectors having unit norm """

        tol = 1e-5

        num_examples = 5
        num_features = 10

        rng = np.random.RandomState([1, 2, 3])

        X = as_floatX(rng.randn(num_examples, num_features))

        dataset = DenseDesignMatrix(X=X)

        # the setting of subtract_mean is not relevant to the test
        # the test only applies when std_bias = 0.0 and use_std = False
        preprocessor = GlobalContrastNormalization(subtract_mean=False,
                                                   sqrt_bias=0.0,
                                                   use_std=False)

        dataset.apply_preprocessor(preprocessor)

        result = dataset.get_design_matrix()

        norms = np.sqrt(np.square(result).sum(axis=1))

        max_norm_error = np.abs(norms - 1.).max()

        tol = 3e-5

        assert max_norm_error < tol
Esempio n. 17
0
    def learning_rate_updates(self):
        """
        Compute a dictionary of shared variable updates related to annealing
        the learning rate.

        Returns
        -------
        updates : dict
            A dictionary with the shared variables representing SGD metadata
            as keys and a symbolic expression of how they are to be updated as
            values.
        """
        ups = {}

        # Annealing coefficient. Here we're using a formula of
        # min(base_lr, anneal_start / (iteration + 1))
        if self.anneal_start is None:
            annealed = sharedX(self.base_lr)
        else:
            frac = self.anneal_start / (self.iteration + 1.)
            annealed = tensor.minimum(
                as_floatX(frac),
                self.base_lr  # maximum learning rate
            )

        # Update the shared variable for the annealed learning rate.
        ups[self.annealed] = annealed
        ups[self.iteration] = self.iteration + 1

        # Calculate the learning rates for each parameter, in the order
        # they appear in self.params
        learn_rates = [annealed * self.learning_rates[p] for p in self.params]
        return ups, learn_rates
Esempio n. 18
0
 def cost(self,Y,q_h):
     z = self.score(q_h)
     z = z - z.max(axis=1).dimshuffle(0, 'x')
     log_prob = z - T.log(T.exp(z).sum(axis=1).dimshuffle(0, 'x'))
     log_prob_of = (Y * log_prob).sum(axis=1)
     assert log_prob_of.ndim == 1
     rval = as_floatX(log_prob_of.mean())
     return - rval
Esempio n. 19
0
 def cost_from_X(self, data):
     X, Y = data
     z = self.score(X)
     z = z - z.max(axis=1).dimshuffle(0, 'x')
     log_prob = z - T.log(T.exp(z).sum(axis=1).dimshuffle(0, 'x'))
     log_prob_of = (Y * log_prob).sum(axis=1)
     assert log_prob_of.ndim == 1
     rval = as_floatX(log_prob_of.mean())
     return - rval
Esempio n. 20
0
    def test_free_energy(self):

        rng = N.random.RandomState([1, 2, 3])

        m = 2**nh

        Vv = as_floatX(N.zeros((m, nv)) + rng.randn(nv))

        F, = F_func(Vv[0:1, :])

        Hv = as_floatX(N.zeros((m, nh)))

        for i in xrange(m):
            for j in xrange(nh):
                Hv[i, j] = (i & (2**j)) / (2**j)

        Ev = E_func(Vv, Hv)

        Fv = -N.log(N.exp(-Ev).sum())
        assert abs(F - Fv) < 1e-6
Esempio n. 21
0
def theano_norms(W):
    """
    .. todo::

        WRITEME properly

    returns a vector containing the L2 norm of each
    column of W, where W and the return value are symbolic
    theano variables
    """
    return T.sqrt(as_floatX(1e-8)+T.sqr(W).sum(axis=0))
Esempio n. 22
0
    def test_score(self):
        rng = N.random.RandomState([1, 2, 3])

        m = 10

        Vv = as_floatX(rng.randn(m, nv))

        Sv = score_func(Vv)
        gSv = generic_score_func(Vv)

        assert N.allclose(Sv, gSv)
Esempio n. 23
0
    def test_free_energy(self):

        rng = N.random.RandomState([1, 2, 3])

        m = 2 ** self.nh

        Vv = as_floatX(N.zeros((m, self.nv)) + rng.randn(self.nv))

        F, = self.F_func(Vv[0:1, :])

        Hv = as_floatX(N.zeros((m, self.nh)))

        for i in xrange(m):
            for j in xrange(self.nh):
                Hv[i, j] = (i & (2 ** j)) / (2 ** j)

        Ev = self.E_func(Vv, Hv)

        Fv = -N.log(N.exp(-Ev).sum())
        assert abs(F-Fv) < 1e-6
Esempio n. 24
0
def theano_norms(W):
    """
    .. todo::

        WRITEME properly

    returns a vector containing the L2 norm of each
    column of W, where W and the return value are symbolic
    theano variables
    """
    return T.sqrt(as_floatX(1e-8)+T.sqr(W).sum(axis=0))
Esempio n. 25
0
    def test_score(self):
        rng = N.random.RandomState([1, 2, 3])

        m = 10

        Vv = as_floatX(rng.randn(m, self.nv))

        Sv = self.score_func(Vv)
        gSv = self.generic_score_func(Vv)

        assert N.allclose(Sv, gSv)
Esempio n. 26
0
    def setUpClass(cls):
        cls.test_m = 2

        cls.rng = N.random.RandomState([1, 2, 3])
        cls.nv = 3
        cls.nh = 4

        cls.vW = cls.rng.randn(cls.nv, cls.nh)
        cls.W = sharedX(cls.vW)
        cls.vbv = as_floatX(cls.rng.randn(cls.nv))
        cls.bv = T.as_tensor_variable(cls.vbv)
        cls.bv.tag.test_value = cls.vbv
        cls.vbh = as_floatX(cls.rng.randn(cls.nh))
        cls.bh = T.as_tensor_variable(cls.vbh)
        cls.bh.tag.test_value = cls.bh
        cls.vsigma = as_floatX(cls.rng.uniform(0.1, 5))
        cls.sigma = T.as_tensor_variable(cls.vsigma)
        cls.sigma.tag.test_value = cls.vsigma

        cls.E = GRBM_Type_1(transformer=MatrixMul(cls.W),
                            bias_vis=cls.bv,
                            bias_hid=cls.bh,
                            sigma=cls.sigma)

        cls.V = T.matrix()
        cls.V.tag.test_value = as_floatX(cls.rng.rand(cls.test_m, cls.nv))
        cls.H = T.matrix()
        cls.H.tag.test_value = as_floatX(cls.rng.rand(cls.test_m, cls.nh))

        cls.E_func = function([cls.V, cls.H], cls.E([cls.V, cls.H]))
        cls.F_func = function([cls.V], cls.E.free_energy(cls.V))
        cls.log_P_H_given_V_func = \
            function([cls.H, cls.V], cls.E.log_P_H_given_V(cls.H, cls.V))
        cls.score_func = function([cls.V], cls.E.score(cls.V))

        cls.F_of_V = cls.E.free_energy(cls.V)
        cls.dummy = T.sum(cls.F_of_V)
        cls.negscore = T.grad(cls.dummy, cls.V)
        cls.score = -cls.negscore

        cls.generic_score_func = function([cls.V], cls.score)
Esempio n. 27
0
    def get_monitoring_channels(self, data):
        X, Y = data
        rval = OrderedDict()
        
        W_context = self.W
        W_target = self.W
        b = self.b
        C = self.C

        sq_W_context = T.sqr(W_context)
        # sq_W_target = T.sqr(W_target)
        sq_b = T.sqr(b)
        sq_c = T.sqr(C)

        row_norms_W_context = T.sqrt(sq_W_context.sum(axis=1))
        col_norms_W_context = T.sqrt(sq_W_context.sum(axis=0))

        # row_norms_W_target = T.sqrt(sq_W_target.sum(axis=1))
        # col_norms_W_target = T.sqrt(sq_W_target.sum(axis=0))
        
        col_norms_b = T.sqrt(sq_b.sum(axis=0))

        
        col_norms_c = T.sqrt(sq_c.sum(axis=0))

        rval = OrderedDict([
                            ('W_context_row_norms_min'  , row_norms_W_context.min()),
                            ('W_context_row_norms_mean' , row_norms_W_context.mean()),
                            ('W_context_row_norms_max'  , row_norms_W_context.max()),
                            ('W_context_col_norms_min'  , col_norms_W_context.min()),
                            ('W_context_col_norms_mean' , col_norms_W_context.mean()),
                            ('W_context_col_norms_max'  , col_norms_W_context.max()),

                            # ('W_target_row_norms_min'  , row_norms_W_target.min()),
                            # ('W_target_row_norms_mean' , row_norms_W_target.mean()),
                            # ('W_target_row_norms_max'  , row_norms_W_target.max()),
                            # ('W_target_col_norms_min'  , col_norms_W_target.min()),
                            # ('W_target_col_norms_mean' , col_norms_W_target.mean()),
                            # ('W_target_col_norms_max'  , col_norms_W_target.max()),
                            
                            ('b_col_norms_min'  , col_norms_b.min()),
                            ('b_col_norms_mean' , col_norms_b.mean()),
                            ('b_col_norms_max'  , col_norms_b.max()),

                            ('c_col_norms_min'  , col_norms_c.min()),
                            ('c_col_norms_mean' , col_norms_c.mean()),
                            ('c_col_norms_max'  , col_norms_c.max()),
                            ])
            
        nll = self.cost_from_X(data)
        
        rval['perplexity'] = as_floatX(10 ** (nll/np.log(10)))
        return rval
Esempio n. 28
0
 def normalize_image(img):
     """
     Converts an image into the format used by ``read()``.
     """
     if img.mode == 'LAB' or img.mode == 'HSV':
         raise ValueError('%s image mode is not supported' % img.mode)
     img = img.convert('RGBA')
     imarray = as_floatX(numpy.array(img)) / 255.0
     assert numpy.all(imarray >= 0.0) and numpy.all(imarray <= 1.0)
     assert len(imarray.shape) == 3
     assert imarray.shape[2] == 4
     return imarray
Esempio n. 29
0
    def test(store_inverse):
        rng = np.random.RandomState([1, 2, 3])
        X = as_floatX(rng.randn(15, 10))
        preprocessed_X = copy.copy(X)
        preprocessor = ZCA(store_inverse=store_inverse)

        dataset = DenseDesignMatrix(X=preprocessed_X,
                                    preprocessor=preprocessor,
                                    fit_preprocessor=True)

        preprocessed_X = dataset.get_design_matrix()

        assert_allclose(X, preprocessor.inverse(preprocessed_X))
Esempio n. 30
0
    def setUpClass(cls):
        cls.test_m = 2

        cls.rng = N.random.RandomState([1, 2, 3])
        cls.nv = 3
        cls.nh = 4

        cls.vW = cls.rng.randn(cls.nv, cls.nh)
        cls.W = sharedX(cls.vW)
        cls.vbv = as_floatX(cls.rng.randn(cls.nv))
        cls.bv = T.as_tensor_variable(cls.vbv)
        cls.bv.tag.test_value = cls.vbv
        cls.vbh = as_floatX(cls.rng.randn(cls.nh))
        cls.bh = T.as_tensor_variable(cls.vbh)
        cls.bh.tag.test_value = cls.bh
        cls.vsigma = as_floatX(cls.rng.uniform(0.1, 5))
        cls.sigma = T.as_tensor_variable(cls.vsigma)
        cls.sigma.tag.test_value = cls.vsigma

        cls.E = GRBM_Type_1(transformer=MatrixMul(cls.W), bias_vis=cls.bv,
                            bias_hid=cls.bh, sigma=cls.sigma)

        cls.V = T.matrix()
        cls.V.tag.test_value = as_floatX(cls.rng.rand(cls.test_m, cls.nv))
        cls.H = T.matrix()
        cls.H.tag.test_value = as_floatX(cls.rng.rand(cls.test_m, cls.nh))

        cls.E_func = function([cls.V, cls.H], cls.E([cls.V, cls.H]))
        cls.F_func = function([cls.V], cls.E.free_energy(cls.V))
        cls.log_P_H_given_V_func = \
            function([cls.H, cls.V], cls.E.log_P_H_given_V(cls.H, cls.V))
        cls.score_func = function([cls.V], cls.E.score(cls.V))

        cls.F_of_V = cls.E.free_energy(cls.V)
        cls.dummy = T.sum(cls.F_of_V)
        cls.negscore = T.grad(cls.dummy, cls.V)
        cls.score = - cls.negscore

        cls.generic_score_func = function([cls.V], cls.score)
Esempio n. 31
0
    def test(store_inverse):
        rng = np.random.RandomState([1, 2, 3])
        X = as_floatX(rng.randn(15, 10))
        preprocessed_X = copy.copy(X)
        preprocessor = ZCA(store_inverse=store_inverse)

        dataset = DenseDesignMatrix(X=preprocessed_X,
                                    preprocessor=preprocessor,
                                    fit_preprocessor=True)

        preprocessed_X = dataset.get_design_matrix()

        assert_allclose(X, preprocessor.inverse(preprocessed_X))
Esempio n. 32
0
 def nll(self, data):
     X, Y = data
     z = self.score(X)
     z = z - z.max(axis=1).dimshuffle(0, 'x')
     log_prob = z - T.log(T.exp(z).sum(axis=1).dimshuffle(0, 'x'))
     Y = OneHotFormatter(self.dict_size).theano_expr(Y)
     Y = Y.reshape((Y.shape[0], Y.shape[2]))
     #import ipdb
     #ipdb.set_trace()
     log_prob_of = (Y * log_prob).sum(axis=1)
     assert log_prob_of.ndim == 1
     rval = as_floatX(log_prob_of.mean())
     return - rval
Esempio n. 33
0
    def test_alpha_jump(self):


        " tests that alpha is where I think it should be "

        stats = self.stats

        mean_h = stats.d['mean_h']
        new_mu = self.model.mu
        mean_hs = stats.d['mean_hs']
        mean_sq_s = stats.d['mean_sq_s']

        one = as_floatX(1.)
        two = as_floatX(2.)
        s_denom1 = mean_sq_s
        s_denom2 = - two * new_mu * mean_hs
        s_denom3 = T.sqr(new_mu) * mean_h

        s_denom = s_denom1 + s_denom2 + s_denom3
        new_alpha =  one / s_denom
        new_alpha.name = 'new_alpha'

        f = function([], new_alpha)

        Alphav = f()
        aAlphav = self.model.alpha.get_value()


        diffs = Alphav - aAlphav
        max_diff = np.abs(diffs).max()

        if max_diff > self.tol:
            print 'Actual alpha: '
            print aAlphav
            print 'Expected alpha: '
            print Alphav
            raise Exception("alpha deviates from its correct value by at most "+str(max_diff))
Esempio n. 34
0
    def test_alpha_jump(self):

        " tests that alpha is where I think it should be "

        stats = self.stats

        mean_h = stats.d['mean_h']
        new_mu = self.model.mu
        mean_hs = stats.d['mean_hs']
        mean_sq_s = stats.d['mean_sq_s']

        one = as_floatX(1.)
        two = as_floatX(2.)
        s_denom1 = mean_sq_s
        s_denom2 = -two * new_mu * mean_hs
        s_denom3 = T.sqr(new_mu) * mean_h

        s_denom = s_denom1 + s_denom2 + s_denom3
        new_alpha = one / s_denom
        new_alpha.name = 'new_alpha'

        f = function([], new_alpha)

        Alphav = f()
        aAlphav = self.model.alpha.get_value()

        diffs = Alphav - aAlphav
        max_diff = np.abs(diffs).max()

        if max_diff > self.tol:
            print 'Actual alpha: '
            print aAlphav
            print 'Expected alpha: '
            print Alphav
            raise Exception(
                "alpha deviates from its correct value by at most " +
                str(max_diff))
Esempio n. 35
0
def test_multiple_samples_allowed():
    """
    VAE allows multiple samples per data point
    """
    encoding_model = MLP(layers=[Linear(layer_name="h", dim=10, irange=0.01)])
    decoding_model = MLP(layers=[Linear(layer_name="h", dim=10, irange=0.01)])
    prior = DiagonalGaussianPrior()
    conditional = BernoulliVector(mlp=decoding_model, name="conditional")
    posterior = DiagonalGaussian(mlp=encoding_model, name="posterior")
    vae = VAE(nvis=10, prior=prior, conditional=conditional, posterior=posterior, nhid=5)
    X = T.matrix("X")
    lower_bound = vae.log_likelihood_lower_bound(X, num_samples=10)
    f = theano.function(inputs=[X], outputs=lower_bound)
    rng = make_np_rng(default_seed=11223)
    f(as_floatX(rng.uniform(size=(10, 10))))
Esempio n. 36
0
def test_multiple_samples_allowed():
    """
    VAE allows multiple samples per data point
    """
    encoding_model = MLP(layers=[Linear(layer_name='h', dim=10, irange=0.01)])
    decoding_model = MLP(layers=[Linear(layer_name='h', dim=10, irange=0.01)])
    prior = DiagonalGaussianPrior()
    conditional = BernoulliVector(mlp=decoding_model, name='conditional')
    posterior = DiagonalGaussian(mlp=encoding_model, name='posterior')
    vae = VAE(nvis=10, prior=prior, conditional=conditional,
              posterior=posterior, nhid=5)
    X = T.matrix('X')
    lower_bound = vae.log_likelihood_lower_bound(X, num_samples=10)
    f = theano.function(inputs=[X], outputs=lower_bound)
    rng = make_np_rng(default_seed=11223)
    f(as_floatX(rng.uniform(size=(10, 10))))
Esempio n. 37
0
    def test_zero_image(self):
        """
        Test on zero-value image if cause any division by zero
        """

        X = as_floatX(np.zeros((5, 32 * 32 * 3)))

        axes = ['b', 0, 1, 'c']
        view_converter = dense_design_matrix.DefaultViewConverter((32, 32, 3),
                                                                  axes)
        dataset = DenseDesignMatrix(X=X, view_converter=view_converter)
        dataset.axes = axes
        preprocessor = LeCunLCN(img_shape=[32, 32])
        dataset.apply_preprocessor(preprocessor)
        result = dataset.get_design_matrix()

        assert isfinite(result)
Esempio n. 38
0
    def test_zero_image(self):
        """
        Test on zero-value image if cause any division by zero
        """

        X = as_floatX(np.zeros((5, 32 * 32 * 3)))

        axes = ['b', 0, 1, 'c']
        view_converter = dense_design_matrix.DefaultViewConverter((32, 32, 3),
                                                                  axes)
        dataset = DenseDesignMatrix(X=X, view_converter=view_converter)
        dataset.axes = axes
        preprocessor = LeCunLCN(img_shape=[32, 32])
        dataset.apply_preprocessor(preprocessor)
        result = dataset.get_design_matrix()

        assert isfinite(result)
Esempio n. 39
0
    def test_channel(self):
        """
        Test if works fine withe different number of channel as argument
        """

        rng = np.random.RandomState([1, 2, 3])
        X = as_floatX(rng.randn(5, 32 * 32 * 3))

        axes = ['b', 0, 1, 'c']
        view_converter = dense_design_matrix.DefaultViewConverter((32, 32, 3),
                                                                  axes)
        dataset = DenseDesignMatrix(X=X, view_converter=view_converter)
        dataset.axes = axes
        preprocessor = LeCunLCN(img_shape=[32, 32], channels=[1, 2])
        dataset.apply_preprocessor(preprocessor)
        result = dataset.get_design_matrix()

        assert isfinite(result)
Esempio n. 40
0
    def test_channel(self):
        """
        Test if works fine withe different number of channel as argument
        """

        rng = np.random.RandomState([1, 2, 3])
        X = as_floatX(rng.randn(5, 32 * 32 * 3))

        axes = ['b', 0, 1, 'c']
        view_converter = dense_design_matrix.DefaultViewConverter((32, 32, 3),
                                                                  axes)
        dataset = DenseDesignMatrix(X=X, view_converter=view_converter)
        dataset.axes = axes
        preprocessor = LeCunLCN(img_shape=[32, 32], channels=[1, 2])
        dataset.apply_preprocessor(preprocessor)
        result = dataset.get_design_matrix()

        assert isfinite(result)
Esempio n. 41
0
    def test_zero_vector(self):
        """ Test that passing in the zero vector does not result in
            a divide by 0 """

        dataset = DenseDesignMatrix(X=as_floatX(np.zeros((1, 1))))

        # the settings of subtract_mean and use_norm are not relevant to
        # the test
        # std_bias = 0.0 is the only value for which there should be a risk
        # of failure occurring
        preprocessor = GlobalContrastNormalization(subtract_mean=True, sqrt_bias=0.0, use_std=True)

        dataset.apply_preprocessor(preprocessor)

        result = dataset.get_design_matrix()

        assert not np.any(np.isnan(result))
        assert not np.any(np.isinf(result))
Esempio n. 42
0
def test_conditional_encode_conditional_parameters():
    """
    Conditional.encode_conditional_parameters calls its MLP's fprop method
    """
    mlp = MLP(layers=[Linear(layer_name="h", dim=5, irange=0.01, max_col_norm=0.01)])
    conditional = DummyConditional(mlp=mlp, name="conditional")
    vae = DummyVAE()
    conditional.set_vae(vae)
    input_space = VectorSpace(dim=5)
    conditional.initialize_parameters(input_space=input_space, ndim=5)

    X = T.matrix("X")
    mlp_Y1, mlp_Y2 = mlp.fprop(X)
    cond_Y1, cond_Y2 = conditional.encode_conditional_params(X)
    f = theano.function([X], [mlp_Y1, mlp_Y2, cond_Y1, cond_Y2])
    rval = f(as_floatX(numpy.random.uniform(size=(10, 5))))
    numpy.testing.assert_allclose(rval[0], rval[2])
    numpy.testing.assert_allclose(rval[1], rval[3])
Esempio n. 43
0
    def test_grad_alpha(self):
        """tests that the gradient of the log probability with respect to alpha
        matches my analytical derivation """

        #self.model.set_param_values(self.new_params)

        g = T.grad(self.prob,
                   self.model.alpha,
                   consider_constant=self.mf_obs.values())

        mu = self.model.mu
        alpha = self.model.alpha
        half = as_floatX(.5)

        mean_sq_s = self.stats.d['mean_sq_s']
        mean_hs = self.stats.d['mean_hs']
        mean_h = self.stats.d['mean_h']

        term1 = -half * mean_sq_s

        term2 = mu * mean_hs

        term3 = -half * T.sqr(mu) * mean_h

        term4 = half / alpha

        analytical = term1 + term2 + term3 + term4

        f = function([], (g, analytical))

        gv, av = f()

        assert gv.shape == av.shape

        max_diff = np.abs(gv - av).max()

        if max_diff > self.tol:
            print "gv"
            print gv
            print "av"
            print av
            raise Exception(
                "analytical gradient on alpha deviates from theano gradient on alpha by up to "
                + str(max_diff))
Esempio n. 44
0
    def gibbs_step_for_v(self, v, rng):
        """
        Do a round of block Gibbs sampling given visible configuration

        Parameters
        ----------
        v  : tensor_like
            Theano symbolic representing the hidden unit states for a batch of
            training examples (or negative phase particles), with the first
            dimension indexing training examples and the second indexing data
            dimensions.
        rng : RandomStreams object
            Random number generator to use for sampling the hidden and visible
            units.

        Returns
        -------
        v_sample : tensor_like
            Theano symbolic representing the new visible unit state after one
            round of Gibbs sampling.
        locals : dict
            Contains the following auxillary state as keys (all symbolics
            except shape tuples):
             * `h_mean`: the returned value from `mean_h_given_v`
             * `h_mean_shape`: shape tuple indicating the size of `h_mean` and
               `h_sample`
             * `h_sample`: the stochastically sampled hidden units
             * `v_mean_shape`: shape tuple indicating the shape of `v_mean` and
               `v_sample`
             * `v_mean`: the returned value from `mean_v_given_h`
             * `v_sample`: the stochastically sampled visible units
        """
        h_mean = self.mean_h_given_v(v)
        # For binary hidden units
        # TODO: factor further to extend to other kinds of hidden units
        #       (e.g. spike-and-slab)
        h_mean_shape = self.batch_size, self.nhid
        h_sample = as_floatX(rng.uniform(size=h_mean_shape) < h_mean)
        v_mean_shape = self.batch_size, self.nvis
        # v_mean is always based on h_sample, not h_mean, because we don't
        # want h transmitting more than one bit of information per unit.
        v_mean = self.mean_v_given_h(h_sample)
        v_sample = self.sample_visibles([v_mean], v_mean_shape, rng)
        return v_sample, locals()
Esempio n. 45
0
    def __init__(self, params, base_lr, anneal_start=None, **kwargs):
        """
        Construct an SGDOptimizer.

        Parameters
        ----------
        params : object or list
            Either a Model object with a .get_params() method, or a list of
            parameters to be optimized.
        base_lr : float
            The base learning rate before annealing or parameter-specific
            scaling.
        anneal_start : int
            Number of steps after which to start annealing the learning
            rate at a 1/t schedule, where t is the number of stochastic
            gradient updates.

        Notes
        -----
        The formula to compute the effective learning rate on a parameter is:
        <paramname>_lr * max(0.0, min(base_lr, lr_anneal_start/(iteration+1)))

        Parameter-specific learning rates can be set by passing keyword
        arguments <name>_lr, where name is the .name attribute of a given
        parameter.

        Parameter-specific bounding values can be specified by passing
        keyword arguments <param>_clip, which should be a (min, max) pair.
        """
        if hasattr(params, '__iter__'):
            self.params = params
        elif hasattr(params, 'get_params') and hasattr(params.get_params,
                                                       '__call__'):
            self.params = params.get_params()
        else:
            raise ValueError("SGDOptimizer couldn't figure out what to do "
                             "with first argument: '%s'" % str(params))
        if anneal_start == None:
            self.anneal_start = None
        else:
            self.anneal_start = as_floatX(anneal_start)

        # Set up the clipping values
        self.clipping_values = {}
Esempio n. 46
0
    def __init__(self, params, base_lr, anneal_start=None, **kwargs):
        """
        Construct an SGDOptimizer.

        Parameters
        ----------
        params : object or list
            Either a Model object with a .get_params() method, or a list of
            parameters to be optimized.
        base_lr : float
            The base learning rate before annealing or parameter-specific
            scaling.
        anneal_start : int
            Number of steps after which to start annealing the learning
            rate at a 1/t schedule, where t is the number of stochastic
            gradient updates.

        Notes
        -----
        The formula to compute the effective learning rate on a parameter is:
        <paramname>_lr * max(0.0, min(base_lr, lr_anneal_start/(iteration+1)))

        Parameter-specific learning rates can be set by passing keyword
        arguments <name>_lr, where name is the .name attribute of a given
        parameter.

        Parameter-specific bounding values can be specified by passing
        keyword arguments <param>_clip, which should be a (min, max) pair.
        """
        if hasattr(params, '__iter__'):
            self.params = params
        elif hasattr(params, 'get_params') and hasattr(params.get_params, '__call__'):
            self.params = params.get_params()
        else:
            raise ValueError("SGDOptimizer couldn't figure out what to do "
                             "with first argument: '%s'" % str(params))
        if anneal_start == None:
            self.anneal_start = None
        else:
            self.anneal_start = as_floatX(anneal_start)

        # Set up the clipping values
        self.clipping_values = {}
Esempio n. 47
0
    def test_zero_vector(self):
        """ Test that passing in the zero vector does not result in
            a divide by 0 """

        dataset = DenseDesignMatrix(X=as_floatX(np.zeros((1, 1))))

        # the settings of subtract_mean and use_norm are not relevant to
        # the test
        # std_bias = 0.0 is the only value for which there should be a risk
        # of failure occurring
        preprocessor = GlobalContrastNormalization(subtract_mean=True,
                                                   sqrt_bias=0.0,
                                                   use_std=True)

        dataset.apply_preprocessor(preprocessor)

        result = dataset.get_design_matrix()

        assert isfinite(result)
Esempio n. 48
0
def test_zca():
    """
    Confirm that ZCA.inv_P_ is the correct inverse of ZCA.P_.
    There's a lot else about the ZCA class that could be tested here.
    """

    rng = np.random.RandomState([1, 2, 3])
    X = as_floatX(rng.randn(15, 10))
    preprocessor = ZCA()
    preprocessor.fit(X)

    def is_identity(matrix):
        identity = np.identity(matrix.shape[0], theano.config.floatX)
        abs_difference = np.abs(identity - matrix)
        return (abs_difference < .0001).all()

    assert preprocessor.P_.shape == (X.shape[1], X.shape[1])
    assert not is_identity(preprocessor.P_)
    assert is_identity(np.dot(preprocessor.P_, preprocessor.inv_P_))
Esempio n. 49
0
def test_conditional_encode_conditional_parameters():
    """
    Conditional.encode_conditional_parameters calls its MLP's fprop method
    """
    mlp = MLP(layers=[Linear(layer_name='h', dim=5, irange=0.01,
                             max_col_norm=0.01)])
    conditional = DummyConditional(mlp=mlp, name='conditional')
    vae = DummyVAE()
    conditional.set_vae(vae)
    input_space = VectorSpace(dim=5)
    conditional.initialize_parameters(input_space=input_space, ndim=5)

    X = T.matrix('X')
    mlp_Y1, mlp_Y2 = mlp.fprop(X)
    cond_Y1, cond_Y2 = conditional.encode_conditional_params(X)
    f = theano.function([X], [mlp_Y1, mlp_Y2, cond_Y1, cond_Y2])
    rval = f(as_floatX(numpy.random.uniform(size=(10, 5))))
    numpy.testing.assert_allclose(rval[0], rval[2])
    numpy.testing.assert_allclose(rval[1], rval[3])
Esempio n. 50
0
def test_zca():
    """
    Confirm that ZCA.inv_P_ is the correct inverse of ZCA.P_.
    There's a lot else about the ZCA class that could be tested here.
    """

    rng = np.random.RandomState([1, 2, 3])
    X = as_floatX(rng.randn(15, 10))
    preprocessor = ZCA()
    preprocessor.fit(X)

    def is_identity(matrix):
        identity = np.identity(matrix.shape[0], theano.config.floatX)
        abs_difference = np.abs(identity - matrix)
        return (abs_difference < .0001).all()

    assert preprocessor.P_.shape == (X.shape[1], X.shape[1])
    assert not is_identity(preprocessor.P_)
    assert is_identity(np.dot(preprocessor.P_, preprocessor.inv_P_))
Esempio n. 51
0
    def sample_visibles(self, params, shape, rng):
        """
        Stochastically sample the visible units given hidden unit
        configurations for a set of training examples.

        Parameters
        ----------
        params : list
            List of the necessary parameters to sample :math:`p(v|h)`. In the
            case of a binary-binary RBM this is a single-element list
            containing the symbolic representing :math:`p(v|h)`, as returned
            by `mean_v_given_h`.

        Returns
        -------
        vprime : tensor_like
            Theano symbolic representing stochastic samples from :math:`p(v|h)`
        """
        v_mean = params[0]
        return as_floatX(rng.uniform(size=shape) < v_mean)
Esempio n. 52
0
def test_rgb_yuv():
    """
    Test on a random image if the per-processor loads and works without
    anyerror and doesn't result in any nan or inf values

    """

    rng = np.random.RandomState([1, 2, 3])
    X = as_floatX(rng.randn(5, 32 * 32 * 3))

    axes = ['b', 0, 1, 'c']
    view_converter = dense_design_matrix.DefaultViewConverter((32, 32, 3),
                                                              axes)
    dataset = DenseDesignMatrix(X=X, view_converter=view_converter)
    dataset.axes = axes
    preprocessor = RGB_YUV()
    dataset.apply_preprocessor(preprocessor)
    result = dataset.get_design_matrix()

    assert isfinite(result)
Esempio n. 53
0
    def test_random_image(self):
        """
        Test on a random image if the per-processor loads and works without
        anyerror and doesn't result in any nan or inf values

        """

        rng = np.random.RandomState([1, 2, 3])
        X = as_floatX(rng.randn(5, 32 * 32 * 3))

        axes = ['b', 0, 1, 'c']
        view_converter = dense_design_matrix.DefaultViewConverter((32, 32, 3),
                                                                  axes)
        dataset = DenseDesignMatrix(X=X, view_converter=view_converter)
        dataset.axes = axes
        preprocessor = LeCunLCN(img_shape=[32, 32])
        dataset.apply_preprocessor(preprocessor)
        result = dataset.get_design_matrix()

        assert not np.any(np.isnan(result))
        assert not np.any(np.isinf(result))
Esempio n. 54
0
    def gibbs_step_for_v(self, v, rng):
        # Sometimes, the number of examples in the data set is not a
        # multiple of self.batch_size.
        batch_size = v.shape[0]

        # sample h given v
        h_mean = self.mean_h_given_v(v)
        h_mean_shape = (batch_size, self.nhid)
        h_sample = as_floatX(rng.uniform(size=h_mean_shape) < h_mean)

        # sample s given (v,h)
        s_mu, s_var = self.mean_var_s_given_v_h1(v)
        s_mu_shape = (batch_size, self.nslab)
        s_sample = s_mu + rng.normal(size=s_mu_shape) * tensor.sqrt(s_var)
        #s_sample=(s_sample.reshape()*h_sample.dimshuffle(0,1,'x')).flatten(2)

        # sample v given (s,h)
        v_mean, v_var = self.mean_var_v_given_h_s(h_sample, s_sample)
        v_mean_shape = (batch_size, self.nvis)
        v_sample = rng.normal(size=v_mean_shape) * tensor.sqrt(v_var) + v_mean

        del batch_size
        return v_sample, locals()
Esempio n. 55
0
 def setup(self):
     rng = np.random.RandomState([1, 2, 3])
     self.dataset = DenseDesignMatrix(X=as_floatX(rng.randn(15, 10)),
                                      y=as_floatX(rng.randn(15, 1)))
     self.num_components = self.dataset.get_design_matrix().shape[1] - 1
Esempio n. 56
0
    def __init__(self,
                 params,
                 base_lr,
                 anneal_start=None,
                 use_adagrad=False,
                 **kwargs):
        """
        Construct an SGDOptimizer.

        Parameters
        ----------
        params : object or list
            Either a Model object with a .get_params() method, or a list of
            parameters to be optimized.
        base_lr : float
            The base learning rate before annealing or parameter-specific
            scaling.
        anneal_start : int
            Number of steps after which to start annealing the learning
            rate at a 1/t schedule, where t is the number of stochastic
            gradient updates.
        use_adagrad : bool
            'adagrad' adaptive learning rate scheme is used. If set to True,
        base_lr is used as e0.

        Notes
        -----
        The formula to compute the effective learning rate on a parameter is:
        <paramname>_lr * max(0.0, min(base_lr, lr_anneal_start/(iteration+1)))

        Parameter-specific learning rates can be set by passing keyword
        arguments <name>_lr, where name is the .name attribute of a given
        parameter.

        Parameter-specific bounding values can be specified by passing
        keyword arguments <param>_clip, which should be a (min, max) pair.

        Adagrad is recommended with sparse inputs. It normalizes the base
        learning rate of a parameter theta_i by the accumulated 2-norm of its
        gradient: e{ti} = e0 / sqrt( sum_t (dL_t / dtheta_i)^2 )
        """
        if hasattr(params, '__iter__'):
            self.params = params
        elif hasattr(params, 'get_params') and hasattr(params.get_params,
                                                       '__call__'):
            self.params = params.get_params()
        else:
            raise ValueError("SGDOptimizer couldn't figure out what to do "
                             "with first argument: '%s'" % str(params))
        if anneal_start == None:
            self.anneal_start = None
        else:
            self.anneal_start = as_floatX(anneal_start)

        # Create accumulators and epsilon0's
        self.use_adagrad = use_adagrad
        if self.use_adagrad:
            self.accumulators = {}
            self.e0s = {}
            for param in self.params:
                self.accumulators[param] = theano.shared(value=as_floatX(0.),
                                                         name='acc_%s' %
                                                         param.name)
                self.e0s[param] = as_floatX(base_lr)

        # Set up the clipping values
        self.clipping_values = {}
        # Keep track of names already seen
        clip_names_seen = set()
        for parameter in self.params:
            clip_name = '%s_clip' % parameter.name
            if clip_name in kwargs:
                if clip_name in clip_names_seen:
                    print >> sys.stderr, (
                        'Warning: In SGDOptimizer, '
                        'at least two parameters have the same name. '
                        'Both will be affected by the keyword argument '
                        '%s.' % clip_name)
                clip_names_seen.add(clip_name)
                p_min, p_max = kwargs[clip_name]
                assert p_min <= p_max
                self.clipping_values[parameter] = (p_min, p_max)

        # Check that no ..._clip keyword is being ignored
        for clip_name in clip_names_seen:
            kwargs.pop(clip_name)
        for kw in kwargs.iterkeys():
            if kw[-5:] == '_clip':
                print >> sys.stderr, (
                    'Warning: in SGDOptimizer, '
                    'keyword argument %s will be ignored, '
                    'because no parameter was found with name %s.' %
                    (kw, kw[:-5]))

        self.learning_rates_setup(base_lr, **kwargs)
Esempio n. 57
0
from theano.compat.six.moves import xrange
import theano.tensor as T
from theano import function
from pylearn2.utils import as_floatX
from pylearn2.utils import sharedX
from pylearn2.linear.matrixmul import MatrixMul

test_m = 2

rng = N.random.RandomState([1, 2, 3])
nv = 3
nh = 4

vW = rng.randn(nv, nh)
W = sharedX(vW)
vbv = as_floatX(rng.randn(nv))
bv = T.as_tensor_variable(vbv)
bv.tag.test_value = vbv
vbh = as_floatX(rng.randn(nh))
bh = T.as_tensor_variable(vbh)
bh.tag.test_value = bh
vsigma = as_floatX(rng.uniform(0.1, 5))
sigma = T.as_tensor_variable(vsigma)
sigma.tag.test_value = vsigma

E = GRBM_Type_1(transformer=MatrixMul(W),
                bias_vis=bv,
                bias_hid=bh,
                sigma=sigma)

V = T.matrix()