Example #1
0
    def __init__(self):
        # Define some model hyperparameters to work with MNIST images!
        input_size = 28 * 28  # dimensions of image
        hidden_size = 1000  # number of hidden units - generally bigger than input size for DAE

        # Now, define the symbolic input to the model (Theano)
        # We use a matrix rather than a vector so that minibatch processing can be done in parallel.
        x = T.matrix("X")
        self.inputs = [x]

        # Build the model's parameters - a weight matrix and two bias vectors
        W = get_weights_uniform(shape=(input_size, hidden_size), name="W")
        b0 = get_bias(shape=input_size, name="b0")
        b1 = get_bias(shape=hidden_size, name="b1")
        self.params = [W, b0, b1]

        # Perform the computation for a denoising autoencoder!
        # first, add noise (corrupt) the input
        corrupted_input = salt_and_pepper(input=x, noise_level=0.4)
        # next, run the hidden layer given the inputs (the encoding function)
        hiddens = tanh(T.dot(corrupted_input, W) + b1)
        # finally, create the reconstruction from the hidden layer (we tie the weights with W.T)
        reconstruction = sigmoid(T.dot(hiddens, W.T) + b0)
        # the training cost is reconstruction error - with MNIST this is binary cross-entropy
        self.train_cost = binary_crossentropy(output=reconstruction, target=x)

        # Compile everything into a Theano function for prediction!
        # When using real-world data in predictions, we wouldn't corrupt the input first.
        # Therefore, create another version of the hiddens and reconstruction without adding the noise
        hiddens_predict = tanh(T.dot(x, W) + b1)
        self.recon_predict = sigmoid(T.dot(hiddens_predict, W.T) + b0)
Example #2
0
    def __init__(self):
        # Define some model hyperparameters to work with MNIST images!
        input_size = 28*28  # dimensions of image
        hidden_size = 1000  # number of hidden units - generally bigger than input size for DAE

        # Now, define the symbolic input to the model (Theano)
        # We use a matrix rather than a vector so that minibatch processing can be done in parallel.
        x = T.matrix("X")
        self.inputs = [x]

        # Build the model's parameters - a weight matrix and two bias vectors
        W = get_weights_uniform(shape=(input_size, hidden_size), name="W")
        b0 = get_bias(shape=input_size, name="b0")
        b1 = get_bias(shape=hidden_size, name="b1")
        self.params = [W, b0, b1]

        # Perform the computation for a denoising autoencoder!
        # first, add noise (corrupt) the input
        corrupted_input = salt_and_pepper_custom(input=x)
        # next, run the hidden layer given the inputs (the encoding function)
        hiddens = tanh(T.dot(corrupted_input, W) + b1)
        # finally, create the reconstruction from the hidden layer (we tie the weights with W.T)
        reconstruction = sigmoid(T.dot(hiddens, W.T) + b0)
        # the training cost is reconstruction error - with MNIST this is binary cross-entropy
        self.train_cost = binary_crossentropy(output=reconstruction, target=x)

        # Compile everything into a Theano function for prediction!
        # When using real-world data in predictions, we wouldn't corrupt the input first.
        # Therefore, create another version of the hiddens and reconstruction without adding the noise
        hiddens_predict      = tanh(T.dot(x, W) + b1)
        self.recon_predict   = sigmoid(T.dot(hiddens_predict, W.T) + b0)
Example #3
0
    def _build_rbm(self):
        """
        Creates the computation graph.

        Returns
        -------
        theano expression
            The cost expression - free energy.
        theano expression
            Monitor expression - binary cross-entropy to monitor training progress.
        dict
            Updates dictionary - updates from the Gibbs sampling process.
        tensor
            Last sample in the chain - last generated visible sample from the Gibbs process.
        tensor
            Last hidden sample in the chain from the Gibbs process.
        """
        # initialize from visibles if we aren't generating from some hiddens
        if self.hiddens_init is None:
            [_, v_chain, _, h_chain], updates = theano.scan(
                fn=lambda v: self._gibbs_step_vhv(v),
                outputs_info=[None, self.input, None, None],
                n_steps=self.k)
        # initialize from hiddens
        else:
            [_, v_chain, _, h_chain], updates = theano.scan(
                fn=lambda h: self._gibbs_step_hvh(h),
                outputs_info=[None, None, None, self.hiddens_init],
                n_steps=self.k)

        v_sample = v_chain[-1]
        h_sample = h_chain[-1]

        mean_v, _, _, _ = self._gibbs_step_vhv(v_sample)

        # some monitors
        # get rid of the -inf for the pseudo_log monitor (due to 0's and 1's in mean_v)
        # eps = 1e-8
        # zero_indices = T.eq(mean_v, 0.0).nonzero()
        # one_indices = T.eq(mean_v, 1.0).nonzero()
        # mean_v = T.inc_subtensor(x=mean_v[zero_indices], y=eps)
        # mean_v = T.inc_subtensor(x=mean_v[one_indices], y=-eps)
        pseudo_log = T.xlogx.xlogy0(self.input, mean_v) + T.xlogx.xlogy0(
            1 - self.input, 1 - mean_v)
        pseudo_log = pseudo_log.sum() / self.input.shape[0]
        crossentropy = T.mean(binary_crossentropy(mean_v, self.input))

        monitors = {'pseudo-log': pseudo_log, 'crossentropy': crossentropy}

        # the free-energy cost function!
        # consider v_sample constant when computing gradients on the cost function
        # this actually keeps v_sample from being considered in the gradient, to set gradient to 0 instead,
        # use theano.gradient.zero_grad
        v_sample_constant = theano.gradient.disconnected_grad(v_sample)
        # v_sample_constant = v_sample
        cost = (self.free_energy(self.input) -
                self.free_energy(v_sample_constant)) / self.input.shape[0]

        return cost, monitors, updates, v_sample, h_sample
    def _build_rbm(self):
        """
        Creates the computation graph.

        Returns
        -------
        theano expression
            The cost expression - free energy.
        theano expression
            Monitor expression - binary cross-entropy to monitor training progress.
        dict
            Updates dictionary - updates from the Gibbs sampling process.
        tensor
            Last sample in the chain - last generated visible sample from the Gibbs process.
        tensor
            Last hidden sample in the chain from the Gibbs process.
        :rtype: List
        """
        # initialize from visibles if we aren't generating from some hiddens
        if self.hiddens_init is None:
            [_, v_chain, _, h_chain], updates = theano.scan(fn=lambda v: self._gibbs_step_vhv(v),
                                                            outputs_info=[None, self.input, None, None],
                                                            n_steps=self.k)
        # initialize from hiddens
        else:
            [_, v_chain, _, h_chain], updates = theano.scan(fn=lambda h: self._gibbs_step_hvh(h),
                                                            outputs_info=[None, None, None, self.hiddens_init],
                                                            n_steps=self.k)

        v_sample = v_chain[-1]
        h_sample = h_chain[-1]

        mean_v, _, _, _ = self._gibbs_step_vhv(v_sample)

        # some monitors
        # get rid of the -inf for the pseudo_log monitor (due to 0's and 1's in mean_v)
        # eps = 1e-8
        # zero_indices = T.eq(mean_v, 0.0).nonzero()
        # one_indices = T.eq(mean_v, 1.0).nonzero()
        # mean_v = T.inc_subtensor(x=mean_v[zero_indices], y=eps)
        # mean_v = T.inc_subtensor(x=mean_v[one_indices], y=-eps)
        pseudo_log = T.xlogx.xlogy0(self.input, mean_v) + T.xlogx.xlogy0(1 - self.input, 1 - mean_v)
        pseudo_log = pseudo_log.sum() / self.input.shape[0]
        crossentropy = T.mean(binary_crossentropy(mean_v, self.input))

        monitors = {'pseudo-log': pseudo_log, 'crossentropy': crossentropy}

        # the free-energy cost function!
        # consider v_sample constant when computing gradients on the cost function
        # this actually keeps v_sample from being considered in the gradient, to set gradient to 0 instead,
        # use theano.gradient.zero_grad
        v_sample_constant = theano.gradient.disconnected_grad(v_sample)
        # v_sample_constant = v_sample
        cost = (self.free_energy(self.input) - self.free_energy(v_sample_constant)) / self.input.shape[0]

        return cost, monitors, updates, v_sample, h_sample