def __init__(self): # Define some model hyperparameters to work with MNIST images! input_size = 28 * 28 # dimensions of image hidden_size = 1000 # number of hidden units - generally bigger than input size for DAE # Now, define the symbolic input to the model (Theano) # We use a matrix rather than a vector so that minibatch processing can be done in parallel. x = T.matrix("X") self.inputs = [x] # Build the model's parameters - a weight matrix and two bias vectors W = get_weights_uniform(shape=(input_size, hidden_size), name="W") b0 = get_bias(shape=input_size, name="b0") b1 = get_bias(shape=hidden_size, name="b1") self.params = [W, b0, b1] # Perform the computation for a denoising autoencoder! # first, add noise (corrupt) the input corrupted_input = salt_and_pepper(input=x, noise_level=0.4) # next, run the hidden layer given the inputs (the encoding function) hiddens = tanh(T.dot(corrupted_input, W) + b1) # finally, create the reconstruction from the hidden layer (we tie the weights with W.T) reconstruction = sigmoid(T.dot(hiddens, W.T) + b0) # the training cost is reconstruction error - with MNIST this is binary cross-entropy self.train_cost = binary_crossentropy(output=reconstruction, target=x) # Compile everything into a Theano function for prediction! # When using real-world data in predictions, we wouldn't corrupt the input first. # Therefore, create another version of the hiddens and reconstruction without adding the noise hiddens_predict = tanh(T.dot(x, W) + b1) self.recon_predict = sigmoid(T.dot(hiddens_predict, W.T) + b0)
def __init__(self): # Define some model hyperparameters to work with MNIST images! input_size = 28*28 # dimensions of image hidden_size = 1000 # number of hidden units - generally bigger than input size for DAE # Now, define the symbolic input to the model (Theano) # We use a matrix rather than a vector so that minibatch processing can be done in parallel. x = T.matrix("X") self.inputs = [x] # Build the model's parameters - a weight matrix and two bias vectors W = get_weights_uniform(shape=(input_size, hidden_size), name="W") b0 = get_bias(shape=input_size, name="b0") b1 = get_bias(shape=hidden_size, name="b1") self.params = [W, b0, b1] # Perform the computation for a denoising autoencoder! # first, add noise (corrupt) the input corrupted_input = salt_and_pepper_custom(input=x) # next, run the hidden layer given the inputs (the encoding function) hiddens = tanh(T.dot(corrupted_input, W) + b1) # finally, create the reconstruction from the hidden layer (we tie the weights with W.T) reconstruction = sigmoid(T.dot(hiddens, W.T) + b0) # the training cost is reconstruction error - with MNIST this is binary cross-entropy self.train_cost = binary_crossentropy(output=reconstruction, target=x) # Compile everything into a Theano function for prediction! # When using real-world data in predictions, we wouldn't corrupt the input first. # Therefore, create another version of the hiddens and reconstruction without adding the noise hiddens_predict = tanh(T.dot(x, W) + b1) self.recon_predict = sigmoid(T.dot(hiddens_predict, W.T) + b0)
def _build_rbm(self): """ Creates the computation graph. Returns ------- theano expression The cost expression - free energy. theano expression Monitor expression - binary cross-entropy to monitor training progress. dict Updates dictionary - updates from the Gibbs sampling process. tensor Last sample in the chain - last generated visible sample from the Gibbs process. tensor Last hidden sample in the chain from the Gibbs process. """ # initialize from visibles if we aren't generating from some hiddens if self.hiddens_init is None: [_, v_chain, _, h_chain], updates = theano.scan( fn=lambda v: self._gibbs_step_vhv(v), outputs_info=[None, self.input, None, None], n_steps=self.k) # initialize from hiddens else: [_, v_chain, _, h_chain], updates = theano.scan( fn=lambda h: self._gibbs_step_hvh(h), outputs_info=[None, None, None, self.hiddens_init], n_steps=self.k) v_sample = v_chain[-1] h_sample = h_chain[-1] mean_v, _, _, _ = self._gibbs_step_vhv(v_sample) # some monitors # get rid of the -inf for the pseudo_log monitor (due to 0's and 1's in mean_v) # eps = 1e-8 # zero_indices = T.eq(mean_v, 0.0).nonzero() # one_indices = T.eq(mean_v, 1.0).nonzero() # mean_v = T.inc_subtensor(x=mean_v[zero_indices], y=eps) # mean_v = T.inc_subtensor(x=mean_v[one_indices], y=-eps) pseudo_log = T.xlogx.xlogy0(self.input, mean_v) + T.xlogx.xlogy0( 1 - self.input, 1 - mean_v) pseudo_log = pseudo_log.sum() / self.input.shape[0] crossentropy = T.mean(binary_crossentropy(mean_v, self.input)) monitors = {'pseudo-log': pseudo_log, 'crossentropy': crossentropy} # the free-energy cost function! # consider v_sample constant when computing gradients on the cost function # this actually keeps v_sample from being considered in the gradient, to set gradient to 0 instead, # use theano.gradient.zero_grad v_sample_constant = theano.gradient.disconnected_grad(v_sample) # v_sample_constant = v_sample cost = (self.free_energy(self.input) - self.free_energy(v_sample_constant)) / self.input.shape[0] return cost, monitors, updates, v_sample, h_sample
def _build_rbm(self): """ Creates the computation graph. Returns ------- theano expression The cost expression - free energy. theano expression Monitor expression - binary cross-entropy to monitor training progress. dict Updates dictionary - updates from the Gibbs sampling process. tensor Last sample in the chain - last generated visible sample from the Gibbs process. tensor Last hidden sample in the chain from the Gibbs process. :rtype: List """ # initialize from visibles if we aren't generating from some hiddens if self.hiddens_init is None: [_, v_chain, _, h_chain], updates = theano.scan(fn=lambda v: self._gibbs_step_vhv(v), outputs_info=[None, self.input, None, None], n_steps=self.k) # initialize from hiddens else: [_, v_chain, _, h_chain], updates = theano.scan(fn=lambda h: self._gibbs_step_hvh(h), outputs_info=[None, None, None, self.hiddens_init], n_steps=self.k) v_sample = v_chain[-1] h_sample = h_chain[-1] mean_v, _, _, _ = self._gibbs_step_vhv(v_sample) # some monitors # get rid of the -inf for the pseudo_log monitor (due to 0's and 1's in mean_v) # eps = 1e-8 # zero_indices = T.eq(mean_v, 0.0).nonzero() # one_indices = T.eq(mean_v, 1.0).nonzero() # mean_v = T.inc_subtensor(x=mean_v[zero_indices], y=eps) # mean_v = T.inc_subtensor(x=mean_v[one_indices], y=-eps) pseudo_log = T.xlogx.xlogy0(self.input, mean_v) + T.xlogx.xlogy0(1 - self.input, 1 - mean_v) pseudo_log = pseudo_log.sum() / self.input.shape[0] crossentropy = T.mean(binary_crossentropy(mean_v, self.input)) monitors = {'pseudo-log': pseudo_log, 'crossentropy': crossentropy} # the free-energy cost function! # consider v_sample constant when computing gradients on the cost function # this actually keeps v_sample from being considered in the gradient, to set gradient to 0 instead, # use theano.gradient.zero_grad v_sample_constant = theano.gradient.disconnected_grad(v_sample) # v_sample_constant = v_sample cost = (self.free_energy(self.input) - self.free_energy(v_sample_constant)) / self.input.shape[0] return cost, monitors, updates, v_sample, h_sample