def cost(self, Y, Y_hat): """ Cost for convnets is hardcoded to be the cost for sigmoids. TODO: move the cost into the non-linearity class. Parameters ---------- Y : theano.gof.Variable Output of `fprop` Y_hat : theano.gof.Variable Targets Returns ------- cost : theano.gof.Variable 0-D tensor describing the cost Notes ----- Cost mean across units, mean across batch of KL divergence KL(P || Q) where P is defined by Y and Q is defined by Y_hat KL(P || Q) = p log p - p log q + (1-p) log (1-p) - (1-p) log (1-q) """ assert self.nonlin.non_lin_name == "sigmoid", ("ConvElemwise " "supports " "cost function " "for only " "sigmoid layer " "for now.") batch_axis = self.output_space.get_batch_axis() ave_total = kl(Y=Y, Y_hat=Y_hat, batch_axis=batch_axis) ave = ave_total.mean() return ave
def test_kl(): """ Test whether function kl() has properly processed the input. """ init_mode = theano.config.compute_test_value theano.config.compute_test_value = 'raise' try: mlp = MLP(layers=[Sigmoid(dim=10, layer_name='Y', irange=0.1)], nvis=10) X = mlp.get_input_space().make_theano_batch() Y = mlp.get_output_space().make_theano_batch() X.tag.test_value = np.random.random( get_debug_values(X)[0].shape).astype(theano.config.floatX) Y_hat = mlp.fprop(X) # This call should not raise any error: ave = kl(Y, Y_hat, 1) # The following calls should raise ValueError exceptions: Y.tag.test_value[2][3] = 1.1 np.testing.assert_raises(ValueError, kl, Y, Y_hat, 1) Y.tag.test_value[2][3] = -0.1 np.testing.assert_raises(ValueError, kl, Y, Y_hat, 1) finally: theano.config.compute_test_value = init_mode
def kl(self, Y, Y_hat): """ Computes the KL divergence. Parameters ---------- Y : Variable targets for the sigmoid outputs. Currently Y must be purely binary. If it's not, you'll still get the right gradient, but the value in the monitoring channel will be wrong. Y_hat : Variable predictions made by the sigmoid layer. Y_hat must be generated by fprop, i.e., it must be a symbolic sigmoid. Returns ------- ave : Variable average kl divergence between Y and Y_hat. Notes ----- Warning: This function expects a sigmoid nonlinearity in the output layer and it uses kl function under pylearn2/expr/nnet/. Returns a batch (vector) of mean across units of KL divergence for each example, KL(P || Q) where P is defined by Y and Q is defined by Y_hat: p log p - p log q + (1-p) log (1-p) - (1-p) log (1-q) For binary p, some terms drop out: - p log q - (1-p) log (1-q) - p log sigmoid(z) - (1-p) log sigmoid(-z) p softplus(-z) + (1-p) softplus(z) """ batch_axis = self.output_space.get_batch_axis() div = kl(Y=Y, Y_hat=Y_hat, batch_axis=batch_axis) return div