def test_make_symbolic_state(): # Tests whether the returned p_sample and h_sample have the right # dimensions num_examples = 40 theano_rng = MRG_RandomStreams(2012+11+1) visible_layer = BinaryVector(nvis=100) rval = visible_layer.make_symbolic_state(num_examples=num_examples, theano_rng=theano_rng) hidden_layer = BinaryVectorMaxPool(detector_layer_dim=500, pool_size=1, layer_name='h', irange=0.05, init_bias=-2.0) p_sample, h_sample = hidden_layer.make_symbolic_state(num_examples=num_examples, theano_rng=theano_rng) softmax_layer = Softmax(n_classes=10, layer_name='s', irange=0.05) h_sample_s = softmax_layer.make_symbolic_state(num_examples=num_examples, theano_rng=theano_rng) required_shapes = [(40, 100), (40, 500), (40, 500), (40, 10)] f = function(inputs=[], outputs=[rval, p_sample, h_sample, h_sample_s]) for s, r in zip(f(), required_shapes): assert s.shape == r
def test_softmax_make_state(): # Verifies that BinaryVector.make_state creates # a shared variable whose value passes check_multinomial_samples n = 5 num_samples = 1000 tol = .04 layer = Softmax(n_classes = n, layer_name = 'y') rng = np.random.RandomState([2012, 11, 1, 11]) z = 3 * rng.randn(n) mean = np.exp(z) mean /= mean.sum() layer.set_biases(z.astype(config.floatX)) state = layer.make_state(num_examples=num_samples, numpy_rng=rng) value = state.get_value() check_multinomial_samples(value, (num_samples, n), mean, tol)
def test_softmax_mf_sample_consistent(): # A test of the Softmax class # Verifies that the mean field update is consistent with # the sampling function # Since a Softmax layer contains only one random variable # (with n_classes possible values) the mean field assumption # does not impose any restriction so mf_update simply gives # the true expected value of h given v. # We can thus use mf_update to compute the expected value # of a sample of y conditioned on v, and check that samples # drawn using the layer's sample method convert to that # value. rng = np.random.RandomState([2012, 11, 1, 1154]) theano_rng = MRG_RandomStreams(2012 + 11 + 1 + 1154) num_samples = 1000 tol = .042 # Make DBM num_vis = rng.randint(1, 11) n_classes = rng.randint(1, 11) v = BinaryVector(num_vis) v.set_biases(rng.uniform(-1., 1., (num_vis, )).astype(config.floatX)) y = Softmax(n_classes=n_classes, layer_name='y', irange=1.) y.set_biases(rng.uniform(-1., 1., (n_classes, )).astype(config.floatX)) dbm = DBM(visible_layer=v, hidden_layers=[y], batch_size=1, niter=50) # Randomly pick a v to condition on # (Random numbers are generated via dbm.rng) layer_to_state = dbm.make_layer_to_state(1) v_state = layer_to_state[v] y_state = layer_to_state[y] # Infer P(y | v) using mean field expected_y = y.mf_update(state_below=v.upward_state(v_state)) expected_y = expected_y[0, :] expected_y = expected_y.eval() # copy all the states out into a batch size of num_samples cause_copy = sharedX(np.zeros((num_samples, ))).dimshuffle(0, 'x') v_state = v_state[0, :] + cause_copy y_state = y_state[0, :] + cause_copy y_samples = y.sample(state_below=v.upward_state(v_state), theano_rng=theano_rng) y_samples = function([], y_samples)() check_multinomial_samples(y_samples, (num_samples, n_classes), expected_y, tol)
def test_softmax_mf_energy_consistent_centering(): # A test of the Softmax class # Verifies that the mean field update is consistent with # the energy function when using the centering trick # Since a Softmax layer contains only one random variable # (with n_classes possible values) the mean field assumption # does not impose any restriction so mf_update simply gives # the true expected value of h given v. # We also know P(h | v) # = P(h, v) / P( v) # = P(h, v) / sum_h P(h, v) # = exp(-E(h, v)) / sum_h exp(-E(h, v)) # So we can check that computing P(h | v) with both # methods works the same way rng = np.random.RandomState([2012,11,1,1131]) # Make DBM num_vis = rng.randint(1,11) n_classes = rng.randint(1, 11) v = BinaryVector(num_vis, center=True) v.set_biases(rng.uniform(-1., 1., (num_vis,)).astype(config.floatX), recenter=True) y = Softmax( n_classes = n_classes, layer_name = 'y', irange = 1., center=True) y.set_biases(rng.uniform(-1., 1., (n_classes,)).astype(config.floatX), recenter=True) dbm = DBM(visible_layer = v, hidden_layers = [y], batch_size = 1, niter = 50) # Randomly pick a v to condition on # (Random numbers are generated via dbm.rng) layer_to_state = dbm.make_layer_to_state(1) v_state = layer_to_state[v] y_state = layer_to_state[y] # Infer P(y | v) using mean field expected_y = y.mf_update( state_below = v.upward_state(v_state)) expected_y = expected_y[0, :] expected_y = expected_y.eval() # Infer P(y | v) using the energy function energy = dbm.energy(V = v_state, hidden = [y_state]) unnormalized_prob = T.exp(-energy) assert unnormalized_prob.ndim == 1 unnormalized_prob = unnormalized_prob[0] unnormalized_prob = function([], unnormalized_prob) def compute_unnormalized_prob(which): write_y = np.zeros((n_classes,)) write_y[which] = 1. y_value = y_state.get_value() y_value[0, :] = write_y y_state.set_value(y_value) return unnormalized_prob() probs = [compute_unnormalized_prob(idx) for idx in xrange(n_classes)] denom = sum(probs) probs = [on_prob / denom for on_prob in probs] # np.asarray(probs) doesn't make a numpy vector, so I do it manually wtf_numpy = np.zeros((n_classes,)) for i in xrange(n_classes): wtf_numpy[i] = probs[i] probs = wtf_numpy if not np.allclose(expected_y, probs): print 'mean field expectation of h:',expected_y print 'expectation of h based on enumerating energy function values:',probs assert False
def test_softmax_mf_sample_consistent(): # A test of the Softmax class # Verifies that the mean field update is consistent with # the sampling function # Since a Softmax layer contains only one random variable # (with n_classes possible values) the mean field assumption # does not impose any restriction so mf_update simply gives # the true expected value of h given v. # We can thus use mf_update to compute the expected value # of a sample of y conditioned on v, and check that samples # drawn using the layer's sample method convert to that # value. rng = np.random.RandomState([2012,11,1,1154]) theano_rng = MRG_RandomStreams(2012+11+1+1154) num_samples = 1000 tol = .042 # Make DBM num_vis = rng.randint(1,11) n_classes = rng.randint(1, 11) v = BinaryVector(num_vis) v.set_biases(rng.uniform(-1., 1., (num_vis,)).astype(config.floatX)) y = Softmax( n_classes = n_classes, layer_name = 'y', irange = 1.) y.set_biases(rng.uniform(-1., 1., (n_classes,)).astype(config.floatX)) dbm = DBM(visible_layer = v, hidden_layers = [y], batch_size = 1, niter = 50) # Randomly pick a v to condition on # (Random numbers are generated via dbm.rng) layer_to_state = dbm.make_layer_to_state(1) v_state = layer_to_state[v] y_state = layer_to_state[y] # Infer P(y | v) using mean field expected_y = y.mf_update( state_below = v.upward_state(v_state)) expected_y = expected_y[0, :] expected_y = expected_y.eval() # copy all the states out into a batch size of num_samples cause_copy = sharedX(np.zeros((num_samples,))).dimshuffle(0,'x') v_state = v_state[0,:] + cause_copy y_state = y_state[0,:] + cause_copy y_samples = y.sample(state_below = v.upward_state(v_state), theano_rng=theano_rng) y_samples = function([], y_samples)() check_multinomial_samples(y_samples, (num_samples, n_classes), expected_y, tol)