def test_fprop_double_layer_one_weights_positive_output_values_relu(self): n_vis = 8 n_hid = 2 hidden_layer_1 = HiddenLayer(n_vis=n_vis, n_hid=n_vis / 2, layer_name='h1', activation='relu', param_init_range=0, alpha=0) hidden_layer_2 = HiddenLayer(n_vis=n_vis / 2, n_hid=n_hid, layer_name='h2', activation='relu', param_init_range=0, alpha=0) W = theano.shared(value=np.ones((n_vis, n_vis / 2)), name='h1_W', borrow=True) hidden_layer_1.W = W W = theano.shared(value=np.ones((n_vis / 2, n_hid)), name='h2_W', borrow=True) hidden_layer_2.W = W mlp = QNetwork([hidden_layer_1, hidden_layer_2], discount=1, learning_rate=1) features = np.ones(n_vis) actual = list(mlp.fprop(features).eval()) expected = [32., 32.] self.assertSequenceEqual(actual, expected)
def test_loss_updates_one_layer_positive_relu(self): n_vis = 4 n_hid = 2 hidden_layer = HiddenLayer(n_vis=n_vis, n_hid=n_hid, layer_name='h', activation='relu', param_init_range=0, alpha=0) # W = theano.shared(value=np.ones((n_vis, n_hid)), name='h_W', borrow=True) # hidden_layer.W = W mlp = QNetwork([hidden_layer], discount=1, learning_rate=1) features = T.dvector('features') action = T.lscalar('action') reward = T.dscalar('reward') next_features = T.dvector('next_features') loss, updates = mlp.get_loss_and_updates(features, action, reward, next_features) train = theano.function( [features, action, reward, next_features], outputs=loss, updates=updates, mode='FAST_COMPILE') features = [1,1,1,1] action = 0 reward = 1 next_features = [1,1,1,1] actual_loss = train(features, action, reward, next_features) expected_loss = 0.5 actual_weights = list(mlp.layers[0].W.eval()) expected_weights = [[1,0], [1,0], [1,0], [1,0]] self.assertEqual(actual_loss, expected_loss) self.assertTrue(np.array_equal(actual_weights, expected_weights))
def test_loss_updates_one_layer_positive_features_with_negative_weights_relu(self): n_vis = 4 n_hid = 2 hidden_layer = HiddenLayer(n_vis=n_vis, n_hid=n_hid, layer_name='h', activation='relu', param_init_range=0, alpha=0) hidden_layer.W.set_value(np.ones((n_vis, n_hid)) * -1) mlp = QNetwork([hidden_layer], discount=1, learning_rate=1) features = T.dvector('features') action = T.lscalar('action') reward = T.dscalar('reward') next_features = T.dvector('next_features') loss, updates = mlp.get_loss_and_updates(features, action, reward, next_features) train = theano.function( [features, action, reward, next_features], outputs=loss, updates=updates, mode='FAST_COMPILE') features = [1,1,1,1] action = 0 reward = 1 next_features = [1,1,1,1] actual_loss = train(features, action, reward, next_features) expected_loss = 0.5 actual_weights = mlp.layers[0].W.eval().tolist() expected_weights = [[-1,-1], [-1,-1], [-1,-1], [-1,-1]] self.assertEqual(actual_loss, expected_loss) self.assertSequenceEqual(actual_weights, expected_weights)
def test_fprop_single_layer_zero_weights_positive_input_values_relu(self): hidden_layer = HiddenLayer(n_vis=4, n_hid=2, layer_name='h', activation='relu', param_init_range=0, alpha=0) mlp = QNetwork([hidden_layer], discount=1, learning_rate=1) features = [1, 2, 3, 4] actual = list(mlp.fprop(features).eval()) expected = [0., 0.] self.assertSequenceEqual(actual, expected)
def test_fprop_single_layer_one_weights_negative_output_values_relu(self): n_vis = 4 n_hid = 2 hidden_layer = HiddenLayer(n_vis=n_vis, n_hid=n_hid, layer_name='h', activation='relu', param_init_range=0, alpha=0) W = theano.shared(value=np.ones((n_vis, n_hid)), name='h_W', borrow=True) hidden_layer.W = W mlp = QNetwork([hidden_layer], discount=1, learning_rate=1) features = [-1, -2, -3, 4] actual = list(mlp.fprop(features).eval()) expected = [0., 0.] self.assertSequenceEqual(actual, expected)
def test_fprop_double_layer_one_weights_negative_output_values_relu(self): n_vis = 8 n_hid = 2 hidden_layer_1 = HiddenLayer(n_vis=n_vis, n_hid=n_vis / 2, layer_name='h1', activation='relu', param_init_range=0, alpha=0) hidden_layer_2 = HiddenLayer(n_vis=n_vis / 2, n_hid=n_hid, layer_name='h2', activation='relu', param_init_range=0, alpha=0) W = theano.shared(value=np.ones((n_vis, n_vis / 2)), name='h1_W', borrow=True) hidden_layer_1.W = W W = theano.shared(value=np.ones((n_vis / 2, n_hid)), name='h2_W', borrow=True) hidden_layer_2.W = W mlp = QNetwork([hidden_layer_1, hidden_layer_2], discount=1, learning_rate=1) features = [-5, -4, -3, -2, -1, 0, 1, 2] actual = list(mlp.fprop(features).eval()) expected = [0., 0.] self.assertSequenceEqual(actual, expected)
def test_loss_updates_one_layer_positive_features_with_negative_weights_relu( self): n_vis = 4 n_hid = 2 hidden_layer = HiddenLayer(n_vis=n_vis, n_hid=n_hid, layer_name='h', activation='relu', param_init_range=0, alpha=0) hidden_layer.W.set_value(np.ones((n_vis, n_hid)) * -1) mlp = QNetwork([hidden_layer], discount=1, learning_rate=1) features = T.dvector('features') action = T.lscalar('action') reward = T.dscalar('reward') next_features = T.dvector('next_features') loss, updates = mlp.get_loss_and_updates(features, action, reward, next_features) train = theano.function([features, action, reward, next_features], outputs=loss, updates=updates, mode='FAST_COMPILE') features = [1, 1, 1, 1] action = 0 reward = 1 next_features = [1, 1, 1, 1] actual_loss = train(features, action, reward, next_features) expected_loss = 0.5 actual_weights = mlp.layers[0].W.eval().tolist() expected_weights = [[-1, -1], [-1, -1], [-1, -1], [-1, -1]] self.assertEqual(actual_loss, expected_loss) self.assertSequenceEqual(actual_weights, expected_weights)
def test_loss_updates_one_layer_positive_diff_action_relu(self): n_vis = 4 n_hid = 2 hidden_layer = HiddenLayer(n_vis=n_vis, n_hid=n_hid, layer_name='h', activation='relu', param_init_range=0, alpha=0) # W = theano.shared(value=np.ones((n_vis, n_hid)), name='h_W', borrow=True) # hidden_layer.W = W mlp = QNetwork([hidden_layer], discount=1, learning_rate=1) features = T.dvector('features') action = T.lscalar('action') reward = T.dscalar('reward') next_features = T.dvector('next_features') loss, updates = mlp.get_loss_and_updates(features, action, reward, next_features) train = theano.function([features, action, reward, next_features], outputs=loss, updates=updates, mode='FAST_COMPILE') features = [1, 1, 1, 1] action = 1 reward = 1 next_features = [1, 1, 1, 1] actual_loss = train(features, action, reward, next_features) expected_loss = 0.5 actual_weights = list(mlp.layers[0].W.eval()) expected_weights = [[0, 1], [0, 1], [0, 1], [0, 1]] self.assertEqual(actual_loss, expected_loss) self.assertTrue(np.array_equal(actual_weights, expected_weights))