예제 #1
0
    def test_loss_updates_one_layer_positive_relu(self):
        n_vis = 4
        n_hid = 2
        hidden_layer = HiddenLayer(n_vis=n_vis, n_hid=n_hid, layer_name='h', activation='relu', param_init_range=0, alpha=0)
        # W = theano.shared(value=np.ones((n_vis, n_hid)), name='h_W', borrow=True)
        # hidden_layer.W = W
        mlp = QNetwork([hidden_layer], discount=1, learning_rate=1)
        
        features = T.dvector('features')
        action = T.lscalar('action')
        reward = T.dscalar('reward')
        next_features = T.dvector('next_features')
        loss, updates = mlp.get_loss_and_updates(features, action, reward, next_features)
        train = theano.function(
                    [features, action, reward, next_features],
                    outputs=loss,
                    updates=updates,
                    mode='FAST_COMPILE')

        features = [1,1,1,1]
        action = 0
        reward = 1
        next_features = [1,1,1,1]

        actual_loss = train(features, action, reward, next_features)
        expected_loss = 0.5

        actual_weights = list(mlp.layers[0].W.eval())
        expected_weights = [[1,0], [1,0], [1,0], [1,0]]

        self.assertEqual(actual_loss, expected_loss)
        self.assertTrue(np.array_equal(actual_weights, expected_weights))
예제 #2
0
    def test_loss_updates_one_layer_positive_features_with_negative_weights_relu(self):
        n_vis = 4
        n_hid = 2
        hidden_layer = HiddenLayer(n_vis=n_vis, n_hid=n_hid, layer_name='h', activation='relu', param_init_range=0, alpha=0)
        hidden_layer.W.set_value(np.ones((n_vis, n_hid)) * -1)
        mlp = QNetwork([hidden_layer], discount=1, learning_rate=1)
        
        features = T.dvector('features')
        action = T.lscalar('action')
        reward = T.dscalar('reward')
        next_features = T.dvector('next_features')
        loss, updates = mlp.get_loss_and_updates(features, action, reward, next_features)
        train = theano.function(
                    [features, action, reward, next_features],
                    outputs=loss,
                    updates=updates,
                    mode='FAST_COMPILE')

        features = [1,1,1,1]
        action = 0
        reward = 1
        next_features = [1,1,1,1]

        actual_loss = train(features, action, reward, next_features)
        expected_loss = 0.5

        actual_weights = mlp.layers[0].W.eval().tolist()
        expected_weights = [[-1,-1], [-1,-1], [-1,-1], [-1,-1]]

        self.assertEqual(actual_loss, expected_loss)
        self.assertSequenceEqual(actual_weights, expected_weights)
예제 #3
0
    def test_loss_updates_one_layer_positive_features_with_negative_weights_relu(
            self):
        n_vis = 4
        n_hid = 2
        hidden_layer = HiddenLayer(n_vis=n_vis,
                                   n_hid=n_hid,
                                   layer_name='h',
                                   activation='relu',
                                   param_init_range=0,
                                   alpha=0)
        hidden_layer.W.set_value(np.ones((n_vis, n_hid)) * -1)
        mlp = QNetwork([hidden_layer], discount=1, learning_rate=1)

        features = T.dvector('features')
        action = T.lscalar('action')
        reward = T.dscalar('reward')
        next_features = T.dvector('next_features')
        loss, updates = mlp.get_loss_and_updates(features, action, reward,
                                                 next_features)
        train = theano.function([features, action, reward, next_features],
                                outputs=loss,
                                updates=updates,
                                mode='FAST_COMPILE')

        features = [1, 1, 1, 1]
        action = 0
        reward = 1
        next_features = [1, 1, 1, 1]

        actual_loss = train(features, action, reward, next_features)
        expected_loss = 0.5

        actual_weights = mlp.layers[0].W.eval().tolist()
        expected_weights = [[-1, -1], [-1, -1], [-1, -1], [-1, -1]]

        self.assertEqual(actual_loss, expected_loss)
        self.assertSequenceEqual(actual_weights, expected_weights)
예제 #4
0
    def test_loss_updates_one_layer_positive_diff_action_relu(self):
        n_vis = 4
        n_hid = 2
        hidden_layer = HiddenLayer(n_vis=n_vis,
                                   n_hid=n_hid,
                                   layer_name='h',
                                   activation='relu',
                                   param_init_range=0,
                                   alpha=0)
        # W = theano.shared(value=np.ones((n_vis, n_hid)), name='h_W', borrow=True)
        # hidden_layer.W = W
        mlp = QNetwork([hidden_layer], discount=1, learning_rate=1)

        features = T.dvector('features')
        action = T.lscalar('action')
        reward = T.dscalar('reward')
        next_features = T.dvector('next_features')
        loss, updates = mlp.get_loss_and_updates(features, action, reward,
                                                 next_features)
        train = theano.function([features, action, reward, next_features],
                                outputs=loss,
                                updates=updates,
                                mode='FAST_COMPILE')

        features = [1, 1, 1, 1]
        action = 1
        reward = 1
        next_features = [1, 1, 1, 1]

        actual_loss = train(features, action, reward, next_features)
        expected_loss = 0.5

        actual_weights = list(mlp.layers[0].W.eval())
        expected_weights = [[0, 1], [0, 1], [0, 1], [0, 1]]

        self.assertEqual(actual_loss, expected_loss)
        self.assertTrue(np.array_equal(actual_weights, expected_weights))