Example #1
0
    def test_convergence_sgd_one_freeze(self):
        freeze_interval = 500
        net = q_network.DeepQLearner(self.mdp.num_states, 1,
                                     self.mdp.num_actions, 1, self.discount,
                                     self.learning_rate, 0, 0, 0,
                                     freeze_interval, 1, 'linear', 'sgd',
                                     'sum', 1.0)

        self.train(net, freeze_interval * 2)

        numpy.testing.assert_almost_equal(
            self.all_q_vals(net), [[.7, 0], [.35, .5], [0, 1.0], [0., 0.]], 3)
Example #2
0
    def test_updates_sgd_no_freeze(self):
        freeze_interval = -1
        net = q_network.DeepQLearner(self.mdp.num_states, 1,
                                     self.mdp.num_actions, 1, self.discount,
                                     self.learning_rate, 0, 0, 0,
                                     freeze_interval, 1, 'linear', 'sgd',
                                     'sum', 1.0)

        mdp = self.mdp

        # Depart left:
        state = mdp.states[0]
        action_index = 0
        reward, next_state, terminal = mdp.act(state, action_index)
        net.train(state, mdp.actions[action_index], reward, next_state,
                  terminal)

        numpy.testing.assert_almost_equal(self.all_q_vals(net),
                                          [[.07, 0], [0, 0], [0, 0], [0, 0]])

        # Depart right:
        state = mdp.states[-2]
        action_index = 1
        reward, next_state, terminal = mdp.act(state, action_index)
        net.train(state, mdp.actions[action_index], reward, next_state,
                  terminal)

        numpy.testing.assert_almost_equal(self.all_q_vals(net),
                                          [[.07, 0], [0, 0], [0, .1], [0, 0]])

        # Move into leftmost state
        state = mdp.states[1]
        action_index = 0
        reward, next_state, terminal = mdp.act(state, action_index)
        net.train(state, mdp.actions[action_index], reward, next_state,
                  terminal)

        numpy.testing.assert_almost_equal(
            self.all_q_vals(net), [[.07, 0], [0.0035, 0], [0, .1], [0, 0]])
Example #3
0
    def test_convergence_random_initialization(self):
        """ This test will only pass if terminal states are handled
        correctly. Otherwise the random initialization of the value of the
        terminal state will propagate back.
        """
        freeze_interval = -1
        net = q_network.DeepQLearner(self.mdp.num_states, 1,
                                     self.mdp.num_actions, 1, self.discount,
                                     self.learning_rate, 0, 0, 0,
                                     freeze_interval, 1, 'linear', 'sgd',
                                     'sum', 1.0)

        # Randomize initial q-values:
        params = lasagne.layers.helper.get_all_param_values(net.l_out)
        rand = np.random.random(params[0].shape)
        rand = numpy.array(rand, dtype=theano.config.floatX)
        lasagne.layers.helper.set_all_param_values(net.l_out, [rand])

        self.train(net, 1000)

        numpy.testing.assert_almost_equal(
            self.all_q_vals(net)[0:3, :], [[.7, .25], [.35, .5], [.25, 1.0]],
            3)
Example #4
0
 def make_net(self, freeze_interval):
     return q_network.DeepQLearner(self.mdp.num_states, 1,
                                   self.mdp.num_actions, 1, self.discount,
                                   self.learning_rate, 0, 0, 0, 0,
                                   freeze_interval, 1, 'linear', 'sgd',
                                   'sum', np.random.RandomState(), 1.0)