Esempio n. 1
0
    def test_calc_decays(self):
        """
        Tests counting sequence lengths based on terminal configurations.
        """
        sequence_helper = SequenceHelper()
        decay_value = 0.5

        test = ComponentTest(component=sequence_helper,
                             input_spaces=self.input_spaces)
        input_ = np.asarray([0, 0, 0, 0])
        expected_decays = [1.0, 0.5, 0.25, 0.125]
        lengths, decays = test.test(
            ("calc_sequence_decays", [input_, decay_value]))

        # Check lengths and decays.
        recursive_assert_almost_equal(x=lengths, y=[4])
        recursive_assert_almost_equal(x=decays, y=expected_decays)

        input_ = np.asarray([0, 0, 1, 0])
        expected_decays = [1.0, 0.5, 0.25, 1.0]
        lengths, decays = test.test(
            ("calc_sequence_decays", [input_, decay_value]))

        recursive_assert_almost_equal(x=lengths, y=[3, 1])
        recursive_assert_almost_equal(x=decays, y=expected_decays)

        input_ = np.asarray([1, 1, 1, 1])
        expected_decays = [1.0, 1.0, 1.0, 1.0]
        lengths, decays = test.test(
            ("calc_sequence_decays", [input_, decay_value]))

        recursive_assert_almost_equal(x=lengths, y=[1, 1, 1, 1])
        recursive_assert_almost_equal(x=decays, y=expected_decays)
Esempio n. 2
0
    def test_bootstrapping(self):
        """
        Tests boot-strapping for GAE purposes.
        """
        sequence_helper = SequenceHelper()
        discount = 0.99

        test = ComponentTest(component=sequence_helper,
                             input_spaces=self.input_spaces)

        # No terminals - just boot-strap with final sequence index.
        values = np.asarray([1.0, 2.0, 3.0, 4.0])
        rewards = np.asarray([0, 0, 0, 0])
        sequence_indices = np.asarray([0, 0, 0, 1])
        terminals = np.asarray([0, 0, 0, 0])

        expected_deltas = self.deltas(values, rewards, discount, terminals,
                                      sequence_indices)
        deltas = test.test(("bootstrap_values",
                            [rewards, values, terminals, sequence_indices]))
        recursive_assert_almost_equal(expected_deltas, deltas, decimals=5)

        # Final index is also terminal.
        values = np.asarray([1.0, 2.0, 3.0, 4.0])
        rewards = np.asarray([0, 0, 0, 0])
        sequence_indices = np.asarray([0, 0, 0, 1])
        terminals = np.asarray([0, 0, 0, 1])

        expected_deltas = self.deltas(values, rewards, discount, terminals,
                                      sequence_indices)
        deltas = test.test(("bootstrap_values",
                            [rewards, values, terminals, sequence_indices]))
        recursive_assert_almost_equal(expected_deltas, deltas, decimals=5)

        # Mixed: i = 1 is also terminal, i = 3 is only sequence.
        values = np.asarray([1.0, 2.0, 3.0, 4.0])
        rewards = np.asarray([0, 0, 0, 0])
        sequence_indices = np.asarray([0, 1, 0, 1])
        terminals = np.asarray([0, 1, 0, 0])

        expected_deltas = self.deltas(values, rewards, discount, terminals,
                                      sequence_indices)
        deltas = test.test(("bootstrap_values",
                            [rewards, values, terminals, sequence_indices]))
        recursive_assert_almost_equal(expected_deltas, deltas, decimals=5)
Esempio n. 3
0
    def test_calc_sequence_lengths(self):
        """
        Tests counting sequence lengths based on terminal configurations.
        """
        sequence_helper = SequenceHelper()
        test = ComponentTest(component=sequence_helper,
                             input_spaces=self.input_spaces)
        input_ = np.asarray([0, 0, 0, 0])
        test.test(("calc_sequence_lengths", input_), expected_outputs=[4])

        input_ = np.asarray([0, 0, 1, 0])
        test.test(("calc_sequence_lengths", input_), expected_outputs=[3, 1])

        input_ = np.asarray([1, 1, 1, 1])
        test.test(("calc_sequence_lengths", input_),
                  expected_outputs=[1, 1, 1, 1])

        input_ = np.asarray([1, 0, 0, 1])
        test.test(("calc_sequence_lengths", input_), expected_outputs=[1, 3])
Esempio n. 4
0
    def test_reverse_apply_decays_to_sequence(self):
        """
        Tests reverse decaying a sequence of 1-step TD errors for GAE.
        """
        sequence_helper = SequenceHelper()
        decay_value = 0.5

        test = ComponentTest(component=sequence_helper,
                             input_spaces=self.input_spaces)
        td_errors = np.asarray([0.1, 0.2, 0.3, 0.4])
        indices = np.array([0, 0, 0, 1])
        expected_output_sequence_manual = np.asarray([
            0.1 + 0.5 * 0.2 + 0.25 * 0.3 + 0.125 * 0.4,
            0.2 + 0.5 * 0.3 + 0.25 * 0.4, 0.3 + 0.5 * 0.4, 0.4
        ])
        expected_output_sequence_numpy = self.decay_td_sequence(
            td_errors, decay=decay_value)
        recursive_assert_almost_equal(expected_output_sequence_manual,
                                      expected_output_sequence_numpy)
        test.test(("reverse_apply_decays_to_sequence",
                   [td_errors, indices, decay_value]),
                  expected_outputs=expected_output_sequence_manual)