Exemple #1
0
    def testGamma(self):
        values = [[0, 1, 2, 3], [1, 2, 3, 4], [2, 3, 4, 5]]
        tensor = tf.constant(values, dtype=tf.float32)

        result_gamma0 = common.discounted_future_sum(tensor, 0.0, 3)
        result_gamma09 = common.discounted_future_sum(tensor, 0.9, 3)
        result_gamma1 = common.discounted_future_sum(tensor, 1.0, 3)
        result_gamma2 = common.discounted_future_sum(tensor, 2.0, 3)

        values = np.array(values)
        values_shift1 = np.pad(values[:, 1:], ((0, 0), (0, 1)), 'constant')
        values_shift2 = np.pad(values[:, 2:], ((0, 0), (0, 2)), 'constant')
        expected_result_gamma0 = values
        expected_result_gamma09 = (values + 0.9 * values_shift1 +
                                   0.81 * values_shift2)
        expected_result_gamma1 = values + values_shift1 + values_shift2
        expected_result_gamma2 = values + 2 * values_shift1 + 4 * values_shift2

        self.assertAllClose(expected_result_gamma0,
                            self.evaluate(result_gamma0))
        self.assertAllClose(expected_result_gamma09,
                            self.evaluate(result_gamma09))
        self.assertAllClose(expected_result_gamma1,
                            self.evaluate(result_gamma1))
        self.assertAllClose(expected_result_gamma2,
                            self.evaluate(result_gamma2))
Exemple #2
0
    def testNumSteps(self):
        values = [[0, 1, 2, 3], [1, 2, 3, 4], [2, 3, 4, 5]]
        tensor = tf.constant(values, dtype=tf.float32)

        result_step1 = common.discounted_future_sum(tensor, 1.0, 1)
        result_step3 = common.discounted_future_sum(tensor, 1.0, 3)
        result_step20 = common.discounted_future_sum(tensor, 1.0, 20)

        expected_result_step1 = values
        expected_result_step3 = [[3, 6, 5, 3], [6, 9, 7, 4], [9, 12, 9, 5]]
        expected_result_step20 = [[6, 6, 5, 3], [10, 9, 7, 4], [14, 12, 9, 5]]

        self.assertAllClose(expected_result_step1, self.evaluate(result_step1))
        self.assertAllClose(expected_result_step3, self.evaluate(result_step3))
        self.assertAllClose(expected_result_step20,
                            self.evaluate(result_step20))