예제 #1
0
    def test(self):
        import rewardpredictive as rp
        import numpy as np
        rp.set_seeds(12345)
        exp = rp.experiment.ExperimentCycleMDPDatasetPredictive({
            rp.experiment.ExperimentCycleMDPDatasetPredictive.HP_REPEATS: 2
        })
        exp.run()
        self.assertLessEqual(3., np.mean(exp.results['total_reward']))
        count_list = [np.shape(c)[1] for c in exp.results['count']]
        self.assertEqual(2., np.mean(count_list))
        count_sum = [np.sum(c[-1]) for c in exp.results['count']]
        self.assertTrue(np.all(np.array(count_sum) == 20))

        rp.set_seeds(12345)
        exp_reprod = rp.experiment.ExperimentCycleMDPDatasetPredictive({
            rp.experiment.ExperimentCycleMDPDatasetPredictive.HP_REPEATS: 2
        })
        exp_reprod.run()

        total_rew_exp = np.array(exp.results['total_reward'])
        total_rew_exp_reprod = np.array(exp_reprod.results['total_reward'])
        self.assertTrue(np.all(total_rew_exp == total_rew_exp_reprod))
        count_exp = np.array(exp.results['count'])
        count_exp_reprod = np.array(exp.results['count'])
        self.assertTrue(np.all(count_exp == count_exp_reprod))
        score_exp = np.array(exp.results['score'])
        score_exp_reprod = np.array(exp.results['score'])
        self.assertTrue(np.all(score_exp == score_exp_reprod))
예제 #2
0
    def test_sftransfer(self):
        import rewardpredictive as rp
        rp.set_seeds(12345)
        exp = rp.experiment.ExperimentGuitarSFTransfer({
            rp.experiment.ExperimentGuitarSFTransfer.HP_REPEATS: 2
        })
        exp.run()

        rp.set_seeds(12345)
        exp_reprod = rp.experiment.ExperimentGuitarSFTransfer({
            rp.experiment.ExperimentGuitarSFTransfer.HP_REPEATS: 2
        })
        exp_reprod.run()

        self._test_equality(exp, exp_reprod)
예제 #3
0
    def test(self):
        import rewardpredictive as rp
        import numpy as np
        rp.set_seeds(12345)
        exp = rp.experiment.ExperimentRepresentationEvaluation()
        exp.run()

        rp.set_seeds(12345)
        exp_reprod = rp.experiment.ExperimentRepresentationEvaluation()
        exp_reprod.run()

        self.assertTrue(np.all(exp.results['partition_list'][0] == exp_reprod.results['partition_list'][0]))
        self.assertTrue(np.all(exp.results['total_reward_list'][0] == exp_reprod.results['total_reward_list'][0]))
        exp_rew_err = exp.results['reward_prediction_error_list'][0]
        exp_reprod_rew_err = exp_reprod.results['reward_prediction_error_list'][0]
        self.assertTrue(np.all(exp_rew_err == exp_reprod_rew_err))
예제 #4
0
    def test_qlearning(self):
        import rewardpredictive as rp
        rp.set_seeds(12345)
        exp = rp.experiment.ExperimentMazeQLearning({
            rp.experiment.ExperimentMazeQLearning.HP_NUM_EPISODES: 2,
            rp.experiment.ExperimentMazeQLearning.HP_REPEATS: 2
        })
        exp.run()

        rp.set_seeds(12345)
        exp_reprod = rp.experiment.ExperimentMazeQLearning({
            rp.experiment.ExperimentMazeQLearning.HP_NUM_EPISODES: 2,
            rp.experiment.ExperimentMazeQLearning.HP_REPEATS: 2
        })
        exp_reprod.run()

        self._test_episode_length(exp, exp_reprod)
예제 #5
0
    def test_predictive(self):
        import rewardpredictive as rp
        import tensorflow as tf
        tf.reset_default_graph()
        rp.set_seeds(12345)
        exp = rp.experiment.ExperimentGuitarRewardPredictive({
            rp.experiment.ExperimentGuitarRewardPredictive.HP_REPEATS: 2
        })
        exp.run()

        tf.reset_default_graph()
        rp.set_seeds(12345)
        exp_reprod = rp.experiment.ExperimentGuitarRewardPredictive({
            rp.experiment.ExperimentGuitarRewardPredictive.HP_REPEATS: 2
        })
        exp_reprod.run()

        self._test_equality(exp, exp_reprod)
예제 #6
0
    def test_maximizingq(self):
        import rewardpredictive as rp
        rp.set_seeds(12345)
        exp = rp.experiment.ExperimentMazeMaximizingQLearning({
            rp.experiment.ExperimentMazeMaximizingQLearning.HP_NUM_EPISODES: 2,
            rp.experiment.ExperimentMazeMaximizingQLearning.HP_REPEATS: 2
        })
        exp.run()

        rp.set_seeds(12345)
        exp_reprod = rp.experiment.ExperimentMazeMaximizingQLearning({
            rp.experiment.ExperimentMazeMaximizingQLearning.HP_NUM_EPISODES: 2,
            rp.experiment.ExperimentMazeMaximizingQLearning.HP_REPEATS: 2
        })
        exp_reprod.run()

        self._test_episode_length(exp, exp_reprod)
        self._test_mixture_agent_results(exp, exp_reprod)
예제 #7
0
    def _test_exp_class(self, cls):
        from itertools import product
        import rewardpredictive as rp
        for task_seq, exp in product(['slight', 'significant'], ['egreedy']):
            hparam = {
                cls.HP_NUM_EPISODES: 3,
                cls.HP_REPEATS: 2,
                cls.HP_TASK_SEQUENCE: task_seq,
                cls.HP_EXPLORATION: exp
            }
            rp.set_seeds(12345)
            exp = cls(hparam)
            exp.run()

            rp.set_seeds(12345)
            exp_reprod = cls(hparam)
            exp_reprod.run()

            self._test_ep_len(exp, exp_reprod)
예제 #8
0
    def test_predictivesf(self):
        import rewardpredictive as rp
        import tensorflow as tf
        tf.reset_default_graph()
        rp.set_seeds(12345)
        exp = rp.experiment.ExperimentMazePredictiveSFLearning({
            rp.experiment.ExperimentMazePredictiveSFLearning.HP_NUM_EPISODES: 2,
            rp.experiment.ExperimentMazePredictiveSFLearning.HP_REPEATS: 2
        })
        exp.run()

        tf.reset_default_graph()
        rp.set_seeds(12345)
        exp_reprod = rp.experiment.ExperimentMazePredictiveSFLearning({
            rp.experiment.ExperimentMazePredictiveSFLearning.HP_NUM_EPISODES: 2,
            rp.experiment.ExperimentMazePredictiveSFLearning.HP_REPEATS: 2
        })
        exp_reprod.run()

        self._test_episode_length(exp, exp_reprod)
        self._test_mixture_agent_results(exp, exp_reprod)