Example #1
0
 def test_one_step(self):
     exp_source = experience.ExperienceSource(self.envs,
                                              DummyAgent(),
                                              steps_count=1)
     for exp in exp_source:
         self.assertEqual(1, len(exp))
         break
Example #2
0
 def test_two_steps(self):
     exp_source = experience.ExperienceSource(self.env,
                                              DummyAgent(),
                                              steps_count=2)
     for exp in exp_source:
         self.assertEqual(2, len(exp))
         break
Example #3
0
 def test_short_game(self):
     env = gym.make('CartPole-v0')
     exp_source = experience.ExperienceSource(env, DummyAgent(), steps_count=1)
     for step, exp in enumerate(exp_source):
         self.assertIsInstance(exp, tuple)
         self.assertIsInstance(exp[0], experience.Experience)
         if exp[0].done:
             break
Example #4
0
 def test_one_step(self):
     exp_source = experience.ExperienceSource(self.env, DummyAgent(), steps_count=1)
     for exp in exp_source:
         self.assertEqual(1, len(exp))
         self.assertIsInstance(exp, tuple)
         self.assertIsInstance(exp[0], experience.Experience)
         self.assertAlmostEqual(exp[0].reward, -1.0)
         self.assertFalse(exp[0].done)
         break
Example #5
0
    def test_short_game(self):
        env = gym.make('CartPole-v0')
        exp_source = experience.ExperienceSource(env,
                                                 dummy_agent,
                                                 steps_count=1)
        for step, exp in enumerate(exp_source):
            self.assertIsInstance(exp, tuple)
            self.assertIsInstance(exp[0], experience.Experience)

            if len(exp) == 1:
                self.assertTrue(exp[0].done)
                break
Example #6
0
    def test_state(self):
        actions_count = self.envs[0].action_space.n
        my_agent = StatefulAgent(self.envs[0].action_space)
        steps = 3
        exp_source = experience.ExperienceSource(self.envs, my_agent, steps_count=steps)

        for _, exp in zip(range(100), exp_source):
            prev_act = None
            for e in exp:
                if prev_act is not None:
                    self.assertEqual(e.action, (prev_act+1) % actions_count)
                prev_act = e.action
            if len(exp) != steps:
                self.assertTrue(exp[-1].done)
Example #7
0
 def setUpClass(cls):
     env = gym.make("MountainCar-v0")
     cls.source = experience.ExperienceSource(env, agent=DummyAgent())
Example #8
0
                input_shape=(1 if grayscale else 3, im_height, im_width))
    if params.cuda_enabled:
        model.cuda()

    loss_fn = nn.MSELoss(size_average=False)
    optimizer = optim.Adam(model.parameters(),
                           lr=run.getfloat("learning", "lr"))

    action_selector = ActionSelectorEpsilonGreedy(epsilon=run.getfloat(
        "defaults", "epsilon"),
                                                  params=params)
    target_net = agent.TargetNet(model)
    dqn_agent = agent.DQNAgent(dqn_model=model,
                               action_selector=action_selector)
    exp_source = experience.ExperienceSource(env=env_pool,
                                             agent=dqn_agent,
                                             steps_count=run.getint(
                                                 "defaults", "n_steps"))
    exp_replay = experience.ExperienceReplayBuffer(exp_source,
                                                   buffer_size=run.getint(
                                                       "exp_buffer", "size"))

    use_target_dqn = run.getboolean("dqn", "target_dqn", fallback=False)
    use_double_dqn = run.getboolean("dqn", "double_dqn", fallback=False)

    if use_target_dqn:
        target_model = target_net.target_model
    else:
        target_model = model

    def batch_to_train(batch):
        """