Example #1
0
    def test_minibatches_per_step(self):
        _epochs = self.epochs
        self.epochs = 2
        rl_parameters = RLParameters(gamma=0.95,
                                     target_update_rate=0.9,
                                     maxq_learning=True)
        rainbow_parameters = RainbowDQNParameters(double_q_learning=True,
                                                  dueling_architecture=False)
        training_parameters1 = TrainingParameters(
            layers=self.layers,
            activations=self.activations,
            minibatch_size=1024,
            minibatches_per_step=1,
            learning_rate=0.25,
            optimizer="ADAM",
        )
        training_parameters2 = TrainingParameters(
            layers=self.layers,
            activations=self.activations,
            minibatch_size=128,
            minibatches_per_step=8,
            learning_rate=0.25,
            optimizer="ADAM",
        )
        env1 = Env(self.state_dims, self.action_dims)
        env2 = Env(self.state_dims, self.action_dims)
        model_parameters1 = DiscreteActionModelParameters(
            actions=env1.actions,
            rl=rl_parameters,
            rainbow=rainbow_parameters,
            training=training_parameters1,
        )
        model_parameters2 = DiscreteActionModelParameters(
            actions=env2.actions,
            rl=rl_parameters,
            rainbow=rainbow_parameters,
            training=training_parameters2,
        )
        # minibatch_size / 8, minibatches_per_step * 8 should give the same result
        logger.info("Training model 1")
        trainer1 = self._train(model_parameters1, env1)
        SummaryWriterContext._reset_globals()
        logger.info("Training model 2")
        trainer2 = self._train(model_parameters2, env2)

        weight1 = trainer1.q_network.fc.dnn[-2].weight.detach().numpy()
        weight2 = trainer2.q_network.fc.dnn[-2].weight.detach().numpy()

        # Due to numerical stability this tolerance has to be fairly high
        self.assertTrue(np.allclose(weight1, weight2, rtol=0.0, atol=1e-3))
        self.epochs = _epochs
Example #2
0
 def tearDown(self):
     SummaryWriterContext._reset_globals()
Example #3
0
 def setUp(self):
     SummaryWriterContext._reset_globals()
     logging.basicConfig(level=logging.INFO)
     np.random.seed(SEED)
     torch.manual_seed(SEED)
     random.seed(SEED)
Example #4
0
 def __iter__(self):
     SummaryWriterContext._reset_globals()
     for epoch in range(self.num_epochs):
         self.notify_observers(epoch_start=epoch)
         yield epoch
         self.notify_observers(epoch_end=epoch)
Example #5
0
 def setUp(self):
     SummaryWriterContext._reset_globals()
Example #6
0
 def setUp(self):
     logging.getLogger().setLevel(logging.INFO)
     SummaryWriterContext._reset_globals()
     np.random.seed(SEED)
     torch.manual_seed(SEED)
     random.seed(SEED)