def setUp(self):
     self.check_tolerance = True
     self.test_save_load = True
     self.num_epochs = 5
     self.tolerance_threshold = 0.1
     self.run_pre_training_eval = True
     SummaryWriterContext._reset_globals()
Beispiel #2
0
def test_complete_experiment(env_name, config):
    """
    Smoke test that runs a small Park QOpt experiment and fails if any exception during its execution was raised.
    """
    try:
        SummaryWriterContext._reset_globals()

        with open(config) as f:
            params = json.load(f)

        checkpoint_freq = params["run_details"]["checkpoint_after_ts"]
        # train agent
        dataset = RLDataset(FILE_PATH)
        # log experiment info to Tensorboard
        evaluation_file = EVALUATION_PATH
        config_file = config
        experiment_name = config_file[config_file.rfind('/') +
                                      1:config_file.rfind('.json')]
        os.environ["TENSORBOARD_DIR"] = os.path.join(evaluation_file,
                                                     experiment_name)
        average_reward_train, num_episodes_train, average_reward_eval, num_episodes_eval, timesteps_history, trainer, predictor, env = horizon_runner.run_gym(
            params, False, None, -1, dataset)

        if dataset:
            dataset.save()

        SummaryWriterContext._reset_globals()
    except Exception:
        pytest.fail('Running a small ' + str(env_name) +
                    ' experiment in Horizon failed!')
Beispiel #3
0
 def setUp(self):
     self.check_tolerance = True
     self.test_save_load = True
     self.num_epochs = 5
     self.tolerance_threshold = GridworldEvaluator.ABS_ERR_THRES * (
         GridworldBase.REWARD_SCALE**2)
     self.run_pre_training_eval = True
     SummaryWriterContext._reset_globals()
Beispiel #4
0
 def setUp(self):
     self.layers = [-1, 128, -1]
     self.activations = ["relu", "linear"]
     self.state_dims = 5
     self.action_dims = 2
     self.num_samples = 1024 * 10  # multiple of minibatch_size (1024)
     self.epochs = 24
     SummaryWriterContext._reset_globals()
     super().setUp()
    def test_minibatches_per_step(self):
        _epochs = self.epochs
        self.epochs = 2
        rl_parameters = RLParameters(gamma=0.95,
                                     target_update_rate=0.9,
                                     maxq_learning=True)
        rainbow_parameters = RainbowDQNParameters(double_q_learning=True,
                                                  dueling_architecture=False)
        training_parameters1 = TrainingParameters(
            layers=self.layers,
            activations=self.activations,
            minibatch_size=1024,
            minibatches_per_step=1,
            learning_rate=0.25,
            optimizer="ADAM",
        )
        training_parameters2 = TrainingParameters(
            layers=self.layers,
            activations=self.activations,
            minibatch_size=128,
            minibatches_per_step=8,
            learning_rate=0.25,
            optimizer="ADAM",
        )
        env1 = Env(self.state_dims, self.action_dims)
        env2 = Env(self.state_dims, self.action_dims)
        model_parameters1 = DiscreteActionModelParameters(
            actions=env1.actions,
            rl=rl_parameters,
            rainbow=rainbow_parameters,
            training=training_parameters1,
        )
        model_parameters2 = DiscreteActionModelParameters(
            actions=env2.actions,
            rl=rl_parameters,
            rainbow=rainbow_parameters,
            training=training_parameters2,
        )
        # minibatch_size / 8, minibatches_per_step * 8 should give the same result
        logger.info("Training model 1")
        trainer1 = self._train(model_parameters1, env1)
        SummaryWriterContext._reset_globals()
        logger.info("Training model 2")
        trainer2 = self._train(model_parameters2, env2)

        weight1 = trainer1.q_network.fc.layers[-1].weight.detach().numpy()
        weight2 = trainer2.q_network.fc.layers[-1].weight.detach().numpy()

        # Due to numerical stability this tolerance has to be fairly high
        self.assertTrue(np.allclose(weight1, weight2, rtol=0.0, atol=1e-3))
        self.epochs = _epochs
Beispiel #6
0
 def setUp(self):
     logging.getLogger().setLevel(logging.INFO)
     SummaryWriterContext._reset_globals()
     np.random.seed(SEED)
     torch.manual_seed(SEED)
     random.seed(SEED)
 def tearDown(self):
     SummaryWriterContext._reset_globals()
 def setUp(self):
     SummaryWriterContext._reset_globals()
Beispiel #9
0
 def tearDown(self):
     SummaryWriterContext._reset_globals()
Beispiel #10
0
 def setUp(self):
     SummaryWriterContext._reset_globals()
Beispiel #11
0
 def setUp(self):
     logging.getLogger().setLevel(logging.INFO)
     SummaryWriterContext._reset_globals()