def setUp(self): self.check_tolerance = True self.test_save_load = True self.num_epochs = 5 self.tolerance_threshold = 0.1 self.run_pre_training_eval = True SummaryWriterContext._reset_globals()
def test_complete_experiment(env_name, config): """ Smoke test that runs a small Park QOpt experiment and fails if any exception during its execution was raised. """ try: SummaryWriterContext._reset_globals() with open(config) as f: params = json.load(f) checkpoint_freq = params["run_details"]["checkpoint_after_ts"] # train agent dataset = RLDataset(FILE_PATH) # log experiment info to Tensorboard evaluation_file = EVALUATION_PATH config_file = config experiment_name = config_file[config_file.rfind('/') + 1:config_file.rfind('.json')] os.environ["TENSORBOARD_DIR"] = os.path.join(evaluation_file, experiment_name) average_reward_train, num_episodes_train, average_reward_eval, num_episodes_eval, timesteps_history, trainer, predictor, env = horizon_runner.run_gym( params, False, None, -1, dataset) if dataset: dataset.save() SummaryWriterContext._reset_globals() except Exception: pytest.fail('Running a small ' + str(env_name) + ' experiment in Horizon failed!')
def setUp(self): self.check_tolerance = True self.test_save_load = True self.num_epochs = 5 self.tolerance_threshold = GridworldEvaluator.ABS_ERR_THRES * ( GridworldBase.REWARD_SCALE**2) self.run_pre_training_eval = True SummaryWriterContext._reset_globals()
def setUp(self): self.layers = [-1, 128, -1] self.activations = ["relu", "linear"] self.state_dims = 5 self.action_dims = 2 self.num_samples = 1024 * 10 # multiple of minibatch_size (1024) self.epochs = 24 SummaryWriterContext._reset_globals() super().setUp()
def test_minibatches_per_step(self): _epochs = self.epochs self.epochs = 2 rl_parameters = RLParameters(gamma=0.95, target_update_rate=0.9, maxq_learning=True) rainbow_parameters = RainbowDQNParameters(double_q_learning=True, dueling_architecture=False) training_parameters1 = TrainingParameters( layers=self.layers, activations=self.activations, minibatch_size=1024, minibatches_per_step=1, learning_rate=0.25, optimizer="ADAM", ) training_parameters2 = TrainingParameters( layers=self.layers, activations=self.activations, minibatch_size=128, minibatches_per_step=8, learning_rate=0.25, optimizer="ADAM", ) env1 = Env(self.state_dims, self.action_dims) env2 = Env(self.state_dims, self.action_dims) model_parameters1 = DiscreteActionModelParameters( actions=env1.actions, rl=rl_parameters, rainbow=rainbow_parameters, training=training_parameters1, ) model_parameters2 = DiscreteActionModelParameters( actions=env2.actions, rl=rl_parameters, rainbow=rainbow_parameters, training=training_parameters2, ) # minibatch_size / 8, minibatches_per_step * 8 should give the same result logger.info("Training model 1") trainer1 = self._train(model_parameters1, env1) SummaryWriterContext._reset_globals() logger.info("Training model 2") trainer2 = self._train(model_parameters2, env2) weight1 = trainer1.q_network.fc.layers[-1].weight.detach().numpy() weight2 = trainer2.q_network.fc.layers[-1].weight.detach().numpy() # Due to numerical stability this tolerance has to be fairly high self.assertTrue(np.allclose(weight1, weight2, rtol=0.0, atol=1e-3)) self.epochs = _epochs
def setUp(self): logging.getLogger().setLevel(logging.INFO) SummaryWriterContext._reset_globals() np.random.seed(SEED) torch.manual_seed(SEED) random.seed(SEED)
def tearDown(self): SummaryWriterContext._reset_globals()
def setUp(self): SummaryWriterContext._reset_globals()
def setUp(self): logging.getLogger().setLevel(logging.INFO) SummaryWriterContext._reset_globals()