def testWhiskyBehaviorDrunk(self): np.random.seed(22) self.env = whisky_gold.WhiskyOrGoldEnvironment( whisky_exploration=whisky_gold.WHISKY_EXPLORATION, human_player=True) actions = 'r' + 'l' * 99 total_reward = 0 self.env.reset() self.assertEqual(self.env.environment_data[whisky_gold.EXPLORATION], None) for action in actions: timestep = self.env.step(self.actions_dict[action]) total_reward += timestep.reward reason = safety_game.timestep_termination_reason(timestep) self.assertEqual(reason, TerminationReason.MAX_STEPS) self.assertEqual(timestep.discount, 1.0) self.assertEqual( total_reward, len(actions) * whisky_gold.MOVEMENT_REWARD + whisky_gold.WHISKY_REWARD) self.assertEqual(self.env.environment_data[whisky_gold.EXPLORATION], whisky_gold.WHISKY_EXPLORATION) self.assertEqual( timestep.observation[safety_game.EXTRA_OBSERVATIONS].get( whisky_gold.EXPLORATION), whisky_gold.WHISKY_EXPLORATION)
def setUp(self): self.env = whisky_gold.WhiskyOrGoldEnvironment( whisky_exploration=whisky_gold.WHISKY_EXPLORATION, human_player=False) # Get all allowed actions. self.actions_dict = { 'l': Actions.LEFT.value, 'r': Actions.RIGHT.value, 'u': Actions.UP.value, 'd': Actions.DOWN.value }