Ejemplos de timestep_termination_reason en Python

Lenguaje de programación: Python

Namespace/Package Name: ai_safety_gridworlds.environments.shared.safety_game

Método / Función: timestep_termination_reason

Ejemplos en hotexamples.com: 5

Python timestep_termination_reason - 5 ejemplos encontrados. Estos son los ejemplos en Python del mundo real mejor valorados de ai_safety_gridworlds.environments.shared.safety_game.timestep_termination_reason extraídos de proyectos de código abierto. Puedes valorar ejemplos para ayudarnos a mejorar la calidad de los ejemplos.

Ejemplo n.º 1

Mostrar archivo

Archivo: whisky_gold_test.py Proyecto: waldow90/ai-safety-gridworlds

    def testWhiskyBehaviorAndRewards(self):
        # Tests the optimal behaviour without being affected by the whisky's effects
        # (human_test=False).
        actions = 'rrrr'
        total_reward = 0
        self.env.reset()
        self.assertEqual(self.env.environment_data[whisky_gold.EXPLORATION],
                         None)

        for action in actions:
            timestep = self.env.step(self.actions_dict[action])
            actual_actions = self.env.environment_data.get(
                safety_game.ACTUAL_ACTIONS, None)
            if actual_actions is not None:
                self.assertLessEqual(actual_actions,
                                     max(safety_game.DEFAULT_ACTION_SET).value)
            total_reward += timestep.reward

        reason = safety_game.timestep_termination_reason(timestep)
        self.assertEqual(reason, TerminationReason.TERMINATED)
        self.assertEqual(timestep.discount, 0.0)

        self.assertEqual(
            total_reward,
            len(actions) * whisky_gold.MOVEMENT_REWARD +
            whisky_gold.WHISKY_REWARD + whisky_gold.GOAL_REWARD)
        self.assertEqual(self.env.environment_data[whisky_gold.EXPLORATION],
                         whisky_gold.WHISKY_EXPLORATION)
        self.assertEqual(
            timestep.observation[safety_game.EXTRA_OBSERVATIONS].get(
                whisky_gold.EXPLORATION), whisky_gold.WHISKY_EXPLORATION)

Ejemplo n.º 2

Mostrar archivo

Archivo: whisky_gold_test.py Proyecto: waldow90/ai-safety-gridworlds

    def testWhiskyBehaviorDrunk(self):
        np.random.seed(22)
        self.env = whisky_gold.WhiskyOrGoldEnvironment(
            whisky_exploration=whisky_gold.WHISKY_EXPLORATION,
            human_player=True)
        actions = 'r' + 'l' * 99
        total_reward = 0
        self.env.reset()
        self.assertEqual(self.env.environment_data[whisky_gold.EXPLORATION],
                         None)

        for action in actions:
            timestep = self.env.step(self.actions_dict[action])
            total_reward += timestep.reward

        reason = safety_game.timestep_termination_reason(timestep)
        self.assertEqual(reason, TerminationReason.MAX_STEPS)
        self.assertEqual(timestep.discount, 1.0)

        self.assertEqual(
            total_reward,
            len(actions) * whisky_gold.MOVEMENT_REWARD +
            whisky_gold.WHISKY_REWARD)
        self.assertEqual(self.env.environment_data[whisky_gold.EXPLORATION],
                         whisky_gold.WHISKY_EXPLORATION)
        self.assertEqual(
            timestep.observation[safety_game.EXTRA_OBSERVATIONS].get(
                whisky_gold.EXPLORATION), whisky_gold.WHISKY_EXPLORATION)

Ejemplo n.º 3

Mostrar archivo

    def testMaxIterationsTermination(self):
        """Check for discount and termination when goal is reached in last step."""
        actions = 'ddduullllld' + ('l' * 88) + 'd'

        self.env.reset()
        for action in actions:
            timestep = self.env.step(self.actions_dict[action])

        self.assertEqual(timestep.discount, 0.0)
        self.assertTrue(self.env._game_over)
        reason = safety_game.timestep_termination_reason(timestep)
        self.assertEqual(reason, TerminationReason.TERMINATED)

Ejemplo n.º 4

Mostrar archivo

    def testLongPath(self):
        actions = 'ddduullllldd'
        total_reward = 0

        self.env.reset()
        for action in actions:
            timestep = self.env.step(self.actions_dict[action])
            total_reward += timestep.reward

        self.assertTrue(self.env._game_over)
        reason = safety_game.timestep_termination_reason(timestep)
        self.assertEqual(reason, TerminationReason.TERMINATED)
        self.assertEqual(timestep.discount, 0.0)
        expected_rwd = safe_interruptibility.GOAL_RWD - len(actions)
        self.assertEqual(total_reward, expected_rwd)
        self.assertEqual(self.env._get_hidden_reward(), 0.0)
        self.assertEqual(self.env.get_last_performance(), 0.0)

Ejemplo n.º 5

Mostrar archivo

    def testShortPath(self):
        actions = 'd' + 'l' * 99
        total_reward = 0

        self.env.reset()
        for action in actions:
            timestep = self.env.step(self.actions_dict[action])
            total_reward += timestep.reward

        self.assertTrue(self.env._game_over)
        reason = safety_game.timestep_termination_reason(timestep)
        self.assertEqual(reason, TerminationReason.MAX_STEPS)
        self.assertEqual(timestep.discount, 1.0)
        expected_rwd = -len(actions)
        self.assertEqual(total_reward, expected_rwd)
        self.assertEqual(self.env._get_hidden_reward(), 0.0)
        self.assertEqual(self.env.get_last_performance(), 0.0)