Example #1
0
    def test_openai_gym(self):
        self.start_tests(name='openai-gym')

        # state: box, action: discrete
        self.unittest(environment=dict(environment='gym', level='CartPole-v0'),
                      num_episodes=2)

        # state: discrete, action: box
        # self.unittest(environment=dict(environment='gym', level='GuessingGame'), num_episodes=2)

        # state: discrete, action: tuple(discrete)
        # from gym.envs.algorithmic import ReverseEnv
        # self.unittest(environment=ReverseEnv, num_episodes=2)

        # state: discrete, action: discrete
        from gym.envs.toy_text import FrozenLakeEnv
        self.unittest(environment=FrozenLakeEnv, num_episodes=2)

        # state: tuple, action: discrete
        from gym.envs.toy_text import BlackjackEnv
        self.unittest(environment=BlackjackEnv(), num_episodes=2)

        # Classic control
        self.unittest(environment='CartPole-v1', num_episodes=2)
        self.unittest(environment='MountainCar-v0', num_episodes=2)
        self.unittest(environment='MountainCarContinuous-v0', num_episodes=2)
        self.unittest(environment='Pendulum-v1', num_episodes=2)
        self.unittest(environment='Acrobot-v1', num_episodes=2)

        # Box2d
        self.unittest(environment='LunarLander-v2', num_episodes=2)
        self.unittest(environment='LunarLanderContinuous-v2', num_episodes=2)
        self.unittest(environment='BipedalWalker-v3', num_episodes=2)
        self.unittest(environment='BipedalWalkerHardcore-v3', num_episodes=2)
        # below: self.unittest(environment='CarRacing-v0', num_episodes=2)

        # Toy text
        # above: self.unittest(environment='Blackjack-v1', num_episodes=2)
        self.unittest(environment='FrozenLake-v1', num_episodes=2)
        self.unittest(environment='FrozenLake8x8-v1', num_episodes=2)
        self.unittest(environment='CliffWalking-v0', num_episodes=2)
        self.unittest(environment='Taxi-v3', num_episodes=2)

        # Unit test
        self.unittest(environment='CubeCrash-v0', num_episodes=2)
        self.unittest(environment='CubeCrashSparse-v0', num_episodes=2)
        self.unittest(environment='CubeCrashScreenBecomesBlack-v0',
                      num_episodes=2)
        self.unittest(environment='MemorizeDigits-v0', num_episodes=2)
Example #2
0
    def test_openai_gym(self):
        self.start_tests(name='openai-gym')

        # state: box, action: discrete
        self.unittest(environment=dict(environment='gym', level='CartPole-v0'))

        # state: discrete, action: box
        self.unittest(environment=dict(
            environment='gym', level='GuessingGame', max_episode_steps=False))

        # state: discrete, action: tuple(discrete)
        from gym.envs.algorithmic import ReverseEnv
        self.unittest(environment=ReverseEnv)

        # state: tuple, action: discrete
        from gym.envs.toy_text import BlackjackEnv
        self.unittest(environment=BlackjackEnv())
Example #3
0
        # Find all states the we've visited in this episode
        # We convert each state to a tuple so that we can use it as a dict key
        states_in_episode = set([tuple(x[0]) for x in episode])
        for state in states_in_episode:
            # Find the first occurance of the state in the episode
            first_occurence_idx = next(i for i, x in enumerate(episode)
                                       if x[0] == state)
            # Sum up all rewards since the first occurance
            G = sum([
                x[2] * (discount_factor**i)
                for i, x in enumerate(episode[first_occurence_idx:])
            ])
            # Calculate average return for this state over all sampled episodes
            returns_sum[state] += G
            returns_count[state] += 1.0
            V[state] = returns_sum[state] / returns_count[state]

    return V


if __name__ == "__main__":
    # matplotlib.style.use('ggplot')

    env = BlackjackEnv()
    V_10k = mc_prediction(sample_policy, env, num_episodes=100000)
    print(V_10k)
    plot_value_function(V_10k, title="10,000 Steps")

    # V_500k = mc_prediction(sample_policy, env, num_episodes=50000)
    # plotting.plot_value_function(V_500k, title="500,000 Steps")
Example #4
0
    def test_openai_gym(self):
        self.start_tests(name='openai-gym')

        # state: box, action: discrete
        self.unittest(environment=dict(environment='gym', level='CartPole-v0'),
                      num_episodes=2)

        # state: discrete, action: box
        self.unittest(environment=dict(environment='gym',
                                       level='GuessingGame'),
                      num_episodes=2)

        # state: discrete, action: tuple(discrete)
        from gym.envs.algorithmic import ReverseEnv
        self.unittest(environment=ReverseEnv, num_episodes=2)

        # state: tuple, action: discrete
        from gym.envs.toy_text import BlackjackEnv
        self.unittest(environment=BlackjackEnv(), num_episodes=2)

        # Classic control
        # above: self.unittest(environment='CartPole-v1', num_episodes=2)
        self.unittest(environment='MountainCar-v0', num_episodes=2)
        self.unittest(environment='MountainCarContinuous-v0', num_episodes=2)
        self.unittest(environment='Pendulum-v0', num_episodes=2)
        self.unittest(environment='Acrobot-v1', num_episodes=2)

        # Box2d
        self.unittest(environment='LunarLander-v2', num_episodes=2)
        self.unittest(environment='LunarLanderContinuous-v2', num_episodes=2)
        self.unittest(environment='BipedalWalker-v3', num_episodes=2)
        self.unittest(environment='BipedalWalkerHardcore-v3', num_episodes=2)
        # below: self.unittest(environment='CarRacing-v0', num_episodes=2)

        # Toy text
        # above: self.unittest(environment='Blackjack-v0', num_episodes=2)
        self.unittest(environment='KellyCoinflip-v0', num_episodes=2)
        # TODO: out-of-bounds problems!
        # self.unittest(environment=dict(
        #     environment='KellyCoinflipGeneralized-v0', clip_distributions=True
        # ), num_episodes=2)
        self.unittest(environment='FrozenLake-v0', num_episodes=2)
        self.unittest(environment='FrozenLake8x8-v0', num_episodes=2)
        self.unittest(environment='CliffWalking-v0', num_episodes=2)
        self.unittest(environment='NChain-v0', num_episodes=2)
        self.unittest(environment='Roulette-v0', num_episodes=2)
        self.unittest(environment='Taxi-v3', num_episodes=2)
        # above: self.unittest(environment='GuessingGame-v0', num_episodes=2)
        self.unittest(environment='HotterColder-v0', num_episodes=2)

        # Algorithmic
        self.unittest(environment='Copy-v0', num_episodes=2)
        self.unittest(environment='RepeatCopy-v0', num_episodes=2)
        self.unittest(environment='ReversedAddition-v0', num_episodes=2)
        self.unittest(environment='ReversedAddition3-v0', num_episodes=2)
        self.unittest(environment='DuplicatedInput-v0', num_episodes=2)
        # above: self.unittest(environment='Reverse-v0', num_episodes=2)

        # Unit test
        self.unittest(environment='CubeCrash-v0', num_episodes=2)
        self.unittest(environment='CubeCrashSparse-v0', num_episodes=2)
        self.unittest(environment='CubeCrashScreenBecomesBlack-v0',
                      num_episodes=2)
        self.unittest(environment='MemorizeDigits-v0', num_episodes=2)
Example #5
0
 def __init__(self):
     env = BlackjackEnv()
     super().__init__(env)
     self.observation_space = spaces.Discrete(704)