Example #1
0
def compatible_2():
    print('+++++++++++++++++++++++++++++++++++++++++++++++++')
    fruit_env = GymEnvironment(env_name='CartPole-v1')
    state = fruit_env.get_state_space()
    print(state.get_range())
    print(tuple(state.get_shape()))
    print(fruit_env.get_action_space().get_range())
    print(fruit_env.reset())
    print(fruit_env.get_state())
    print('+++++++++++++++++++++++++++++++++++++++++++++++++')

    print('+++++++++++++++++++++++++++++++++++++++++++++++++')
    env = OpenAIGym(level='CartPole-v1')
    state = env.states()
    print(state)
    print(env.actions())
    print(env.reset())
    print(env.execute(0))
    print(env.max_episode_timesteps())
    print('+++++++++++++++++++++++++++++++++++++++++++++++++')

    print('+++++++++++++++++++++++++++++++++++++++++++++++++')
    env = TFEnvironment(fruit_environment=fruit_env)
    print(env.states())
    print(env.actions())
    print(env.getrobotics_states())
    print(env.execute(0))
    print(env.max_episode_timesteps())
    print('+++++++++++++++++++++++++++++++++++++++++++++++++')
Example #2
0
    def test_quickstart(self):
        sys.stdout.write('\nQuickstart:\n')
        sys.stdout.flush()

        # Create an OpenAI-Gym environment
        environment = OpenAIGym('CartPole-v1')

        # Create the agent
        agent = PPOAgent(
            states=environment.states(),
            actions=environment.actions(),
            # Automatically configured network
            network='auto',
            # Memory sampling most recent experiences, with a capacity of 2500 timesteps
            # (6100 > [30 batch episodes] * [200 max timesteps per episode])
            memory=6100,
            # Update every 10 episodes, with a batch of 30 episodes
            update_mode=dict(unit='episodes', batch_size=30, frequency=10),
            # PPO optimizer
            step_optimizer=dict(type='adam', learning_rate=1e-3),
            # PPO multi-step optimization: 10 updates, each based on a third of the batch
            subsampling_fraction=0.33,
            optimization_steps=10,
            # MLP baseline
            baseline_mode='states',
            baseline=dict(type='network', network='auto'),
            # Baseline optimizer
            baseline_optimizer=dict(type='multi_step',
                                    optimizer=dict(type='adam',
                                                   learning_rate=1e-4),
                                    num_steps=5),
            # Other parameters
            discount=0.99,
            entropy_regularization=1e-2,
            gae_lambda=None,
            likelihood_ratio_clipping=0.2)

        # Initialize the runner
        runner = Runner(agent=agent, environment=environment)

        # Function handle called after each finished episode
        def callback(r):
            return float(np.mean(r.episode_rewards[-100:])) <= 180.0

        # Start the runner
        runner.run(num_episodes=1000,
                   max_episode_timesteps=200,
                   callback=callback)
        runner.close()

        if float(np.mean(runner.episode_rewards[-100:])) <= 180.0:
            sys.stdout.write('Test failed, exceeding {} episodes\n'.format(
                runner.episode))
            sys.stdout.flush()
            self.assertTrue(expr=False)
        else:
            sys.stdout.write('Test passed after {} episodes\n'.format(
                runner.episode))
            sys.stdout.flush()
            self.assertTrue(expr=True)
def main():
    # Create an OpenAI-Gym environment
    environment = OpenAIGym('CartPole-v1')

    # Create the agent
    agent = PPOAgent(
        states=environment.states(),
        actions=environment.actions(),
        # Automatically configured network
        network='auto',
        # Memory sampling most recent experiences, with a capacity of 2500 timesteps
        # (6100 > [30 batch episodes] * [200 max timesteps per episode])
        memory=6100,
        # Update every 10 episodes, with a batch of 30 episodes
        update_mode=dict(unit='episodes', batch_size=30, frequency=10),
        # PPO optimizer
        step_optimizer=dict(type='adam', learning_rate=1e-3),
        # PPO multi-step optimization: 10 updates, each based on a third of the batch
        subsampling_fraction=0.33,
        optimization_steps=10,
        # MLP baseline
        baseline_mode='states',
        baseline=dict(type='network', network='auto'),
        # Baseline optimizer
        baseline_optimizer=dict(type='multi_step',
                                optimizer=dict(type='adam',
                                               learning_rate=1e-4),
                                num_steps=5),
        # Other parameters
        discount=0.99,
        entropy_regularization=1e-2,
        gae_lambda=None,
        likelihood_ratio_clipping=0.2)

    # Initialize the runner
    runner = Runner(agent=agent, environment=environment)

    # Start the runner
    runner.run(num_episodes=1000, max_episode_timesteps=200)
    runner.close()
def main():
    parser = argparse.ArgumentParser()
    # Gym arguments
    parser.add_argument('-g', '--gym', help="Gym environment id")
    parser.add_argument('-i',
                        '--import-modules',
                        help="Import module(s) required for gym environment")
    parser.add_argument('--monitor',
                        type=str,
                        default=None,
                        help="Save results to this directory")
    parser.add_argument('--monitor-safe',
                        action='store_true',
                        default=False,
                        help="Do not overwrite previous results")
    parser.add_argument('--monitor-video',
                        type=int,
                        default=0,
                        help="Save video every x steps (0 = disabled)")
    parser.add_argument('--visualize',
                        action='store_true',
                        default=False,
                        help="Enable OpenAI Gym's visualization")
    # Agent arguments
    parser.add_argument('-a', '--agent', help="Agent configuration file")
    parser.add_argument('-n',
                        '--network',
                        default=None,
                        help="Network specification file")
    # Runner arguments
    parser.add_argument('-e',
                        '--episodes',
                        type=int,
                        default=None,
                        help="Number of episodes")
    parser.add_argument('-t',
                        '--timesteps',
                        type=int,
                        default=None,
                        help="Number of timesteps")
    parser.add_argument('-m',
                        '--max-episode-timesteps',
                        type=int,
                        default=None,
                        help="Maximum number of timesteps per episode")
    parser.add_argument('-d',
                        '--deterministic',
                        action='store_true',
                        default=False,
                        help="Choose actions deterministically")
    args = parser.parse_args()

    if args.import_modules is not None:
        for module in args.import_modules.split(','):
            importlib.import_module(name=module)

    environment = OpenAIGym(gym_id=args.gym,
                            monitor=args.monitor,
                            monitor_safe=args.monitor_safe,
                            monitor_video=args.monitor_video,
                            visualize=args.visualize)

    agent = Agent.from_spec(spec=args.agent,
                            states=environment.states(),
                            actions=environment.actions(),
                            network=args.network)

    runner = Runner(agent=agent, environment=environment)

    def callback(r):
        if r.episode % 100 == 0:
            print("================================================\n"
                  "Average secs/episode over 100 episodes: {time:0.2f}\n"
                  "Average steps/sec over 100 episodes:    {timestep:0.2f}\n"
                  "Average reward over 100 episodes:       {reward100:0.2f}\n"
                  "Average reward over 500 episodes:       {reward500:0.2f}".
                  format(time=(sum(r.episode_times[-100:]) / 100.0),
                         timestep=(sum(r.episode_timesteps[-100:]) /
                                   sum(r.episode_times[-100:])),
                         reward100=(sum(r.episode_rewards[-100:]) /
                                    min(100.0, r.episode)),
                         reward500=(sum(r.episode_rewards[-500:]) /
                                    min(500.0, r.episode))))
        return True

    runner.run(num_timesteps=args.timesteps,
               num_episodes=args.episodes,
               max_episode_timesteps=args.max_episode_timesteps,
               deterministic=args.deterministic,
               callback=callback)

    runner.close()