# -*- coding: utf-8 -*-
#!/usr/bin/env python
"""Example on how to use the 'Cartpole' OpenAI Gym environments in PyRoboLearn using a random policy
"""

from pyrobolearn.envs import gym
from pyrobolearn.policies import RandomPolicy
from pyrobolearn.tasks import RLTask

# create env, state, and action from gym
env = gym.make('CartPole-v1')
state, action = env.state, env.action
print("State and action space: {} and {}".format(state.space, action.space))

# create policy
policy = RandomPolicy(state, action)

# create task and run it
task = RLTask(env, policy)
task.run(num_steps=1000, dt=0.02, use_terminating_condition=False, render=True)
Esempio n. 2
0
from pyrobolearn.envs import gym
from pyrobolearn.policies import LinearPolicy
from pyrobolearn.tasks import RLTask
from pyrobolearn.algos import FD

# create env, state, and action from gym
env = gym.make('CartPole-v1')
state, action = env.state, env.action
print("State and action space: {} and {}".format(state.space, action.space))

# create policy
policy = LinearPolicy(state, action)

# create task and run it
task = RLTask(env, policy)
task.run(num_steps=1000, use_terminating_condition=True, render=True)

# create RL algo
# Note: the hyperparameters can be a little bit tricky to optimize...
algo = FD(task,
          policy,
          std_dev=0.01,
          learning_rate=0.01,
          difference_type='central',
          normalize_grad=True)
rewards = algo.train(num_steps=1000,
                     num_rollouts=5,
                     num_episodes=50,
                     verbose=True)

# plot