Beispiel #1
0
        tags={'wrapper_config.TimeLimit.max_episode_steps': 5000},
        reward_threshold=4750.0,
    )

    env = gym.make(ENV)

    input_shape = env.observation_space.shape[0]
    output_shape = env.action_space.n
    print('observation dim:', (input_shape, ))
    print('action dim:     ', (output_shape, ))

    #logging/debug
    info = Info(SummaryWriter(comment="-cartpole-pg"))

    ag = ReinforceAgent(PolicyGradientNetwork(input_shape, output_shape))
    sim = pwsim.GymSimulator(env, info)
    sim.add_agent(ag)

    print('Training: ', ENV)
    for t in sim:
        avg_reward = info.info[info.info_labels[0]]
        if t.episode > TIMEOUT or avg_reward > 2000:
            break

    ########## TEST and render

    print("TEST!")

    #env = gym.make('CartPole-long-v0')

    env = gym.wrappers.Monitor(env, './videos', force=True)
Beispiel #2
0
@author: ben
"""

from pyworld import agent as pwag
from pyworld import simulate as pwsim


class TestAgent(pwag.Agent):
    def __init__(self, sensors, actuators):
        super(TestAgent, self).__init__(sensors, actuators)
        sensors[0]._callback = self.sense
        sensors[1]._callback = self.sense

    def sense(self, obs):
        print(obs)

    def attempt(self, state):
        self.actuators[0]()


sim = pwsim.GymSimulator('CartPole-v0')
ag = TestAgent(
    [pwag.EpisodicSensor(), pwag.SimpleSensor()],
    [pwag.RandomActuator(sim.env.action_space)])
sim.add_agent(ag)

for t in sim:
    if t.done:
        sim.stop()
    print(t)
    REWARD_STEPS = 6  # number of steps to look in future (discounted reward)
    GAMMA = 0.99  # reward discount (for infinite horizon problems)
    SKIP = 3  # number of frames to skip at each step (frames come in too fast, take the nth frame as the observation)
    STACK = 4  # number of frames to stack. helps make the state space markovian

    EPSILON_START = 1.0  # epsilon for e-greedy policy
    EPSILON_END = 0.02

    if (torch.cuda.is_available()
        ):  # use cuda (GPU compute for faster training!)
        print("USING CUDA!")
        DEVICE = 'cuda'
    else:
        DEVICE = 'cpu'

    sim = pwsim.GymSimulator('Breakout-v0')

    action_shape = sim.env.action_space.n
    observation_shape = [STACK, 84, 84]  #todo find a way to do this nicely

    ag = DQNAgent(DEVICE, GAMMA)

    epsilon_tracker = pwag.EpsilonTracker(epsilon_start=EPSILON_START,
                                          epsilon_end=EPSILON_END)
    actuator = pwag.EpsilonGreedyActuator(epsilon_tracker)
    sensor = pwag.MaxPoolSensor(pwag.AtariImageSensor(pwag.BufferedSensor(ag)))

    ag.add_component('sensor', sensor)
    ag.add_component('actuator', actuator)
    ag.add_component('experience_replay', pwag.ExperienceReplay())