tags={'wrapper_config.TimeLimit.max_episode_steps': 5000}, reward_threshold=4750.0, ) env = gym.make(ENV) input_shape = env.observation_space.shape[0] output_shape = env.action_space.n print('observation dim:', (input_shape, )) print('action dim: ', (output_shape, )) #logging/debug info = Info(SummaryWriter(comment="-cartpole-pg")) ag = ReinforceAgent(PolicyGradientNetwork(input_shape, output_shape)) sim = pwsim.GymSimulator(env, info) sim.add_agent(ag) print('Training: ', ENV) for t in sim: avg_reward = info.info[info.info_labels[0]] if t.episode > TIMEOUT or avg_reward > 2000: break ########## TEST and render print("TEST!") #env = gym.make('CartPole-long-v0') env = gym.wrappers.Monitor(env, './videos', force=True)
@author: ben """ from pyworld import agent as pwag from pyworld import simulate as pwsim class TestAgent(pwag.Agent): def __init__(self, sensors, actuators): super(TestAgent, self).__init__(sensors, actuators) sensors[0]._callback = self.sense sensors[1]._callback = self.sense def sense(self, obs): print(obs) def attempt(self, state): self.actuators[0]() sim = pwsim.GymSimulator('CartPole-v0') ag = TestAgent( [pwag.EpisodicSensor(), pwag.SimpleSensor()], [pwag.RandomActuator(sim.env.action_space)]) sim.add_agent(ag) for t in sim: if t.done: sim.stop() print(t)
REWARD_STEPS = 6 # number of steps to look in future (discounted reward) GAMMA = 0.99 # reward discount (for infinite horizon problems) SKIP = 3 # number of frames to skip at each step (frames come in too fast, take the nth frame as the observation) STACK = 4 # number of frames to stack. helps make the state space markovian EPSILON_START = 1.0 # epsilon for e-greedy policy EPSILON_END = 0.02 if (torch.cuda.is_available() ): # use cuda (GPU compute for faster training!) print("USING CUDA!") DEVICE = 'cuda' else: DEVICE = 'cpu' sim = pwsim.GymSimulator('Breakout-v0') action_shape = sim.env.action_space.n observation_shape = [STACK, 84, 84] #todo find a way to do this nicely ag = DQNAgent(DEVICE, GAMMA) epsilon_tracker = pwag.EpsilonTracker(epsilon_start=EPSILON_START, epsilon_end=EPSILON_END) actuator = pwag.EpsilonGreedyActuator(epsilon_tracker) sensor = pwag.MaxPoolSensor(pwag.AtariImageSensor(pwag.BufferedSensor(ag))) ag.add_component('sensor', sensor) ag.add_component('actuator', actuator) ag.add_component('experience_replay', pwag.ExperienceReplay())