Example #1
0
 def reset(self):
     obs = CartPoleEnv.reset(self)
     self.steps_beyond_done = 0
     self.success_steps = 0
     return obs
Example #2
0
    #instanciate agent
    agent = PolicyGradientAgent(state_size, action_size)

    done = True
    last_render_time = time.time()
    episode_count = -1
    #forever
    while True:

        #if episode is done
        if done:
            episode_count += 1
            logging.info("Episode: %i", episode_count)
            #reset env
            state_np = env.reset()
            #set render flag based on time
            render = (time.time() - last_render_time) > 5

        #Get an action from our agent
        action = agent.get_action(state_np)

        #take a step in the environment
        new_state_np, reward, done, info_dict = env.step(action)

        #let the agent record the experience
        agent.record_experience(state_np, action, reward, done)

        #update the state
        state_np = new_state_np