def reset(self): obs = CartPoleEnv.reset(self) self.steps_beyond_done = 0 self.success_steps = 0 return obs
#instanciate agent agent = PolicyGradientAgent(state_size, action_size) done = True last_render_time = time.time() episode_count = -1 #forever while True: #if episode is done if done: episode_count += 1 logging.info("Episode: %i", episode_count) #reset env state_np = env.reset() #set render flag based on time render = (time.time() - last_render_time) > 5 #Get an action from our agent action = agent.get_action(state_np) #take a step in the environment new_state_np, reward, done, info_dict = env.step(action) #let the agent record the experience agent.record_experience(state_np, action, reward, done) #update the state state_np = new_state_np