def do_cycle(agent, return_observation=False): env = simulation.Simulation( [-100, 0, 500, 0, np.random.rand() * np.pi * 2], np.deg2rad(15), dt=0.3) totalreward = 0 action = 1 observation, done = env.step(action) observations = [env.state] for _ in range(300): action = agent.get_action(observation) nextobservation, done = env.step(action) reward = calc_reward(observation, nextobservation) totalreward += reward if done: break agent.update(observation, action, reward, nextobservation) observation = nextobservation observations.append(env.state) if return_observation: return totalreward, observations else: return totalreward
def step(self): """ Update with a given step size """ self.world.Step(self.timestep, self.velocityIterations, self.positionIterations) for name, agent in self.agents.items(): agent.update()
def on_update(self): self.player.update() for a_object in self.objects: if a_object.in_camera(): a_object.update() for agent in self.agents: if agent.in_camera(): agent.update()
def main(): teamA, teamB = initialize() ball = [WIDTH // 2, HEIGHT // 2] while True: team_red = [agent.get_coordinates() for agent in teamA] team_blue = [agent.get_coordinates() for agent in teamB] for agent in teamA: agent.update(team_own=team_red, team_opp=team_blue, ball=ball) for agent in teamB: agent.update(team_own=team_blue, team_opp=team_red, ball=ball) team_red = [agent.get_coordinates() for agent in teamA] team_blue = [agent.get_coordinates() for agent in teamB] field.update_positions(team_red, team_blue, ball, canvas) root.update() time.sleep(TIMESTEP)
# agent = agent.SARSAAgent(alpha, discount, env, epsilon=0.6) # agent = agent.QLearningAgent(alpha, discount, env, epsilon=0.6) # agent = agent.EVSarsaAgent(alpha, discount, env) # Initialize environment state ----------- env.reset_state() reward = 0 # Learning ----------- while (True): env.render(agent) input ("=== Episode === ") # Uncomment to inspect agent episode-by-episode while (True): # input ("== Step == ") # Uncomment to inspect agent step-by-step # Get current state state = env.get_state() # Choose action action = agent.get_action(state) # Try out the action next_state, reward, terminal = env.step(action) # Update the agent's internal variable done = agent.update(state, action, reward, next_state, terminal) env.render(agent) if done: env.reset_state() break
alpha = 0.2 epsilon = 0.5 discount = 0.99 action_space = env.action_space state_space = env.state_space #agent = agent.QLearningAgent(alpha, epsilon, discount, action_space, state_space) agent = agent.EVSarsaAgent(alpha, epsilon, discount, action_space, state_space) env.render(agent.qvalues) state = env.get_state() while (True): possible_actions = env.get_possible_actions() action = agent.get_action(state, possible_actions) next_state, reward, done = env.step(action) env.render(agent.qvalues) next_state_possible_actions = env.get_possible_actions() agent.update(state, action, reward, next_state, next_state_possible_actions, done) state = next_state if done == True: env.reset_state() env.render(agent.qvalues) state = env.get_state() continue
mainarg.add_argument("--update_tf_board", type=int, default=1000, help="Update the Tensorboard every X steps.") args = parser.parse_args() env = gym.make(args.game_name) if args.mode == "test": env.monitor.start('./results/' + args.game_name) print "Generating Test report." agent = agent.LearningAgent(env, args) step = 0 with tf.device("/gpu:0"): for epoch in range(args.epochs): for train_step in range(args.train_steps): done = agent.update(step) step += 1 agent.save_model() print("Epoch #", epoch, "has finished.") if args.mode == "test": env.monitor.close() gym.upload('./results/' + args.game_name, api_key='sk_ppjQw9T1TYgT1LKJfSG9Q')
blocked_positions, start_pos, default_reward, scale) # Agent ------------- alpha = 0.2 epsilon = 0.25 discount = 0.99 action_space = env.action_space state_space = env.state_space agent = agent.QLearningAgent(alpha, epsilon, discount, env) # Learning ----------- env.render(agent) state = env.get_state() while (True): action = agent.get_explore_action(state) next_state, reward, done = env.step(action) env.render(agent) agent.update(state, action, reward, next_state, done) state = next_state if done == True: env.reset_state() env.render(agent) state = env.get_state() continue