Beispiel #1
0
def do_cycle(agent, return_observation=False):
    env = simulation.Simulation(
        [-100, 0, 500, 0, np.random.rand() * np.pi * 2],
        np.deg2rad(15),
        dt=0.3)
    totalreward = 0

    action = 1
    observation, done = env.step(action)
    observations = [env.state]

    for _ in range(300):
        action = agent.get_action(observation)
        nextobservation, done = env.step(action)
        reward = calc_reward(observation, nextobservation)

        totalreward += reward

        if done:
            break

        agent.update(observation, action, reward, nextobservation)

        observation = nextobservation
        observations.append(env.state)

    if return_observation:
        return totalreward, observations
    else:
        return totalreward
Beispiel #2
0
 def step(self):
     """ Update with a given step size """
     self.world.Step(self.timestep,
                     self.velocityIterations,
                     self.positionIterations)
     for name, agent in self.agents.items():
         agent.update()
Beispiel #3
0
 def on_update(self):
     self.player.update()
     for a_object in self.objects:
         if a_object.in_camera(): a_object.update()
     for agent in self.agents:
         if agent.in_camera():
             agent.update()
Beispiel #4
0
def main():
    teamA, teamB = initialize()
    ball = [WIDTH // 2, HEIGHT // 2]
    while True:
        team_red = [agent.get_coordinates() for agent in teamA]
        team_blue = [agent.get_coordinates() for agent in teamB]

        for agent in teamA:
            agent.update(team_own=team_red, team_opp=team_blue, ball=ball)
        for agent in teamB:
            agent.update(team_own=team_blue, team_opp=team_red, ball=ball)

        team_red = [agent.get_coordinates() for agent in teamA]
        team_blue = [agent.get_coordinates() for agent in teamB]

        field.update_positions(team_red, team_blue, ball, canvas)
        root.update()
        time.sleep(TIMESTEP)
# agent = agent.SARSAAgent(alpha, discount, env, epsilon=0.6)
# agent = agent.QLearningAgent(alpha, discount, env, epsilon=0.6)
# agent = agent.EVSarsaAgent(alpha, discount, env)

# Initialize environment state -----------
env.reset_state()

reward = 0
# Learning -----------
while (True):
    env.render(agent)
    input ("=== Episode === ") # Uncomment to inspect agent episode-by-episode

    while (True):
       # input ("== Step == ") # Uncomment to inspect agent step-by-step

        # Get current state
        state = env.get_state()
        # Choose action
        action = agent.get_action(state)
        # Try out the action
        next_state, reward, terminal = env.step(action)
        # Update the agent's internal variable
        done = agent.update(state, action, reward, next_state, terminal)

        env.render(agent)

        if done:
            env.reset_state()
            break
Beispiel #6
0
alpha = 0.2
epsilon = 0.5
discount = 0.99
action_space = env.action_space
state_space = env.state_space

#agent = agent.QLearningAgent(alpha, epsilon, discount, action_space, state_space)
agent = agent.EVSarsaAgent(alpha, epsilon, discount, action_space, state_space)

env.render(agent.qvalues)
state = env.get_state()

while (True):

    possible_actions = env.get_possible_actions()
    action = agent.get_action(state, possible_actions)
    next_state, reward, done = env.step(action)
    env.render(agent.qvalues)

    next_state_possible_actions = env.get_possible_actions()
    agent.update(state, action, reward, next_state,
                 next_state_possible_actions, done)
    state = next_state

    if done == True:
        env.reset_state()
        env.render(agent.qvalues)
        state = env.get_state()
        continue
Beispiel #7
0
mainarg.add_argument("--update_tf_board", type=int, default=1000, help="Update the Tensorboard every X steps.")


args = parser.parse_args()
env = gym.make(args.game_name)

if args.mode == "test":
    env.monitor.start('./results/' + args.game_name)
    print "Generating Test report."

agent = agent.LearningAgent(env, args)

step = 0
with tf.device("/gpu:0"):
    for epoch in range(args.epochs):

        for train_step in range(args.train_steps):

            done = agent.update(step)
            step += 1

        agent.save_model()

        print("Epoch #", epoch, "has finished.")

    if args.mode == "test":
        env.monitor.close()
        gym.upload('./results/' + args.game_name, api_key='sk_ppjQw9T1TYgT1LKJfSG9Q')


Beispiel #8
0
                              blocked_positions, start_pos, default_reward,
                              scale)

# Agent -------------
alpha = 0.2
epsilon = 0.25
discount = 0.99
action_space = env.action_space
state_space = env.state_space

agent = agent.QLearningAgent(alpha, epsilon, discount, env)

# Learning -----------
env.render(agent)
state = env.get_state()

while (True):

    action = agent.get_explore_action(state)
    next_state, reward, done = env.step(action)
    env.render(agent)

    agent.update(state, action, reward, next_state, done)
    state = next_state

    if done == True:
        env.reset_state()
        env.render(agent)
        state = env.get_state()
        continue