예제 #1
0
                             env_goal_size=env.goal_size)

# parameters
EPISODES = 5000

for i in range(EPISODES):
    state = env.reset()

    done = False
    while not done:
        env.show()
        print()

        # agent 1 decides its action
        actionME = "choose your action here"

        # agent 2 decides its action
        actionOP = agentOP.get_action(state)

        # perform actions on the environment
        done, reward_l, reward_r, state_, actions = env.step(
            actionME, actionOP)

        # training process of agent 1
        """ do some training here """

        # training process of agent 2
        agentOP.adjust(done, reward_r, i)

        state = state_
예제 #2
0
for i in range(EPISODES):
    state = env.reset()
    stat.set_initial_ball(state[4])

    rewardL = 0
    rewardR = 0
    done = False
    while not done:
        # agent 1 decides its action
        actionL = random.randint(0, env.act_dim-1)

        # agent 2 decides its action
        actionR = random.randint(0, env.act_dim-1)

        # perform actions on the environment
        done, reward_l, reward_r, state_, actions = env.step(actionL, actionR)

        state = state_

        rewardL += reward_l
        rewardR += reward_r

        if done:
            stat.add_stat(rewardL, rewardR)
            print(*stat.get_moving_avg(),
                  file=open('log_files/baseline_reward.txt', 'a'))

# save stats
with open('stats/baseline.pkl', 'wb') as output:
    pickle.dump(stat, output)
예제 #3
0
from env import SoccerEnv
from agents.common.training_opponent import StationaryOpponent, RandomSwitchOpponent, RLBasedOpponent

TOP = 0
TOP_RIGHT = 1
RIGHT = 2
BOTTOM_RIGHT = 3
BOTTOM = 4
BOTTOM_LEFT = 5
LEFT = 6
TOP_LEFT = 7

env = SoccerEnv()
agentOP = StationaryOpponent(env_width=env.width, env_height=env.height, env_goal_size=env.goal_size)

state = env.reset()

# loop
env.show()
actionOP = agentOP.get_action(state)
print(actionOP)
done, reward_l, reward_r, state, actions = env.step("type action here!", actionOP)

agentOP.adjust(done, reward_r, i)