env_goal_size=env.goal_size) # parameters EPISODES = 5000 for i in range(EPISODES): state = env.reset() done = False while not done: env.show() print() # agent 1 decides its action actionME = "choose your action here" # agent 2 decides its action actionOP = agentOP.get_action(state) # perform actions on the environment done, reward_l, reward_r, state_, actions = env.step( actionME, actionOP) # training process of agent 1 """ do some training here """ # training process of agent 2 agentOP.adjust(done, reward_r, i) state = state_
for i in range(EPISODES): state = env.reset() stat.set_initial_ball(state[4]) rewardL = 0 rewardR = 0 done = False while not done: # agent 1 decides its action actionL = random.randint(0, env.act_dim-1) # agent 2 decides its action actionR = random.randint(0, env.act_dim-1) # perform actions on the environment done, reward_l, reward_r, state_, actions = env.step(actionL, actionR) state = state_ rewardL += reward_l rewardR += reward_r if done: stat.add_stat(rewardL, rewardR) print(*stat.get_moving_avg(), file=open('log_files/baseline_reward.txt', 'a')) # save stats with open('stats/baseline.pkl', 'wb') as output: pickle.dump(stat, output)
from env import SoccerEnv from agents.common.training_opponent import StationaryOpponent, RandomSwitchOpponent, RLBasedOpponent TOP = 0 TOP_RIGHT = 1 RIGHT = 2 BOTTOM_RIGHT = 3 BOTTOM = 4 BOTTOM_LEFT = 5 LEFT = 6 TOP_LEFT = 7 env = SoccerEnv() agentOP = StationaryOpponent(env_width=env.width, env_height=env.height, env_goal_size=env.goal_size) state = env.reset() # loop env.show() actionOP = agentOP.get_action(state) print(actionOP) done, reward_l, reward_r, state, actions = env.step("type action here!", actionOP) agentOP.adjust(done, reward_r, i)