def battle(process_number): # プロセスIDを取得しておく。 pid = os.getpid() print("battle start, process_number={}, pid={}".format( process_number, pid)) # Javaへの接続を gateway = JavaGateway() addition_app = gateway.entry_point # ゲーム開始を伝える。 addition_app.start_game(pid) # List of four agents agent_list = [ #agents.SimpleAgent(), #MyAgentO(), #MyAgentT(), #MyAgentO(), #MyAgentT(), MyAgentT(), MyAgentT(), MyAgentT(), MyAgentT(), ] env = pommerman.make('PommeTeamCompetition-v0', agent_list) #env = pommerman.make('PommeTeamCompetition-v1', agent_list) #env = pommerman.make('PommeTeamCompetitionFast-v0', agent_list) #env = pommerman.make('PommeTeamCompetitionFast-v1', agent_list) numAliveLast = 4 state = env.reset() step = 0 done = False while not done: step += 1 actions = env.act(state) state, reward, done, info = env.step(actions) print("pid={} step={} actions={}".format(pid, step, actions)) temp = state[0] temp2 = temp['alive'] numAlive = len(temp2) if numAlive != numAliveLast: numAliveLast = numAlive addition_app.changed() # 結果を出力する。 print("battle finished, process_number={}, pid={}, reward={}".format( process_number, pid, reward)) # ゲーム終了を伝える。 addition_app.finish_game(pid, reward[0], reward[1], reward[2], reward[3]) # 不要なハンドルを閉じる。 env.close()
def battle(episode): pid = os.getpid() print(episode, ", ", pid) res = addition_app.start_game(pid) print(res) # List of four agents if False: agent_list = [ MyAgentT(), MyAgentT(), MyAgentT(), MyAgentT(), ] agent_list = [ agents.SimpleAgent(), agents.SimpleAgent(), agents.SimpleAgent(), agents.SimpleAgent(), ] agent_list = [ MyAgentO(), MyAgentT(), MyAgentO(), MyAgentT(), ] env = pommerman.make('PommeTeamCompetition-v0', agent_list) state = env.reset() done = False step = 0 while not done: step += 1 actions = env.act(state) state, reward, done, info = env.step(actions) # rewards.append(reward) print('Episode {} finished'.format(episode), reward) res = addition_app.finish_game(pid, reward[0], reward[1], reward[2], reward[3]) print(res)
def main(render=False, interactive=False): # List of four agents agent_list = [ agents.SimpleAgent(), agents.SimpleAgent(), agents.SimpleAgent(), agents.SimpleAgent(), ] agent_list = [ MyAgentT(), MyAgentT(), MyAgentT(), MyAgentT(), ] agent_list = [ MyAgentO(), MyAgentT(), MyAgentO(), MyAgentT(), ] # Environment of FFA competition #env = pommerman.make('PommeFFACompetition-v0', agent_list) env = pommerman.make('PommeTeamCompetition-v0', agent_list) # Run rewards = list() for episode in range(10000000): state = env.reset() done = False step = 0 while not done: if verbose: print("Step: ", step) step += 1 if render: env.render() actions = env.act(state) if verbose: print(actions[-1]) if forwardModelDebug: send_env(env, 0) send_action(actions) # compute step() state, reward, done, info = env.step(actions) if forwardModelDebug: send_env(env, 1) if interactive: sys.stdin.readline() #rewards.append(reward) print('Episode {} finished'.format(episode), reward) rewards = np.array(rewards) print(np.mean(rewards, axis=0)) env.close()
def __init__(self): self._agent = MyAgentT()