if win == playerID[j]: # 勝ったら報酬1を得る reword = 1 players[j].store_experience( state, targets, tr, reword, state_X, target_X, end) players[j].experience_replay() # 行動を選択 action = players[i].select_action(state, targets, players[i].exploration) # 行動を実行 env.update(action, playerID[i]) # for log loss = players[i].current_loss Q_max, Q_action = players[i].select_enable_action( state, targets) print( "player:{:1d} | pos:{:2d} | LOSS: {:.4f} | Q_MAX: {:.4f}" .format(playerID[i], action, loss, Q_max)) # 行動を実行した結果 terminal = env.isEnd() w = env.winner() print("EPOCH: {:03d}/{:03d} | WIN: player{:1d}".format(e, n_epochs, w)) # 保存は後攻のplayer2 を保存する。 players[1].save_model()
for i in range(0, 2): state = env.screen # 현재 보드판의 상태(배열)을 가져옴 targets = env.getEnables( playerID[i]) # 배열(플레이어가 둘 수 있는 위치를 가짐)을 가져옴 if len(targets) > 0: # 어딘가에 둘 수 있는 장소가있는 경우 # 행동을 선택 action = players[i].selectAction(state, targets, 1 / ((e / 10) + 1)) # 행동을 실행 env.doFlip([action // 8, action % 8], playerID[i]) # 종료 판정 win = env.winner() # 현재 유리한 플레이어 end = env.isEnd() # 다음 상태 state_X = env.screen target_X = env.getEnables(playerID[i + 1]) if len(target_X) == 0: target_X = env.getEnables(playerID[i]) reward = 0 if end == True: reward = 1 players[i].storeExperience(state, targets, action, reward, state_X, target_X, end) # for log
# args parser = argparse.ArgumentParser() parser.add_argument("-m", "--model_path") parser.add_argument("-s", "--save", dest="save", action="store_true") parser.set_defaults(save=False) args = parser.parse_args() # environmet, agent env = Reversi() agent = DQNAgent(env.enable_actions, env.name, env.screen_n_rows, env.screen_n_cols) agent.load_model("models1/Reversi.ckpt") # game print("------------- GAME START ---------------") while not env.isEnd(): print("*** userターン○ ***") env.print_screen() enables = env.get_enables(1) if len(enables) > 0: flg = False while not flg: print("番号を入力してください") print(enables) inp = input('>>> ') action_t = int(inp) for i in enables: if action_t == i: flg = True break
parser.set_defaults(save=False) args = parser.parse_args() # environmet, agent env = Reversi() agent1 = DQNAgent(env.enable_actions, env.name, env.screen_n_rows, env.screen_n_cols) agent1.load_model("models1/Reversi.ckpt") agent2 = DQNAgent(env.enable_actions, env.name, env.screen_n_rows, env.screen_n_cols) agent2.load_model("models2/Reversi.ckpt") # game print("------------- GAME START ---------------") while not env.isEnd(): # print("*** userターン○ ***") # env.print_screen() # enables = env.get_enables(1) # if len(enables) > 0: # flg = False # while not flg: # print("番号を入力してください") # print(enables) # inp = input('>>> ') # action_t = int(inp) # for i in enables: # if action_t == i: # flg = True # break