print("番号を入力してください") print(enables) inp = input('>>> ') action_t = int(inp) for i in enables: if action_t == i: flg = True break env.update(action_t, 1) else: print("パス") if env.isEnd() == True: break print("*** AIターン● ***") env.print_screen() enables = env.get_enables(2) if len(enables) > 0: qvalue, action_t = agent.select_enable_action(env.screen, enables) print('>>> {:}'.format(action_t)) env.update(action_t, 2) else: print("パス") print("*** ゲーム終了 ***") if env.winner() == 1: print("あなたの勝ち! スコアは、{:}です。".format(env.get_score(1))) else: print("あなたの負け! AIのスコアは、{:}です。".format(env.get_score(2)))
if win == playerID[j]: # 勝ったら報酬1を得る reword = 1 players[j].store_experience( state, targets, tr, reword, state_X, target_X, end) players[j].experience_replay() # 行動を選択 action = players[i].select_action(state, targets, players[i].exploration) # 行動を実行 env.update(action, playerID[i]) # for log loss = players[i].current_loss Q_max, Q_action = players[i].select_enable_action( state, targets) print( "player:{:1d} | pos:{:2d} | LOSS: {:.4f} | Q_MAX: {:.4f}" .format(playerID[i], action, loss, Q_max)) # 行動を実行した結果 terminal = env.isEnd() w = env.winner() print("EPOCH: {:03d}/{:03d} | WIN: player{:1d}".format(e, n_epochs, w)) # 保存は後攻のplayer2 を保存する。 players[1].save_model()
for i in range(0, 2): state = env.screen # 현재 보드판의 상태(배열)을 가져옴 targets = env.getEnables( playerID[i]) # 배열(플레이어가 둘 수 있는 위치를 가짐)을 가져옴 if len(targets) > 0: # 어딘가에 둘 수 있는 장소가있는 경우 # 행동을 선택 action = players[i].selectAction(state, targets, 1 / ((e / 10) + 1)) # 행동을 실행 env.doFlip([action // 8, action % 8], playerID[i]) # 종료 판정 win = env.winner() # 현재 유리한 플레이어 end = env.isEnd() # 다음 상태 state_X = env.screen target_X = env.getEnables(playerID[i + 1]) if len(target_X) == 0: target_X = env.getEnables(playerID[i]) reward = 0 if end == True: reward = 1 players[i].storeExperience(state, targets, action, reward, state_X, target_X, end)