def evaluate_network(): cur_dir = Path(__file__).parent.absolute() model0 = load_model(str(cur_dir) + '\\model\\latest.h5') model1 = load_model(str(cur_dir) + '\\model\\best.h5') next_action0 = pv_mcts_action(model0, EN_TEMPERATURE) next_action1 = pv_mcts_action(model1, EN_TEMPERATURE) next_actions = (next_action0, next_action1) total_point = 0 for i in range(EN_GAME_COUNT): if i % 2 == 0: total_point += play(next_actions) else: total_point += 1 - play(list(reversed(next_actions))) print('\rEvaluate {}/{}'.format(i + 1, EN_GAME_COUNT), end='') print('') average_point = total_point / EN_GAME_COUNT print('AveragePoint', average_point) K.clear_session() del model0 del model1 if average_point > 0.52: update_best_player() return True else: return False
def evaluate_network() -> bool: """ネットワークの評価""" # モデル読み込み model0 = load_model('./model/latest.h5') model1 = load_model('./model/best.h5') # PV MCTSで行動選択を行う関数の生成 next_action0 = pv_mcts_action(model0, EN_TEMPERATURE) next_action1 = pv_mcts_action(model1, EN_TEMPERATURE) next_actions = (next_action0, next_action1) # 複数回の対戦を繰り返す total_point = 0 for i in range(EN_GAME_COUNT): if i % 2 == 0: total_point += play(next_actions) else: total_point += 1 - play(next_actions[::-1]) print(f'\rEvaluate {i + 1}/{EN_GAME_COUNT}', end='') print('') # 平均ポイントの計算 average_point = total_point / EN_GAME_COUNT print('AveragePoint', average_point) # ベストプレイヤーの交代 if average_point > 0.5: update_best_player() return True else: return False
def __init__(self, master=None, model=None): tk.Frame.__init__(self, master) self.master.title('簡易将棋') # ゲーム状態の生成 self.state = State() self.select = -1 # 選択(-1:なし, 0~11:マス, 12~14:持ち駒) # 方向定数 self.dxy = ((0, -1), (1, -1), (1, 0), (1, 1), (0, 1), (-1, 1), (-1, 0), (-1, -1)) # PV MCTSで行動選択を行う関数の生成 self.next_action = pv_mcts_action(model, 0.0) # イメージの準備 self.images = [(None, None, None, None)] for i in range(1, 5): image = Image.open('piece{}.png'.format(i)) self.images.append(( ImageTk.PhotoImage(image), ImageTk.PhotoImage(image.rotate(180)), ImageTk.PhotoImage(image.resize((40, 40))), ImageTk.PhotoImage(image.resize((40, 40)).rotate(180)))) # キャンバスの生成 self.c = tk.Canvas(self, width=240, height=400, highlightthickness=0) self.c.bind('<Button-1>', self.turn_of_human) self.c.pack() # 描画の更新 self.on_draw()
def __init__(self, master=None, model=None): tk.Frame.__init__(self, master) self.master.title('簡易将棋') self.state = State() self.select = -1 self.dxy = ((0, -1), (1, -1), (1, 0), (1, 1), (0, 1), (-1, 1), (-1, 0), (-1, -1)) self.next_action = pv_mcts_action(model, 0.0) self.images = [(None, None, None, None)] for i in range(1, 5): image = Image.open('piece{}.png'.format(i)) self.images.append( (\ ImageTk.PhotoImage(image), \ ImageTk.PhotoImage(image.rotate(180)),\ ImageTk.PhotoImage(image.resize((40,40))), \ ImageTk.PhotoImage(image.resize((40,40)).rotate(180))\ ) ) self.c = tk.Canvas(self, width=240, height=400, highlightthickness=0) self.c.bind('<Button-1>', self.turn_of_human) self.c.pack() self.on_draw()
def __init__(self, master=None, model=None, ai_is_first=True): self.ai_is_first = ai_is_first tk.Frame.__init__(self, master) self.master.title('リバーシ') # ゲーム状態の生成 self.state = State() self.prev_state = None # PV MCTSで行動選択を行う関数の生成 self.next_action = pv_mcts_action(model, 0.0) # self.next_action = mcs_action # キャンバスの生成 self.c = tk.Canvas(self, width=BOARD_SIZE * 40 + 40, height=BOARD_SIZE * 40, highlightthickness=0) # 後手の場合 if self.ai_is_first: self.turn_of_ai() self.c.bind('<Button-1>', self.turn_of_human) self.c.pack() # 描画の更新 self.on_draw()
def __init__(self, master=None, model=None): tk.Frame.__init__(self, master) self.master.title('간이 장기') # 게임 상태 생성 self.state = State() self.select = -1 # 선택(-1: 없음, 0~11: 매스, 12~14: 획득한 말) # 방향 정수 self.dxy = ((0, -1), (1, -1), (1, 0), (1, 1), (0, 1), (-1, 1), (-1, 0), (-1, -1)) # PV MCTS를 활용한 행동 선택을 수행하는 함수 생성 self.next_action = pv_mcts_action(model, 0.0) # 이미지 준비 self.images = [(None, None, None, None)] for i in range(1, 5): image = Image.open('piece{}.png'.format(i)) self.images.append(( ImageTk.PhotoImage(image), ImageTk.PhotoImage(image.rotate(180)), ImageTk.PhotoImage(image.resize((40, 40))), ImageTk.PhotoImage(image.resize((40, 40)).rotate(180)))) # 캔버스 생성 self.c = tk.Canvas(self, width=240, height=400, highlightthickness=0) self.c.bind('<Button-1>', self.turn_of_human) self.c.pack() # 화면 갱신 self.on_draw()
def evaluate_network(): model_paths = ['./model/latest.h5', './model/best.h5'] models = [tf.keras.models.load_model(path) for path in model_paths] next_actions = [pv_mcts_action(model, EN_TEMPERATURE) for model in models] reversed_actions = list(reversed(next_actions)) total_point = 0 for i in range(EN_GAME_COUNT): if i % 2 == 0: total_point += play(next_actions) else: total_point += 1 - play(reversed_actions) print('\rEvaluate {}/{}'.format(i + 1, EN_GAME_COUNT), end='') print('') average_point = total_point / EN_GAME_COUNT print('Average point: ', average_point) tf.keras.backend.clear_session() del models if 0.55 < average_point: update_best_player() return True else: return False
def evaluate_network(): # 최신 플레이어 모델 로드 model0 = load_model('./model/latest.h5') # 베스트 플레이어 모델 로드 model1 = load_model('./model/best.h5') # PV MCTS를 활용해 행동 선택을 수행하는 함수 생성 next_action0 = pv_mcts_action(model0, EN_TEMPERATURE) next_action1 = pv_mcts_action(model1, EN_TEMPERATURE) next_actions = (next_action0, next_action1) # 여러 차례 대전을 반복 total_point = 0 # 10회를 대전시켜서 승률을 비교, for i in range(EN_GAME_COUNT): # 1 게임 실행 if i % 2 == 0: # 액션을 순서대로 넣고 플레이 total_point += play(next_actions) else: # 액견을 뒤집이서 넣고 플레이 total_point += 1 - play(list(reversed(next_actions))) # 출력 print('\rEvaluate {}/{}'.format(i + 1, EN_GAME_COUNT), end='') print('') # 평균 포인트 계산 average_point = total_point / EN_GAME_COUNT print('AveragePoint', average_point) # 모델 파기 K.clear_session() del model0 del model1 # 베스트 플레이어 교대 # 승률이 50% 이상인 경우 베스트 플레이어와 교대한다 # 오리지널은 승률 55%인 경우 베스트 플레이어와 교대, 뉴럴네트워크만 끊임없이 갱신함 if average_point > 0.5: update_best_player() return True else: return False
def evaluate_network(): # 最新プレイヤーのモデルの読み込み device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') device = 'cpu' model0 = DualNet() #model0.load_state_dict(torch.load('./learned_param/elmo2_best.h5')) model0.load_state_dict(torch.load('./learned_param/normal_50/best.h5')) model0 = model0.double() model0 = model0.to(device) model0.eval() # ベストプレイヤーのモデルの読み込み model1 = DualNet() #model1.load_state_dict(torch.load('./learned_param/normal_best.h5')) model1.load_state_dict(torch.load('./learned_param/elmo_50/best.h5')) #model0.load_state_dict(torch.load('./learned_param/elmo2_best.h5')) model1 = model1.double() model1 = model1.to(device) model1.eval() # PV MCTSで行動選択を行う関数の生成 next_action0 = pv_mcts_action(model0, EN_TEMPERATURE) next_action1 = pv_mcts_action(model1, EN_TEMPERATURE) next_actions = (next_action0, next_action1) # 複数回の対戦を繰り返す total_point = 0 for i in range(EN_GAME_COUNT): # 1ゲームの実行 if i % 2 == 0: total_point += play(next_actions) else: total_point += 1 - play(list(reversed(next_actions))) # 出力 print('\rEvaluate {}/{} {} {}'.format(i + 1, EN_GAME_COUNT, total_point, total_point/(i+1)), end='') print('') # 平均ポイントの計算 average_point = total_point / EN_GAME_COUNT print('AveragePoint', average_point) print('Point', total_point)
def evaluate_network(): ''' ネットワークの評価 ''' # 最新プレイヤーのモデルの読み込み model0 = load_model(LATEST_PATH) # ベストプレイヤーのモデルの読み込み model1 = load_model(BEST_PATH) # PV MCTSで行動選択を行う関数の生成 next_action0 = pv_mcts_action(model0, EN_TEMPERATURE) next_action1 = pv_mcts_action(model1, EN_TEMPERATURE) next_actions = (next_action0, next_action1) # 複数回の対戦を繰り返す total_point = 0 for i in range(EN_GAME_COUNT): # 1ゲームの実行 if i % 2 == 0: total_point += play(next_actions) else: total_point += 1 - play(list(reversed(next_actions))) # 出力 print('\rEvaluate {}/{}'.format(i + 1, EN_GAME_COUNT), end='') print('') # 平均ポイントの計算 average_point = total_point / EN_GAME_COUNT print('AveragePoint', average_point) # モデルの破棄 K.clear_session() del model0 del model1 # ベストプレイヤーの交代 if average_point > 0.5: update_best_player() return True else: return False
def evaluate_best_player(): cur_dir = Path(__file__).parent.absolute() model = load_model(str(cur_dir) + '\\model\\best.h5') next_action0 = pv_mcts_action(model, 0.0) next_actions = (next_action0, random_action) evaluate_algorithm_of('VS_Random', next_actions) K.clear_session() del model
def evaluate_network(): # 최신 플레이어 모델 로드 model0 = load_model('model\\latest.h5') # 베스트 플레이어 모델 로드 model1 = load_model('model\\best.h5') # PV MCTS를 활용해 행동 선택을 수행하는 함수 생성 next_action0 = pv_mcts_action(model0, EN_TEMPERATURE) next_action1 = pv_mcts_action(model1, EN_TEMPERATURE) next_actions = (next_action0, next_action1) # 여러 차례 대전을 반복 total_point = 0 for i in range(EN_GAME_COUNT): # 1 게임 실행 if i % 2 == 0: total_point += play(next_actions) else: total_point += 1 - play(list(reversed(next_actions))) # 출력 print('\rEvaluate {}/{}'.format(i + 1, EN_GAME_COUNT), end='') print('') # 평균 포인트 계산 average_point = total_point / EN_GAME_COUNT print('AveragePoint', average_point) # 모델 파기 K.clear_session() del model0 del model1 # 베스트 플레이어 교대 if average_point > 0.5: update_best_player() return True else: return False
def evaluate_network(): # 最新プレイヤーのモデルの読み込み model0 = load_model("./model/latest.h5") # ベストプレイヤーのモデルの読み込み model1 = load_model("./model/best.h5") # PV MCTSで行動選択を行う関数の生成 next_action0 = pv_mcts_action(model0, EN_TEMPERATURE) next_action1 = pv_mcts_action(model1, EN_TEMPERATURE) next_actions = (next_action0, next_action1) # 複数回の対戦を繰り返す total_point = 0 for i in range(EN_GAME_COUNT): # 1ゲームの実行 if i % 2 == 0: total_point += play(next_actions) else: total_point += 1 - play(list(reversed(next_actions))) # 出力 print("\rEvaluate {}/{}".format(i + 1, EN_GAME_COUNT), end="") print("") # 平均ポイントの計算 average_point = total_point / EN_GAME_COUNT print("AveragePoint", average_point) # モデルの破棄 K.clear_session() del model0 del model1 # ベストプレイヤーの交代 if average_point >= 0.4: update_best_player() return True else: return False
def __init__(self, master=None, model=None): tk.Frame.__init__(self, master) self.master.title("三目並べ") self.state = State() self.next_action = pv_mcts_action(model, 0.0) self.c = tk.Canvas(self, width=240, height=240, highlightthickness=0) self.c.bind("<Button-1>", self.turn_of_human) self.c.pack() self.on_draw()
def evaluate_best_player(): model = tf.keras.models.load_model('./model/best.h5') next_pv_mcts_action = pv_mcts_action(model, 0.0) next_actions = [next_pv_mcts_action, random_action] evaluate_algorithm_of('VS_Random', next_actions) next_actions = [next_pv_mcts_action, alpha_beta_action] evaluate_algorithm_of('VS_AlphaBeta', next_actions) next_actions = [next_pv_mcts_action, mcts_action] evaluate_algorithm_of('VS_MCTS', next_actions) tf.keras.backend.clear_session() del model
def evaluate_best_player(): model = load_model('./model/best.h5') next_pv_mcts_action = pv_mcts_action(model, 0.0) next_actions = (next_pv_mcts_action, random_action) evaluate_algorithm_of('VS_Random', next_actions) next_actions = (next_pv_mcts_action, alpha_beta_action) evaluate_algorithm_of('VS_Alpha_Beta', next_actions) next_actions = (next_pv_mcts_action, mcts_action) evaluate_algorithm_of('VS_MCTS', next_actions) K.clear_session() del model
def __init__(self, master=None, model=None): tk.Frame.__init__(self, master) self.master.title('컨넥트4') # 게임 상태 생성 self.state = State() # PV MCTS를 활용한 행동 선택을 따르는 함수 생성 self.next_action = pv_mcts_action(model, 0.0) # 캔버스 생성 self.c = tk.Canvas(self, width=280, height=240, highlightthickness=0) self.c.bind('<Button-1>', self.turn_of_human) self.c.pack() # 화면 갱신 self.on_draw()
def __init__(self, master=None, model=None): tk.Frame.__init__(self, master) self.master.title('リバーシ') # ゲーム状態の生成 self.state = State() # PV MCTSで行動選択を行う関数の生成 self.next_action = pv_mcts_action(model, 0.0) # キャンバスの生成 self.c = tk.Canvas(self, width=240, height=240, highlightthickness=0) self.c.bind('<Button-1>', self.turn_of_human) self.c.pack() # 描画の更新 self.on_draw()
def __init__(self, master=None, model=None): tk.Frame.__init__(self, master) self.master.title('簡易将棋') # ゲーム状態の生成 self.state = State() self.select = -1 # 選択(-1:なし, 0~11:マス, 12~14:持ち駒) # 方向定数 self.dxy = ((0, -1), (1, -1), (1, 0), (1, 1), (0, 1), (-1, 1), (-1, 0), (-1, -1), (1, -2), (-1, -2), (1, -1), (2, -2), (3, -3), (4, -4), (5, -5), (6, -6), (7, -7), (8, -8), (-1, 1), (-2, 2), (-3, 3), (-4, 4), (-5, 5), (-6, 6), (-7, 7), (-8, 8), (1, 1), (2, 2), (3, 3), (4, 4), (5, 5), (6, 6), (7, 7), (8, 8), (-1, -1), (-2, -2), (-3, -3), (-4, -4), (-5, -5), (-6, -6), (-7, -7), (-8, -8), (1, 0), (2, 0), (3, 0), (4, 0), (5, 0), (6, 0), (7, 0), (8, 0), (-1, 0), (-2, 0), (-3, 0), (-4, 0), (-5, 0), (-6, 0), (-7, 0), (-8, 0), (0, 1), (0, 2), (0, 3), (0, 4), (0, 5), (0, 6), (0, 7), (0, 8), (0, -1), (0, -2), (0, -3), (0, -4), (0, -5), (0, -6), (0, -7), (0, -8)) #self.dxy = ((0, -1), (1, -1), (1, 0), (1, 1), (0, 1), (-1, 1), (-1, 0), (-1, -1)) # PV MCTSで行動選択を行う関数の生成 self.next_action = pv_mcts_action(model, 0.0) # イメージの準備 self.images = [(None, None, None, None)] for i in range(1, 19): image = Image.open('koma_gif/piece{}.gif'.format(i)) self.images.append( (ImageTk.PhotoImage(image), ImageTk.PhotoImage(image.rotate(180)), ImageTk.PhotoImage(image.resize((40, 40))), ImageTk.PhotoImage(image.resize((40, 40)).rotate(180)))) # キャンバスの生成 self.c = tk.Canvas(self, width=720, height=800, highlightthickness=0) self.c.bind('<Button-1>', self.turn_of_human) self.c.pack() # 描画の更新 self.on_draw()
def evaluate_best_player(): # ベストプレイヤーのモデルの読み込み model = load_model("./model/best.h5") # PV MCTSで行動選択を行う関数の生成 next_pv_mcts_action = pv_mcts_action(model, 0.0) print("load model") # VSランダム # next_actions = (next_pv_mcts_action, random_action) # evaluate_algorithm_of("VS_Random", next_actions) # 過去のモデルの読み込み # first_model = load_model("./model/first_best.h5") # PV MCTSで行動選択を行う関数の生成 # first_next_pv_mcts_action = pv_mcts_action(first_model, 0.0) # VSランダム next_actions = (next_pv_mcts_action, random_action) evaluate_algorithm_of("first_VS_Random", next_actions) # VSランダム(ポリシーとバリューを撒き散らす) # next_GetPVAndRandomAction = GetPVAndRandomAction(model) # next_actions = (next_pv_mcts_action, next_GetPVAndRandomAction) # evaluate_algorithm_of("first_VS_Random(print)", next_actions) # 人類との戦い human_player_action # next_actions = (human_player_action, next_pv_mcts_action) # evaluate_algorithm_of("自己対戦", next_actions) # 自己対戦 # next_actions = (next_pv_mcts_action, first_next_pv_mcts_action) # evaluate_algorithm_of("VS_過去の自分", next_actions) # VSモンテカルロ木探索 # next_actions = (next_pv_mcts_action, mcts_action) # evaluate_algorithm_of("VS_MCTS", next_actions) # モデルの破棄 K.clear_session() del model
def __init__(self, master=None, model=None): # Frameクラスを継承し、Frameクラスの初期処理を実行 tk.Frame.__init__(self, master) self.master.title("グラフィックの描画") # 状態クラスからインスタンスを設定 self.state = State() # AIの行動関数(方策)を指定 self.next_action = pv_mcts_action(model, 0.0) # 盤面の大きさを設定 self.c = tk.Canvas(self, width=675, height=675, highlightthickness=0) # 左クリック操作を指定 self.c.bind("<Button-1>", self.turn_of_human) # 描画 self.c.pack() self.on_draw()
def evaluate_best_player(): # ベストプレイヤーのモデルの読み込み model = load_model('./model/best.h5') # PV MCTSで行動選択を行う関数の生成 next_pv_mcts_action = pv_mcts_action(model, 0.0) # VSランダム next_actions = (next_pv_mcts_action, random_action) evaluate_algorithm_of('VS_Random', next_actions) # VSアルファベータ法 next_actions = (next_pv_mcts_action, alpha_beta_action) evaluate_algorithm_of('VS_AlphaBeta', next_actions) # VSモンテカルロ木探索 next_actions = (next_pv_mcts_action, mcts_action) evaluate_algorithm_of('VS_MCTS', next_actions) # モデルの破棄 K.clear_session() del model
def evaluate_best_player(): # 베스트 플레이어 모델 로드 model = load_model('./model/best.h5') # PV MCTS로 행동 선택을 수행하는 함수 생성 next_pv_mcts_action = pv_mcts_action(model, 0.0) # VS 랜덤 next_actions = (next_pv_mcts_action, random_action) evaluate_algorithm_of('VS_Random', next_actions) # VS 알파베타법 next_actions = (next_pv_mcts_action, alpha_beta_action) evaluate_algorithm_of('VS_AlphaBeta', next_actions) # VS 몬테카를로 트리 탐색 next_actions = (next_pv_mcts_action, mcts_action) evaluate_algorithm_of('VS_MCTS', next_actions) # 모델 파기 K.clear_session() del model
def evaluate_network(): model0 = load_model(BEST_PATH) next_action0 = pv_mcts_action(model0, EN_TEMPERATURE) # next_action0 = mcts_actions next_action1 = random_action next_actions = (next_action0, next_action1) # 複数回の対戦を繰り返す total_point = 0 for i in range(EN_GAME_COUNT): # 1ゲームの実行 if i % 2 == 0: total_point += play(next_actions) else: total_point += 1 - play(list(reversed(next_actions))) # 出力 print('\rEvaluate {}/{}'.format(i + 1, EN_GAME_COUNT), end='') print('') # 平均ポイントの計算 average_point = total_point / EN_GAME_COUNT print('AveragePoint', average_point)
def evaluate_best_player(): # ベストプレイヤーのモデルの読み込み model = load_model("./model/best.h5") # 行動価値が高い行動を選択し続けるエージェントvsランダム # pv_high_value_action = high_value_action(model) # next_actions = (pv_high_value_action, random_action) # evaluate_algorithm_of("Value_VS_Random", next_actions) # PV MCTSで行動選択を行う関数の生成 next_pv_mcts_action = pv_mcts_action(model, 0.0) # VSランダム next_actions = (next_pv_mcts_action, random_action) evaluate_algorithm_of("VS_Random", next_actions) # VS_過去の自分 # first_model = load_model("./model/first_best.h5") # 過去のモデルの読み込み # first_next_pv_mcts_action = pv_mcts_action(first_model, 0.0) # PV MCTSで行動選択を行う関数の生成 # next_actions = (next_pv_mcts_action, first_next_pv_mcts_action) # evaluate_algorithm_of("VS_過去の自分", next_actions) # VSランダム # next_actions = (first_next_pv_mcts_action, random_action) # evaluate_algorithm_of("first_VS_Random", next_actions) # 人類との戦い human_player_action # next_actions = (human_player_action, next_pv_mcts_action) # evaluate_algorithm_of("自己対戦", next_actions) # VSモンテカルロ木探索 # next_actions = (next_pv_mcts_action, mcts_action) # evaluate_algorithm_of("VS_MCTS", next_actions) # モデルの破棄 K.clear_session() del model
from kivy.animation import Animation from kivy.app import App from kivy.core.window import Window from kivy.graphics import Color, Ellipse, Line from kivy.uix.widget import Widget from tensorflow.keras.models import load_model from game import State from pv_mcts import pv_mcts_action model = load_model('./model/best.h5') state = State() next_action = pv_mcts_action(model, 0.0) class MyWedget(Widget): def reset(self): self.canvas.clear() with self.canvas: Color(1, 1, 0, .5) Line(points=[160, 0, 160, 480], width=2, close='True') Line(points=[320, 0, 320, 480], width=2, close='True') Line(points=[0, 160, 480, 160], width=2, close='True') Line(points=[0, 320, 480, 320], width=2, close='True') def on_touch_down(self, touch): ''' クリックイベント ''' self.turn_of_human(touch) # 人間のターン