def test_loss_cut_ab(seed=random.random()): create_ev_table(ev_table) print("seed", seed) winning_rate = 0.0 drow_count = 0 for i in range(10): random.seed(seed * i) state = State() est_ii_state = EstimatedState() est_ii_state.create_est_ii_state_from_state(state) while True: # ゲーム終了時 if state.is_done(): if state.is_lose(): if state.depth % 2 == 1: winning_rate += 1 # 先手勝ち else: # 引き分け winning_rate += 0.5 drow_count += 1 break # 行動の取得 if state.is_first_player(): action, est_ii_state = cut_loss_alpha_beta_action( est_ii_state, 5) est_ii_state.my_real_next(state, action) else: action = alpha_beta_action(state) est_ii_state.enemy_real_next(action) print(state) state = state.next(action) # 先手後手を入れ替えて同じ条件で対戦 random.seed(seed * i) state = State() while True: if state.is_done(): if state.is_lose(): if state.depth % 2 == 0: winning_rate += 1 # 後手勝ち else: # 引き分け winning_rate += 0.5 drow_count += 1 break # 行動の取得 if state.is_first_player(): action = alpha_beta_action(state) est_ii_state.enemy_real_next(action) else: action = perfect_alpha_beta_action(state, 5) est_ii_state.my_real_next(state, action) state = state.next(action) print(winning_rate, (i + 1) * 2, drow_count)
def exp_reduction_effect( seed=random.random(), reduction_func=IDDFS_alpha_beta_action): # 状態の生成 create_ev_table(ev_table) print("seed", seed) reduction_ab_action = time_limit_alpha_beta(reduction_func) # 勝率を計測する方 simple_ab_action = time_limit_alpha_beta(alpha_beta_action) # 対戦相手 winning_rate = 0.0 drow_count = 0 for i in range(50): random.seed(seed * i) state = State() while True: # ゲーム終了時 if state.is_done(): if state.is_lose(): if state.depth % 2 == 1: winning_rate += 1 # 先手勝ち else: # 引き分け winning_rate += 0.5 drow_count += 1 break # 行動の取得 if state.is_first_player(): action = reduction_ab_action(state) else: action = simple_ab_action(state) state = state.next(action) # 先手後手を入れ替えて同じ条件で対戦 random.seed(seed * i) state = State() while True: if state.is_done(): if state.is_lose(): if state.depth % 2 == 0: winning_rate += 1 # 後手勝ち else: # 引き分け winning_rate += 0.5 drow_count += 1 break # 行動の取得 if state.is_first_player(): action = simple_ab_action(state) else: action = reduction_ab_action(state) state = state.next(action) print(winning_rate, (i + 1) * 2, drow_count)
def exp_search_depth_effect(seed=random.random(), deep_depth=5, shallow_depth=3, search_func=alpha_beta_action): # 状態の生成 create_ev_table(ev_table) print("seed", seed) winning_rate = 0.0 drow_count = 0 for i in range(50): random.seed(seed * i) state = State() while True: # ゲーム終了時 if state.is_done(): if state.is_lose(): if state.depth % 2 == 1: winning_rate += 1 # 先手勝ち else: # 引き分け winning_rate += 0.5 drow_count += 1 break # 行動の取得 if state.is_first_player(): action = search_func(state, deep_depth) # 深い探索 else: action = search_func(state, shallow_depth) # 浅い探索 state = state.next(action) # 先手後手を入れ替えて同じ条件で対戦 random.seed(seed * i) state = State() while True: if state.is_done(): if state.is_lose(): if state.depth % 2 == 0: winning_rate += 1 # 後手勝ち else: # 引き分け winning_rate += 0.5 drow_count += 1 break # 行動の取得 if state.is_first_player(): action = search_func(state, shallow_depth) # 浅い探索 else: action = search_func(state, deep_depth) # 深い探索 state = state.next(action) print(winning_rate, (i + 1) * 2, drow_count)
def exp_effect_of_search_depth(func_id=2, seed=random.random()): # 状態の生成 create_ev_table(ev_table, select_func(func_id)) print("seed", seed) gamma = 100000 # スレッショルドカットを実施しない depths = [2, 3, 4, 5, 6] for depth in depths: winning_rate = 0.0 drows_count = 0 for i in range(100): random.seed(seed * i) state = State() while True: # ゲーム終了時 if state.is_done(): if state.is_lose(): if state.depth % 2 == 0: winning_rate += 1 else: drows_count += 1 winning_rate += 0.5 break # 行動の取得 if state.is_first_player(): action = mcts_action(state) else: action = alpha_beta_action(state, gamma, depth) state = state.next(action) print("勝率", winning_rate, "drows_count=", drows_count)
def exp_fair_compete(depth=5, func_id=3, seed=random.random()): gamma = 100000 # スレッショルドカットを実施しない restricts = [True, False] print(seed) for restrict in restricts: create_ev_table(ev_table, select_func(func_id)) winning_rate = 0.0 drows_count = 0 for i in range(100): random.seed(seed * i) state = State() while True: # ゲーム終了時 if state.is_done(): if state.is_lose(): if state.depth % 2 == 0: winning_rate += 1 else: winning_rate += 0.5 drows_count += 1 break # 行動の取得 if state.is_first_player(): action = alpha_beta_action(state, gamma, depth, not restrict) else: action = alpha_beta_action(state, gamma, depth, restrict) state = state.next(action) print("制限", restrict, "のエージェントが後手の際の勝率") print(winning_rate, "drows_count=", drows_count)
def exp_effect_of_action_restrict_for_compete(depth=5, func_id=2, rdm=random.random()): gamma = 100000 # スレッショルドカットを実施しない restricts = [True, False] for restrict in restricts: create_ev_table(ev_table, select_func(func_id)) winning_rate = 0.0 drows_count = 0 for i in range(100): random.seed(rdm * i) state = State() while True: # ゲーム終了時 if state.is_done(): if state.is_lose(): if state.depth % 2 == 0: winning_rate += 1 else: winning_rate += 0.5 drows_count += 1 break # 行動の取得 if state.is_first_player(): action = ii_mcts_action(state) else: action = alpha_beta_action(state, gamma, depth, restrict) state = state.next(action) print("restrict", restrict) print(winning_rate, "drows_count=", drows_count)
def exp_effect_of_search_depth(): gamma = 100000 # スレッショルドカットを実施しない rdm = random.random() for func_id in range(8): create_ev_table(ev_table, select_func(func_id)) winning_rate = 0.0 drows_count = 0 for i in range(100): random.seed(rdm * i) state = State() while True: # ゲーム終了時 if state.is_done(): if state.is_lose(): if state.depth % 2 == 0: winning_rate += 1 else: winning_rate += 0.5 drows_count += 1 break # 行動の取得 if state.is_first_player(): action = ii_mcts_action(state) else: action = alpha_beta_action(state, gamma, 5) state = state.next(action) print(winning_rate, "id=", func_id, "drows_count=", drows_count)
def vs_mcts(ev_func, seed, buttle_num): winning_rate = 0.0 drow_count = 0 for i in range(buttle_num): random.seed(seed * i) state = State() while True: # ゲーム終了時 if state.is_done(): if state.is_lose(): if state.depth % 2 == 1: winning_rate += 1 # 先手勝ち else: # 引き分け winning_rate += 0.5 drow_count += 1 break # 行動の取得 if state.is_first_player(): action = alpha_beta_action(state, ev_func, 5) else: action = mcts_action(state) state = state.next(action) # 先手後手を入れ替えて同じ条件で対戦 random.seed(seed * i) state = State() while True: if state.is_done(): if state.is_lose(): if state.depth % 2 == 0: winning_rate += 1 # 後手勝ち else: winning_rate += 0.5 drow_count += 1 break # 行動の取得 if state.is_first_player(): action = mcts_action(state) else: action = alpha_beta_action(state, ev_func, 5) state = state.next(action) print(winning_rate, drow_count) return winning_rate
def play(model, using_saved_state=False, saving_ontheway_state=False): ''' 1ゲームの実行 ''' # 学習データ history = [] # 状態の生成 if using_saved_state: state = load_state() if not state: state = State() else: state = State() starttime = time.time() print('') while True: # ゲーム終了時 if state.is_done(): endtime = time.time() print("first player is ", "lose" if state.is_lose() else "win") print("first player num:", state.piece_count(state.pieces)) print('elapsed time', endtime - starttime) print(state) break # 合法手の確率分布の取得 scores = pv_mcts_scores(model, state, SP_TEMPERATURE) # 学習データに状態と方策を追加 policies = [0] * DN_OUTPUT_SIZE for action, policy in zip(state.legal_actions(), scores): policies[action] = policy history.append([[state.pieces, state.enemy_pieces], policies, None]) # 行動の取得 if len(history) % 10 == 0: print("state len: ", len(history)) print(state) if saving_ontheway_state and len(history) == 25: save_state(state) action = np.random.choice(state.legal_actions(), p=scores) # 次の状態の取得 state = state.next(action) # 学習データに価値を追加 value = first_player_value(state) for i in range(len(history)): history[i][2] = value value = -value return history
def exp_gamma_winning_rate(depth=5, func_id=2, seed=random.random()): # 状態の生成 create_ev_table(ev_table, select_func(func_id)) keep_gamma_winning_rate = [0] * 30 print("seed", seed) gamma = 0.0 for index, _ in enumerate(keep_gamma_winning_rate): winning_rate = 0.0 for i in range(100): random.seed(seed * i) state = State() while True: # ゲーム終了時 if state.is_done(): if state.is_lose(): if state.depth % 2 == 0: winning_rate += 1 # 後手勝ち # elif state.depth % 2 == 1: # pass # 先手勝ち else: # 引き分け winning_rate += 0.5 break # 行動の取得 if state.is_first_player(): # action = random_action(state) action = mcts_action(state) else: # action = alpha_beta_action(state, gamma) action = alpha_beta_action(state, gamma, depth, False) state = state.next(action) keep_gamma_winning_rate[index] = winning_rate print(keep_gamma_winning_rate) gamma += 0.1 print(keep_gamma_winning_rate)
class GameUI(tk.Frame): ''' ゲームUIの定義 ''' # 初期化 def __init__(self, master=None, model=None, ai_is_first=True): self.ai_is_first = ai_is_first tk.Frame.__init__(self, master) self.master.title('リバーシ') # ゲーム状態の生成 self.state = State() self.prev_state = None # PV MCTSで行動選択を行う関数の生成 self.next_action = pv_mcts_action(model, 0.0) # self.next_action = mcs_action # キャンバスの生成 self.c = tk.Canvas(self, width=BOARD_SIZE * 40 + 40, height=BOARD_SIZE * 40, highlightthickness=0) # 後手の場合 if self.ai_is_first: self.turn_of_ai() self.c.bind('<Button-1>', self.turn_of_human) self.c.pack() # 描画の更新 self.on_draw() # 人間のターン def turn_of_human(self, event): # ゲーム終了時 if self.state.is_done(): print("first player is ", "lose" if self.state.is_lose() else "win") self.state = State() self.prev_state = None self.on_draw() return # 手番をチェック is_human_turn = None if self.ai_is_first: is_human_turn = not self.state.is_first_player() else: is_human_turn = self.state.is_first_player() if not is_human_turn: return # クリック位置を行動に変換 x = int(event.x / 40) y = int(event.y / 40) is_back = x > BOARD_SIZE - 1 print("x y", x, y) if is_back and self.prev_state: print("check modoru") print("") self.state = self.prev_state self.prev_state = None self.on_draw() return if x < 0 or (BOARD_SIZE - 1) < x or y < 0 or (BOARD_SIZE - 1) < y: # 範囲外 print("範囲外") return action = x + y * BOARD_SIZE print("human", action, get_coodicate(action)) # 合法手でない時 legal_actions = self.state.legal_actions() if legal_actions == [ALL_PIECES_NUM]: action = ALL_PIECES_NUM # パス if action != ALL_PIECES_NUM and not (action in legal_actions): return # 次の状態の取得 self.prev_state = self.state # 現在の状態を保存 self.state = self.state.next(action) print("check2") self.on_draw() # AIのターン self.master.after(1, self.turn_of_ai) # AIのターン def turn_of_ai(self): # ゲーム終了時 if self.state.is_done(): print("first player is ", "lose" if self.state.is_lose() else "win") return # 行動の取得 action = self.next_action(self.state) print(action, get_coodicate(action)) # 次の状態の取得 self.state = self.state.next(action) self.on_draw() # 石の描画 def draw_piece(self, index, first_player): x = (index % BOARD_SIZE) * 40 + (BOARD_SIZE - 1) y = int(index / BOARD_SIZE) * 40 + (BOARD_SIZE - 1) if first_player: self.c.create_oval(x, y, x + 30, y + 30, width=1.0, outline='#000000', fill='#000000') else: self.c.create_oval(x, y, x + 30, y + 30, width=1.0, outline='#000000', fill='#FFFFFF') # 描画の更新 def on_draw(self): self.c.delete('all') self.c.create_rectangle(0, 0, BOARD_SIZE * 40, BOARD_SIZE * 40, width=0.0, fill='#C69C6C') for i in range(1, BOARD_SIZE + 2 + 1): self.c.create_line(0, i * 40, BOARD_SIZE * 40, i * 40, width=1.0, fill='#000000') self.c.create_line(i * 40, 0, i * 40, BOARD_SIZE * 40, width=1.0, fill='#000000') for i in range(ALL_PIECES_NUM): if self.state.pieces[i] == 1: self.draw_piece(i, self.state.is_first_player()) if self.state.enemy_pieces[i] == 1: self.draw_piece(i, not self.state.is_first_player())
def evaluate_elite(params): # paramsから評価関数を作成 ev_func = create_ev_func(params) # 対戦相手の評価関数を生成 enemy_ev_func = create_ev_func(enemy_params) buttle_piece_lists = create_buttle_piece_lists() num_of_wins = 0.0 num_of_matches = 0.0 num_of_turns = 0.0 # 決着までにかかったターン数(MAP要素) num_of_kill_pieces = 0.0 # 決着までに取った相手の駒の数 num_of_blue_move_turns = 0.0 # 青駒が動いた数 # iとjの部分:buttle_piece_listsに入っているパターンから重複を許して2つ選ぶ for i in buttle_piece_lists: # 通常のiと、iを左右反転させるパターンを用意する # (反転i vs 反転jは、i vs jと等価なのでjを反転させる必要はない) mirror_i = invert_piece_list(i) for k in [i, mirror_i]: for j in buttle_piece_lists: state = State(create_pieces_matrix(k), create_pieces_matrix(j)) while True: # ゲーム終了時 if state.is_done(): num_of_matches += 1 num_of_turns += state.depth num_of_kill_pieces += check_num_of_killed_enemy_pieces( state, True) if state.is_lose(): if state.depth % 2 == 1: num_of_wins += 1 # 先手勝ち else: # 引き分け num_of_wins += 0.5 break # 行動の取得 if state.is_first_player(): action = alpha_beta_action(state, ev_func) num_of_blue_move_turns += judge_move_piece_color( action, state.pieces) else: # action = mcts_action(state) action = alpha_beta_action(state, enemy_ev_func) state = state.next(action) # 先手後手を入れ替えて対戦(盤面はそのまま) state = State(create_pieces_matrix(k), create_pieces_matrix(j)) while True: if state.is_done(): num_of_matches += 1 num_of_turns += state.depth num_of_kill_pieces += check_num_of_killed_enemy_pieces( state, False) if state.is_lose(): if state.depth % 2 == 0: num_of_wins += 1 # 後手勝ち else: num_of_wins += 0.5 break # 行動の取得 if state.is_first_player(): # action = mcts_action(state) action = alpha_beta_action(state, enemy_ev_func) else: action = alpha_beta_action(state, ev_func) num_of_blue_move_turns += judge_move_piece_color( action, state.pieces) state = state.next(action) winning_rate = num_of_wins / num_of_matches avg_num_of_turns = num_of_turns / num_of_matches avg_num_of_kill_pieces = num_of_kill_pieces / num_of_matches avg_ratio_of_move_blue_piece = (2 * num_of_blue_move_turns) / num_of_turns return [winning_rate, avg_num_of_turns, avg_num_of_kill_pieces]
def fair_game(agent1, agent2, csv_writer, game_num=100, seed=random.random()): create_ev_table(ev_table) # 評価関数のテーブルを作成 winning_rate = 0.0 drow_count = 0 for i in range(game_num // 2): csv_writer.writerow(["seed値", str(seed * (i + 1))]) # seed値を出力 print(seed * (i + 1)) # 先生に見せる用 random.seed(seed * (i + 1)) state = State() csv_writer.writerow(["初期盤面"]) write_board_csv(state, csv_writer) while True: # ゲーム終了時 if state.is_done(): if state.is_lose(): if state.depth % 2 == 1: csv_writer.writerow(["winner = agent1"]) winning_rate += 1 else: csv_writer.writerow(["winner = agent2"]) csv_writer.writerow(["決まり手", state_winning_reason(state)]) else: # 引き分け csv_writer.writerow(["winner = none (drow) "]) winning_rate += 0.5 drow_count += 1 csv_writer.writerow(["ターン数", state.depth]) csv_writer.writerow(["最終盤面"]) write_board_csv(state, csv_writer) # 最終盤面を出力 csv_writer.writerow(["〜〜〜〜〜〜〜〜〜〜〜〜〜〜〜〜〜〜〜〜〜〜〜〜〜〜〜〜〜〜〜〜〜〜〜〜〜"]) break if state.is_first_player(): action = agent1(state) # 先手 else: action = agent2(state) # 後手 print("state.depth", state.depth, "action", action) state = state.next(action) # 先手後手を入れ替えて同じ条件で対戦 random.seed(seed * (i + 1)) state = State() csv_writer.writerow(["初期盤面"]) write_board_csv(state, csv_writer) while True: if state.is_done(): if state.is_lose(): if state.depth % 2 == 0: csv_writer.writerow(["winner = agent1"]) winning_rate += 1 # 後手がagent1 else: csv_writer.writerow(["winner = agent2"]) csv_writer.writerow(["決まり手", state_winning_reason(state)]) else: # 引き分け csv_writer.writerow(["winner = none (drow) "]) winning_rate += 0.5 drow_count += 1 csv_writer.writerow(["ターン数", state.depth]) csv_writer.writerow(["最終盤面"]) write_board_csv(state, csv_writer) # 最終盤面を出力 csv_writer.writerow(["〜〜〜〜〜〜〜〜〜〜〜〜〜〜〜〜〜〜〜〜〜〜〜〜〜〜〜〜〜〜〜〜〜〜〜〜〜"]) break if state.is_first_player(): action = agent2(state) else: action = agent1(state) print("state.depth", state.depth, "action", action) state = state.next(action) print(winning_rate) csv_writer.writerow( ["agent1の勝率", str((winning_rate / game_num) * 100) + "%"]) csv_writer.writerow(["引き分けの回数", drow_count])
def evaluate_elite(params, seed=random.random()): # paramsから評価関数を作成 blue_dis_func = lambda r: params[0] / (r**4) red_dis_func = lambda r: params[1] / (r**4) ev_func = create_ev_func(0, blue_dis_func, 7, red_dis_func) buttle_piece_lists = create_buttle_piece_lists() num_of_wins = 0.0 num_of_matches = 0.0 num_of_turns = 0.0 # 決着までにかかったターン数(MAP要素) num_of_kill_pieces = 0.0 # 決着までに取った相手の駒の数 # iとjの部分:buttle_piece_listsに入っているパターンから重複を許して2つ選ぶ for i in buttle_piece_lists: # 通常のiと、iを左右反転させるパターンを用意する # (反転i vs 反転jは、i vs jと等価なのでjを反転させる必要はない) mirror_i = invert_piece_list(i) for k in [i, mirror_i]: for j in buttle_piece_lists: state = State(create_pieces_matrix(k), create_pieces_matrix(j)) while True: # ゲーム終了時 if state.is_done(): num_of_matches += 1 num_of_turns += state.depth num_of_kill_pieces += check_num_of_killed_enemy_pieces( state, True) if state.is_lose(): if state.depth % 2 == 1: num_of_wins += 1 # 先手勝ち else: # 引き分け num_of_wins += 0.5 break # 行動の取得 if state.is_first_player(): action = alpha_beta_action(state, ev_func) else: action = mcts_action(state) state = state.next(action) # 先手後手を入れ替えて対戦(盤面はそのまま) state = State(create_pieces_matrix(k), create_pieces_matrix(j)) while True: if state.is_done(): num_of_matches += 1 num_of_turns += state.depth num_of_kill_pieces += check_num_of_killed_enemy_pieces( state, False) if state.is_lose(): if state.depth % 2 == 0: num_of_wins += 1 # 後手勝ち else: num_of_wins += 0.5 break # 行動の取得 if state.is_first_player(): action = mcts_action(state) else: action = alpha_beta_action(state, ev_func) state = state.next(action) winning_rate = num_of_wins / num_of_matches avg_num_of_turns = num_of_turns / num_of_matches avg_num_of_kill_pieces = num_of_kill_pieces / num_of_matches return [winning_rate, avg_num_of_turns, avg_num_of_kill_pieces]