Example #1
0
def test_loss_cut_ab(seed=random.random()):
    create_ev_table(ev_table)
    print("seed", seed)

    winning_rate = 0.0
    drow_count = 0
    for i in range(10):
        random.seed(seed * i)
        state = State()
        est_ii_state = EstimatedState()
        est_ii_state.create_est_ii_state_from_state(state)
        while True:
            # ゲーム終了時
            if state.is_done():
                if state.is_lose():
                    if state.depth % 2 == 1:
                        winning_rate += 1  # 先手勝ち
                else:  # 引き分け
                    winning_rate += 0.5
                    drow_count += 1
                break

            # 行動の取得
            if state.is_first_player():
                action, est_ii_state = cut_loss_alpha_beta_action(
                    est_ii_state, 5)
                est_ii_state.my_real_next(state, action)
            else:
                action = alpha_beta_action(state)
                est_ii_state.enemy_real_next(action)
            print(state)
            state = state.next(action)

        # 先手後手を入れ替えて同じ条件で対戦
        random.seed(seed * i)
        state = State()
        while True:
            if state.is_done():
                if state.is_lose():
                    if state.depth % 2 == 0:
                        winning_rate += 1  # 後手勝ち
                else:  # 引き分け
                    winning_rate += 0.5
                    drow_count += 1
                break

            # 行動の取得
            if state.is_first_player():
                action = alpha_beta_action(state)
                est_ii_state.enemy_real_next(action)
            else:
                action = perfect_alpha_beta_action(state, 5)
                est_ii_state.my_real_next(state, action)
            state = state.next(action)

        print(winning_rate, (i + 1) * 2, drow_count)
Example #2
0
def exp_reduction_effect(
        seed=random.random(), reduction_func=IDDFS_alpha_beta_action):
    # 状態の生成
    create_ev_table(ev_table)
    print("seed", seed)

    reduction_ab_action = time_limit_alpha_beta(reduction_func)  # 勝率を計測する方
    simple_ab_action = time_limit_alpha_beta(alpha_beta_action)  # 対戦相手

    winning_rate = 0.0
    drow_count = 0
    for i in range(50):
        random.seed(seed * i)
        state = State()
        while True:
            # ゲーム終了時
            if state.is_done():
                if state.is_lose():
                    if state.depth % 2 == 1:
                        winning_rate += 1  # 先手勝ち
                else:  # 引き分け
                    winning_rate += 0.5
                    drow_count += 1
                break

            # 行動の取得
            if state.is_first_player():
                action = reduction_ab_action(state)
            else:
                action = simple_ab_action(state)
            state = state.next(action)

        # 先手後手を入れ替えて同じ条件で対戦
        random.seed(seed * i)
        state = State()
        while True:
            if state.is_done():
                if state.is_lose():
                    if state.depth % 2 == 0:
                        winning_rate += 1  # 後手勝ち
                else:  # 引き分け
                    winning_rate += 0.5
                    drow_count += 1
                break

            # 行動の取得
            if state.is_first_player():
                action = simple_ab_action(state)
            else:
                action = reduction_ab_action(state)
            state = state.next(action)

        print(winning_rate, (i + 1) * 2, drow_count)
Example #3
0
def exp_search_depth_effect(seed=random.random(),
                            deep_depth=5,
                            shallow_depth=3,
                            search_func=alpha_beta_action):
    # 状態の生成
    create_ev_table(ev_table)
    print("seed", seed)

    winning_rate = 0.0
    drow_count = 0
    for i in range(50):
        random.seed(seed * i)
        state = State()
        while True:
            # ゲーム終了時
            if state.is_done():
                if state.is_lose():
                    if state.depth % 2 == 1:
                        winning_rate += 1  # 先手勝ち
                else:  # 引き分け
                    winning_rate += 0.5
                    drow_count += 1
                break

            # 行動の取得
            if state.is_first_player():
                action = search_func(state, deep_depth)  # 深い探索
            else:
                action = search_func(state, shallow_depth)  # 浅い探索
            state = state.next(action)

        # 先手後手を入れ替えて同じ条件で対戦
        random.seed(seed * i)
        state = State()
        while True:
            if state.is_done():
                if state.is_lose():
                    if state.depth % 2 == 0:
                        winning_rate += 1  # 後手勝ち
                else:  # 引き分け
                    winning_rate += 0.5
                    drow_count += 1
                break

            # 行動の取得
            if state.is_first_player():
                action = search_func(state, shallow_depth)  # 浅い探索
            else:
                action = search_func(state, deep_depth)  # 深い探索
            state = state.next(action)

        print(winning_rate, (i + 1) * 2, drow_count)
Example #4
0
def exp_effect_of_search_depth(func_id=2, seed=random.random()):
    # 状態の生成
    create_ev_table(ev_table, select_func(func_id))
    print("seed", seed)

    gamma = 100000  # スレッショルドカットを実施しない
    depths = [2, 3, 4, 5, 6]
    for depth in depths:
        winning_rate = 0.0
        drows_count = 0
        for i in range(100):
            random.seed(seed * i)
            state = State()
            while True:
                # ゲーム終了時
                if state.is_done():
                    if state.is_lose():
                        if state.depth % 2 == 0:
                            winning_rate += 1
                    else:
                        drows_count += 1
                        winning_rate += 0.5
                    break

                # 行動の取得
                if state.is_first_player():
                    action = mcts_action(state)
                else:
                    action = alpha_beta_action(state, gamma, depth)
                state = state.next(action)
        print("勝率", winning_rate, "drows_count=", drows_count)
Example #5
0
def exp_fair_compete(depth=5, func_id=3, seed=random.random()):
    gamma = 100000  # スレッショルドカットを実施しない
    restricts = [True, False]
    print(seed)
    for restrict in restricts:
        create_ev_table(ev_table, select_func(func_id))
        winning_rate = 0.0
        drows_count = 0
        for i in range(100):
            random.seed(seed * i)
            state = State()
            while True:
                # ゲーム終了時
                if state.is_done():
                    if state.is_lose():
                        if state.depth % 2 == 0:
                            winning_rate += 1
                    else:
                        winning_rate += 0.5
                        drows_count += 1
                    break

                # 行動の取得
                if state.is_first_player():
                    action = alpha_beta_action(state, gamma, depth,
                                               not restrict)
                else:
                    action = alpha_beta_action(state, gamma, depth, restrict)
                state = state.next(action)
        print("制限", restrict, "のエージェントが後手の際の勝率")
        print(winning_rate, "drows_count=", drows_count)
Example #6
0
def exp_effect_of_action_restrict_for_compete(depth=5,
                                              func_id=2,
                                              rdm=random.random()):
    gamma = 100000  # スレッショルドカットを実施しない
    restricts = [True, False]
    for restrict in restricts:
        create_ev_table(ev_table, select_func(func_id))
        winning_rate = 0.0
        drows_count = 0
        for i in range(100):
            random.seed(rdm * i)
            state = State()
            while True:
                # ゲーム終了時
                if state.is_done():
                    if state.is_lose():
                        if state.depth % 2 == 0:
                            winning_rate += 1
                    else:
                        winning_rate += 0.5
                        drows_count += 1
                    break

                # 行動の取得
                if state.is_first_player():
                    action = ii_mcts_action(state)
                else:
                    action = alpha_beta_action(state, gamma, depth, restrict)
                state = state.next(action)
        print("restrict", restrict)
        print(winning_rate, "drows_count=", drows_count)
Example #7
0
def exp_effect_of_search_depth():
    gamma = 100000  # スレッショルドカットを実施しない
    rdm = random.random()
    for func_id in range(8):
        create_ev_table(ev_table, select_func(func_id))
        winning_rate = 0.0
        drows_count = 0
        for i in range(100):
            random.seed(rdm * i)
            state = State()
            while True:
                # ゲーム終了時
                if state.is_done():
                    if state.is_lose():
                        if state.depth % 2 == 0:
                            winning_rate += 1
                    else:
                        winning_rate += 0.5
                        drows_count += 1
                    break

                # 行動の取得
                if state.is_first_player():
                    action = ii_mcts_action(state)
                else:
                    action = alpha_beta_action(state, gamma, 5)
                state = state.next(action)
        print(winning_rate, "id=", func_id, "drows_count=", drows_count)
Example #8
0
def vs_mcts(ev_func, seed, buttle_num):
    winning_rate = 0.0
    drow_count = 0
    for i in range(buttle_num):
        random.seed(seed * i)
        state = State()
        while True:
            # ゲーム終了時
            if state.is_done():
                if state.is_lose():
                    if state.depth % 2 == 1:
                        winning_rate += 1  # 先手勝ち
                else:  # 引き分け
                    winning_rate += 0.5
                    drow_count += 1
                break

            # 行動の取得
            if state.is_first_player():
                action = alpha_beta_action(state, ev_func, 5)
            else:
                action = mcts_action(state)
            state = state.next(action)

        # 先手後手を入れ替えて同じ条件で対戦
        random.seed(seed * i)
        state = State()
        while True:
            if state.is_done():
                if state.is_lose():
                    if state.depth % 2 == 0:
                        winning_rate += 1  # 後手勝ち
                else:
                    winning_rate += 0.5
                    drow_count += 1
                break

            # 行動の取得
            if state.is_first_player():
                action = mcts_action(state)
            else:
                action = alpha_beta_action(state, ev_func, 5)
            state = state.next(action)

    print(winning_rate, drow_count)
    return winning_rate
Example #9
0
def play(model, using_saved_state=False, saving_ontheway_state=False):
    '''
    1ゲームの実行
    '''

    # 学習データ
    history = []

    # 状態の生成
    if using_saved_state:
        state = load_state()
        if not state:
            state = State()
    else:
        state = State()

    starttime = time.time()
    print('')
    while True:
        # ゲーム終了時
        if state.is_done():
            endtime = time.time()
            print("first player is ", "lose" if state.is_lose() else "win")
            print("first player num:", state.piece_count(state.pieces))
            print('elapsed time', endtime - starttime)
            print(state)
            break

        # 合法手の確率分布の取得

        scores = pv_mcts_scores(model, state, SP_TEMPERATURE)

        # 学習データに状態と方策を追加
        policies = [0] * DN_OUTPUT_SIZE
        for action, policy in zip(state.legal_actions(), scores):
            policies[action] = policy
        history.append([[state.pieces, state.enemy_pieces], policies, None])

        # 行動の取得
        if len(history) % 10 == 0:
            print("state len: ", len(history))
            print(state)

        if saving_ontheway_state and len(history) == 25:
            save_state(state)
        action = np.random.choice(state.legal_actions(), p=scores)

        # 次の状態の取得
        state = state.next(action)

    # 学習データに価値を追加
    value = first_player_value(state)
    for i in range(len(history)):
        history[i][2] = value
        value = -value
    return history
Example #10
0
def exp_gamma_winning_rate(depth=5, func_id=2, seed=random.random()):
    # 状態の生成
    create_ev_table(ev_table, select_func(func_id))
    keep_gamma_winning_rate = [0] * 30
    print("seed", seed)

    gamma = 0.0
    for index, _ in enumerate(keep_gamma_winning_rate):
        winning_rate = 0.0
        for i in range(100):
            random.seed(seed * i)
            state = State()
            while True:
                # ゲーム終了時
                if state.is_done():
                    if state.is_lose():
                        if state.depth % 2 == 0:
                            winning_rate += 1  # 後手勝ち
                        # elif state.depth % 2 == 1:
                        #     pass  # 先手勝ち
                    else:  # 引き分け
                        winning_rate += 0.5
                    break

                # 行動の取得
                if state.is_first_player():
                    # action = random_action(state)
                    action = mcts_action(state)
                else:
                    # action = alpha_beta_action(state, gamma)
                    action = alpha_beta_action(state, gamma, depth, False)
                state = state.next(action)

        keep_gamma_winning_rate[index] = winning_rate
        print(keep_gamma_winning_rate)
        gamma += 0.1
    print(keep_gamma_winning_rate)
Example #11
0
class GameUI(tk.Frame):
    '''
    ゲームUIの定義
    '''

    # 初期化
    def __init__(self, master=None, model=None, ai_is_first=True):
        self.ai_is_first = ai_is_first
        tk.Frame.__init__(self, master)
        self.master.title('リバーシ')

        # ゲーム状態の生成
        self.state = State()
        self.prev_state = None

        # PV MCTSで行動選択を行う関数の生成
        self.next_action = pv_mcts_action(model, 0.0)
        # self.next_action = mcs_action

        # キャンバスの生成
        self.c = tk.Canvas(self,
                           width=BOARD_SIZE * 40 + 40,
                           height=BOARD_SIZE * 40,
                           highlightthickness=0)

        # 後手の場合
        if self.ai_is_first:
            self.turn_of_ai()
        self.c.bind('<Button-1>', self.turn_of_human)
        self.c.pack()

        # 描画の更新
        self.on_draw()

    # 人間のターン
    def turn_of_human(self, event):
        # ゲーム終了時
        if self.state.is_done():
            print("first player is ",
                  "lose" if self.state.is_lose() else "win")
            self.state = State()
            self.prev_state = None
            self.on_draw()
            return

        # 手番をチェック
        is_human_turn = None
        if self.ai_is_first:
            is_human_turn = not self.state.is_first_player()
        else:
            is_human_turn = self.state.is_first_player()

        if not is_human_turn:
            return

        # クリック位置を行動に変換
        x = int(event.x / 40)
        y = int(event.y / 40)

        is_back = x > BOARD_SIZE - 1
        print("x y", x, y)
        if is_back and self.prev_state:
            print("check modoru")
            print("")
            self.state = self.prev_state
            self.prev_state = None
            self.on_draw()
            return

        if x < 0 or (BOARD_SIZE - 1) < x or y < 0 or (BOARD_SIZE -
                                                      1) < y:  # 範囲外
            print("範囲外")
            return
        action = x + y * BOARD_SIZE
        print("human", action, get_coodicate(action))
        # 合法手でない時
        legal_actions = self.state.legal_actions()
        if legal_actions == [ALL_PIECES_NUM]:
            action = ALL_PIECES_NUM  # パス
        if action != ALL_PIECES_NUM and not (action in legal_actions):
            return

        # 次の状態の取得
        self.prev_state = self.state  # 現在の状態を保存
        self.state = self.state.next(action)
        print("check2")
        self.on_draw()

        # AIのターン
        self.master.after(1, self.turn_of_ai)

    # AIのターン
    def turn_of_ai(self):
        # ゲーム終了時
        if self.state.is_done():
            print("first player is ",
                  "lose" if self.state.is_lose() else "win")
            return

        # 行動の取得
        action = self.next_action(self.state)
        print(action, get_coodicate(action))

        # 次の状態の取得
        self.state = self.state.next(action)
        self.on_draw()

    # 石の描画
    def draw_piece(self, index, first_player):
        x = (index % BOARD_SIZE) * 40 + (BOARD_SIZE - 1)
        y = int(index / BOARD_SIZE) * 40 + (BOARD_SIZE - 1)
        if first_player:
            self.c.create_oval(x,
                               y,
                               x + 30,
                               y + 30,
                               width=1.0,
                               outline='#000000',
                               fill='#000000')
        else:
            self.c.create_oval(x,
                               y,
                               x + 30,
                               y + 30,
                               width=1.0,
                               outline='#000000',
                               fill='#FFFFFF')

    # 描画の更新
    def on_draw(self):
        self.c.delete('all')

        self.c.create_rectangle(0,
                                0,
                                BOARD_SIZE * 40,
                                BOARD_SIZE * 40,
                                width=0.0,
                                fill='#C69C6C')
        for i in range(1, BOARD_SIZE + 2 + 1):
            self.c.create_line(0,
                               i * 40,
                               BOARD_SIZE * 40,
                               i * 40,
                               width=1.0,
                               fill='#000000')
            self.c.create_line(i * 40,
                               0,
                               i * 40,
                               BOARD_SIZE * 40,
                               width=1.0,
                               fill='#000000')
        for i in range(ALL_PIECES_NUM):
            if self.state.pieces[i] == 1:
                self.draw_piece(i, self.state.is_first_player())
            if self.state.enemy_pieces[i] == 1:
                self.draw_piece(i, not self.state.is_first_player())
Example #12
0
def evaluate_elite(params):
    # paramsから評価関数を作成
    ev_func = create_ev_func(params)
    # 対戦相手の評価関数を生成
    enemy_ev_func = create_ev_func(enemy_params)

    buttle_piece_lists = create_buttle_piece_lists()

    num_of_wins = 0.0
    num_of_matches = 0.0
    num_of_turns = 0.0  # 決着までにかかったターン数(MAP要素)
    num_of_kill_pieces = 0.0  # 決着までに取った相手の駒の数
    num_of_blue_move_turns = 0.0  # 青駒が動いた数

    # iとjの部分:buttle_piece_listsに入っているパターンから重複を許して2つ選ぶ
    for i in buttle_piece_lists:
        # 通常のiと、iを左右反転させるパターンを用意する
        # (反転i vs 反転jは、i vs jと等価なのでjを反転させる必要はない)
        mirror_i = invert_piece_list(i)
        for k in [i, mirror_i]:
            for j in buttle_piece_lists:
                state = State(create_pieces_matrix(k), create_pieces_matrix(j))
                while True:
                    # ゲーム終了時
                    if state.is_done():
                        num_of_matches += 1
                        num_of_turns += state.depth
                        num_of_kill_pieces += check_num_of_killed_enemy_pieces(
                            state, True)
                        if state.is_lose():
                            if state.depth % 2 == 1:
                                num_of_wins += 1  # 先手勝ち
                        else:  # 引き分け
                            num_of_wins += 0.5
                        break

                    # 行動の取得
                    if state.is_first_player():
                        action = alpha_beta_action(state, ev_func)
                        num_of_blue_move_turns += judge_move_piece_color(
                            action, state.pieces)
                    else:
                        # action = mcts_action(state)
                        action = alpha_beta_action(state, enemy_ev_func)
                    state = state.next(action)

                # 先手後手を入れ替えて対戦(盤面はそのまま)
                state = State(create_pieces_matrix(k), create_pieces_matrix(j))
                while True:
                    if state.is_done():
                        num_of_matches += 1
                        num_of_turns += state.depth
                        num_of_kill_pieces += check_num_of_killed_enemy_pieces(
                            state, False)
                        if state.is_lose():
                            if state.depth % 2 == 0:
                                num_of_wins += 1  # 後手勝ち
                        else:
                            num_of_wins += 0.5
                        break

                    # 行動の取得
                    if state.is_first_player():
                        # action = mcts_action(state)
                        action = alpha_beta_action(state, enemy_ev_func)
                    else:
                        action = alpha_beta_action(state, ev_func)
                        num_of_blue_move_turns += judge_move_piece_color(
                            action, state.pieces)
                    state = state.next(action)

    winning_rate = num_of_wins / num_of_matches
    avg_num_of_turns = num_of_turns / num_of_matches
    avg_num_of_kill_pieces = num_of_kill_pieces / num_of_matches
    avg_ratio_of_move_blue_piece = (2 * num_of_blue_move_turns) / num_of_turns
    return [winning_rate, avg_num_of_turns, avg_num_of_kill_pieces]
Example #13
0
def fair_game(agent1, agent2, csv_writer, game_num=100, seed=random.random()):
    create_ev_table(ev_table)  # 評価関数のテーブルを作成
    winning_rate = 0.0
    drow_count = 0
    for i in range(game_num // 2):
        csv_writer.writerow(["seed値", str(seed * (i + 1))])  # seed値を出力
        print(seed * (i + 1))  # 先生に見せる用
        random.seed(seed * (i + 1))
        state = State()
        csv_writer.writerow(["初期盤面"])
        write_board_csv(state, csv_writer)
        while True:
            # ゲーム終了時
            if state.is_done():
                if state.is_lose():
                    if state.depth % 2 == 1:
                        csv_writer.writerow(["winner = agent1"])
                        winning_rate += 1
                    else:
                        csv_writer.writerow(["winner = agent2"])
                    csv_writer.writerow(["決まり手", state_winning_reason(state)])
                else:  # 引き分け
                    csv_writer.writerow(["winner = none (drow) "])
                    winning_rate += 0.5
                    drow_count += 1
                csv_writer.writerow(["ターン数", state.depth])
                csv_writer.writerow(["最終盤面"])
                write_board_csv(state, csv_writer)  # 最終盤面を出力
                csv_writer.writerow(["〜〜〜〜〜〜〜〜〜〜〜〜〜〜〜〜〜〜〜〜〜〜〜〜〜〜〜〜〜〜〜〜〜〜〜〜〜"])
                break
            if state.is_first_player():
                action = agent1(state)  # 先手
            else:
                action = agent2(state)  # 後手
            print("state.depth", state.depth, "action", action)
            state = state.next(action)

        # 先手後手を入れ替えて同じ条件で対戦
        random.seed(seed * (i + 1))
        state = State()
        csv_writer.writerow(["初期盤面"])
        write_board_csv(state, csv_writer)
        while True:
            if state.is_done():
                if state.is_lose():
                    if state.depth % 2 == 0:
                        csv_writer.writerow(["winner = agent1"])
                        winning_rate += 1  # 後手がagent1
                    else:
                        csv_writer.writerow(["winner = agent2"])
                    csv_writer.writerow(["決まり手", state_winning_reason(state)])
                else:  # 引き分け
                    csv_writer.writerow(["winner = none (drow) "])
                    winning_rate += 0.5
                    drow_count += 1
                csv_writer.writerow(["ターン数", state.depth])
                csv_writer.writerow(["最終盤面"])
                write_board_csv(state, csv_writer)  # 最終盤面を出力
                csv_writer.writerow(["〜〜〜〜〜〜〜〜〜〜〜〜〜〜〜〜〜〜〜〜〜〜〜〜〜〜〜〜〜〜〜〜〜〜〜〜〜"])
                break
            if state.is_first_player():
                action = agent2(state)
            else:
                action = agent1(state)
            print("state.depth", state.depth, "action", action)
            state = state.next(action)
    print(winning_rate)
    csv_writer.writerow(
        ["agent1の勝率", str((winning_rate / game_num) * 100) + "%"])
    csv_writer.writerow(["引き分けの回数", drow_count])
Example #14
0
def evaluate_elite(params, seed=random.random()):
    # paramsから評価関数を作成
    blue_dis_func = lambda r: params[0] / (r**4)
    red_dis_func = lambda r: params[1] / (r**4)
    ev_func = create_ev_func(0, blue_dis_func, 7, red_dis_func)

    buttle_piece_lists = create_buttle_piece_lists()

    num_of_wins = 0.0
    num_of_matches = 0.0
    num_of_turns = 0.0  # 決着までにかかったターン数(MAP要素)
    num_of_kill_pieces = 0.0  # 決着までに取った相手の駒の数

    # iとjの部分:buttle_piece_listsに入っているパターンから重複を許して2つ選ぶ
    for i in buttle_piece_lists:
        # 通常のiと、iを左右反転させるパターンを用意する
        # (反転i vs 反転jは、i vs jと等価なのでjを反転させる必要はない)
        mirror_i = invert_piece_list(i)
        for k in [i, mirror_i]:
            for j in buttle_piece_lists:
                state = State(create_pieces_matrix(k), create_pieces_matrix(j))
                while True:
                    # ゲーム終了時
                    if state.is_done():
                        num_of_matches += 1
                        num_of_turns += state.depth
                        num_of_kill_pieces += check_num_of_killed_enemy_pieces(
                            state, True)
                        if state.is_lose():
                            if state.depth % 2 == 1:
                                num_of_wins += 1  # 先手勝ち
                        else:  # 引き分け
                            num_of_wins += 0.5
                        break

                    # 行動の取得
                    if state.is_first_player():
                        action = alpha_beta_action(state, ev_func)
                    else:
                        action = mcts_action(state)
                    state = state.next(action)

                # 先手後手を入れ替えて対戦(盤面はそのまま)
                state = State(create_pieces_matrix(k), create_pieces_matrix(j))
                while True:
                    if state.is_done():
                        num_of_matches += 1
                        num_of_turns += state.depth
                        num_of_kill_pieces += check_num_of_killed_enemy_pieces(
                            state, False)
                        if state.is_lose():
                            if state.depth % 2 == 0:
                                num_of_wins += 1  # 後手勝ち
                        else:
                            num_of_wins += 0.5
                        break

                    # 行動の取得
                    if state.is_first_player():
                        action = mcts_action(state)
                    else:
                        action = alpha_beta_action(state, ev_func)
                    state = state.next(action)

    winning_rate = num_of_wins / num_of_matches
    avg_num_of_turns = num_of_turns / num_of_matches
    avg_num_of_kill_pieces = num_of_kill_pieces / num_of_matches
    return [winning_rate, avg_num_of_turns, avg_num_of_kill_pieces]