Python State.is_doneの例、game.State.is_done Pythonの例

コード例 #1

0

ファイルを表示

def test_loss_cut_ab(seed=random.random()):
    create_ev_table(ev_table)
    print("seed", seed)

    winning_rate = 0.0
    drow_count = 0
    for i in range(10):
        random.seed(seed * i)
        state = State()
        est_ii_state = EstimatedState()
        est_ii_state.create_est_ii_state_from_state(state)
        while True:
            # ゲーム終了時
            if state.is_done():
                if state.is_lose():
                    if state.depth % 2 == 1:
                        winning_rate += 1  # 先手勝ち
                else:  # 引き分け
                    winning_rate += 0.5
                    drow_count += 1
                break

            # 行動の取得
            if state.is_first_player():
                action, est_ii_state = cut_loss_alpha_beta_action(
                    est_ii_state, 5)
                est_ii_state.my_real_next(state, action)
            else:
                action = alpha_beta_action(state)
                est_ii_state.enemy_real_next(action)
            print(state)
            state = state.next(action)

        # 先手後手を入れ替えて同じ条件で対戦
        random.seed(seed * i)
        state = State()
        while True:
            if state.is_done():
                if state.is_lose():
                    if state.depth % 2 == 0:
                        winning_rate += 1  # 後手勝ち
                else:  # 引き分け
                    winning_rate += 0.5
                    drow_count += 1
                break

            # 行動の取得
            if state.is_first_player():
                action = alpha_beta_action(state)
                est_ii_state.enemy_real_next(action)
            else:
                action = perfect_alpha_beta_action(state, 5)
                est_ii_state.my_real_next(state, action)
            state = state.next(action)

        print(winning_rate, (i + 1) * 2, drow_count)

コード例 #2

0

ファイルを表示

def exp_reduction_effect(
        seed=random.random(), reduction_func=IDDFS_alpha_beta_action):
    # 状態の生成
    create_ev_table(ev_table)
    print("seed", seed)

    reduction_ab_action = time_limit_alpha_beta(reduction_func)  # 勝率を計測する方
    simple_ab_action = time_limit_alpha_beta(alpha_beta_action)  # 対戦相手

    winning_rate = 0.0
    drow_count = 0
    for i in range(50):
        random.seed(seed * i)
        state = State()
        while True:
            # ゲーム終了時
            if state.is_done():
                if state.is_lose():
                    if state.depth % 2 == 1:
                        winning_rate += 1  # 先手勝ち
                else:  # 引き分け
                    winning_rate += 0.5
                    drow_count += 1
                break

            # 行動の取得
            if state.is_first_player():
                action = reduction_ab_action(state)
            else:
                action = simple_ab_action(state)
            state = state.next(action)

        # 先手後手を入れ替えて同じ条件で対戦
        random.seed(seed * i)
        state = State()
        while True:
            if state.is_done():
                if state.is_lose():
                    if state.depth % 2 == 0:
                        winning_rate += 1  # 後手勝ち
                else:  # 引き分け
                    winning_rate += 0.5
                    drow_count += 1
                break

            # 行動の取得
            if state.is_first_player():
                action = simple_ab_action(state)
            else:
                action = reduction_ab_action(state)
            state = state.next(action)

        print(winning_rate, (i + 1) * 2, drow_count)

コード例 #3

0

ファイルを表示

def exp_search_depth_effect(seed=random.random(),
                            deep_depth=5,
                            shallow_depth=3,
                            search_func=alpha_beta_action):
    # 状態の生成
    create_ev_table(ev_table)
    print("seed", seed)

    winning_rate = 0.0
    drow_count = 0
    for i in range(50):
        random.seed(seed * i)
        state = State()
        while True:
            # ゲーム終了時
            if state.is_done():
                if state.is_lose():
                    if state.depth % 2 == 1:
                        winning_rate += 1  # 先手勝ち
                else:  # 引き分け
                    winning_rate += 0.5
                    drow_count += 1
                break

            # 行動の取得
            if state.is_first_player():
                action = search_func(state, deep_depth)  # 深い探索
            else:
                action = search_func(state, shallow_depth)  # 浅い探索
            state = state.next(action)

        # 先手後手を入れ替えて同じ条件で対戦
        random.seed(seed * i)
        state = State()
        while True:
            if state.is_done():
                if state.is_lose():
                    if state.depth % 2 == 0:
                        winning_rate += 1  # 後手勝ち
                else:  # 引き分け
                    winning_rate += 0.5
                    drow_count += 1
                break

            # 行動の取得
            if state.is_first_player():
                action = search_func(state, shallow_depth)  # 浅い探索
            else:
                action = search_func(state, deep_depth)  # 深い探索
            state = state.next(action)

        print(winning_rate, (i + 1) * 2, drow_count)

コード例 #4

0

ファイルを表示

ファイル: exp_gamma.py プロジェクト: keisuke-777/Geister

def exp_effect_of_search_depth(func_id=2, seed=random.random()):
    # 状態の生成
    create_ev_table(ev_table, select_func(func_id))
    print("seed", seed)

    gamma = 100000  # スレッショルドカットを実施しない
    depths = [2, 3, 4, 5, 6]
    for depth in depths:
        winning_rate = 0.0
        drows_count = 0
        for i in range(100):
            random.seed(seed * i)
            state = State()
            while True:
                # ゲーム終了時
                if state.is_done():
                    if state.is_lose():
                        if state.depth % 2 == 0:
                            winning_rate += 1
                    else:
                        drows_count += 1
                        winning_rate += 0.5
                    break

                # 行動の取得
                if state.is_first_player():
                    action = mcts_action(state)
                else:
                    action = alpha_beta_action(state, gamma, depth)
                state = state.next(action)
        print("勝率", winning_rate, "drows_count=", drows_count)

コード例 #5

0

ファイルを表示

ファイル: exp_gamma.py プロジェクト: keisuke-777/Geister

def exp_value_changing(depth=5, func_id=3, gamma=1.0, seed=random.random()):
    record_values = []  # 評価値を記録
    record_boards = []  # 評価値に連動して盤面を記録
    for i in range(100):
        random.seed(seed * (i + 1))
        state = State()
        ii_state = AccessableState()
        values = []
        boards = []
        while True:
            if state.is_done():
                break
            if state.is_first_player():
                action = move_ordering_alpha_beta_action(state, 1, depth, i)
                # 盤面の評価値を算出し記録
                ii_state.create_ii_state_from_state(state)
                values.append(evaluate_board_state(ii_state))
                boards.append([state.pieces, state.enemy_pieces])
            else:
                action = random_action(state)
            state = state.next(action)
        record_values.apped(values)
        record_boards.apped(boards)

    # TODO: csvに出力する
    print(record_values)
    print(record_boards)

コード例 #6

0

ファイルを表示

ファイル: exp_gamma.py プロジェクト: keisuke-777/Geister

def exp_fair_compete(depth=5, func_id=3, seed=random.random()):
    gamma = 100000  # スレッショルドカットを実施しない
    restricts = [True, False]
    print(seed)
    for restrict in restricts:
        create_ev_table(ev_table, select_func(func_id))
        winning_rate = 0.0
        drows_count = 0
        for i in range(100):
            random.seed(seed * i)
            state = State()
            while True:
                # ゲーム終了時
                if state.is_done():
                    if state.is_lose():
                        if state.depth % 2 == 0:
                            winning_rate += 1
                    else:
                        winning_rate += 0.5
                        drows_count += 1
                    break

                # 行動の取得
                if state.is_first_player():
                    action = alpha_beta_action(state, gamma, depth,
                                               not restrict)
                else:
                    action = alpha_beta_action(state, gamma, depth, restrict)
                state = state.next(action)
        print("制限", restrict, "のエージェントが後手の際の勝率")
        print(winning_rate, "drows_count=", drows_count)

コード例 #7

0

ファイルを表示

ファイル: exp_gamma.py プロジェクト: keisuke-777/Geister

def exp_effect_of_action_restrict_for_compete(depth=5,
                                              func_id=2,
                                              rdm=random.random()):
    gamma = 100000  # スレッショルドカットを実施しない
    restricts = [True, False]
    for restrict in restricts:
        create_ev_table(ev_table, select_func(func_id))
        winning_rate = 0.0
        drows_count = 0
        for i in range(100):
            random.seed(rdm * i)
            state = State()
            while True:
                # ゲーム終了時
                if state.is_done():
                    if state.is_lose():
                        if state.depth % 2 == 0:
                            winning_rate += 1
                    else:
                        winning_rate += 0.5
                        drows_count += 1
                    break

                # 行動の取得
                if state.is_first_player():
                    action = ii_mcts_action(state)
                else:
                    action = alpha_beta_action(state, gamma, depth, restrict)
                state = state.next(action)
        print("restrict", restrict)
        print(winning_rate, "drows_count=", drows_count)

コード例 #8

0

ファイルを表示

ファイル: exp_gamma.py プロジェクト: keisuke-777/Geister

def exp_effect_of_action_restrict_for_time(depth=5, func_id=2):
    gamma = 100000  # スレッショルドカットを実施しない
    create_ev_table(ev_table, select_func(func_id))  # 評価関数は固定
    state = State()
    restrict_time = 0.0
    no_restrict_time = 0.0
    while True:
        # ゲーム終了時
        if state.is_done():
            break

        # 行動の取得
        if state.is_first_player():
            action = random_action(state)  # ランダム行動
        else:
            # 行動数の削減あり
            start = time.time()
            for _ in range(50):
                action = alpha_beta_action(state, gamma, depth, True)
            restrict_time += time.time() - start
            # 行動数の削減なし
            start = time.time()
            for _ in range(50):
                action = alpha_beta_action(state, gamma, depth, False)
            no_restrict_time += time.time() - start
            action = random_action(state)  # お互いにランダム行動をさせる
        state = state.next(action)
    print("restrict:", restrict_time, "no_restrict:", no_restrict_time)

コード例 #9

0

ファイルを表示

ファイル: self_play.py プロジェクト: youngick/AlphaZero

def play(model):
    # 학습 데이터
    history = []

    # 상태 생성
    state = State()

    while True:
        # 게임 종료 시
        if state.is_done():
            break

        # 합법적인 수의 확률 분포 얻기
        scores = pv_mcts_scores(model, state, SP_TEMPERATURE)

        # 학습 데이터에 상태와 정책 추가
        policies = [0] * DN_OUTPUT_SIZE
        for action, policy in zip(state.legal_actions(), scores):
            policies[action] = policy
        history.append([[state.pieces, state.enemy_pieces], policies, None])

        # 행동 얻기
        action = np.random.choice(state.legal_actions(), p=scores)

        # 다음 상태 얻기
        state = state.next(action)

    # 학습 데이터에 가치 추가
    value = first_player_value(state)
    for i in range(len(history)):
        history[i][2] = value
        value = -value
    return history

コード例 #10

0

ファイルを表示

ファイル: exp_gamma.py プロジェクト: keisuke-777/Geister

def exp_gamma_time(depth=5, func_id=2, seed=random.random()):
    print("seed", seed)
    random.seed(seed)
    state = State()
    create_ev_table(ev_table, select_func(func_id))
    keep_gamma_time = [0] * 30

    # ゲーム終了までのループ
    while True:
        # ゲーム終了時
        if state.is_done():
            break

        # 行動の取得
        if state.is_first_player():
            action = random_action(state)
        else:
            gamma = 0.0
            for index, _ in enumerate(keep_gamma_time):
                start = time.time()
                for _ in range(100):
                    # action = alpha_beta_action(state, gamma)
                    action = alpha_beta_action(state, gamma, depth, False)
                keep_gamma_time[index] += time.time() - start
                gamma += 0.1

            # データをばらつかせるためにランダム行動をとる
            action = random_action(state)
            print(keep_gamma_time)
        # 次の状態の取得
        state = state.next(action)

コード例 #11

0

ファイルを表示

ファイル: exp_gamma.py プロジェクト: keisuke-777/Geister

def exp_effect_of_search_depth():
    gamma = 100000  # スレッショルドカットを実施しない
    rdm = random.random()
    for func_id in range(8):
        create_ev_table(ev_table, select_func(func_id))
        winning_rate = 0.0
        drows_count = 0
        for i in range(100):
            random.seed(rdm * i)
            state = State()
            while True:
                # ゲーム終了時
                if state.is_done():
                    if state.is_lose():
                        if state.depth % 2 == 0:
                            winning_rate += 1
                    else:
                        winning_rate += 0.5
                        drows_count += 1
                    break

                # 行動の取得
                if state.is_first_player():
                    action = ii_mcts_action(state)
                else:
                    action = alpha_beta_action(state, gamma, 5)
                state = state.next(action)
        print(winning_rate, "id=", func_id, "drows_count=", drows_count)

コード例 #12

0

ファイルを表示

ファイル: self_play.py プロジェクト: myzk-a/doubutu-syougi

def play(model):
  history = []

  state = State()

  while True:
    if state.is_done():
      break
    
    scores = pv_mcts_scores(model, state, SP_TEMPERATURE)
    policies = [0] * DN_OUTPUT_SIZE
    for action, policy in zip(state.legal_actions(), scores):
      policies[action] = policy
    history.append([state.pieces_array(), policies, None])

    action = np.random.choice(state.legal_actions(), p=scores)

    state = state.next(action)

  value = first_player_value(state)
  for i in range(len(history)):
    history[i][2] = value
    value = -value
  
  return history

コード例 #13

0

ファイルを表示

def play(model):
    # 学習データ
    history = []

    # 状態の生成
    state = State()

    while True:
        # ゲーム終了時
        if state.is_done():
            break

        # 合法手の確率分布の取得
        scores = pv_mcts_scores(model, state, SP_TEMPERATURE)

        # 学習データに状態と方策を追加
        policies = [0] * DN_OUTPUT_SIZE
        for action, policy in zip(state.legal_actions(), scores):
            policies[action] = policy
        history.append([state.pieces_array(), policies, None])

        # 行動の取得
        action = np.random.choice(state.legal_actions(), p=scores)

        # 次の状態の取得
        state = state.next(action)

    # 学習データに価値を追加
    value = first_player_value(state)
    for i in range(len(history)):
        history[i][2] = value
        value = -value
    return history

コード例 #14

0

ファイルを表示

def vs_mcts(ev_func, seed, buttle_num):
    winning_rate = 0.0
    drow_count = 0
    for i in range(buttle_num):
        random.seed(seed * i)
        state = State()
        while True:
            # ゲーム終了時
            if state.is_done():
                if state.is_lose():
                    if state.depth % 2 == 1:
                        winning_rate += 1  # 先手勝ち
                else:  # 引き分け
                    winning_rate += 0.5
                    drow_count += 1
                break

            # 行動の取得
            if state.is_first_player():
                action = alpha_beta_action(state, ev_func, 5)
            else:
                action = mcts_action(state)
            state = state.next(action)

        # 先手後手を入れ替えて同じ条件で対戦
        random.seed(seed * i)
        state = State()
        while True:
            if state.is_done():
                if state.is_lose():
                    if state.depth % 2 == 0:
                        winning_rate += 1  # 後手勝ち
                else:
                    winning_rate += 0.5
                    drow_count += 1
                break

            # 行動の取得
            if state.is_first_player():
                action = mcts_action(state)
            else:
                action = alpha_beta_action(state, ev_func, 5)
            state = state.next(action)

    print(winning_rate, drow_count)
    return winning_rate

コード例 #15

0

ファイルを表示

ファイル: self_play.py プロジェクト: tugajin/muzero_tic_tac_toe

def play(model):
    # 学習データ
    history = []

    # 状態の生成
    state = State()

    while True:
        # ゲーム終了時
        if state.is_done():
            break

        # 合法手の確率分布の取得

        scores, values = pv_mcts_scores(model, state, SP_TEMPERATURE)

        # 学習データに状態と方策を追加
        policies = [0] * DN_OUTPUT_SIZE
        for action, policy in zip(state.legal_actions(), scores):
            policies[action] = policy

        # 行動の取得
        action = np.random.choice(state.legal_actions(), p=scores)

        # state, policy, value, 探索結果, 選ばれた手、それから先の局面
        history.append([[state.pieces, state.enemy_pieces], policies, None,
                        values, action, None])

        # 次の状態の取得
        state = state.next(action)

    # 学習データに価値を追加
    value = first_player_value(state)
    for i in range(len(history)):
        history[i][2] = value
        value = -value

    # 最後の局面情報を取っておく
    last_state = history[-1][0]
    last_policy = [0] * DN_OUTPUT_SIZE
    v0 = history[0][2]
    v1 = history[1][2]

    for i in range(len(history)):
        rp = []
        for inc in range(3):
            index = i + inc
            if index < len(history):
                rp.append(history[i + inc])
            else:
                v = v0 if ((i + inc) % 2) == 0 else v1
                a = randint(9)
                rp.append([last_state, last_policy, v, v, a, None])
        history[i][5] = rp

    return history

コード例 #16

0

ファイルを表示

def play(model, using_saved_state=False, saving_ontheway_state=False):
    '''
    1ゲームの実行
    '''

    # 学習データ
    history = []

    # 状態の生成
    if using_saved_state:
        state = load_state()
        if not state:
            state = State()
    else:
        state = State()

    starttime = time.time()
    print('')
    while True:
        # ゲーム終了時
        if state.is_done():
            endtime = time.time()
            print("first player is ", "lose" if state.is_lose() else "win")
            print("first player num:", state.piece_count(state.pieces))
            print('elapsed time', endtime - starttime)
            print(state)
            break

        # 合法手の確率分布の取得

        scores = pv_mcts_scores(model, state, SP_TEMPERATURE)

        # 学習データに状態と方策を追加
        policies = [0] * DN_OUTPUT_SIZE
        for action, policy in zip(state.legal_actions(), scores):
            policies[action] = policy
        history.append([[state.pieces, state.enemy_pieces], policies, None])

        # 行動の取得
        if len(history) % 10 == 0:
            print("state len: ", len(history))
            print(state)

        if saving_ontheway_state and len(history) == 25:
            save_state(state)
        action = np.random.choice(state.legal_actions(), p=scores)

        # 次の状態の取得
        state = state.next(action)

    # 学習データに価値を追加
    value = first_player_value(state)
    for i in range(len(history)):
        history[i][2] = value
        value = -value
    return history

コード例 #17

0

ファイルを表示

def play(next_actions):
    state = State()
    while True:
        if state.is_done():
            break
        action_idx = 0 if state.is_first_player() else 1
        next_action = next_actions[action_idx]
        action = next_action(state)
        state = state.next_state(action)
    return first_player_point(state)

コード例 #18

0

ファイルを表示

ファイル: evaluate_best_player.py プロジェクト: myzk-a/doubutu-syougi

def play(next_actions):
  state = State()

  while True:
    if state.is_done():
      break

    next_action = next_actions[0] if state.is_first_player() else next_actions[1]

    action = next_action(state)
    state = state.next(action)

  return first_player_point(state)

コード例 #19

0

ファイルを表示

ファイル: evaluate_network.py プロジェクト: frozenegg/friendly-pancake

def play(next_actions_num):
    state = State()

    while True:
        if state.is_done():
            break

        next_action_num = next_actions_num[0] if state.is_first_player(
        ) else next_actions_num[1]
        action_num = next_action_num(state)

        state.next(action_num)

    return first_player_point(state)

コード例 #20

0

ファイルを表示

def play(next_actions):
    # 状態の生成
    state = State()

    # ゲーム終了までループ
    while True:
        # ゲーム終了時
        if state.is_done():
            break;

        # 行動の取得
        next_action = next_actions[0] if state.is_first_player() else next_actions[1]
        action = next_action(state)

        # 次の状態の取得
        state = state.next(action)

    # 先手プレイヤーのポイントを返す
    return first_player_point(state)

コード例 #21

0

ファイルを表示

ファイル: evaluate_network.py プロジェクト: devnjw/Connect6-Algorithm

def play(next_actions):
    # 상태 생성
    state = State()

    # 게임 종료 시까지 반복
    while True:
        # 게임 종료 시
        if state.is_done():
            break

        # 행동 얻기
        next_action = next_actions[0] if state.is_first_player() else next_actions[1]
        action = next_action(state)

        # 다음 상태 얻기
        state = state.next(action)

    # 선 수 플레이어의 포인트 반환
    return first_player_point(state)

コード例 #22

0

ファイルを表示

ファイル: evaluate_network.py プロジェクト: derodero24/GobbletGobblers

def play(next_actions) -> float:
    """1ゲームの実行"""
    state = State()  # 状態の生成

    # ゲーム終了までループ
    while True:
        if state.is_done():
            break

        # 行動の取得
        next_action = next_actions[0] if state.is_first_player(
        ) else next_actions[1]
        action = next_action(state)

        # 次の状態の取得
        state = state.next(action)

    # 先手プレイヤーのポイントを返す
    return first_player_point(state)

コード例 #23

0

ファイルを表示

ファイル: exp_gamma.py プロジェクト: keisuke-777/Geister

def exp_move_ordering_time(depth=5,
                           func_id=3,
                           gamma=1.0,
                           seed=random.random()):
    print("seed", seed)
    timer = [0.0] * (depth + 1)

    random.seed(seed)
    state = State()
    while True:
        if state.is_done():
            break
        for i in range(depth + 1):
            start = time.time()
            for _ in range(1):
                move_ordering_alpha_beta_action(state, 1, depth, i)
            timer[i] += time.time() - start
        action = random_action(state)
        state = state.next(action)  # ランダムにゲームを進める

コード例 #24

0

ファイルを表示

def play(model):
    history = []
    state = State()

    while True:
        if state.is_done():
            break

        scores = pv_mcts_scores(model, state, SP_TEMPERATURE)

        with open('action_list.txt', 'rb') as f:
            action_list = pickle.load(f)

        # print('action_list:', len(action_list))

        policies = np.zeros(len(action_list))
        # for action_num, policy in zip(state.legal_actions(), scores):
        # 	policies[action_num] = policy

        # print('size check', len(policies), len(scores))

        legal_actions = state.legal_actions()

        for i in range(len(legal_actions)):
            policies[legal_actions[i]] = scores[i]
            # print(policies)
        # print('policies:', policies)
        history.append([[state.pieces, state.enemy_pieces], policies, None])

        # action_list_num = np.arange(len(action_list))
        # action_num = np.random.choice(action_list_num, p=scores)
        action_num = np.random.choice(legal_actions, p=scores)
        # print(action_num)
        state.next(action_num)

    value = first_player_value(state)
    for i in range(len(history)):
        history[i][2] = value
        value = -value
    return history

コード例 #25

0

ファイルを表示

ファイル: exp_gamma.py プロジェクト: keisuke-777/Geister

def exp_gamma_winning_rate(depth=5, func_id=2, seed=random.random()):
    # 状態の生成
    create_ev_table(ev_table, select_func(func_id))
    keep_gamma_winning_rate = [0] * 30
    print("seed", seed)

    gamma = 0.0
    for index, _ in enumerate(keep_gamma_winning_rate):
        winning_rate = 0.0
        for i in range(100):
            random.seed(seed * i)
            state = State()
            while True:
                # ゲーム終了時
                if state.is_done():
                    if state.is_lose():
                        if state.depth % 2 == 0:
                            winning_rate += 1  # 後手勝ち
                        # elif state.depth % 2 == 1:
                        #     pass  # 先手勝ち
                    else:  # 引き分け
                        winning_rate += 0.5
                    break

                # 行動の取得
                if state.is_first_player():
                    # action = random_action(state)
                    action = mcts_action(state)
                else:
                    # action = alpha_beta_action(state, gamma)
                    action = alpha_beta_action(state, gamma, depth, False)
                state = state.next(action)

        keep_gamma_winning_rate[index] = winning_rate
        print(keep_gamma_winning_rate)
        gamma += 0.1
    print(keep_gamma_winning_rate)

コード例 #26

0

ファイルを表示

def play(model):
    # 학습 데이터
    history = []
    # 상태 생성
    state = State()
    while True:
        # 게임 종료 시
        if state.is_done():
            break

        # 합법적인 수의 확률 분포 얻기
        # (모델, 게임 상태, 온도파라미터:변동성을주기위해사용하는변수)
        # 각 노드의 점수가 계산
        scores = pv_mcts_scores(model, state, SP_TEMPERATURE)

        # 학습 데이터에 상태와 정책 추가
        policies = [0] * DN_OUTPUT_SIZE  # 행동수 :7
        # 돌을 놓을수 있는 후보지, 점수를 넣어서
        for action, policy in zip(state.legal_actions(), scores):
            # 행동과 정책을 세팅
            # 어떤 열에 정책 세팅
            policies[action] = policy
        # 내역을 기록 ( [내돌상태, 적돌상태], 정책, None(점수))
        history.append([[state.pieces, state.enemy_pieces], policies, None])

        # 행동 얻기
        action = np.random.choice(state.legal_actions(), p=scores)

        # 다음 상태 얻기
        state = state.next(action)

    # 학습 데이터에 가치 추가
    value = first_player_value(state)
    for i in range(len(history)):
        history[i][2] = value
        value = -value
    return history

コード例 #27

0

ファイルを表示

ファイル: AlphaBetaSearch.py プロジェクト: keisuke-777/Geister

def keisoku():
    buttle_num = 0
    for _ in range(100):
        # 状態の生成
        state = State()
        create_ev_table(ev_table)
        create_red_ev_table(red_ev_table)  # 大実験や
        keep_info = KeepInfo()
        counter = [0] * 148

        # ゲーム終了までのループ
        while True:
            # ゲーム終了時
            if state.is_done():
                buttle_num += 1
                break
            # 行動の取得
            if state.is_first_player():
                action, counter = check_unnecessary_action(state, counter)
            else:
                action, counter = check_unnecessary_action(state, counter)
            state = state.next(action)
        print(buttle_num, "戦目")
        print(counter)

コード例 #28

0

ファイルを表示

class GameUI(tk.Frame):
    def __init__(self, master=None, model=None):
        tk.Frame.__init__(self, master)
        self.master.title("三目並べ")

        self.state = State()

        self.next_action = mini_max_action

        self.c = tk.Canvas(self, width=240, height=240, highlightthickness=0)
        self.c.bind("<Button-1>", self.turn_of_human)
        self.c.pack()

        self.on_draw()

    def turn_of_human(self, event):
        if self.state.is_done():
            self.state = State()
            self.on_draw()
            return

        if not self.state.is_first_player():
            return

        x = int(event.x / 80)
        y = int(event.y / 80)
        if x < 0 or 2 < x or y < 0 or 2 < y:
            return
        action = x + y * 3

        if not (action in self.state.legal_actions()):
            return

        self.state = self.state.next(action)
        self.on_draw()

        self.master.after(1, self.turn_of_ai)

    def turn_of_ai(self):
        if self.state.is_done():
            return

        action = self.next_action(self.state)
        self.state = self.state.next(action)
        self.on_draw()

    def draw_piece(self, index, first_player):
        x = (index % 3) * 80 + 10
        y = int(index / 3) * 80 + 10
        if first_player:
            self.c.create_oval(x,
                               y,
                               x + 60,
                               y + 60,
                               width=2.0,
                               outline="#FFFFFF")
        else:
            self.c.create_line(x, y, x + 60, y + 60, width=2.0, fill="#5D5D5D")
            self.c.create_line(x + 60, y, x, y + 60, width=2.0, fill="#5D5D5D")

    def on_draw(self):
        self.c.delete("all")
        self.c.create_rectangle(0, 0, 240, 240, width=0.0, fill="#00A0FF")
        self.c.create_line(80, 0, 80, 240, width=2.0, fill="#0077BB")
        self.c.create_line(160, 0, 160, 240, width=2.0, fill="#0077BB")
        self.c.create_line(0, 80, 240, 80, width=2.0, fill="#0077BB")
        self.c.create_line(0, 160, 240, 160, width=2.0, fill="#0077BB")
        for i in range(9):
            if self.state.pieces[i] == 1:
                self.draw_piece(i, self.state.is_first_player())
            if self.state.enemy_pieces[i] == 1:
                self.draw_piece(i, not self.state.is_first_player())

コード例 #29

0

ファイルを表示

ファイル: pv_mcts.py プロジェクト: Gil-jung/DSBookStudy

def boltzman(xs, temperature):
    xs = [x ** (1 / temperature) for x in xs]
    return [x / sum(xs) for x in xs]


# 동작 확인
if __name__ == '__main__':
    # 모델 로드
    path = sorted(Path('model').glob('*.h5'))[-1]
    model = load_model(str(path))

    # 상태 생성
    state = State()

    # 몬테카를로 트리 탐색을 활용해 행동을 얻는 함수 생성
    next_action = pv_mcts_action(model, 1.0)

    # 게임 종료 시까지 반복
    while True:
        # 게임 종료 시
        if state.is_done():
            break

        # 행동 얻기
        action = next_action(state)

        # 다음 상태 얻기
        state = state.next(action)

        # 문자열 출력
        print(state)

コード例 #30

0

ファイルを表示

class GameUI(tk.Frame):
    # 초기화
    def __init__(self, master=None, model=None):
        tk.Frame.__init__(self, master)
        self.master.title('간이 장기')

        # 게임 상태 생성
        self.state = State()
        self.select = -1  # 선택(-1: 없음, 0~11: 매스, 12~14: 획득한 말)

        # 방향 정수
        self.dxy = ((0, -1), (1, -1), (1, 0), (1, 1), (0, 1), (-1, 1), (-1, 0), (-1, -1))

        # PV MCTS를 활용한 행동 선택을 수행하는 함수 생성
        self.next_action = pv_mcts_action(model, 0.0)

        # 이미지 준비
        self.images = [(None, None, None, None)]
        for i in range(1, 5):
            image = Image.open('piece{}.png'.format(i))
            self.images.append((
                ImageTk.PhotoImage(image),
                ImageTk.PhotoImage(image.rotate(180)),
                ImageTk.PhotoImage(image.resize((40, 40))),
                ImageTk.PhotoImage(image.resize((40, 40)).rotate(180))))

        # 캔버스 생성
        self.c = tk.Canvas(self, width=240, height=400, highlightthickness=0)
        self.c.bind('<Button-1>', self.turn_of_human)
        self.c.pack()

        # 화면 갱신
        self.on_draw()

    # 사람의 턴
    def turn_of_human(self, event):
        # 게임 종료 시
        if self.state.is_done():
            self.state = State()
            self.on_draw()
            return

        # 선 수가 아닌 경우
        if not self.state.is_first_player():
            return

        # 획득한 말의 종류 얻기
        captures = []
        for i in range(3):
            if self.state.pieces[12 + i] >= 2: captures.append(1 + i)
            if self.state.pieces[12 + i] >= 1: captures.append(1 + i)

        # 말 선택과 이동 위치 계산(0~11: 매스. 12~13: 획득한 말)
        p = int(event.x / 80) + int((event.y - 40) / 80) * 3
        if 40 <= event.y and event.y <= 360:
            select = p
        elif event.x < len(captures) * 40 and event.y > 360:
            select = 12 + int(event.x / 40)
        else:
            return

        # 말 선택
        if self.select < 0:
            self.select = select
            self.on_draw()
            return

        # 말 선택과 이동을 행동으로 변환
        action = -1
        if select < 12:
            # 말 이동 시
            if self.select < 12:
                action = self.state.position_to_action(p, self.position_to_direction(self.select, p))
            # 획득한 말 배치 시
            else:
                action = self.state.position_to_action(p, 8 - 1 + captures[self.select - 12])

        # 합법적인 수가 아닌 경우
        if not (action in self.state.legal_actions()):
            self.select = -1
            self.on_draw()
            return

        # 다음 상태 얻기
        self.state = self.state.next(action)
        self.select = -1
        self.on_draw()

        # AI의 턴
        self.master.after(1, self.turn_of_ai)

    # AI의 턴
    def turn_of_ai(self):
        # 게임 종료 시
        if self.state.is_done():
            return

        # 행동 얻기
        action = self.next_action(self.state)

        # 다음 상태 얻기
        self.state = self.state.next(action)
        self.on_draw()

    # 말의 이동 대상 위치를 말의 이동 방향으로 변환
    def position_to_direction(self, position_src, position_dst):
        dx = position_dst % 3 - position_src % 3
        dy = int(position_dst / 3) - int(position_src / 3)
        for i in range(8):
            if self.dxy[i][0] == dx and self.dxy[i][1] == dy: return i
        return 0

    # 말 그리기
    def draw_piece(self, index, first_player, piece_type):
        x = (index % 3) * 80
        y = int(index / 3) * 80 + 40
        index = 0 if first_player else 1
        self.c.create_image(x, y, image=self.images[piece_type][index], anchor=tk.NW)

    # 획득한 말 그리기
    def draw_capture(self, first_player, pieces):
        index, x, dx, y = (2, 0, 40, 360) if first_player else (3, 200, -40, 0)
        captures = []
        for i in range(3):
            if pieces[12 + i] >= 2: captures.append(1 + i)
            if pieces[12 + i] >= 1: captures.append(1 + i)
        for i in range(len(captures)):
            self.c.create_image(x + dx * i, y, image=self.images[captures[i]][index], anchor=tk.NW)

    # 커서 그리기
    def draw_cursor(self, x, y, size):
        self.c.create_line(x + 1, y + 1, x + size - 1, y + 1, width=4.0, fill='#FF0000')
        self.c.create_line(x + 1, y + size - 1, x + size - 1, y + size - 1, width=4.0, fill='#FF0000')
        self.c.create_line(x + 1, y + 1, x + 1, y + size - 1, width=4.0, fill='#FF0000')
        self.c.create_line(x + size - 1, y + 1, x + size - 1, y + size - 1, width=4.0, fill='#FF0000')

    # 화면 갱신
    def on_draw(self):
        # 매스 눈금
        self.c.delete('all')
        self.c.create_rectangle(0, 0, 240, 400, width=0.0, fill='#EDAA56')
        for i in range(1, 3):
            self.c.create_line(i * 80 + 1, 40, i * 80, 360, width=2.0, fill='#000000')
        for i in range(5):
            self.c.create_line(0, 40 + i * 80, 240, 40 + i * 80, width=2.0, fill='#000000')

        # 말
        for p in range(12):
            p0, p1 = (p, 11 - p) if self.state.is_first_player() else (11 - p, p)
            if self.state.pieces[p0] != 0:
                self.draw_piece(p, self.state.is_first_player(), self.state.pieces[p0])
            if self.state.enemy_pieces[p1] != 0:
                self.draw_piece(p, not self.state.is_first_player(), self.state.enemy_pieces[p1])

        # 획득한 말
        self.draw_capture(self.state.is_first_player(), self.state.pieces)
        self.draw_capture(not self.state.is_first_player(), self.state.enemy_pieces)

        # 선택 커서
        if 0 <= self.select and self.select < 12:
            self.draw_cursor(int(self.select % 3) * 80, int(self.select / 3) * 80 + 40, 80)
        elif 12 <= self.select:
            self.draw_cursor((self.select - 12) * 40, 360, 40)