Esempio n. 1
0
def test_loss_cut_ab(seed=random.random()):
    create_ev_table(ev_table)
    print("seed", seed)

    winning_rate = 0.0
    drow_count = 0
    for i in range(10):
        random.seed(seed * i)
        state = State()
        est_ii_state = EstimatedState()
        est_ii_state.create_est_ii_state_from_state(state)
        while True:
            # ゲーム終了時
            if state.is_done():
                if state.is_lose():
                    if state.depth % 2 == 1:
                        winning_rate += 1  # 先手勝ち
                else:  # 引き分け
                    winning_rate += 0.5
                    drow_count += 1
                break

            # 行動の取得
            if state.is_first_player():
                action, est_ii_state = cut_loss_alpha_beta_action(
                    est_ii_state, 5)
                est_ii_state.my_real_next(state, action)
            else:
                action = alpha_beta_action(state)
                est_ii_state.enemy_real_next(action)
            print(state)
            state = state.next(action)

        # 先手後手を入れ替えて同じ条件で対戦
        random.seed(seed * i)
        state = State()
        while True:
            if state.is_done():
                if state.is_lose():
                    if state.depth % 2 == 0:
                        winning_rate += 1  # 後手勝ち
                else:  # 引き分け
                    winning_rate += 0.5
                    drow_count += 1
                break

            # 行動の取得
            if state.is_first_player():
                action = alpha_beta_action(state)
                est_ii_state.enemy_real_next(action)
            else:
                action = perfect_alpha_beta_action(state, 5)
                est_ii_state.my_real_next(state, action)
            state = state.next(action)

        print(winning_rate, (i + 1) * 2, drow_count)
Esempio n. 2
0
def exp_reduction_effect(
        seed=random.random(), reduction_func=IDDFS_alpha_beta_action):
    # 状態の生成
    create_ev_table(ev_table)
    print("seed", seed)

    reduction_ab_action = time_limit_alpha_beta(reduction_func)  # 勝率を計測する方
    simple_ab_action = time_limit_alpha_beta(alpha_beta_action)  # 対戦相手

    winning_rate = 0.0
    drow_count = 0
    for i in range(50):
        random.seed(seed * i)
        state = State()
        while True:
            # ゲーム終了時
            if state.is_done():
                if state.is_lose():
                    if state.depth % 2 == 1:
                        winning_rate += 1  # 先手勝ち
                else:  # 引き分け
                    winning_rate += 0.5
                    drow_count += 1
                break

            # 行動の取得
            if state.is_first_player():
                action = reduction_ab_action(state)
            else:
                action = simple_ab_action(state)
            state = state.next(action)

        # 先手後手を入れ替えて同じ条件で対戦
        random.seed(seed * i)
        state = State()
        while True:
            if state.is_done():
                if state.is_lose():
                    if state.depth % 2 == 0:
                        winning_rate += 1  # 後手勝ち
                else:  # 引き分け
                    winning_rate += 0.5
                    drow_count += 1
                break

            # 行動の取得
            if state.is_first_player():
                action = simple_ab_action(state)
            else:
                action = reduction_ab_action(state)
            state = state.next(action)

        print(winning_rate, (i + 1) * 2, drow_count)
Esempio n. 3
0
def exp_search_depth_effect(seed=random.random(),
                            deep_depth=5,
                            shallow_depth=3,
                            search_func=alpha_beta_action):
    # 状態の生成
    create_ev_table(ev_table)
    print("seed", seed)

    winning_rate = 0.0
    drow_count = 0
    for i in range(50):
        random.seed(seed * i)
        state = State()
        while True:
            # ゲーム終了時
            if state.is_done():
                if state.is_lose():
                    if state.depth % 2 == 1:
                        winning_rate += 1  # 先手勝ち
                else:  # 引き分け
                    winning_rate += 0.5
                    drow_count += 1
                break

            # 行動の取得
            if state.is_first_player():
                action = search_func(state, deep_depth)  # 深い探索
            else:
                action = search_func(state, shallow_depth)  # 浅い探索
            state = state.next(action)

        # 先手後手を入れ替えて同じ条件で対戦
        random.seed(seed * i)
        state = State()
        while True:
            if state.is_done():
                if state.is_lose():
                    if state.depth % 2 == 0:
                        winning_rate += 1  # 後手勝ち
                else:  # 引き分け
                    winning_rate += 0.5
                    drow_count += 1
                break

            # 行動の取得
            if state.is_first_player():
                action = search_func(state, shallow_depth)  # 浅い探索
            else:
                action = search_func(state, deep_depth)  # 深い探索
            state = state.next(action)

        print(winning_rate, (i + 1) * 2, drow_count)
def play(next_actions_num):
    state = State()

    while True:
        if state.is_done():
            break

        next_action_num = next_actions_num[0] if state.is_first_player(
        ) else next_actions_num[1]
        action_num = next_action_num(state)

        state.next(action_num)

    return first_player_point(state)
Esempio n. 5
0
def exp_effect_of_search_depth(func_id=2, seed=random.random()):
    # 状態の生成
    create_ev_table(ev_table, select_func(func_id))
    print("seed", seed)

    gamma = 100000  # スレッショルドカットを実施しない
    depths = [2, 3, 4, 5, 6]
    for depth in depths:
        winning_rate = 0.0
        drows_count = 0
        for i in range(100):
            random.seed(seed * i)
            state = State()
            while True:
                # ゲーム終了時
                if state.is_done():
                    if state.is_lose():
                        if state.depth % 2 == 0:
                            winning_rate += 1
                    else:
                        drows_count += 1
                        winning_rate += 0.5
                    break

                # 行動の取得
                if state.is_first_player():
                    action = mcts_action(state)
                else:
                    action = alpha_beta_action(state, gamma, depth)
                state = state.next(action)
        print("勝率", winning_rate, "drows_count=", drows_count)
Esempio n. 6
0
    def turn_of_human(self, touch):
        global state

        # ゲーム終了時
        if state.is_done():
            state = State()
            self.reset()
            return

        # 先手でない時
        if not state.is_first_player():
            return

        # クリック位置を行動に変換
        x = int(touch.pos[0] / 160)
        y = int(touch.pos[1] / 160)
        action = x + y * 3

        if x < 0 or 2 < x or y < 0 or 2 < y:  # 範囲外
            return

        # 合法手でない時
        if not (action in state.legal_actions()):
            return

        # 次の状態の取得
        state = state.next(action)

        # 丸追加
        self.draw_piece(action)

        # AIのターン
        self.turn_of_ai()
Esempio n. 7
0
def exp_value_changing(depth=5, func_id=3, gamma=1.0, seed=random.random()):
    record_values = []  # 評価値を記録
    record_boards = []  # 評価値に連動して盤面を記録
    for i in range(100):
        random.seed(seed * (i + 1))
        state = State()
        ii_state = AccessableState()
        values = []
        boards = []
        while True:
            if state.is_done():
                break
            if state.is_first_player():
                action = move_ordering_alpha_beta_action(state, 1, depth, i)
                # 盤面の評価値を算出し記録
                ii_state.create_ii_state_from_state(state)
                values.append(evaluate_board_state(ii_state))
                boards.append([state.pieces, state.enemy_pieces])
            else:
                action = random_action(state)
            state = state.next(action)
        record_values.apped(values)
        record_boards.apped(boards)

    # TODO: csvに出力する
    print(record_values)
    print(record_boards)
Esempio n. 8
0
def exp_fair_compete(depth=5, func_id=3, seed=random.random()):
    gamma = 100000  # スレッショルドカットを実施しない
    restricts = [True, False]
    print(seed)
    for restrict in restricts:
        create_ev_table(ev_table, select_func(func_id))
        winning_rate = 0.0
        drows_count = 0
        for i in range(100):
            random.seed(seed * i)
            state = State()
            while True:
                # ゲーム終了時
                if state.is_done():
                    if state.is_lose():
                        if state.depth % 2 == 0:
                            winning_rate += 1
                    else:
                        winning_rate += 0.5
                        drows_count += 1
                    break

                # 行動の取得
                if state.is_first_player():
                    action = alpha_beta_action(state, gamma, depth,
                                               not restrict)
                else:
                    action = alpha_beta_action(state, gamma, depth, restrict)
                state = state.next(action)
        print("制限", restrict, "のエージェントが後手の際の勝率")
        print(winning_rate, "drows_count=", drows_count)
Esempio n. 9
0
def exp_effect_of_action_restrict_for_compete(depth=5,
                                              func_id=2,
                                              rdm=random.random()):
    gamma = 100000  # スレッショルドカットを実施しない
    restricts = [True, False]
    for restrict in restricts:
        create_ev_table(ev_table, select_func(func_id))
        winning_rate = 0.0
        drows_count = 0
        for i in range(100):
            random.seed(rdm * i)
            state = State()
            while True:
                # ゲーム終了時
                if state.is_done():
                    if state.is_lose():
                        if state.depth % 2 == 0:
                            winning_rate += 1
                    else:
                        winning_rate += 0.5
                        drows_count += 1
                    break

                # 行動の取得
                if state.is_first_player():
                    action = ii_mcts_action(state)
                else:
                    action = alpha_beta_action(state, gamma, depth, restrict)
                state = state.next(action)
        print("restrict", restrict)
        print(winning_rate, "drows_count=", drows_count)
Esempio n. 10
0
def exp_effect_of_action_restrict_for_time(depth=5, func_id=2):
    gamma = 100000  # スレッショルドカットを実施しない
    create_ev_table(ev_table, select_func(func_id))  # 評価関数は固定
    state = State()
    restrict_time = 0.0
    no_restrict_time = 0.0
    while True:
        # ゲーム終了時
        if state.is_done():
            break

        # 行動の取得
        if state.is_first_player():
            action = random_action(state)  # ランダム行動
        else:
            # 行動数の削減あり
            start = time.time()
            for _ in range(50):
                action = alpha_beta_action(state, gamma, depth, True)
            restrict_time += time.time() - start
            # 行動数の削減なし
            start = time.time()
            for _ in range(50):
                action = alpha_beta_action(state, gamma, depth, False)
            no_restrict_time += time.time() - start
            action = random_action(state)  # お互いにランダム行動をさせる
        state = state.next(action)
    print("restrict:", restrict_time, "no_restrict:", no_restrict_time)
Esempio n. 11
0
def exp_effect_of_search_depth():
    gamma = 100000  # スレッショルドカットを実施しない
    rdm = random.random()
    for func_id in range(8):
        create_ev_table(ev_table, select_func(func_id))
        winning_rate = 0.0
        drows_count = 0
        for i in range(100):
            random.seed(rdm * i)
            state = State()
            while True:
                # ゲーム終了時
                if state.is_done():
                    if state.is_lose():
                        if state.depth % 2 == 0:
                            winning_rate += 1
                    else:
                        winning_rate += 0.5
                        drows_count += 1
                    break

                # 行動の取得
                if state.is_first_player():
                    action = ii_mcts_action(state)
                else:
                    action = alpha_beta_action(state, gamma, 5)
                state = state.next(action)
        print(winning_rate, "id=", func_id, "drows_count=", drows_count)
Esempio n. 12
0
def play(model):
  history = []

  state = State()

  while True:
    if state.is_done():
      break
    
    scores = pv_mcts_scores(model, state, SP_TEMPERATURE)
    policies = [0] * DN_OUTPUT_SIZE
    for action, policy in zip(state.legal_actions(), scores):
      policies[action] = policy
    history.append([state.pieces_array(), policies, None])

    action = np.random.choice(state.legal_actions(), p=scores)

    state = state.next(action)

  value = first_player_value(state)
  for i in range(len(history)):
    history[i][2] = value
    value = -value
  
  return history
Esempio n. 13
0
def play(model):
    # 学習データ
    history = []

    # 状態の生成
    state = State()

    while True:
        # ゲーム終了時
        if state.is_done():
            break

        # 合法手の確率分布の取得
        scores = pv_mcts_scores(model, state, SP_TEMPERATURE)

        # 学習データに状態と方策を追加
        policies = [0] * DN_OUTPUT_SIZE
        for action, policy in zip(state.legal_actions(), scores):
            policies[action] = policy
        history.append([state.pieces_array(), policies, None])

        # 行動の取得
        action = np.random.choice(state.legal_actions(), p=scores)

        # 次の状態の取得
        state = state.next(action)

    # 学習データに価値を追加
    value = first_player_value(state)
    for i in range(len(history)):
        history[i][2] = value
        value = -value
    return history
Esempio n. 14
0
def play(model):
    # 학습 데이터
    history = []

    # 상태 생성
    state = State()

    while True:
        # 게임 종료 시
        if state.is_done():
            break

        # 합법적인 수의 확률 분포 얻기
        scores = pv_mcts_scores(model, state, SP_TEMPERATURE)

        # 학습 데이터에 상태와 정책 추가
        policies = [0] * DN_OUTPUT_SIZE
        for action, policy in zip(state.legal_actions(), scores):
            policies[action] = policy
        history.append([[state.pieces, state.enemy_pieces], policies, None])

        # 행동 얻기
        action = np.random.choice(state.legal_actions(), p=scores)

        # 다음 상태 얻기
        state = state.next(action)

    # 학습 데이터에 가치 추가
    value = first_player_value(state)
    for i in range(len(history)):
        history[i][2] = value
        value = -value
    return history
Esempio n. 15
0
def exp_gamma_time(depth=5, func_id=2, seed=random.random()):
    print("seed", seed)
    random.seed(seed)
    state = State()
    create_ev_table(ev_table, select_func(func_id))
    keep_gamma_time = [0] * 30

    # ゲーム終了までのループ
    while True:
        # ゲーム終了時
        if state.is_done():
            break

        # 行動の取得
        if state.is_first_player():
            action = random_action(state)
        else:
            gamma = 0.0
            for index, _ in enumerate(keep_gamma_time):
                start = time.time()
                for _ in range(100):
                    # action = alpha_beta_action(state, gamma)
                    action = alpha_beta_action(state, gamma, depth, False)
                keep_gamma_time[index] += time.time() - start
                gamma += 0.1

            # データをばらつかせるためにランダム行動をとる
            action = random_action(state)
            print(keep_gamma_time)
        # 次の状態の取得
        state = state.next(action)
Esempio n. 16
0
def vs_mcts(ev_func, seed, buttle_num):
    winning_rate = 0.0
    drow_count = 0
    for i in range(buttle_num):
        random.seed(seed * i)
        state = State()
        while True:
            # ゲーム終了時
            if state.is_done():
                if state.is_lose():
                    if state.depth % 2 == 1:
                        winning_rate += 1  # 先手勝ち
                else:  # 引き分け
                    winning_rate += 0.5
                    drow_count += 1
                break

            # 行動の取得
            if state.is_first_player():
                action = alpha_beta_action(state, ev_func, 5)
            else:
                action = mcts_action(state)
            state = state.next(action)

        # 先手後手を入れ替えて同じ条件で対戦
        random.seed(seed * i)
        state = State()
        while True:
            if state.is_done():
                if state.is_lose():
                    if state.depth % 2 == 0:
                        winning_rate += 1  # 後手勝ち
                else:
                    winning_rate += 0.5
                    drow_count += 1
                break

            # 行動の取得
            if state.is_first_player():
                action = mcts_action(state)
            else:
                action = alpha_beta_action(state, ev_func, 5)
            state = state.next(action)

    print(winning_rate, drow_count)
    return winning_rate
Esempio n. 17
0
def play(model, using_saved_state=False, saving_ontheway_state=False):
    '''
    1ゲームの実行
    '''

    # 学習データ
    history = []

    # 状態の生成
    if using_saved_state:
        state = load_state()
        if not state:
            state = State()
    else:
        state = State()

    starttime = time.time()
    print('')
    while True:
        # ゲーム終了時
        if state.is_done():
            endtime = time.time()
            print("first player is ", "lose" if state.is_lose() else "win")
            print("first player num:", state.piece_count(state.pieces))
            print('elapsed time', endtime - starttime)
            print(state)
            break

        # 合法手の確率分布の取得

        scores = pv_mcts_scores(model, state, SP_TEMPERATURE)

        # 学習データに状態と方策を追加
        policies = [0] * DN_OUTPUT_SIZE
        for action, policy in zip(state.legal_actions(), scores):
            policies[action] = policy
        history.append([[state.pieces, state.enemy_pieces], policies, None])

        # 行動の取得
        if len(history) % 10 == 0:
            print("state len: ", len(history))
            print(state)

        if saving_ontheway_state and len(history) == 25:
            save_state(state)
        action = np.random.choice(state.legal_actions(), p=scores)

        # 次の状態の取得
        state = state.next(action)

    # 学習データに価値を追加
    value = first_player_value(state)
    for i in range(len(history)):
        history[i][2] = value
        value = -value
    return history
Esempio n. 18
0
def play(model):
    # 学習データ
    history = []

    # 状態の生成
    state = State()

    while True:
        # ゲーム終了時
        if state.is_done():
            break

        # 合法手の確率分布の取得

        scores, values = pv_mcts_scores(model, state, SP_TEMPERATURE)

        # 学習データに状態と方策を追加
        policies = [0] * DN_OUTPUT_SIZE
        for action, policy in zip(state.legal_actions(), scores):
            policies[action] = policy

        # 行動の取得
        action = np.random.choice(state.legal_actions(), p=scores)

        # state, policy, value, 探索結果, 選ばれた手、それから先の局面
        history.append([[state.pieces, state.enemy_pieces], policies, None,
                        values, action, None])

        # 次の状態の取得
        state = state.next(action)

    # 学習データに価値を追加
    value = first_player_value(state)
    for i in range(len(history)):
        history[i][2] = value
        value = -value

    # 最後の局面情報を取っておく
    last_state = history[-1][0]
    last_policy = [0] * DN_OUTPUT_SIZE
    v0 = history[0][2]
    v1 = history[1][2]

    for i in range(len(history)):
        rp = []
        for inc in range(3):
            index = i + inc
            if index < len(history):
                rp.append(history[i + inc])
            else:
                v = v0 if ((i + inc) % 2) == 0 else v1
                a = randint(9)
                rp.append([last_state, last_policy, v, v, a, None])
        history[i][5] = rp

    return history
Esempio n. 19
0
def play(model):
    history = []
    state = State()

    while True:
        if state.is_done():
            break

        scores = pv_mcts_scores(model, state, SP_TEMPERATURE)

        with open('action_list.txt', 'rb') as f:
            action_list = pickle.load(f)

        # print('action_list:', len(action_list))

        policies = np.zeros(len(action_list))
        # for action_num, policy in zip(state.legal_actions(), scores):
        # 	policies[action_num] = policy

        # print('size check', len(policies), len(scores))

        legal_actions = state.legal_actions()

        for i in range(len(legal_actions)):
            policies[legal_actions[i]] = scores[i]
            # print(policies)
        # print('policies:', policies)
        history.append([[state.pieces, state.enemy_pieces], policies, None])

        # action_list_num = np.arange(len(action_list))
        # action_num = np.random.choice(action_list_num, p=scores)
        action_num = np.random.choice(legal_actions, p=scores)
        # print(action_num)
        state.next(action_num)

    value = first_player_value(state)
    for i in range(len(history)):
        history[i][2] = value
        value = -value
    return history
def play(next_actions):
  state = State()

  while True:
    if state.is_done():
      break

    next_action = next_actions[0] if state.is_first_player() else next_actions[1]

    action = next_action(state)
    state = state.next(action)

  return first_player_point(state)
def play(next_actions):
    # 상태 생성
    state = State()

    # 게임 종료 시까지 반복
    while True:
        # 게임 종료 시
        if state.is_done():
            break

        # 행동 얻기
        next_action = next_actions[0] if state.is_first_player() else next_actions[1]
        action = next_action(state)

        # 다음 상태 얻기
        state = state.next(action)

    # 선 수 플레이어의 포인트 반환
    return first_player_point(state)
Esempio n. 22
0
def exp_move_ordering_time(depth=5,
                           func_id=3,
                           gamma=1.0,
                           seed=random.random()):
    print("seed", seed)
    timer = [0.0] * (depth + 1)

    random.seed(seed)
    state = State()
    while True:
        if state.is_done():
            break
        for i in range(depth + 1):
            start = time.time()
            for _ in range(1):
                move_ordering_alpha_beta_action(state, 1, depth, i)
            timer[i] += time.time() - start
        action = random_action(state)
        state = state.next(action)  # ランダムにゲームを進める
Esempio n. 23
0
def play(next_actions):
    # 状態の生成
    state = State()

    # ゲーム終了までループ
    while True:
        # ゲーム終了時
        if state.is_done():
            break;

        # 行動の取得
        next_action = next_actions[0] if state.is_first_player() else next_actions[1]
        action = next_action(state)

        # 次の状態の取得
        state = state.next(action)

    # 先手プレイヤーのポイントを返す
    return first_player_point(state)
def play(next_actions) -> float:
    """1ゲームの実行"""
    state = State()  # 状態の生成

    # ゲーム終了までループ
    while True:
        if state.is_done():
            break

        # 行動の取得
        next_action = next_actions[0] if state.is_first_player(
        ) else next_actions[1]
        action = next_action(state)

        # 次の状態の取得
        state = state.next(action)

    # 先手プレイヤーのポイントを返す
    return first_player_point(state)
Esempio n. 25
0
def test_dual_network():

    model0 = RepNet()
    model1 = DynamicsNet()
    model2 = PredictNet()

    model0.load_state_dict(torch.load('./model/best_r.h5'))
    model1.load_state_dict(torch.load('./model/best_d.h5'))
    model2.load_state_dict(torch.load('./model/best_p.h5'))

    model0 = model0.double()
    model1 = model1.double()
    model2 = model2.double()

    state = State()
    action = 0
    next_state = state.next(action)

    file, rank, channel = DN_INPUT_SHAPE
    x = np.array([state.pieces, state.enemy_pieces])
    x = x.reshape(channel, file, rank)
    x = np.array([x])
    x = torch.tensor(x, dtype=torch.double)

    hidden = model0(x)

    action = np.array([0])
    at = action_to_tensor(action)

    hidden = model1(hidden, at)

    print("----------------------------------")

    policy, value = model2(hidden)

    print(policy.shape)
    print(value.shape)
    print(hidden.shape)
Esempio n. 26
0
def exp_gamma_winning_rate(depth=5, func_id=2, seed=random.random()):
    # 状態の生成
    create_ev_table(ev_table, select_func(func_id))
    keep_gamma_winning_rate = [0] * 30
    print("seed", seed)

    gamma = 0.0
    for index, _ in enumerate(keep_gamma_winning_rate):
        winning_rate = 0.0
        for i in range(100):
            random.seed(seed * i)
            state = State()
            while True:
                # ゲーム終了時
                if state.is_done():
                    if state.is_lose():
                        if state.depth % 2 == 0:
                            winning_rate += 1  # 後手勝ち
                        # elif state.depth % 2 == 1:
                        #     pass  # 先手勝ち
                    else:  # 引き分け
                        winning_rate += 0.5
                    break

                # 行動の取得
                if state.is_first_player():
                    # action = random_action(state)
                    action = mcts_action(state)
                else:
                    # action = alpha_beta_action(state, gamma)
                    action = alpha_beta_action(state, gamma, depth, False)
                state = state.next(action)

        keep_gamma_winning_rate[index] = winning_rate
        print(keep_gamma_winning_rate)
        gamma += 0.1
    print(keep_gamma_winning_rate)
Esempio n. 27
0
def play(model):
    # 학습 데이터
    history = []
    # 상태 생성
    state = State()
    while True:
        # 게임 종료 시
        if state.is_done():
            break

        # 합법적인 수의 확률 분포 얻기
        # (모델, 게임 상태, 온도파라미터:변동성을주기위해사용하는변수)
        # 각 노드의 점수가 계산
        scores = pv_mcts_scores(model, state, SP_TEMPERATURE)

        # 학습 데이터에 상태와 정책 추가
        policies = [0] * DN_OUTPUT_SIZE  # 행동수 :7
        # 돌을 놓을수 있는 후보지, 점수를 넣어서
        for action, policy in zip(state.legal_actions(), scores):
            # 행동과 정책을 세팅
            # 어떤 열에 정책 세팅
            policies[action] = policy
        # 내역을 기록 ( [내돌상태, 적돌상태], 정책, None(점수))
        history.append([[state.pieces, state.enemy_pieces], policies, None])

        # 행동 얻기
        action = np.random.choice(state.legal_actions(), p=scores)

        # 다음 상태 얻기
        state = state.next(action)

    # 학습 데이터에 가치 추가
    value = first_player_value(state)
    for i in range(len(history)):
        history[i][2] = value
        value = -value
    return history
Esempio n. 28
0
    def turn_of_human(self, touch):
        global state

        # ゲーム終了時
        if state.is_done():
            state = State()
            self.reset()
            return

        # 先手でない時
        if not state.is_first_player():
            return

        # クリック位置を行動に変換
        x = int(touch.pos[0] / 80)
        y = int(touch.pos[1] / 80)
        action = x + y * 6

        if x < 0 or 5 < x or y < 0 or 5 < y:  # 範囲外
            return

        # 合法手でない時
        legal_actions = state.legal_actions()
        if legal_actions == [36]:
            action = 36  # パス
        if action != 36 and not (action in legal_actions):
            return

        # 次の状態の取得
        state = state.next(action)

        # 丸追加
        self.draw_piece()
        sleep(1)

        # AIのターン
        self.turn_of_ai()
Esempio n. 29
0
def keisoku():
    buttle_num = 0
    for _ in range(100):
        # 状態の生成
        state = State()
        create_ev_table(ev_table)
        create_red_ev_table(red_ev_table)  # 大実験や
        keep_info = KeepInfo()
        counter = [0] * 148

        # ゲーム終了までのループ
        while True:
            # ゲーム終了時
            if state.is_done():
                buttle_num += 1
                break
            # 行動の取得
            if state.is_first_player():
                action, counter = check_unnecessary_action(state, counter)
            else:
                action, counter = check_unnecessary_action(state, counter)
            state = state.next(action)
        print(buttle_num, "戦目")
        print(counter)
Esempio n. 30
0
#             else:
#                 pass

if __name__ == "__main__":
    os.environ["OMP_NUM_THREADS"] = "1"

    with open("config.yaml") as f:
        args = yaml.safe_load(f)
    # print(args)

    # ここに実験用のコードを書く

    state = State()
    while True:
        print(state.legal_actions())
        state = state.next(random_action(state))

    # path = "models/10000.pth"
    # EvalHandyRL(100, path)
    # policies = obs_to_policy_to_use_game(agent, obs, state)

    # print(policies)

    # convert_state_to_obs(state)

    # test_predict()
    # test_cigeister()

    # 方策を持ってくる

    # 接続部分