Esempio n. 1
0
def exp_effect_of_action_restrict_for_time(depth=5, func_id=2):
    gamma = 100000  # スレッショルドカットを実施しない
    create_ev_table(ev_table, select_func(func_id))  # 評価関数は固定
    state = State()
    restrict_time = 0.0
    no_restrict_time = 0.0
    while True:
        # ゲーム終了時
        if state.is_done():
            break

        # 行動の取得
        if state.is_first_player():
            action = random_action(state)  # ランダム行動
        else:
            # 行動数の削減あり
            start = time.time()
            for _ in range(50):
                action = alpha_beta_action(state, gamma, depth, True)
            restrict_time += time.time() - start
            # 行動数の削減なし
            start = time.time()
            for _ in range(50):
                action = alpha_beta_action(state, gamma, depth, False)
            no_restrict_time += time.time() - start
            action = random_action(state)  # お互いにランダム行動をさせる
        state = state.next(action)
    print("restrict:", restrict_time, "no_restrict:", no_restrict_time)
Esempio n. 2
0
def exp_gamma_time(depth=5, func_id=2, seed=random.random()):
    print("seed", seed)
    random.seed(seed)
    state = State()
    create_ev_table(ev_table, select_func(func_id))
    keep_gamma_time = [0] * 30

    # ゲーム終了までのループ
    while True:
        # ゲーム終了時
        if state.is_done():
            break

        # 行動の取得
        if state.is_first_player():
            action = random_action(state)
        else:
            gamma = 0.0
            for index, _ in enumerate(keep_gamma_time):
                start = time.time()
                for _ in range(100):
                    # action = alpha_beta_action(state, gamma)
                    action = alpha_beta_action(state, gamma, depth, False)
                keep_gamma_time[index] += time.time() - start
                gamma += 0.1

            # データをばらつかせるためにランダム行動をとる
            action = random_action(state)
            print(keep_gamma_time)
        # 次の状態の取得
        state = state.next(action)
Esempio n. 3
0
def exp_value_changing(depth=5, func_id=3, gamma=1.0, seed=random.random()):
    record_values = []  # 評価値を記録
    record_boards = []  # 評価値に連動して盤面を記録
    for i in range(100):
        random.seed(seed * (i + 1))
        state = State()
        ii_state = AccessableState()
        values = []
        boards = []
        while True:
            if state.is_done():
                break
            if state.is_first_player():
                action = move_ordering_alpha_beta_action(state, 1, depth, i)
                # 盤面の評価値を算出し記録
                ii_state.create_ii_state_from_state(state)
                values.append(evaluate_board_state(ii_state))
                boards.append([state.pieces, state.enemy_pieces])
            else:
                action = random_action(state)
            state = state.next(action)
        record_values.apped(values)
        record_boards.apped(boards)

    # TODO: csvに出力する
    print(record_values)
    print(record_boards)
Esempio n. 4
0
def playout(state):
    if state.is_lose():
        return -1

    if state.is_draw():
        return 0

    return -playout(state.next(random_action(state)))  # 再帰的に探索
Esempio n. 5
0
 def rollout(self, state):
     cur_player = state.player
     while state.winner == 0:
         while True:
             a = random_action()
             if state.legal_move(a):
                 break
         state.make_move(a)
     return cur_player == state.winner
Esempio n. 6
0
def exp_move_ordering_time(depth=5,
                           func_id=3,
                           gamma=1.0,
                           seed=random.random()):
    print("seed", seed)
    timer = [0.0] * (depth + 1)

    random.seed(seed)
    state = State()
    while True:
        if state.is_done():
            break
        for i in range(depth + 1):
            start = time.time()
            for _ in range(1):
                move_ordering_alpha_beta_action(state, 1, depth, i)
            timer[i] += time.time() - start
        action = random_action(state)
        state = state.next(action)  # ランダムにゲームを進める
Esempio n. 7
0
#             else:
#                 pass

if __name__ == "__main__":
    os.environ["OMP_NUM_THREADS"] = "1"

    with open("config.yaml") as f:
        args = yaml.safe_load(f)
    # print(args)

    # ここに実験用のコードを書く

    state = State()
    while True:
        print(state.legal_actions())
        state = state.next(random_action(state))

    # path = "models/10000.pth"
    # EvalHandyRL(100, path)
    # policies = obs_to_policy_to_use_game(agent, obs, state)

    # print(policies)

    # convert_state_to_obs(state)

    # test_predict()
    # test_cigeister()

    # 方策を持ってくる

    # 接続部分
Esempio n. 8
0
def main():
    print(
        "Please input a port number that you want to connect. (10000 or 10001)"
    )
    # port = 10000  # 先手
    # port = 10001 # 後手
    port = int(input())

    # クライアントの作成
    with socket.socket(socket.AF_INET, socket.SOCK_STREAM) as s:
        # サーバを指定
        s.connect(("localhost", port))

        # s.connect(("15.152.47.162", port))  # 1
        # s.connect(("13.208.76.157", port))  # 2
        # s.connect(("13.208.214.66", port))  # 3
        # s.connect(("13.208.184.19", port))  # 4

        # ネットワークのバッファサイズ。サーバからの文字列を取得(recv)する
        data = s.recv(2048)
        print(repr(data))

        # 赤駒のセット
        red_piece = ""
        piece_char_list = ["A", "B", "C", "D", "E", "F", "G", "H"]

        confirmed_info = KeepConfirmedInfo()

        for i, color in enumerate(confirmed_info.my_pieces_color):
            if color == 2:
                red_piece += piece_char_list[i]
        send_str = "SET:" + red_piece + "\r\n"
        s.sendall(send_str.encode(encoding="utf-8"))
        data = s.recv(2048)
        print(repr(data))

        if str(repr(data)) == str(b"OK \r\n"):
            print("駒のセット成功")

        else:
            print("駒のセット失敗")

        now_tcp_str = ""  # 直前に受け取ったtcp
        # 受け取ったdataをstrに変換
        data = s.recv(2048)
        now_tcp_str = data.decode(encoding="utf-8")
        print(now_tcp_str)

        # ちょっとルールベースな処理
        rl_action = HandyAction("models/108000.pth")

        turn_count = 0
        while 1:
            # 確定している情報からStateを生成
            state = confirmed_info.create_state()
            print(state)
            # stateからactionを決定
            action = alpha_beta_action(state, 5, False)
            print(action)

            # actionが変(0とか)だったらとりあえず修正
            if action not in state.legal_actions():
                action = random_action(state)

            # 自分の行動をsendall可能なバイナリデータに変換
            sendall_str_b = confirmed_info.action_to_sendall_str(action)
            print("送信データ(行動):", str(sendall_str_b))

            s.sendall(sendall_str_b)
            data = s.recv(2048)  # OK or OKR
            now_tcp_str = data.decode(encoding="utf-8")
            print("データ送信の可否:", now_tcp_str)

            # 自分の行動からconfirmed_infoを更新
            confirmed_info.update_coo_from_my_aciton(action, now_tcp_str)

            # 相手の行動
            data = s.recv(2048)
            now_tcp_str = data.decode(encoding="utf-8")
            print("相手の行動:", now_tcp_str)

            # 相手の行動からconfirmed_infoを更新
            # confirmed_info.update_coo_from_enemy_aciton(now_tcp_str)

            # 更新が正常に動作しないことがあるため、毎回リセットする(仮置き)
            confirmed_info.reset_keep_confirmed_info(now_tcp_str)

            # 自分と相手で2ターン経過
            turn_count += 2