예제 #1
0
                                if win == playerID[j]:
                                    # 勝ったら報酬1を得る
                                    reword = 1

                            players[j].store_experience(
                                state, targets, tr, reword, state_X, target_X,
                                end)
                            players[j].experience_replay()

                    # 行動を選択
                    action = players[i].select_action(state, targets,
                                                      players[i].exploration)
                    # 行動を実行
                    env.update(action, playerID[i])
                    # for log
                    loss = players[i].current_loss
                    Q_max, Q_action = players[i].select_enable_action(
                        state, targets)
                    print(
                        "player:{:1d} | pos:{:2d} | LOSS: {:.4f} | Q_MAX: {:.4f}"
                        .format(playerID[i], action, loss, Q_max))

                # 行動を実行した結果
                terminal = env.isEnd()

        w = env.winner()
        print("EPOCH: {:03d}/{:03d} | WIN: player{:1d}".format(e, n_epochs, w))

    # 保存は後攻のplayer2 を保存する。
    players[1].save_model()
예제 #2
0
            for i in range(0, 2):
                state = env.screen  # 현재 보드판의 상태(배열)을 가져옴
                targets = env.getEnables(
                    playerID[i])  # 배열(플레이어가 둘 수 있는 위치를 가짐)을 가져옴

                if len(targets) > 0:  # 어딘가에 둘 수 있는 장소가있는 경우
                    # 행동을 선택
                    action = players[i].selectAction(state, targets,
                                                     1 / ((e / 10) + 1))

                    # 행동을 실행
                    env.doFlip([action // 8, action % 8], playerID[i])

                    # 종료 판정
                    win = env.winner()  # 현재 유리한 플레이어
                    end = env.isEnd()

                    # 다음 상태
                    state_X = env.screen
                    target_X = env.getEnables(playerID[i + 1])
                    if len(target_X) == 0:
                        target_X = env.getEnables(playerID[i])

                    reward = 0
                    if end == True:
                        reward = 1

                    players[i].storeExperience(state, targets, action, reward,
                                               state_X, target_X, end)

                    # for log
예제 #3
0
    # args
    parser = argparse.ArgumentParser()
    parser.add_argument("-m", "--model_path")
    parser.add_argument("-s", "--save", dest="save", action="store_true")
    parser.set_defaults(save=False)
    args = parser.parse_args()

    # environmet, agent
    env = Reversi()
    agent = DQNAgent(env.enable_actions, env.name, env.screen_n_rows,
                     env.screen_n_cols)
    agent.load_model("models1/Reversi.ckpt")

    # game
    print("------------- GAME START ---------------")
    while not env.isEnd():
        print("*** userターン○ ***")
        env.print_screen()
        enables = env.get_enables(1)
        if len(enables) > 0:
            flg = False
            while not flg:
                print("番号を入力してください")
                print(enables)
                inp = input('>>>  ')
                action_t = int(inp)
                for i in enables:
                    if action_t == i:
                        flg = True
                        break
예제 #4
0
    parser.set_defaults(save=False)
    args = parser.parse_args()

    # environmet, agent
    env = Reversi()
    agent1 = DQNAgent(env.enable_actions, env.name, env.screen_n_rows,
                      env.screen_n_cols)
    agent1.load_model("models1/Reversi.ckpt")

    agent2 = DQNAgent(env.enable_actions, env.name, env.screen_n_rows,
                      env.screen_n_cols)
    agent2.load_model("models2/Reversi.ckpt")

    # game
    print("------------- GAME START ---------------")
    while not env.isEnd():
        # print("*** userターン○ ***")
        # env.print_screen()
        # enables = env.get_enables(1)
        # if len(enables) > 0:
        #     flg = False
        #     while not flg:
        #         print("番号を入力してください")
        #         print(enables)
        #         inp = input('>>>  ')
        #         action_t = int(inp)
        #         for i in enables:
        #             if action_t == i:
        #                 flg = True
        #                 break