def exploration(board):
    #-------------------------------
    #  危急情况,使用启发式思考(蒙特卡洛方法)
    #  根据危机严重程度分级,确定模拟次数
    space = list(board.flatten()).count(0)
    n = max(400, (10 - space) * 100)
    score_dict = {0: [], 1: [], 2: [], 3: []}
    for _ in range(n):
        # 初始化随机移动机
        rand_game = Game(4, 4096, enable_rewrite_board=True)
        rand_game.board = board
        # 首次随机移动
        first_direction = np.random.randint(0, 4)
        rand_game.move_and_score(first_direction)
        score = board_score(rand_game.board)
        while rand_game.end == 0:
            rand_direction = np.random.randint(0, 4)
            rand_game.move_and_score(rand_direction)
        score_dict[first_direction].append(rand_game.score_move + score)
    # print(score_dict)
    # 根据均分最高选择移动方向
    score_ave = {i: sum(score_dict[i]) / len(score_dict[i]) for i in range(4)}
    print(score_ave)
    direction = max(score_ave, key=score_ave.get)
    max_score = score_ave[direction]
    min_direction = min(score_ave, key=score_ave.get)
    min_score = score_ave[min_direction]
    # 情况十分危急,超级搜索
    if max_score < 100 or min_score < 50:
        n = 2000
        if max_score < 80:
            n = 3000
        # 存活系数小于50后,每一步都会举步维艰
        if max_score < 50:
            n = 8000
        score_dict = {0: [], 1: [], 2: [], 3: []}
        for _ in range(n):
            # 初始化随机移动机
            rand_game = Game(4, 4096, enable_rewrite_board=True)
            rand_game.board = board
            # 首次随机移动
            first_direction = np.random.randint(0, 4)
            rand_game.move_and_score(first_direction)
            # score = board_score(rand_game.board)
            while rand_game.end == 0:
                rand_direction = np.random.randint(0, 4)
                rand_game.move_and_score(rand_direction)
            score_dict[first_direction].append(rand_game.score_move)
        # print(score_dict)
        # 根据均分最高选择移动方向
        score_ave = {
            i: sum(score_dict[i]) / len(score_dict[i])
            for i in range(4)
        }
        print(score_ave)
        direction = max(score_ave, key=score_ave.get)
    return direction
Beispiel #2
0
def generate_fingerprint(AgentClass, **kwargs):
    with open("board_cases.json") as f:
        board_json = json.load(f)

    game = Game(size=4, enable_rewrite_board=True)
    agent = AgentClass(game=game, **kwargs)

    agent.net7.load_state_dict(
        torch.load("net_3_params_7.pkl", map_location='cpu'))
    agent.net6.load_state_dict(
        torch.load("net_3_params_6.pkl", map_location='cpu'))
    agent.net5.load_state_dict(
        torch.load("net_3_params_5.pkl", map_location='cpu'))
    agent.net4.load_state_dict(
        torch.load("net_3_params_4.pkl", map_location='cpu'))
    # agent.net3.load_state_dict(torch.load("net_3_params_3.pkl", map_location='cpu'))
    agent.net2.load_state_dict(
        torch.load("net_3_params_2.pkl", map_location='cpu'))

    trace = []
    for board in board_json:
        game.board = np.array(board)
        direction = agent.step()
        trace.append(direction)
    fingerprint = "".join(str(i) for i in trace)
    return fingerprint
Beispiel #3
0
    def step(self):
        prev_board = self.game.board
        image = [[prev_board[i][j] for j in range(4)] for i in range(4)]
        P, Pcount = self.Board8(image)
        counter = 0
        while True:
            counter += 1
            select = -1
            pmax = -1
            for i in range(4):
                if pmax < Pcount[i]:
                    pmax = Pcount[i]
                    select = i
                elif pmax == Pcount[i] and P[select] < P[i]:
                    pmax = Pcount[i]
                    select = i
            Pcount[select] = -1
            new_game = Game(4, enable_rewrite_board=True)
            new_game.board = prev_board
            new_game.move(3 - select)
            new_board = new_game.board
            isMoved = not (prev_board == new_board).all()
            if isMoved: break
            if counter == 4:
                select = np.argmax(Pcount)
                break

        return (3 - select)
Beispiel #4
0
def generate_fingerprint(AgentClass, **kwargs):
    with open("board_cases.json") as f:
        board_json = json.load(f)

    game = Game(size=4, enable_rewrite_board=True)
    agent = AgentClass(game=game, **kwargs)

    trace = []
    for board in board_json:
        game.board = np.array(board)
        direction = agent.step()
        trace.append(direction)
    fingerprint = "".join(str(i) for i in trace)
    return fingerprint
Beispiel #5
0
def get_grids_next_step(grid):
    #Returns the next 4 states s' from the current state s

    grids_list = []

    for movement in range(4):
        grid_before = grid.copy()
        env1 = Game(4, random=False, enable_rewrite_board=True)
        env1.board = grid_before
        try:
            _ = env1.move(movement)
        except:
            pass
        grid_after = env1.board
        grids_list.append(grid_after)

    return grids_list
Beispiel #6
0
def generate_fingerprint(AgentClass, **kwargs):
    sess = tf.Session()
    with open("board_cases.json") as f:
        board_json = json.load(f)

    game = Game(size=4, enable_rewrite_board=True)
    agent = AgentClass(game=game, sess=sess)
    agent.build()

    trace = []
    num = len(board_json)
    for index, board in enumerate(board_json):
        print('{} left.'.format(num - index))
        game.board = np.array(board)
        direction = agent.step()
        trace.append(direction)
    fingerprint = "".join(str(i) for i in trace)
    return fingerprint
Beispiel #7
0
def intuition(board):
    # ------------------------------
    # 正常情况,使用直觉快速思考(近似贪心法)
    score_dict = {0: 0, 1: 0, 2: 0, 3: 0}
    game = Game(4, 4096, enable_rewrite_board=True)
    for i in range(4):
        # 每次模拟前置零
        game.board = board
        game.score_move = 0
        game.only_move(i)
        # 无合并,就是菜
        # print(game.board)
        if game.score_move == 0:
            score_dict[i] = -100 + board_score(game.board)
        # 有合并,计算权值
        else:
            score_dict[i] += game.score_move + board_score(game.board)
    # print('score_dict {}'.format(score_dict))
    direction = max(score_dict, key=score_dict.get)
    return direction