def game_loop_ai_vs_ai(n=20000):
    state = GameState()

    while not state.is_finished():
        # if state.get_player() == 0:
        #     print(state)
        #     print(*(f'{a} {b} {c} {d}' for a, b, c, d in state.get_possible_next_move()), sep = ', ')
        #     grid_x, grid_y, cell_x, cell_y = map(int, input().split())
        #     state.play(grid_x, grid_y, cell_x, cell_y)
        #     print(state)
        # else:
        print(state)
        next_ai_move = find_best_next_move(state, n)
        state.play(*next_ai_move)

    print(state)
    print(f'Winner: {state.winner}')
Beispiel #2
0
    def load_game_state(self):
        print(f"turn number = {self.turn_number}")
        self.game.state = GameState.from_csv_line(
            self.game_states[self.turn_number])

        next_action = self.game_states[self.turn_number].split(',')[-1].strip()
        print(f"action from this state: {next_action}")

        self.draw_game_tiles()
        self.draw_score()
Beispiel #3
0
 def play_game(self, game_dir, random_seed=None):
     game_state = GameState()
     game_state.new_game(random_seed=random_seed, game_dir=game_dir)
     while not game_state.game_over:
         dir = self.next_action(game_state)
         game_state.move_tiles(dir)
     print(f"final score: {game_state.score}")
Beispiel #4
0
def main(model_h5_file: str, game_file: str):
    print(f"==== Loading model from {model_h5_file} ====")
    value_model = load_model(model_h5_file)

    with open(game_file, 'r') as f:
        lines = f.readlines()
        game_states = [GameState.from_csv_line(line) for line in lines]

    for current_state in game_states:
        network_input = np.expand_dims(convert_tiles_to_bitarray(
            current_state.tiles),
                                       axis=0)
        network_output = value_model.predict(network_input)[0]
        assert len(network_output) == 4
        # print(f"network output: {network_output}")
        action = np.argmax(network_output)

        print(pretty_print_tiles(current_state))
        print("selected action:", ACTIONS[action])
        print("---")
    possible_moves    = list(state.get_possible_next_move())
    n_next_move       = len(possible_moves)
    run_by_move       = n // n_next_move
    player            = state.get_player()
    move_to_win_proba = {}

    for move in possible_moves:
        move_state = deepcopy(state)
        move_state.play(*move)
        proba = evaluate_position(move_state, run_by_move)
        move_to_win_proba[move] = proba[player]

    result = sorted(move_to_win_proba.items(), key = lambda x: x[1], reverse = True)
    print(f'Number of game run by potential move: {run_by_move}')
    print(
        *(f'{xg} {yg} {xc} {yc} -> {100 * proba:5.2f}%' for (xg, yg, xc, yc), proba in result),
        sep = '\n'
    )

    return result[0][0]

if __name__ == '__main__':
    n       = 50000
    state   = GameState()
    # for i in range(30):
    #     win_0, win_1, draw = evaluate_position(state, n)
    #     print(f'[{i:2d}] {100 * win_0:5.2f}% {100 * win_1:5.2f}% {100 * draw:5.2f}%')
    state.play(1, 1, 1, 1)
    result = find_best_next_move(state, n)
    print(result)
def main():
    parser = argparse.ArgumentParser(description='Play N games using the random bot.')
    parser.add_argument('game_dir', type=str,
        help='directory where games are saved to')
    parser.add_argument('figure_prefix', type=str,
        help='prefix of analysis figure filenames (saved to the figures directory)')
    args = parser.parse_args()

    files = glob.glob(f"{args.game_dir}/*.csv")
    random_seeds = []
    scores = []
    game_lens = []
    max_tile_values = []
    for filepath in files:
        head, filename = os.path.split(filepath)
        root, ext = os.path.splitext(filename)
        if len(ext) > 0:
            date, random_seed = root.split('_', 1)
            if random_seed in random_seeds:
                print(f"Random seed {random_seed} already used!")
            random_seeds.append(random_seed)
        else:
            print(f"Unable to parse filename extension")

        with open(filepath, 'r') as f:
            lines = f.readlines()
            final_game_state = GameState.from_csv_line(lines[-1])
            if not final_game_state.game_over:
                print(f"Game not over for {filepath}")
                continue
            max_tile_value = final_game_state.max_tile_value()
            max_tile_values.append(max_tile_value)
            scores.append(final_game_state.score)
            game_lens.append(len(lines))
    scores = np.array(scores, dtype=np.uint)
    game_lens = np.array(game_lens, dtype=np.uint)
    max_tile_values = np.array(max_tile_values, dtype=np.uint)
    # print(scores)

    with open(f"{args.figure_prefix}_summary.txt", 'w') as f:
        # f.write(f"length of scores = {len(scores)}\n")
        f.write(f"max in scores = {np.amax(scores)}\n")
        f.write(f"max score filename = {files[np.argmax(scores)]}\n")
        f.write(f"min in scores = {np.amin(scores)}\n")
        f.write(f"min score filename = {files[np.argmin(scores)]}\n")
        f.write(f"mean in scores = {np.mean(scores)}\n")
        f.write(f"median in scores = {np.median(scores)}\n")

        # f.write(f"length of game_lens = {len(game_lens)}\n")
        f.write(f"max in game_lens = {np.amax(game_lens)}\n")
        f.write(f"longest game filename = {files[np.argmax(game_lens)]}\n")
        f.write(f"min in game_lens = {np.amin(game_lens)}\n")
        f.write(f"shortest game filename = {files[np.argmin(game_lens)]}\n")
        f.write(f"mean in game_lens = {np.mean(game_lens)}\n")
        f.write(f"median in game_lens = {np.median(game_lens)}\n")

    fig, ax = plt.subplots()
    ax.hist(scores, bins='auto')
    ax.set_xlabel("Final Scores")
    ax.set_ylabel("Number of Games")
    ax.set_title(f"{args.figure_prefix} Bot: Final Scores Distribution")
    fig.savefig(f"figures/{args.figure_prefix}_scores_hist.png")
    plt.close(fig)

    fig, ax = plt.subplots()
    ax.hist(game_lens, bins='auto')
    ax.set_xlabel("Game Lengths (# of Turns)")
    ax.set_ylabel("Number of Games")
    ax.set_title(f"{args.figure_prefix} Bot: Game Lengths Distribution")
    fig.savefig(f"figures/{args.figure_prefix}_game_lens_hist.png")
    plt.close(fig)

    fig, ax = plt.subplots()
    tile_values, tile_freqs = np.unique(max_tile_values, return_counts=True)
    ax.bar(range(len(tile_values)), tile_freqs)
    plt.xticks(range(len(tile_values)), tile_values)
    ax.set_xlabel("Final Max Tile Value")
    ax.set_ylabel("Number of Games")
    ax.set_title(f"{args.figure_prefix} Bot: Final Max Tile Distribution")
    fig.savefig(f"figures/{args.figure_prefix}_max_tile_values_hist.png")
    plt.close(fig)


    fig, ax = plt.subplots()
    ax.scatter(game_lens, scores)
    ax.set_xlabel("Game Lengths (# of Turns)")
    ax.set_ylabel("Final Scores")
    ax.set_title(f"{args.figure_prefix} Bot: Score vs. Game Length")
    fig.savefig(f"figures/{args.figure_prefix}_score_vs_game_len.png")
    plt.close(fig)

    fig, ax = plt.subplots()
    max_tile_idxs = {}
    for i in range(len(max_tile_values)):
        tile_value = max_tile_values[i]
        if tile_value not in max_tile_idxs:
            max_tile_idxs[tile_value] = []
        max_tile_idxs[tile_value].append(i)

    data = [scores[max_tile_idxs[tile_value]] for tile_value in sorted(max_tile_idxs.keys())]
    # print(data)
    ax.boxplot(data, labels=sorted(max_tile_idxs.keys()))
    ax.set_xlabel("Final Max Tile Value")
    ax.set_ylabel("Final Scores")
    ax.set_title(f"{args.figure_prefix} Bot: Score vs. Final Max Tile Value")
    fig.savefig(f"figures/{args.figure_prefix}_score_vs_max_tile_value.png")
    plt.close(fig)

    fig, ax = plt.subplots()
    ax.scatter(random_seeds, scores)
    ax.set_xlabel("Random Seeds")
    ax.set_ylabel("Final Scores")
    ax.set_title(f"{args.figure_prefix} Bot: Score vs. Random Seeds")
    fig.savefig(f"figures/{args.figure_prefix}_score_vs_random_seed.png")
    plt.close(fig)
def main():
    parser = argparse.ArgumentParser(
        description=
        'Build and solve an MDP that models an NxM game of 2048 (using value iteration).'
    )
    parser.add_argument('-r',
                        '--num_rows',
                        type=int,
                        default=2,
                        help='number of rows (default: 2)')
    parser.add_argument('-c',
                        '--num_cols',
                        type=int,
                        default=2,
                        help='number of rows (default: 2)')
    parser.add_argument(
        '-p',
        '--two_tile_prob',
        type=float,
        default=1.0,
        help=
        'probability of spawning a 2-tile (instead of a 4-tile) after a successful move'
    )
    args = parser.parse_args()

    num_rows = args.num_rows
    num_cols = args.num_cols
    frontier = []
    visited = []
    TWO_TILE_PROB = args.two_tile_prob
    output_filename = f"mdp_{num_rows}x{num_cols}_prob{TWO_TILE_PROB}.txt"

    # BFS search to generate list of all possible states in the one-row game
    states_graph = nx.DiGraph(num_rows=num_rows,
                              num_cols=num_cols,
                              two_tile_prob=TWO_TILE_PROB)

    # initialize the possible starting states
    for i in range(num_rows):
        for j in range(num_cols):
            tiles = [[0] * num_cols for r in range(num_rows)]
            tiles[i][j] = 1
            new_state = GameState(nrows=num_rows,
                                  ncols=num_cols,
                                  tiles=tiles,
                                  score=0,
                                  game_over=False)
            frontier.append(new_state)
            states_graph.add_node(state_to_string(new_state))

    # perform BFS search to enumerate all states
    while len(frontier) > 0:
        state = frontier.pop(0)
        if state in visited:
            continue
        # print(f"popped state: {state.tiles}")

        visited.append(state)
        states_graph.add_node(state_to_string(state))

        for move in state.moves_available():
            successors = state.successor_states(move,
                                                prob_two_tile=TWO_TILE_PROB)
            for probability, successor, reward in successors:
                if successor in visited:
                    continue
                # print(f"{state.tiles} -> {move} -> {successor.tiles} (prob {probability}, reward {reward})")
                frontier.append(successor)

                states_graph.add_edge(state_to_string(state),
                                      state_to_string(successor))

    # print all states
    all_states = visited
    for state in all_states:
        print(state.tiles)
    print(f"num states: {len(all_states)}")

    # value iteration on the MDP
    with open(output_filename, "w") as output:
        output.write(f"number of rows = {num_rows}\n")
        output.write(f"number of columns = {num_cols}\n")
        output.write(f"prob of 2-tile (vs. 4-tile) = {TWO_TILE_PROB}\n\n")

        output.write(f"number of states = {len(all_states)}\n")

        # initialize V and V_new to 0 for all states
        V = {}
        V_new = {}
        for state in all_states:
            state_str = state_to_string(state)
            if state_str not in V:
                V[state_str] = 0
            V_new[state_str] = 0

        converged = False
        iter_num = 1
        while not converged:
            print(f"========== value iteration (iter # {iter_num})")
            # update V_new using values in V
            for state in all_states:
                # print(state.tiles)
                state_str = state_to_string(state)
                action_vals = {}
                for move in state.moves_available():
                    # print(f" {move}")
                    successors = state.successor_states(
                        move, prob_two_tile=TWO_TILE_PROB)
                    action_val = 0
                    for probability, successor, reward in successors:
                        # print(f" -> {successor.tiles} (prob: {probability}, reward: {reward})")
                        action_val += probability * (reward +
                                                     V[str(successor.tiles)])
                    action_vals[move] = action_val
                    # print(f" {move} has value {action_val}")

                if not state.moves_available():
                    # print("found a terminal state")
                    continue

                # update V_new with the action with the highest value
                best_action = None
                best_action_val = float('-inf')
                for action in action_vals:
                    if action_vals[action] > best_action_val:
                        best_action = action
                        best_action_val = action_vals[action]

                if best_action_val > V[state_str]:
                    V_new[state_str] = best_action_val
                    print(
                        f"V[{state_str}] updated from {V[state_str]} to {V_new[state_str]}"
                    )

            # convergence check: are any values in V and V_new "significantly different"?
            converged = True
            for state_str in V_new:
                diff = V_new[state_str] - V[state_str]
                if diff > 1e-5:  # TODO arbitrary threshold...
                    converged = False
                    break

            if converged:
                print(f"Value iteration converged!")
                break

            # if not converged, then copy from V_new to V and proceed to next iteration
            for state_str in V_new:
                V[state_str] = V_new[state_str]

            iter_num += 1

        for state in all_states:
            state_str = state_to_string(state)
            output.write(f"state {state_str}: Value = {V[state_str]}\n")
            states_graph.nodes[state_to_string(state)]['value'] = V[state_str]

            action_total = 0
            best_actions = []
            best_action_val = float('-inf')
            for move in state.moves_available():
                successors = state.successor_states(
                    move, prob_two_tile=TWO_TILE_PROB)
                action_val = 0
                for probability, successor, reward in successors:
                    # output.write(f"    -> {successor.tiles} (prob: {probability}, reward: {reward})\n")
                    action_val += probability * (reward +
                                                 V[str(successor.tiles)])

                output.write(f"    {move} has value {action_val}\n")
                if action_val > best_action_val:
                    best_action_val = action_val
                    best_actions = [move]
                elif action_val == best_action_val:
                    best_actions.append(move)
                action_total += action_val

            if len(state.moves_available()) > 0:
                output.write(f"****Best move(s) = {best_actions}\n")
                random_action_value = action_total / len(
                    state.moves_available())
                if random_action_value < V[state_str]:
                    output.write(
                        f"****Random move value = {random_action_value} (a loss of {V[state_str] - random_action_value})\n"
                    )

    # save graph of states to Graphviz dot format
    nx.drawing.nx_agraph.write_dot(
        states_graph,
        f"states_graph_dot_{num_rows}x{num_cols}_prob{TWO_TILE_PROB}")

    # draw the graph of states
    fig, ax = plt.subplots(figsize=(25, 13))
    ax.axis('off')
    pos = graphviz_layout(states_graph,
                          prog="dot")  # "dot" is good for directed graphs

    # colormap nodes using values from value iteration (mark terminal states)
    terminal_states = [
        node for node in states_graph.nodes()
        if states_graph.out_degree(node) == 0
    ]
    nonterminal_states = [
        node for node in states_graph.nodes()
        if states_graph.out_degree(node) > 0
    ]
    nodes_terminal = nx.draw_networkx_nodes(
        states_graph,
        pos,
        nodelist=terminal_states,
        node_size=200,
        node_shape='s',
        alpha=0.5,
        node_color=[V[node] for node in terminal_states],
        cmap='viridis')
    nodes_nonterminal = nx.draw_networkx_nodes(
        states_graph,
        pos,
        nodelist=nonterminal_states,
        node_size=200,
        node_shape='o',
        alpha=0.5,
        node_color=[V[node] for node in nonterminal_states],
        cmap='viridis')
    fig.colorbar(nodes_nonterminal)

    nx.draw_networkx_edges(states_graph, pos)
    nx.draw_networkx_labels(states_graph, pos, font_size=8)

    fig.savefig(f"states_graph_{num_rows}x{num_cols}_prob{TWO_TILE_PROB}.png")
    plt.close(fig)