def game_loop_ai_vs_ai(n=20000): state = GameState() while not state.is_finished(): # if state.get_player() == 0: # print(state) # print(*(f'{a} {b} {c} {d}' for a, b, c, d in state.get_possible_next_move()), sep = ', ') # grid_x, grid_y, cell_x, cell_y = map(int, input().split()) # state.play(grid_x, grid_y, cell_x, cell_y) # print(state) # else: print(state) next_ai_move = find_best_next_move(state, n) state.play(*next_ai_move) print(state) print(f'Winner: {state.winner}')
def load_game_state(self): print(f"turn number = {self.turn_number}") self.game.state = GameState.from_csv_line( self.game_states[self.turn_number]) next_action = self.game_states[self.turn_number].split(',')[-1].strip() print(f"action from this state: {next_action}") self.draw_game_tiles() self.draw_score()
def play_game(self, game_dir, random_seed=None): game_state = GameState() game_state.new_game(random_seed=random_seed, game_dir=game_dir) while not game_state.game_over: dir = self.next_action(game_state) game_state.move_tiles(dir) print(f"final score: {game_state.score}")
def main(model_h5_file: str, game_file: str): print(f"==== Loading model from {model_h5_file} ====") value_model = load_model(model_h5_file) with open(game_file, 'r') as f: lines = f.readlines() game_states = [GameState.from_csv_line(line) for line in lines] for current_state in game_states: network_input = np.expand_dims(convert_tiles_to_bitarray( current_state.tiles), axis=0) network_output = value_model.predict(network_input)[0] assert len(network_output) == 4 # print(f"network output: {network_output}") action = np.argmax(network_output) print(pretty_print_tiles(current_state)) print("selected action:", ACTIONS[action]) print("---")
possible_moves = list(state.get_possible_next_move()) n_next_move = len(possible_moves) run_by_move = n // n_next_move player = state.get_player() move_to_win_proba = {} for move in possible_moves: move_state = deepcopy(state) move_state.play(*move) proba = evaluate_position(move_state, run_by_move) move_to_win_proba[move] = proba[player] result = sorted(move_to_win_proba.items(), key = lambda x: x[1], reverse = True) print(f'Number of game run by potential move: {run_by_move}') print( *(f'{xg} {yg} {xc} {yc} -> {100 * proba:5.2f}%' for (xg, yg, xc, yc), proba in result), sep = '\n' ) return result[0][0] if __name__ == '__main__': n = 50000 state = GameState() # for i in range(30): # win_0, win_1, draw = evaluate_position(state, n) # print(f'[{i:2d}] {100 * win_0:5.2f}% {100 * win_1:5.2f}% {100 * draw:5.2f}%') state.play(1, 1, 1, 1) result = find_best_next_move(state, n) print(result)
def main(): parser = argparse.ArgumentParser(description='Play N games using the random bot.') parser.add_argument('game_dir', type=str, help='directory where games are saved to') parser.add_argument('figure_prefix', type=str, help='prefix of analysis figure filenames (saved to the figures directory)') args = parser.parse_args() files = glob.glob(f"{args.game_dir}/*.csv") random_seeds = [] scores = [] game_lens = [] max_tile_values = [] for filepath in files: head, filename = os.path.split(filepath) root, ext = os.path.splitext(filename) if len(ext) > 0: date, random_seed = root.split('_', 1) if random_seed in random_seeds: print(f"Random seed {random_seed} already used!") random_seeds.append(random_seed) else: print(f"Unable to parse filename extension") with open(filepath, 'r') as f: lines = f.readlines() final_game_state = GameState.from_csv_line(lines[-1]) if not final_game_state.game_over: print(f"Game not over for {filepath}") continue max_tile_value = final_game_state.max_tile_value() max_tile_values.append(max_tile_value) scores.append(final_game_state.score) game_lens.append(len(lines)) scores = np.array(scores, dtype=np.uint) game_lens = np.array(game_lens, dtype=np.uint) max_tile_values = np.array(max_tile_values, dtype=np.uint) # print(scores) with open(f"{args.figure_prefix}_summary.txt", 'w') as f: # f.write(f"length of scores = {len(scores)}\n") f.write(f"max in scores = {np.amax(scores)}\n") f.write(f"max score filename = {files[np.argmax(scores)]}\n") f.write(f"min in scores = {np.amin(scores)}\n") f.write(f"min score filename = {files[np.argmin(scores)]}\n") f.write(f"mean in scores = {np.mean(scores)}\n") f.write(f"median in scores = {np.median(scores)}\n") # f.write(f"length of game_lens = {len(game_lens)}\n") f.write(f"max in game_lens = {np.amax(game_lens)}\n") f.write(f"longest game filename = {files[np.argmax(game_lens)]}\n") f.write(f"min in game_lens = {np.amin(game_lens)}\n") f.write(f"shortest game filename = {files[np.argmin(game_lens)]}\n") f.write(f"mean in game_lens = {np.mean(game_lens)}\n") f.write(f"median in game_lens = {np.median(game_lens)}\n") fig, ax = plt.subplots() ax.hist(scores, bins='auto') ax.set_xlabel("Final Scores") ax.set_ylabel("Number of Games") ax.set_title(f"{args.figure_prefix} Bot: Final Scores Distribution") fig.savefig(f"figures/{args.figure_prefix}_scores_hist.png") plt.close(fig) fig, ax = plt.subplots() ax.hist(game_lens, bins='auto') ax.set_xlabel("Game Lengths (# of Turns)") ax.set_ylabel("Number of Games") ax.set_title(f"{args.figure_prefix} Bot: Game Lengths Distribution") fig.savefig(f"figures/{args.figure_prefix}_game_lens_hist.png") plt.close(fig) fig, ax = plt.subplots() tile_values, tile_freqs = np.unique(max_tile_values, return_counts=True) ax.bar(range(len(tile_values)), tile_freqs) plt.xticks(range(len(tile_values)), tile_values) ax.set_xlabel("Final Max Tile Value") ax.set_ylabel("Number of Games") ax.set_title(f"{args.figure_prefix} Bot: Final Max Tile Distribution") fig.savefig(f"figures/{args.figure_prefix}_max_tile_values_hist.png") plt.close(fig) fig, ax = plt.subplots() ax.scatter(game_lens, scores) ax.set_xlabel("Game Lengths (# of Turns)") ax.set_ylabel("Final Scores") ax.set_title(f"{args.figure_prefix} Bot: Score vs. Game Length") fig.savefig(f"figures/{args.figure_prefix}_score_vs_game_len.png") plt.close(fig) fig, ax = plt.subplots() max_tile_idxs = {} for i in range(len(max_tile_values)): tile_value = max_tile_values[i] if tile_value not in max_tile_idxs: max_tile_idxs[tile_value] = [] max_tile_idxs[tile_value].append(i) data = [scores[max_tile_idxs[tile_value]] for tile_value in sorted(max_tile_idxs.keys())] # print(data) ax.boxplot(data, labels=sorted(max_tile_idxs.keys())) ax.set_xlabel("Final Max Tile Value") ax.set_ylabel("Final Scores") ax.set_title(f"{args.figure_prefix} Bot: Score vs. Final Max Tile Value") fig.savefig(f"figures/{args.figure_prefix}_score_vs_max_tile_value.png") plt.close(fig) fig, ax = plt.subplots() ax.scatter(random_seeds, scores) ax.set_xlabel("Random Seeds") ax.set_ylabel("Final Scores") ax.set_title(f"{args.figure_prefix} Bot: Score vs. Random Seeds") fig.savefig(f"figures/{args.figure_prefix}_score_vs_random_seed.png") plt.close(fig)
def main(): parser = argparse.ArgumentParser( description= 'Build and solve an MDP that models an NxM game of 2048 (using value iteration).' ) parser.add_argument('-r', '--num_rows', type=int, default=2, help='number of rows (default: 2)') parser.add_argument('-c', '--num_cols', type=int, default=2, help='number of rows (default: 2)') parser.add_argument( '-p', '--two_tile_prob', type=float, default=1.0, help= 'probability of spawning a 2-tile (instead of a 4-tile) after a successful move' ) args = parser.parse_args() num_rows = args.num_rows num_cols = args.num_cols frontier = [] visited = [] TWO_TILE_PROB = args.two_tile_prob output_filename = f"mdp_{num_rows}x{num_cols}_prob{TWO_TILE_PROB}.txt" # BFS search to generate list of all possible states in the one-row game states_graph = nx.DiGraph(num_rows=num_rows, num_cols=num_cols, two_tile_prob=TWO_TILE_PROB) # initialize the possible starting states for i in range(num_rows): for j in range(num_cols): tiles = [[0] * num_cols for r in range(num_rows)] tiles[i][j] = 1 new_state = GameState(nrows=num_rows, ncols=num_cols, tiles=tiles, score=0, game_over=False) frontier.append(new_state) states_graph.add_node(state_to_string(new_state)) # perform BFS search to enumerate all states while len(frontier) > 0: state = frontier.pop(0) if state in visited: continue # print(f"popped state: {state.tiles}") visited.append(state) states_graph.add_node(state_to_string(state)) for move in state.moves_available(): successors = state.successor_states(move, prob_two_tile=TWO_TILE_PROB) for probability, successor, reward in successors: if successor in visited: continue # print(f"{state.tiles} -> {move} -> {successor.tiles} (prob {probability}, reward {reward})") frontier.append(successor) states_graph.add_edge(state_to_string(state), state_to_string(successor)) # print all states all_states = visited for state in all_states: print(state.tiles) print(f"num states: {len(all_states)}") # value iteration on the MDP with open(output_filename, "w") as output: output.write(f"number of rows = {num_rows}\n") output.write(f"number of columns = {num_cols}\n") output.write(f"prob of 2-tile (vs. 4-tile) = {TWO_TILE_PROB}\n\n") output.write(f"number of states = {len(all_states)}\n") # initialize V and V_new to 0 for all states V = {} V_new = {} for state in all_states: state_str = state_to_string(state) if state_str not in V: V[state_str] = 0 V_new[state_str] = 0 converged = False iter_num = 1 while not converged: print(f"========== value iteration (iter # {iter_num})") # update V_new using values in V for state in all_states: # print(state.tiles) state_str = state_to_string(state) action_vals = {} for move in state.moves_available(): # print(f" {move}") successors = state.successor_states( move, prob_two_tile=TWO_TILE_PROB) action_val = 0 for probability, successor, reward in successors: # print(f" -> {successor.tiles} (prob: {probability}, reward: {reward})") action_val += probability * (reward + V[str(successor.tiles)]) action_vals[move] = action_val # print(f" {move} has value {action_val}") if not state.moves_available(): # print("found a terminal state") continue # update V_new with the action with the highest value best_action = None best_action_val = float('-inf') for action in action_vals: if action_vals[action] > best_action_val: best_action = action best_action_val = action_vals[action] if best_action_val > V[state_str]: V_new[state_str] = best_action_val print( f"V[{state_str}] updated from {V[state_str]} to {V_new[state_str]}" ) # convergence check: are any values in V and V_new "significantly different"? converged = True for state_str in V_new: diff = V_new[state_str] - V[state_str] if diff > 1e-5: # TODO arbitrary threshold... converged = False break if converged: print(f"Value iteration converged!") break # if not converged, then copy from V_new to V and proceed to next iteration for state_str in V_new: V[state_str] = V_new[state_str] iter_num += 1 for state in all_states: state_str = state_to_string(state) output.write(f"state {state_str}: Value = {V[state_str]}\n") states_graph.nodes[state_to_string(state)]['value'] = V[state_str] action_total = 0 best_actions = [] best_action_val = float('-inf') for move in state.moves_available(): successors = state.successor_states( move, prob_two_tile=TWO_TILE_PROB) action_val = 0 for probability, successor, reward in successors: # output.write(f" -> {successor.tiles} (prob: {probability}, reward: {reward})\n") action_val += probability * (reward + V[str(successor.tiles)]) output.write(f" {move} has value {action_val}\n") if action_val > best_action_val: best_action_val = action_val best_actions = [move] elif action_val == best_action_val: best_actions.append(move) action_total += action_val if len(state.moves_available()) > 0: output.write(f"****Best move(s) = {best_actions}\n") random_action_value = action_total / len( state.moves_available()) if random_action_value < V[state_str]: output.write( f"****Random move value = {random_action_value} (a loss of {V[state_str] - random_action_value})\n" ) # save graph of states to Graphviz dot format nx.drawing.nx_agraph.write_dot( states_graph, f"states_graph_dot_{num_rows}x{num_cols}_prob{TWO_TILE_PROB}") # draw the graph of states fig, ax = plt.subplots(figsize=(25, 13)) ax.axis('off') pos = graphviz_layout(states_graph, prog="dot") # "dot" is good for directed graphs # colormap nodes using values from value iteration (mark terminal states) terminal_states = [ node for node in states_graph.nodes() if states_graph.out_degree(node) == 0 ] nonterminal_states = [ node for node in states_graph.nodes() if states_graph.out_degree(node) > 0 ] nodes_terminal = nx.draw_networkx_nodes( states_graph, pos, nodelist=terminal_states, node_size=200, node_shape='s', alpha=0.5, node_color=[V[node] for node in terminal_states], cmap='viridis') nodes_nonterminal = nx.draw_networkx_nodes( states_graph, pos, nodelist=nonterminal_states, node_size=200, node_shape='o', alpha=0.5, node_color=[V[node] for node in nonterminal_states], cmap='viridis') fig.colorbar(nodes_nonterminal) nx.draw_networkx_edges(states_graph, pos) nx.draw_networkx_labels(states_graph, pos, font_size=8) fig.savefig(f"states_graph_{num_rows}x{num_cols}_prob{TWO_TILE_PROB}.png") plt.close(fig)