def test_get_valid_actions():
    # UP, DOWN, LEFT, RIGHT is valid
    all_board = tuple([2, 4, 8, 16, 2, 8, 16, 32, 32, 16, 8, 4, 32, 32, 4, 8])
    game = Nick2048()
    game.set_board(all_board)
    all_actions = [(a, r) for (a, r, b) in game.get_valid_actions()]
    assert game.board == all_board
    assert (game.UP, 68) in all_actions
    assert (game.DOWN, 68) in all_actions
    assert (game.RIGHT, 64) in all_actions
    assert (game.LEFT, 64) in all_actions
    for (a, r, b) in Nick2048.get_valid_actions_from_board(all_board):
        assert (a, r) in all_actions
    # No valid actions
    no_board = tuple([2, 4, 8, 16, 32, 64, 128, 256, 2, 4, 8, 16, 32, 64, 128, 256])
    game.set_board(no_board)
    no_actions = [(a, r) for (a, r, b) in game.get_valid_actions()]
    assert game.board == no_board
    assert len(no_actions) == 0
    for (a, r, b) in Nick2048.get_valid_actions_from_board(no_board):
        assert (a, r) in no_actions
    # DOWN or RIGHT is valid
    dr_board = tuple([2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0])
    game.set_board(dr_board)
    some_actions = [(a, r) for (a, r, b) in game.get_valid_actions()]
    assert game.board == dr_board
    assert len(some_actions) == 2
    assert (game.DOWN, 0) in some_actions
    assert (game.RIGHT, 0) in some_actions
    for (a, r, b) in Nick2048.get_valid_actions_from_board(dr_board):
        assert (a, r) in some_actions
Beispiel #2
0
 def _setup(self, config):
     self.params = config
     self.mlflow_client = mlflow.tracking.MlflowClient()
     self.mlflow_run = self.mlflow_client.create_run(experiment_id="0")
     self.mlflow_log_params(config)
     if "random_seed" in self.params:
         self.env = Nick2048(random_seed=self.params["random_seed"])
     else:
         self.env = Nick2048()
     self.q_models = []
     q_model = keras.Sequential([
         keras.layers.Dense(20, activation="relu"),
         keras.layers.Dense(20, activation="relu"),
         keras.layers.Dense(20, activation="relu"),
         keras.layers.Dense(1),
     ])
     for _ in range(self.env.action_space.n):
         self.q_models.append(keras.models.clone_model(q_model))
     [
         m.build(input_shape=[1, self.env.observation_space.shape[0]])
         for m in self.q_models
     ]
     self.loss_fn = keras.losses.mean_squared_error
     self.optimizer = keras.optimizers.Adam(lr=self.params["learning_rate"])
     self.memory = Memory(self.params["buffer_size"])
def test_randomness():
    game1 = Nick2048()
    game2 = Nick2048()
    boards1 = _run_game(game1)
    boards2 = _run_game(game2)
    # If this fails, you either got REALLY unlucky or something is broken
    assert boards1 != boards2
def test_rotate_board():
    # 2 0 4 8
    # 2 0 0 0
    # 4 4 0 0
    # 0 0 0 8
    board = (2, 0, 4, 8, 2, 0, 0, 0, 4, 4, 0, 0, 0, 0, 0, 8)
    result_90 = Nick2048.rotate_board_right(board)
    # 0 4 2 2
    # 0 4 0 0
    # 0 0 0 4
    # 8 0 0 8
    assert result_90 == (0, 4, 2, 2, 0, 4, 0, 0, 0, 0, 0, 4, 8, 0, 0, 8)
    result_180 = Nick2048.rotate_board_right(result_90)
    # 8 0 0 0
    # 0 0 4 4
    # 0 0 0 2
    # 8 4 0 2
    assert result_180 == (8, 0, 0, 0, 0, 0, 4, 4, 0, 0, 0, 2, 8, 4, 0, 2)
    result_270 = Nick2048.rotate_board_right(result_180)
    # 8 0 0 8
    # 4 0 0 0
    # 0 0 4 0
    # 2 2 4 0
    assert result_270 == (8, 0, 0, 8, 4, 0, 0, 0, 0, 0, 4, 0, 2, 2, 4, 0)
    result_360 = Nick2048.rotate_board_right(result_270)
    assert result_360 == board
def test_get_valid_actions_by_reward():
    # UP, DOWN, LEFT, RIGHT is valid
    board = tuple([2, 4, 4, 2, 2, 8, 8, 0, 0, 0, 0, 0, 0, 0, 0, 0])
    game = Nick2048()
    game.set_board(board)
    action_rewards = [(a, r) for (a, r, b) in game.get_valid_actions_by_reward()]
    assert game.board == board
    left_right = [(game.LEFT, 24), (game.RIGHT, 24)]
    up_down = [(game.UP, 4), (game.DOWN, 4)]
    assert action_rewards[0] in left_right
    assert action_rewards[1] in left_right
    assert action_rewards[2] in up_down
    assert action_rewards[3] in up_down
    for (a, r, b) in Nick2048.get_valid_actions_by_reward_from_board(board):
        assert (a, r) in action_rewards
def test_board_env_step_two():
    init_state = tuple([4, 2, 2, 4, 0, 0, 0, 0, 0, 2, 0, 0, 0, 0, 0, 0])
    game = Nick2048()
    game.set_board(init_state)
    assert game.board == init_state
    state, reward, done, _ = game.step(game.RIGHT)
    assert game.board[3] == 4
    assert game.board[2] == 4
    assert game.board[1] == 4
def test_get_afterstate():
    # 2 0 4 8
    # 2 0 0 0
    # 4 4 0 0
    # 0 0 0 8
    board = (2, 0, 4, 8, 2, 0, 0, 0, 4, 4, 0, 0, 0, 0, 0, 8)
    after_up, up_reward = Nick2048.get_afterstate(board, Nick2048.UP)
    assert after_up == (4, 4, 4, 16, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0)
    assert up_reward == 20
    after_down, down_reward = Nick2048.get_afterstate(board, Nick2048.DOWN)
    assert after_down == (0, 0, 0, 0, 0, 0, 0, 0, 4, 0, 0, 0, 4, 4, 4, 16)
    assert down_reward == 20
    after_left, left_reward = Nick2048.get_afterstate(board, Nick2048.LEFT)
    assert after_left == (2, 4, 8, 0, 2, 0, 0, 0, 8, 0, 0, 0, 8, 0, 0, 0)
    assert left_reward == 8
    after_right, right_reward = Nick2048.get_afterstate(board, Nick2048.RIGHT)
    assert after_right == (0, 2, 4, 8, 0, 0, 0, 2, 0, 0, 0, 8, 0, 0, 0, 8)
    assert right_reward == 8
def test_action_history(actions, expected_score=None, expected_tile=None):
    assert expected_score or expected_tile
    test_game = Nick2048(random_seed=SEED)
    for action in actions:
        test_game.step(action)
    if expected_score:
        assert test_game.score == expected_score
    if expected_tile:
        assert max(test_game.board) == expected_tile
def bfs_search():
    search_queue = []
    game = Nick2048(random_seed=SEED)
    update_search_queue(search_queue, game, ())
    state_action_pairs = set()
    max_tile = 0
    max_tile_history = ()
    max_score = 0
    max_score_history = ()
    depth_start_time = time.time()

    curr_depth = 1

    while len(search_queue) > 0:
        board, score, action, action_history = search_queue.pop(0)
        if len(action_history) >= curr_depth:
            depth_time = round(time.time() - depth_start_time, 1)
            print(
                f"Depth: {curr_depth}:"
                f"\n\tMax Tile: {max_tile} "
                f"({get_move_string(max_tile_history)})"
                f"\n\tMax Score: {max_score} "
                f"({get_move_string(max_score_history)})"
                f"\n\tTotal State Action Pairs: {len(state_action_pairs)}"
                f"\n\tDepth Time: {depth_time} sec"
            )
            mlflow.log_metric("Max Tile", max_tile, step=curr_depth)
            mlflow.log_metric("Max Score", max_score, step=curr_depth)
            mlflow.log_metric(
                "Total State Action Pairs", len(state_action_pairs), step=curr_depth
            )
            test_action_history(max_tile_history, expected_tile=max_tile)
            test_action_history(max_score_history, expected_score=max_score)
            max_tile = 0
            max_score = 0
            depth_start_time = time.time()
            curr_depth += 1

        game.set_board(board)
        game.score = score
        game.step(action)

        state_action_pairs.add((board, action))
        action_history = (*action_history, action)

        if max(game.board) > max_tile:
            max_tile = max(game.board)
            max_tile_history = action_history
        if game.score > max_score:
            max_score = game.score
            max_score_history = action_history

        update_search_queue(search_queue, game, action_history)

        if len(action_history) > DEPTH_LIMIT:
            break
def test_boardenv_fill_on_move_logic():
    # make sure a new piece is added that is either a 2 or a 4
    init_state = tuple([2, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0])
    game = Nick2048()
    game.set_board(init_state)
    assert game.board == init_state
    state, reward, done, _ = game.step(game.LEFT)
    assert state == game.board
    assert reward == 4
    assert len([v for v in game.board if v != 0]) == 2
def test_boardenv_move_logic_three_in_a_row():
    # make sure the behavior is correct when 3 elts are same in a row
    init_state = tuple([0, 2, 0, 0, 0, 2, 0, 0, 0, 2, 0, 0, 0, 0, 0, 0])
    game = Nick2048()
    game.set_board(init_state)
    assert game.board == init_state
    state, reward, done, _ = game.step(game.DOWN)
    assert state == game.board
    assert reward == 4
    assert game.board[13] == 4
    assert game.board[9] == 2
def test_boardenv_move_logic_four_in_a_row():
    # make sure the behavior is correct when a row is full of same values.
    init_state = tuple([2, 2, 2, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0])
    game = Nick2048()
    game.set_board(init_state)
    assert game.board == init_state
    state, reward, done, _ = game.step(game.RIGHT)
    assert reward == 8
    assert game.board[2] == 4
    assert game.board[3] == 4
    state, reward, done, _ = game.step(game.RIGHT)
    assert reward == 8
    assert game.board[3] == 8
def test_board_env_step_one():
    init_state = tuple([2, 0, 0, 0, 0, 0, 0, 0, 0, 2, 0, 0, 0, 0, 0, 0])
    game = Nick2048()
    game.set_board(init_state)
    assert game.board == init_state
    # Note the move will add a random 2 or 4 into the board
    state, reward, done, _ = game.step(game.RIGHT)
    assert game.board[3] == 2
    assert game.board[11] == 2
    nonzeros = [v for v in game.board if v != 0]
    assert len(nonzeros) == 3
    for v in nonzeros:
        assert v in [2, 4]
def test_reflect_board():
    # 2 0 4 8
    # 2 0 0 0
    # 4 4 0 0
    # 0 0 0 8
    board = (2, 0, 4, 8, 2, 0, 0, 0, 4, 4, 0, 0, 0, 0, 0, 8)
    reflected_y_1 = Nick2048.reflect_board_across_y(board)
    # 8 4 0 2
    # 0 0 0 2
    # 0 0 4 4
    # 8 0 0 0
    assert reflected_y_1 == (8, 4, 0, 2, 0, 0, 0, 2, 0, 0, 4, 4, 8, 0, 0, 0)
    reflected_y_2 = Nick2048.reflect_board_across_y(reflected_y_1)
    assert reflected_y_2 == board
    reflected_x_1 = Nick2048.reflect_board_across_x(board)
    # 0 0 0 8
    # 4 4 0 0
    # 2 0 0 0
    # 2 0 4 8
    assert reflected_x_1 == (0, 0, 0, 8, 4, 4, 0, 0, 2, 0, 0, 0, 2, 0, 4, 8)
    reflected_x_2 = Nick2048.reflect_board_across_x(reflected_x_1)
    assert reflected_x_2 == board
Beispiel #15
0
 def _setup(self, config):
     self.params = config
     self.mlflow_client = mlflow.tracking.MlflowClient()
     self.mlflow_run = self.mlflow_client.create_run(experiment_id="0")
     self.mlflow_log_params(config)
     self.env = Nick2048()
     self.v_model = keras.Sequential([
         keras.layers.Dense(20, activation="relu"),
         keras.layers.Dense(20, activation="relu"),
         keras.layers.Dense(20, activation="relu"),
         keras.layers.Dense(1),
     ])
     self.v_model.build(
         input_shape=[1, self.env.observation_space.shape[0]])
     self.loss_fn = keras.losses.mean_squared_error
     self.optimizer = keras.optimizers.Adam(lr=self.params["learning_rate"])
     self.memory = Memory(self.params["buffer_size"])
def test_boardenv_done_logic():
    init_state = tuple([16, 8, 16, 4, 4, 2, 4, 8, 32, 2, 32, 4, 4, 16, 4, 8])
    game = Nick2048()
    game.set_board(init_state)
    assert game.board == init_state
    state, reward, done, _ = game.step(game.RIGHT)
    assert state == game.board
    assert state == init_state
    assert not done
    assert reward == 0
    state, reward, done, _ = game.step(game.RIGHT)
    assert state == game.board
    assert state == init_state
    assert not done
    assert reward == 0
    state, reward, done, _ = game.step(game.LEFT)
    assert state == game.board
    assert state == init_state
    assert not done
    assert reward == 0
    state, reward, done, _ = game.step(game.DOWN)
    assert done
    assert reward == 4
def dfs_search():
    start = time.time()
    search_stack = []
    game = Nick2048(random_seed=SEED)
    update_search_stack(search_stack, game, [])
    max_score = 0
    max_action_history = 0
    complete_games = 0

    while len(search_stack) > 0:
        board, score, action, action_history = search_stack.pop()
        action_history = action_history[:]
        game.set_board(board)
        game.score = score
        game.step(action)
        action_history.append(action)
        update_search_stack(search_stack, game, action_history)
        if game.done:
            complete_games += 1
            if game.score > max_score:
                max_score = game.score
                max_action_history = action_history[:]
                max_elapsed_time = time.time() - start
            if complete_games % 1001 == 1000:
                print(
                    f"Random seed: {SEED}\n"
                    f"Max action history: {max_action_history}\n"
                    f"Max Score: {max_score}\n"
                    f"Max moves: {len(max_action_history)}\n"
                    f"Max found after: {round(max_elapsed_time, 2)} sec\n"
                    f"Total elapsed time: {round(time.time() - start, 2)} sec\n"
                    f"Search stack size: {len(search_stack)}\n"
                    f"Complete Games: {complete_games}\n"
                    f"Complete Games: {complete_games}\n"
                    f"Current Score: {game.score}\n")
                test_action_history(action_history, game.score)
                test_action_history(max_action_history, max_score)
def play_nick_version():
    game = Nick2048()
    run_manual_loop(game)
def test_boardenv_init():
    game = Nick2048()
    nonzero = [v for v in game.board if v != 0]
    assert len(nonzero) == 2
    for v in nonzero:
        assert v in [2, 4]
def test_set_board_makes_copy():
    init_state = tuple([2, 2, 0, 0, 2, 4, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0])
    game = Nick2048()
    game.set_board(init_state)
    assert game.board == init_state
def play_with_seed(seed):
    game = Nick2048(random_seed=seed)
    run_manual_loop(game)
    def _train(self):
        with mlflow.start_run():
            mlflow.log_params(self.params)
            optimizer = keras.optimizers.Adam(lr=self.params["learning_rate"])
            train_acc_metric = keras.metrics.SparseCategoricalAccuracy()
            game_scores = []
            game_num_steps = []
            b = Nick2048()
            for episode_num in range(self.params["num_episodes"]):
                state = b.reset()
                game_score = 0
                # Pseudo code for our Sarsa learning algo:
                #   for each step in the rollout:
                #     action = fancy_argmax_a(q(get_afterstate(s),a))
                #     q_val = model(get_afterstate(s),action)
                #     next_s, r = b.step(a)
                #     next_action = fancy_argmax_a(q(get_afterstate(next_s), a))
                #     next_q_val = model(get_afterstate(new_s),next_action)
                #     update q_model using loss(q_val - (r + next_q_val)
                for step_num in range(self.params["max_steps_per_episode"]):
                    with tf.GradientTape() as q_tape:
                        logging.debug(
                            f"state:\n{np.asarray(state).reshape([4,4])}")
                        candidate_actions = list(range(b.action_space.n))
                        canonical_afterstates = [
                            b.get_canonical_board(
                                b.get_afterstate(state, a)[0])
                            for a in candidate_actions
                        ]
                        q_vals = [
                            tf.squeeze(self.q_models[i](np.array(
                                canonical_afterstates[i])[np.newaxis]))
                            for i in candidate_actions
                        ]
                        logging.debug(f"q_vals : {q_vals}")
                        # pick action by rolling dice according to relative values of canonical_afterstates
                        while True:
                            dice_roll = tfp.distributions.Multinomial(
                                total_count=5, probs=softmax(q_vals)).sample(1)
                            action_index = np.argmax(dice_roll)
                            action = candidate_actions[action_index]
                            next_state, reward, done, _ = b.step(action)
                            if next_state != state:  # you found a valid move
                                break
                            else:  # that wasn't a valid move, but one must exist since we weren't done after last step.
                                logging.debug(
                                    f"action {action} was invalid, removing it from candidate and rolling dice again"
                                )
                                assert (
                                    len(candidate_actions) > 1
                                ), "No actions changed the board but we are not done."
                            a_idx_pp = action_index + 1
                            q_vals = q_vals[:action_index] + q_vals[a_idx_pp:]
                            candidate_actions = (
                                candidate_actions[:action_index] +
                                candidate_actions[a_idx_pp:])
                        logging.debug(f"action: {action}")
                        logging.debug(
                            f"canonical_afterstate:\n{np.asarray(canonical_afterstates[action]).reshape([4,4])}"
                        )
                        q_val = q_vals[action_index]
                        logging.debug(f"q_val: {q_val}")
                        logging.debug(f"reward: {reward}")
                        logging.debug(
                            f"next_state:\n{np.asarray(next_state).reshape([4,4])}"
                        )

                        # update q_model via TD learning using q(s,a) (which we computed last loop iter) and q(s',a')
                        next_candidate_actions = list(range(b.action_space.n))
                        next_canonical_afterstates = [
                            b.get_canonical_board(
                                b.get_afterstate(next_state, action)[0])
                            for action in next_candidate_actions
                        ]
                        next_q_vals = [
                            tf.squeeze(self.q_models[i](np.array(
                                next_canonical_afterstates[i])[np.newaxis]))
                            for i in next_candidate_actions
                        ]
                        logging.debug(f"next_q_vals: {next_q_vals}")
                        next_action = np.argmax(next_q_vals)
                        logging.debug(f"next_action: {next_action}")
                        next_q_val = next_q_vals[next_action]
                        target_q_val = (
                            reward +
                            (1 - done) * self.params["alpha"] * next_q_val)
                        logging.debug(f"next_q_val: {next_q_val}")
                        logging.debug(f"target_q_val: {target_q_val}")
                        val_loss = tf.math.square(q_val - target_q_val)
                        print(f"loss: {val_loss}")
                    val_grads = q_tape.gradient(
                        val_loss, self.q_models[action].trainable_variables)
                    optimizer.apply_gradients(
                        zip(val_grads,
                            self.q_models[action].trainable_variables))
                    train_acc_metric.update_state(action, q_vals)
                    print(f"q_val before gradient step: {q_val}")
                    print(f"target_q_val: {target_q_val}")
                    q_for_print = np.squeeze(self.q_models[action](np.array(
                        canonical_afterstates[action])[np.newaxis]))
                    print(f"q_val after gradient step: {q_for_print}")
                    print()
                    logging.debug("\n")

                    # get ready to loop
                    state = next_state
                    game_score += reward
                    if done:
                        break
                print(
                    f"accuracy in episode {episode_num}: {train_acc_metric.result().numpy()}"
                )
                train_acc_metric.reset_states()
                game_scores.append(game_score)
                game_num_steps.append(step_num + 1)
                avg_game_score = np.mean(game_scores)
                avg_last_10 = np.mean(game_scores[-10:])
                print(
                    "%s steps in episode %s, score: %s, running_avg: %.0f, avg_last_10_games: %.0f"
                    % (
                        step_num + 1,
                        episode_num,
                        game_score,
                        avg_game_score,
                        avg_last_10,
                    ))
                # mlflow.log_metric("game scores", game_score, step=episode_num)
                # mlflow.log_metric("avg game score", avg_game_score, step=episode_num)
                # mlflow.log_metric("avg_score_last_10", avg_last_10)
                # mlflow.log_metric("game num steps", step_num + 1, step=episode_num)
                # mlflow.log_metric(
                #     "avg num steps", np.mean(game_num_steps), step=episode_num
                # )
            return {
                "avg_game_score": avg_game_score,
                "avg_num_steps": np.mean(game_num_steps),
                "episodes_total": episode_num + 1,
                "timesteps_total": np.sum(game_num_steps),
            }
def test_no_randomness():
    game1 = Nick2048(random_seed=13)
    game2 = Nick2048(random_seed=13)
    boards1 = _run_game(game1)
    boards2 = _run_game(game2)
    assert boards1 == boards2
Beispiel #24
0
# logging.basicConfig(level=logging.DEBUG)
start_time = time.time()
with mlflow.start_run():
    max_depth = 15
    assert max_depth > 0
    num_random_seeds = 100
    max_max_tile = []
    max_score = []
    total_state_action_pairs = []
    for rand_seed in range(num_random_seeds):
        max_max_tile.append([0] * (max_depth + 1))
        max_score.append([0] * (max_depth + 1))
        total_state_action_pairs.append([0] * (max_depth + 1))
        state_action_scores = {}
        env = Nick2048(random_seed=rand_seed)
        actions = range(env.action_space.n)
        state_actions = (
            deque()
        )  # queue of (depth, game_score, max_tile, state, next_action)

        init_state = env.get_state()[0]
        for a in actions:
            state_actions.append(
                (1, 0, max(init_state), init_state, a))  # push initial actions

        while state_actions:
            debug_str = ""
            t = state_actions.popleft()
            debug_str += f"handling {t}\n"
            depth, game_score, max_tile, state, next_action = t
def test_get_canonical():
    for i in range(100):
        board = _generate_random_board()
        canonical = Nick2048.get_canonical_board(board)
        r90 = Nick2048.rotate_board_right(board)
        r180 = Nick2048.rotate_board_right(r90)
        r270 = Nick2048.rotate_board_right(r180)
        r360 = Nick2048.rotate_board_right(r270)
        xr0 = Nick2048.reflect_board_across_x(board)
        xr90 = Nick2048.rotate_board_right(xr0)
        xr180 = Nick2048.rotate_board_right(xr90)
        xr270 = Nick2048.rotate_board_right(xr180)
        xr360 = Nick2048.rotate_board_right(xr270)
        yr0 = Nick2048.reflect_board_across_y(board)
        yr90 = Nick2048.rotate_board_right(yr0)
        yr180 = Nick2048.rotate_board_right(yr90)
        yr270 = Nick2048.rotate_board_right(yr180)
        yr360 = Nick2048.rotate_board_right(yr270)
        assert canonical == Nick2048.get_canonical_board(r90)
        assert canonical == Nick2048.get_canonical_board(r180)
        assert canonical == Nick2048.get_canonical_board(r270)
        assert canonical == Nick2048.get_canonical_board(r360)
        assert canonical == Nick2048.get_canonical_board(xr0)
        assert canonical == Nick2048.get_canonical_board(xr90)
        assert canonical == Nick2048.get_canonical_board(xr180)
        assert canonical == Nick2048.get_canonical_board(xr270)
        assert canonical == Nick2048.get_canonical_board(xr360)
        assert canonical == Nick2048.get_canonical_board(yr0)
        assert canonical == Nick2048.get_canonical_board(yr90)
        assert canonical == Nick2048.get_canonical_board(yr180)
        assert canonical == Nick2048.get_canonical_board(yr270)
        assert canonical == Nick2048.get_canonical_board(yr360)
def play_with_lookahead():
    game = Nick2048()
    lookahead_fn = get_lookahead_fn(Nick2048, 5)
    run_manual_loop(game, lookahead_fn)
def test_set_board():
    board = tuple([1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16])
    game = Nick2048()
    game.set_board(board)
    assert game.board == board
Beispiel #28
0
# requires PYTHONPATH to contain the top-level directory
# i.e. the improved-funicular you checked out from github or run:
# $ PYTHONPATH=. python tests/perf_benchmarks.py
#
# Alternatively, add a pth file to site-packages, for example
# $ echo \`pwd\`/  > improved-funicular/lib/python3.7/site-packages/curr_dir.pth

import time
from envs.nick_2048 import Nick2048
from strategies.random import try_random

board = [2, 0, 8, 16, 2, 4, 8, 4, 2, 0, 2, 2, 4, 0, 0, 0]
game = Nick2048()

start = time.time()

# Initial implementation: .39sec
# With squash lookup table: .22sec
for i in range(10000):
    game.set_board(board)
    game.step(game.UP)

end = time.time()

print(f"Time to set board and step: {end-start}")

start = time.time()
rollouts = 100
# Initial (with squash table): .35sec
try_random(Nick2048, rollouts)
end = time.time()
def test_action_history(action_history, expected_score):
    test_game = Nick2048(random_seed=SEED)
    for action in action_history:
        test_game.step(action)
    assert test_game.done
    assert test_game.score == expected_score