コード例 #1
0
def test_board_env_step_three():
    init_state = [
        [2.0, 0.0, 0.0, 0.0],
        [0.0, 0.0, 0.0, 0.0],
        [0.0, 2.0, 0.0, 0.0],
        [0.0, 0.0, 0.0, 0.0],
    ]
    b = BoardEnv().from_init_state(init_state)
    state, reward, done, _ = b.step(BoardEnv.RIGHT)
    assert state[0, 3] == 2.0 and state[2, 3] == 2.0, state
コード例 #2
0
def test_board_env_step_one():
    # make sure the behavior is correct when a row is full of same values.
    init_state = [
        [2.0, 0.0, 0.0, 0.0],
        [0.0, 0.0, 0.0, 0.0],
        [0.0, 2.0, 0.0, 0.0],
        [0.0, 0.0, 0.0, 0.0],
    ]
    b = BoardEnv().from_init_state(init_state)
    state, reward, done, _ = b.step(BoardEnv.RIGHT)
    assert state[0, 3] == 2.0 and state[2, 3] == 2.0
コード例 #3
0
def test_board_env_step_two():
    init_state = [
        [4.0, 2.0, 2.0, 4.0],
        [0.0, 0.0, 0.0, 0.0],
        [0.0, 2.0, 0.0, 0.0],
        [0.0, 0.0, 0.0, 0.0],
    ]
    b = BoardEnv().from_init_state(init_state)
    state, reward, done, _ = b.step(BoardEnv.RIGHT)
    assert state[0, 1] == 4.0
    assert state[0, 2] == 4.0
    assert state[0, 3] == 4.0
コード例 #4
0
def test_boardenv_fill_on_move_logic():
    # make sure a new piece is added that is either a 2 or a 4
    init_state = [
        [2.0, 2.0, 0.0, 0.0],
        [0.0, 0.0, 0.0, 0.0],
        [0.0, 0.0, 0.0, 0.0],
        [0.0, 0.0, 0.0, 0.0],
    ]
    b = BoardEnv().from_init_state(init_state)
    state, reward, done, _ = b.step(BoardEnv.LEFT)
    num_non_zero_spots = (b.state != 0).sum().sum()
    assert num_non_zero_spots == 2, state
コード例 #5
0
def test_boardenv_move_logic_three_in_a_row():
    # make sure the behavior is correct when 3 elts are same in a row
    init_state = [
        [0.0, 2.0, 0.0, 0.0],
        [0.0, 2.0, 0.0, 0.0],
        [0.0, 2.0, 0.0, 0.0],
        [0.0, 0.0, 0.0, 0.0],
    ]
    b = BoardEnv().from_init_state(init_state)
    assert np.array_equal(init_state, b.state)
    state, reward, done, _ = b.step(BoardEnv.DOWN)
    assert reward == 4
    assert state[3, 1] == 4 and state[2, 1] == 2, b.state
コード例 #6
0
def test_boardenv_move_logic_four_in_a_row():
    # make sure the behavior is correct when a row is full of same values.
    init_state = [
        [2.0, 2.0, 2.0, 2.0],
        [0.0, 0.0, 0.0, 0.0],
        [0.0, 0.0, 0.0, 0.0],
        [0.0, 0.0, 0.0, 0.0],
    ]
    b = BoardEnv().from_init_state(init_state)
    assert np.array_equal(init_state, b.state)
    state, reward, done, _ = b.step(BoardEnv.RIGHT)
    assert reward == 8
    assert state[0, 2] == 4 and state[0, 3] == 4, b.state
    state, reward, done, _ = b.step(BoardEnv.RIGHT)
    assert reward >= 8
    assert state[0, 3] == 8, b.state
コード例 #7
0
def test_boardenv_init():
    board_width = random.randint(4, 10)
    num_filled_init = random.randint(0, 4)
    b = BoardEnv(width=board_width, init_spots_filled=num_filled_init)
    num_non_zero_spots = (b.state != 0).sum().sum()
    assert num_non_zero_spots == num_filled_init, (
        "BoardEnv initializing wrong num spots %s" % num_non_zero_spots)
コード例 #8
0
class Andy2048(Base2048):
    info = "Andy's implementation of 2048"
    UP = BoardEnv.UP
    RIGHT = BoardEnv.RIGHT
    DOWN = BoardEnv.DOWN
    LEFT = BoardEnv.LEFT

    @classmethod
    def from_init_state(cls, init_state):
        andy_wrapper = cls()
        if isinstance(init_state, list) and len(init_state) == 16:
            init_state = np.array(init_state).reshape((4, 4))
        andy_wrapper.andy = BoardEnv.from_init_state(init_state)
        return andy_wrapper

    def __init__(self, random_seed=None):
        self.andy = BoardEnv(random_seed=random_seed)

    @property
    def board(self):
        board = []
        for row in self.andy.state:
            for el in row:
                board.append(int(el))
        return board

    @property
    def score(self):
        return self.andy.value

    @property
    def action_space(self):
        return self.andy.action_space

    def step(self, direction):
        _, reward, _, c = self.andy.step(direction)
        return self.board, reward, self.andy.done, c

    def get_state(self):
        return self.board, self.score, self.andy.done

    def set_board(self, board):
        self.andy.state = np.array(board[:]).reshape(4, 4)
コード例 #9
0
def test_boardenv_done_logic():
    init_state = [
        [16.0, 8.0, 16.0, 4.0],
        [4.0, 2.0, 4.0, 8.0],
        [32.0, 2.0, 32.0, 4.0],
        [4.0, 16.0, 4.0, 8.0],
    ]
    b = BoardEnv().from_init_state(init_state)
    state, reward, done, _ = b.step(BoardEnv.RIGHT)
    assert not done and np.array_equal(state, np.array(init_state))
    assert reward == 0
    state, reward, done, _ = b.step(BoardEnv.RIGHT)
    assert not done and np.array_equal(state, np.array(init_state))
    assert reward == 0
    state, reward, done, _ = b.step(BoardEnv.LEFT)
    assert not done and np.array_equal(state, np.array(init_state))
    assert reward == 0
    state, reward, done, _ = b.step(BoardEnv.DOWN)
    assert done, state
    assert reward == 4
コード例 #10
0
 def __init__(self, random_seed=None):
     self.andy = BoardEnv(random_seed=random_seed)
コード例 #11
0
 def from_init_state(cls, init_state):
     andy_wrapper = cls()
     if isinstance(init_state, list) and len(init_state) == 16:
         init_state = np.array(init_state).reshape((4, 4))
     andy_wrapper.andy = BoardEnv.from_init_state(init_state)
     return andy_wrapper
コード例 #12
0
    mlflow.log_params(params)
    p_model = keras.Sequential([
        keras.layers.Flatten(),
        keras.layers.Dense(10, activation="relu"),
        keras.layers.Dense(4, activation="softmax"),
    ])
    q_model = keras.Sequential([
        keras.layers.Flatten(),
        keras.layers.Dense(10, activation="relu"),
        keras.layers.Dense(4),
    ])
    q_model.build(input_shape=(1, 16))
    optimizer = keras.optimizers.Adam(lr=params["learning_rate"])
    p_loss_fn = keras.losses.CategoricalCrossentropy()

    b = BoardEnv()
    done = False
    for episode_num in range(params["num_episodes"]):
        state = b.reset()
        action_probs = tf.squeeze(p_model(state[np.newaxis]), axis=0)
        dice_roll = tfp.distributions.Multinomial(total_count=1,
                                                  probs=action_probs).sample(1)
        action = b.action_space[np.argmax(dice_roll)]
        game_score = 0
        for step_num in range(params["max_steps_per_episode"]):
            # compute s'
            next_state, reward, done, _ = b.step(action)
            if np.array_equal(next_state,
                              state):  # don't keep trying dud moves
                break
            # compute a' and grad log pi(a'|s')
コード例 #13
0
def test_boardenv_from_init_state():
    b = BoardEnv.from_init_state([[0, 0], [2, 0]])
    assert b.value == 0.0
    assert np.sum(b.state) == 2
    assert b.width == 2
    assert b.init_spots_filled == 1