Beispiel #1
0
def play(next_actions):
    state = State()
    while True:
        if state.is_done():
            break
        action_idx = 0 if state.is_first_player() else 1
        next_action = next_actions[action_idx]
        action = next_action(state)
        state = state.next_state(action)
    return first_player_point(state)
Beispiel #2
0
def play(model):
    history = []
    state = State()
    while True:
        if state.is_done():
            break
        scores = pv_mcts_scores(model, state, SP_TEMPERATURE)

        # Add state and policy into history
        policies = [0 for _ in range(DN_OUTPUT_SIZE)]
        for action, policy in zip(state.legal_actions(), scores):
            policies[action] = policy
        history.append([[state.pieces, state.enemy_pieces], policies, None])

        action = np.random.choice(state.legal_actions(), p=scores)
        state = state.next_state(action)
    value = first_player_value(state)
    for i in range(len(history)):
        history[i][2] = value
        value = -value
    return history
Beispiel #3
0
        C_PUCT = 1.0
        t = sum(nodes_to_scores(self.child_nodes))
        pucb_values = [
            (-child_node.w / child_node.n if child_node.n > 0 else 0.0) +
            C_PUCT * child_node.policy * math.sqrt(t) / (1 + child_node.n)
            for child_node in self.child_nodes
        ]
        return self.child_nodes[np.argmax(pucb_values)]


def pv_mcts_action(model, temperature=0):
    def pv_mcts_action(state):
        scores = pv_mcts_scores(model, state, temperature)
        return np.random.choice(state.legal_actions(), p=scores)

    return pv_mcts_action


if __name__ == '__main__':
    path = sorted(Path('./model').glob('*.h5'))[-1]
    model = tf.keras.models.load_model(str(path))
    state = State()
    next_action = pv_mcts_action(model, 1.0)

    while True:
        if state.is_done():
            break
        action = next_action(state)
        state = state.next_state(action)
        print(state)