Beispiel #1
0
def minimax_decision(state, game):
    """Given a state in a game, calculate the best move by searching
    forward all the way to the terminal states. [Figure 5.3]"""

    player = game.to_move(state)

    def max_value(state):
        if game.terminal_test(state):
            return game.utility(state, player)
        v = -inf
        for a in game.actions(state):
            v = max(v, min_value(game.result(state, a)))
        return v

    def min_value(state):
        if game.terminal_test(state):
            return game.utility(state, player)
        v = inf
        for a in game.actions(state):
            v = min(v, max_value(game.result(state, a)))
        return v

    # Body of minimax_decision:
    return argmax(game.actions(state),
                  key=lambda a: min_value(game.result(state, a)))
Beispiel #2
0
def best_policy(mdp, U):
    """Given an MDP and a utility function U, determine the best policy,
    as a mapping from state to action."""

    pi = {}
    for s in mdp.states:
        pi[s] = argmax(mdp.actions(s), key=lambda a: q_value(mdp, s, a, U))
    return pi
    def execute(self, percept):
        """Execute the information gathering algorithm"""
        self.observation = self.integrate_percept(percept)
        vpis = self.vpi_cost_ratio(self.variables)
        j = argmax(vpis)
        variable = self.variables[j]

        if self.vpi(variable) > self.cost(variable):
            return self.request(variable)

        return self.decnet.best_action()
Beispiel #4
0
def policy_iteration(mdp):
    """Solve an MDP by policy iteration [Figure 17.7]"""

    U = {s: 0 for s in mdp.states}
    pi = {s: random.choice(mdp.actions(s)) for s in mdp.states}
    while True:
        U = policy_evaluation(pi, U, mdp)
        unchanged = True
        for s in mdp.states:
            a_star = argmax(mdp.actions(s),
                            key=lambda a: q_value(mdp, s, a, U))
            # a = argmax(mdp.actions(s), key=lambda a: expected_utility(a, s, U, mdp))
            if q_value(mdp, s, a_star, U) > q_value(mdp, s, pi[s], U):
                pi[s] = a_star
                unchanged = False
        if unchanged:
            return pi
Beispiel #5
0
    def __call__(self, percept):
        s1, r1 = self.update_state(percept)
        Q, Nsa, s, a, r = self.Q, self.Nsa, self.s, self.a, self.r
        alpha, gamma, terminals = self.alpha, self.gamma, self.terminals,
        actions_in_state = self.actions_in_state

        if s in terminals:
            Q[s, None] = r1
        if s is not None:
            Nsa[s, a] += 1
            Q[s, a] += alpha(Nsa[s, a]) * (
                r + gamma * max(Q[s1, a1]
                                for a1 in actions_in_state(s1)) - Q[s, a])
        if s in terminals:
            self.s = self.a = self.r = None
        else:
            self.s, self.r = s1, r1
            self.a = argmax(actions_in_state(s1),
                            key=lambda a1: self.f(Q[s1, a1], Nsa[s1, a1]))
        return self.a
Beispiel #6
0
def expectiminimax(state, game):
    """Return the best move for a player after dice are thrown. The game tree
	includes chance nodes along with min and max nodes. [Figure 5.11]"""
    player = game.to_move(state)

    def max_value(state):
        v = -inf
        for a in game.actions(state):
            v = max(v, chance_node(state, a))
        return v

    def min_value(state):
        v = inf
        for a in game.actions(state):
            v = min(v, chance_node(state, a))
        return v

    def chance_node(state, action):
        res_state = game.result(state, action)
        if game.terminal_test(res_state):
            return game.utility(res_state, player)
        sum_chances = 0
        num_chances = len(game.chances(res_state))
        for chance in game.chances(res_state):
            res_state = game.outcome(res_state, chance)
            util = 0
            if res_state.to_move == player:
                util = max_value(res_state)
            else:
                util = min_value(res_state)
            sum_chances += util * game.probability(chance)
        return sum_chances / num_chances

    # Body of expectiminimax:
    return argmax(game.actions(state),
                  key=lambda a: chance_node(state, a),
                  default=None)
Beispiel #7
0
 def program(percept):
     belief_state.observe(program.action, percept)
     program.action = argmax(belief_state.actions(),
                             key=belief_state.expected_outcome_utility)
     return program.action