Esempio n. 1
0
    def V(self, state, alpha=-game.INT_INF, beta=game.INT_INF, verbose=0):
        if verbose >= 1:
            print('call V({})'.format(state))

        # If IsEnd(s)
        if game.is_end(state):
            return game.utility(state)

        # Get possible actions
        actions = game.get_possible_actions(state)
        assert len(actions) > 0

        # If player == agent (maximizing player)
        if game.get_player_from_state(state) == game.MAX_PLAYER:
            value = -game.INT_INF
            for action in actions:
                value = _FILL_IN_  # HINT: use self.V (with verbose=verbose) and game.get_next_state
                alpha = _FILL_IN_
                if _FILL_IN_: break

        # If player == opponent (minimzing player)
        else:
            value = game.INT_INF
            for action in actions:
                value = _FILL_IN_  # HINT: use self.V (with verbose=verbose) and game.get_next_state
                beta = _FILL_IN_
                if _FILL_IN_: break

        return value
Esempio n. 2
0
    def V(self, state, depth, verbose=0):
        if verbose >= 1:
            print('call V({})'.format(state))

        # If IsEnd(s)
        if game.is_end(state):
            return game.utility(state)

        # If depth = 0
        if depth == 0:
            #print game.get_board_str(state), eval(state)
            return eval(state)

        # Get possible actions
        actions = game.get_possible_actions(state)
        assert len(actions) > 0

        # If player == agent (maximizing player)
        if game.get_player_from_state(state) == game.MAX_PLAYER:
            value = -game.INT_INF
            for action in actions:
                value = _FILL_IN_  # HINT: use self.V (with verbose=verbose) and game.get_next_state

        # If player == opponent (minimzing player)
        else:
            value = game.INT_INF
            for action in actions:
                value = _FILL_IN_  # HINT: use self.V (with verbose=verbose) and game.get_next_state

        return value
Esempio n. 3
0
    def policy(self, state):
        # BEGIN_YOUR_CODE
        actions = game.get_possible_actions(state)

        alpha = -game.INT_INF
        beta = game.INT_INF

        player = game.get_player_from_state(state)
        if player == game.MAX_PLAYER:
            values = []
            for action in actions:
                next_state = game.get_next_state(state, action)
                value = self.V(next_state, alpha, beta)
                values.append(value)
                alpha = max(alpha, value)
                if beta <= alpha: break
            idx = mp.argmax(values)
            return actions[idx]
            # return actions[mp.argmax([self.V(game.get_next_state(state, action)) for action in actions])]
        else:
            values = []
            for action in actions:
                next_state = game.get_next_state(state, action)
                value = self.V(next_state, alpha, beta)
                values.append(value)
                beta = min(beta, value)
                if beta <= alpha: break
            idx = mp.argmin(values)
            return actions[idx]
Esempio n. 4
0
    def V(self, state, alpha=-game.INT_INF, beta=game.INT_INF):

        # If IsEnd(s)
        if game.is_end(state):
            return game.utility(state)

        # Get possible actions
        actions = game.get_possible_actions(state)
        assert len(actions) > 0

        # If player == agent (maximizing player)
        if game.get_player_from_state(state) == game.MAX_PLAYER:
            value = -game.INT_INF
            for action in actions:
                value = max(value, self.V(game.get_next_state(state, action), alpha, beta))
                alpha = max(alpha, value)
                if beta <= alpha: break

        # If player == opponent (minimzing player)
        else:
            value = game.INT_INF
            for action in actions:
                value = min(value, self.V(game.get_next_state(state, action), alpha, beta))
                beta = min(beta, value)
                if beta <= alpha: break

        return value
Esempio n. 5
0
    def V(self, state):
        # If IsEnd(s)
        if game.is_end(state):
            return game.utility(state)

        # Get possible actions
        actions = game.get_possible_actions(state)
        assert len(actions) > 0

        # If player == agent (maximizing player)
        if game.get_player_from_state(state) == game.MAX_PLAYER:
            value = -game.INT_INF
            for action in actions:
                value = max(value, self.V(game.get_next_state(
                    state, action)))  # use 'game.get_next_state'
# use 'game.get_next_state'
# value = max(self.V(game,get_next_state(state,action)) for action in actions)
# use 'game.get_next_state'

# If player == opponent (minimzing player)
        else:
            value = game.INT_INF
            for action in actions:
                value = min(value, self.V(game.get_next_state(
                    state, action)))  # use 'game.get_next_state'

        return value
Esempio n. 6
0
    def V(self, state, alpha=-game.INT_INF, beta=game.INT_INF):
        print '-',
        # BEGIN_YOUR_CODE
        if game.is_end(state):
            return game.utility(state)

        actions = game.get_possible_actions(state)

        player = game.get_player_from_state(state)
        if player == game.MAX_PLAYER:
            value = -game.INT_INF
            for action in actions:
                value = max(
                    value,
                    self.V(game.get_next_state(state, action), alpha, beta))
                alpha = max(alpha, value)
                if beta <= alpha: break
        else:
            value = game.INT_INF
            for action in actions:
                value = min(
                    value,
                    self.V(game.get_next_state(state, action), alpha, beta))
                beta = min(beta, value)
                if beta <= alpha: break

        return value
Esempio n. 7
0
    def V(self, state, depth):
        # If IsEnd(s)
        if game.is_end(state):
            return game.utility(state)

        # If depth = 0
        if depth == 0:
            #print game.get_board_str(state), eval(state)
            return eval(state)

        # Get possible actions
        actions = game.get_possible_actions(state)
        assert len(actions) > 0

        # If player == agent (maximizing player)
        if game.get_player_from_state(state) == game.MAX_PLAYER:
            value = -game.INT_INF
            for action in actions:
                value = max(value,
                            self.V(game.get_next_state(state, action), depth))

        # If player == opponent (minimzing player)
        else:
            value = game.INT_INF
            for action in actions:
                value = min(
                    value, self.V(game.get_next_state(state, action),
                                  depth - 1))

        return value
Esempio n. 8
0
    def policy(self, state):
        actions = game.get_possible_actions(state)
        assert len(actions) > 0

        if game.get_player_from_state(state) == game.MAX_PLAYER:
            return max(actions, key=lambda x: self.V(game.get_next_state(state, x)))
        else:
            return random.choice(actions)
Esempio n. 9
0
    def policy(self, state):
        actions = game.get_possible_actions(state)
        assert len(actions) > 0

        optimal = max if game.get_player_from_state(
            state) == game.MAX_PLAYER else min
        return optimal(actions,
                       key=lambda x: self.V(game.get_next_state(state, x)))
Esempio n. 10
0
    def policy(self, state):
        # BEGIN_YOUR_CODE

        actions = game.get_possible_actions(state)

        player = game.get_player_from_state(state)
        if player == game.MAX_PLAYER:
            return actions[mp.argmax([
                self.V(game.get_next_state(state, action))
                for action in actions
            ])]
        else:
            import random
            return random.choice(actions)
Esempio n. 11
0
    def V(self, state):
        # BEGIN_YOUR_CODE
        if game.is_end(state):
            return game.utility(state)
        player = game.get_player_from_state(state)
        if player == game.MAX_PLAYER:
            value = -game.INT_INF
            for action in game.get_possible_actions(state):
                value = max(value, self.V(game.get_next_state(state, action)))
        else:
            value = game.INT_INF
            for action in game.get_possible_actions(state):
                value = min(value, self.V(game.get_next_state(state, action)))

        return value
Esempio n. 12
0
    def V(self, state, alpha=-game.INT_INF, beta=game.INT_INF):
        # BEGIN_YOUR_CODE
        if game.is_end(state):
            return game.utility(state)

        actions = game.get_possible_actions(state)
        if game.get_player_from_state(state) == game.MAX_PLAYER:  # my-turn
            value = -game.INT_INF
            for action in actions:
                value = max(value, self.V(game.get_next_state(state, action)))
        else:  # opp-turn
            value = 0
            for action in actions:
                value += self.V(game.get_next_state(state,
                                                    action)) / len(actions)

        return value
Esempio n. 13
0
    def policy(self, state):

        # BEGIN_YOUR_CODE

        #print [(self.V(game.get_next_state(state, action)), action) for action in game.get_possible_actions(state)]

        actions = game.get_possible_actions(state)

        player = game.get_player_from_state(state)
        if player == game.MAX_PLAYER:
            return actions[mp.argmax([
                self.V(game.get_next_state(state, action))
                for action in actions
            ])]
        else:
            return actions[mp.argmin([
                self.V(game.get_next_state(state, action))
                for action in actions
            ])]
Esempio n. 14
0
    def V(self, state, depth):
        # BEGIN_YOUR_CODE
        if game.is_end(state):
            return game.utility(state)
        if depth == 0:
            return eval(state)

        if game.get_player_from_state(state) == game.MAX_PLAYER:  # my-turn
            value = -game.INT_INF
            for action in game.get_possible_actions(state):
                value = max(value,
                            self.V(game.get_next_state(state, action), depth))
        else:  # opp-turn
            value = game.INT_INF
            for action in game.get_possible_actions(state):
                value = min(
                    value, self.V(game.get_next_state(state, action),
                                  depth - 1))

        return value
Esempio n. 15
0
    def V(self, state):
        # If IsEnd(s)
        if game.is_end(state):
            return game.utility(state)

        # Get possible actions
        actions = game.get_possible_actions(state)
        assert len(actions) > 0

        # If player == agent (maximizing player)
        if game.get_player_from_state(state) == game.MAX_PLAYER:
            value = -game.INT_INF
            for _X_ in _X_:
                value = _X_  # use 'game.get_next_state'

        # If player == opponent (minimzing player)
        else:
            value = _X_

        return value
Esempio n. 16
0
    def policy(self, state):
        actions = game.get_possible_actions(state)
        assert len(actions) > 0

        alpha = -game.INT_INF
        beta = game.INT_INF

        if game.get_player_from_state(state) == game.MAX_PLAYER:
            values = []
            for action in actions:
                value = self.V(game.get_next_state(state, action), alpha, beta)
                values.append(value)
                alpha = max(alpha, value)
            return max(list(zip(actions, values)), key=lambda x: x[1])[0]
        else:
            values = []
            for action in actions:
                value = self.V(game.get_next_state(state, action), alpha, beta)
                values.append(value)
                beta = min(beta, value)
            return min(list(zip(actions, values)), key=lambda x: x[1])[0]
Esempio n. 17
0
    def policy(self, state):
        # BEGIN_YOUR_CODE
        actions = game.get_possible_actions(state)

        player = game.get_player_from_state(state)
        if player == game.MAX_PLAYER:
            values = []
            for action in actions:
                next_state = game.get_next_state(state, action)
                value = self.V(next_state, self.max_depth)
                values.append(value)
            idx = mp.argmax(values)
            return actions[idx]
        else:
            values = []
            for action in actions:
                next_state = game.get_next_state(state, action)
                value = self.V(next_state, self.max_depth)
                values.append(value)
            idx = mp.argmin(values)
            return actions[idx]