def V(self, state, alpha=-game.INT_INF, beta=game.INT_INF): print '-', # BEGIN_YOUR_CODE if game.is_end(state): return game.utility(state) actions = game.get_possible_actions(state) player = game.get_player_from_state(state) if player == game.MAX_PLAYER: value = -game.INT_INF for action in actions: value = max( value, self.V(game.get_next_state(state, action), alpha, beta)) alpha = max(alpha, value) if beta <= alpha: break else: value = game.INT_INF for action in actions: value = min( value, self.V(game.get_next_state(state, action), alpha, beta)) beta = min(beta, value) if beta <= alpha: break return value
def V(self, state, alpha=-game.INT_INF, beta=game.INT_INF): # If IsEnd(s) if game.is_end(state): return game.utility(state) # Get possible actions actions = game.get_possible_actions(state) assert len(actions) > 0 # If player == agent (maximizing player) if game.get_player_from_state(state) == game.MAX_PLAYER: value = -game.INT_INF for action in actions: value = max(value, self.V(game.get_next_state(state, action), alpha, beta)) alpha = max(alpha, value) if beta <= alpha: break # If player == opponent (minimzing player) else: value = game.INT_INF for action in actions: value = min(value, self.V(game.get_next_state(state, action), alpha, beta)) beta = min(beta, value) if beta <= alpha: break return value
def create_policytable() -> Dict[GameState, Tuple[int, MoveList]]: """Create policytable by iterating through game state space. Returns: Dict[GameState, Tuple[int, MoveList]]: Map of state to utility and list of moves """ space = game.create_statespace() pol_tab = {} for state in space: move_list = [] # Convert 1x10 vector into game state tuple state = (state[0], np.asarray(state[1:]).reshape(3, 3)) next_move = minimax_search(state) s = state while not game.is_terminal(s): move_list.append(next_move) next_move = minimax_search(s) s = game.result(s, next_move) u = game.utility(s) pol_tab[tuple(game.to_vector(state))] = (u, move_list) return pol_tab
def V(self, state): # If IsEnd(s) if game.is_end(state): return game.utility(state) # Get possible actions actions = game.get_possible_actions(state) assert len(actions) > 0 # If player == agent (maximizing player) if game.get_player_from_state(state) == game.MAX_PLAYER: value = -game.INT_INF for action in actions: value = max(value, self.V(game.get_next_state( state, action))) # use 'game.get_next_state' # use 'game.get_next_state' # value = max(self.V(game,get_next_state(state,action)) for action in actions) # use 'game.get_next_state' # If player == opponent (minimzing player) else: value = game.INT_INF for action in actions: value = min(value, self.V(game.get_next_state( state, action))) # use 'game.get_next_state' return value
def V(self, state, alpha=-game.INT_INF, beta=game.INT_INF, verbose=0): if verbose >= 1: print('call V({})'.format(state)) # If IsEnd(s) if game.is_end(state): return game.utility(state) # Get possible actions actions = game.get_possible_actions(state) assert len(actions) > 0 # If player == agent (maximizing player) if game.get_player_from_state(state) == game.MAX_PLAYER: value = -game.INT_INF for action in actions: value = _FILL_IN_ # HINT: use self.V (with verbose=verbose) and game.get_next_state alpha = _FILL_IN_ if _FILL_IN_: break # If player == opponent (minimzing player) else: value = game.INT_INF for action in actions: value = _FILL_IN_ # HINT: use self.V (with verbose=verbose) and game.get_next_state beta = _FILL_IN_ if _FILL_IN_: break return value
def V(self, state, depth): # If IsEnd(s) if game.is_end(state): return game.utility(state) # If depth = 0 if depth == 0: #print game.get_board_str(state), eval(state) return eval(state) # Get possible actions actions = game.get_possible_actions(state) assert len(actions) > 0 # If player == agent (maximizing player) if game.get_player_from_state(state) == game.MAX_PLAYER: value = -game.INT_INF for action in actions: value = max(value, self.V(game.get_next_state(state, action), depth)) # If player == opponent (minimzing player) else: value = game.INT_INF for action in actions: value = min( value, self.V(game.get_next_state(state, action), depth - 1)) return value
def V(self, state, depth, verbose=0): if verbose >= 1: print('call V({})'.format(state)) # If IsEnd(s) if game.is_end(state): return game.utility(state) # If depth = 0 if depth == 0: #print game.get_board_str(state), eval(state) return eval(state) # Get possible actions actions = game.get_possible_actions(state) assert len(actions) > 0 # If player == agent (maximizing player) if game.get_player_from_state(state) == game.MAX_PLAYER: value = -game.INT_INF for action in actions: value = _FILL_IN_ # HINT: use self.V (with verbose=verbose) and game.get_next_state # If player == opponent (minimzing player) else: value = game.INT_INF for action in actions: value = _FILL_IN_ # HINT: use self.V (with verbose=verbose) and game.get_next_state return value
def V(self, state): # BEGIN_YOUR_CODE if game.is_end(state): return game.utility(state) player = game.get_player_from_state(state) if player == game.MAX_PLAYER: value = -game.INT_INF for action in game.get_possible_actions(state): value = max(value, self.V(game.get_next_state(state, action))) else: value = game.INT_INF for action in game.get_possible_actions(state): value = min(value, self.V(game.get_next_state(state, action))) return value
def V(self, state, alpha=-game.INT_INF, beta=game.INT_INF): # BEGIN_YOUR_CODE if game.is_end(state): return game.utility(state) actions = game.get_possible_actions(state) if game.get_player_from_state(state) == game.MAX_PLAYER: # my-turn value = -game.INT_INF for action in actions: value = max(value, self.V(game.get_next_state(state, action))) else: # opp-turn value = 0 for action in actions: value += self.V(game.get_next_state(state, action)) / len(actions) return value
def V(self, state): # If IsEnd(s) if game.is_end(state): return game.utility(state) # Get possible actions actions = game.get_possible_actions(state) assert len(actions) > 0 # If player == agent (maximizing player) if game.get_player_from_state(state) == game.MAX_PLAYER: value = -game.INT_INF for _X_ in _X_: value = _X_ # use 'game.get_next_state' # If player == opponent (minimzing player) else: value = _X_ return value
def V(self, state, depth): # BEGIN_YOUR_CODE if game.is_end(state): return game.utility(state) if depth == 0: return eval(state) if game.get_player_from_state(state) == game.MAX_PLAYER: # my-turn value = -game.INT_INF for action in game.get_possible_actions(state): value = max(value, self.V(game.get_next_state(state, action), depth)) else: # opp-turn value = game.INT_INF for action in game.get_possible_actions(state): value = min( value, self.V(game.get_next_state(state, action), depth - 1)) return value
def max_value(state: np.ndarray) -> Tuple[int, GameMove]: """Look for the move generating the maximum value. Args: state (np.ndarray): Current state Returns: Tuple[int, Tuple]: Tuple of value and move """ move = None if game.is_terminal(state): return game.utility(state), move v = -20 for act in game.actions(state): v2, act2 = min_value(game.result(state, act)) if v2 > v: v = v2 move = act return v, move
def alphabeta_cutoff_search(state, game, d=4, cutoff_test=None, eval_fn=None, extra_fn=None): """Search game to determine best action; use alpha-beta pruning. This version cuts off search and uses an evaluation function.""" player = game.to_move(state) # Functions used by alphabeta def max_value(state, alpha, beta, depth): #print(depth) if cutoff_test(state, depth): return eval_fn(state, player) v = -infinity for a in game.actions(state): ## v = max( v, min_value(game.result(state, a, player, extra_fn), alpha, beta, depth + 1)) if v >= beta: return v alpha = max(alpha, v) return v def min_value(state, alpha, beta, depth): #print(depth) if cutoff_test(state, depth): return eval_fn(state, player) v = infinity for a in game.actions(state): ## v = min( v, max_value(game.result(state, a, player, extra_fn), alpha, beta, depth + 1)) if v <= alpha: return v beta = min(beta, v) return v # Body of alphabeta_cutoff_search starts here: # The default test cuts off at depth d or at a terminal state cutoff_test = ( cutoff_test or (lambda state, depth: depth >= d or game.terminal_test(state))) eval_fn = eval_fn or (lambda state: game.utility(state, player)) extra_fn = extra_fn or (lambda st1: st1.extra) #print("Well, I am inside alphabeta and i am going to apply...",extra_fn) best_score = -infinity beta = infinity best_action = None movimentos = game.actions(state) ## jb if len(movimentos) == 1: return movimentos[0] else: random.shuffle(movimentos) ## para dar variabilidade aos jogos for a in movimentos: ## v = min_value(game.result(state, a, player, extra_fn), best_score, beta, 1) if v > best_score: best_score = v best_action = a return best_action