def V(self, state): # BEGIN_YOUR_CODE if game.is_end(state): return game.utility(state) player = game.get_player_from_state(state) if player == game.MAX_PLAYER: value = -game.INT_INF for action in game.get_possible_actions(state): value = max(value, self.V(game.get_next_state(state, action))) else: value = game.INT_INF for action in game.get_possible_actions(state): value = min(value, self.V(game.get_next_state(state, action))) return value
def V(self, state, alpha=-game.INT_INF, beta=game.INT_INF, verbose=0): if verbose >= 1: print('call V({})'.format(state)) # If IsEnd(s) if game.is_end(state): return game.utility(state) # Get possible actions actions = game.get_possible_actions(state) assert len(actions) > 0 # If player == agent (maximizing player) if game.get_player_from_state(state) == game.MAX_PLAYER: value = -game.INT_INF for action in actions: value = _FILL_IN_ # HINT: use self.V (with verbose=verbose) and game.get_next_state alpha = _FILL_IN_ if _FILL_IN_: break # If player == opponent (minimzing player) else: value = game.INT_INF for action in actions: value = _FILL_IN_ # HINT: use self.V (with verbose=verbose) and game.get_next_state beta = _FILL_IN_ if _FILL_IN_: break return value
def V(self, state, depth, verbose=0): if verbose >= 1: print('call V({})'.format(state)) # If IsEnd(s) if game.is_end(state): return game.utility(state) # If depth = 0 if depth == 0: #print game.get_board_str(state), eval(state) return eval(state) # Get possible actions actions = game.get_possible_actions(state) assert len(actions) > 0 # If player == agent (maximizing player) if game.get_player_from_state(state) == game.MAX_PLAYER: value = -game.INT_INF for action in actions: value = _FILL_IN_ # HINT: use self.V (with verbose=verbose) and game.get_next_state # If player == opponent (minimzing player) else: value = game.INT_INF for action in actions: value = _FILL_IN_ # HINT: use self.V (with verbose=verbose) and game.get_next_state return value
def policy(self, state): # BEGIN_YOUR_CODE actions = game.get_possible_actions(state) alpha = -game.INT_INF beta = game.INT_INF player = game.get_player_from_state(state) if player == game.MAX_PLAYER: values = [] for action in actions: next_state = game.get_next_state(state, action) value = self.V(next_state, alpha, beta) values.append(value) alpha = max(alpha, value) if beta <= alpha: break idx = mp.argmax(values) return actions[idx] # return actions[mp.argmax([self.V(game.get_next_state(state, action)) for action in actions])] else: values = [] for action in actions: next_state = game.get_next_state(state, action) value = self.V(next_state, alpha, beta) values.append(value) beta = min(beta, value) if beta <= alpha: break idx = mp.argmin(values) return actions[idx]
def V(self, state, alpha=-game.INT_INF, beta=game.INT_INF): # If IsEnd(s) if game.is_end(state): return game.utility(state) # Get possible actions actions = game.get_possible_actions(state) assert len(actions) > 0 # If player == agent (maximizing player) if game.get_player_from_state(state) == game.MAX_PLAYER: value = -game.INT_INF for action in actions: value = max(value, self.V(game.get_next_state(state, action), alpha, beta)) alpha = max(alpha, value) if beta <= alpha: break # If player == opponent (minimzing player) else: value = game.INT_INF for action in actions: value = min(value, self.V(game.get_next_state(state, action), alpha, beta)) beta = min(beta, value) if beta <= alpha: break return value
def V(self, state, alpha=-game.INT_INF, beta=game.INT_INF): print '-', # BEGIN_YOUR_CODE if game.is_end(state): return game.utility(state) actions = game.get_possible_actions(state) player = game.get_player_from_state(state) if player == game.MAX_PLAYER: value = -game.INT_INF for action in actions: value = max( value, self.V(game.get_next_state(state, action), alpha, beta)) alpha = max(alpha, value) if beta <= alpha: break else: value = game.INT_INF for action in actions: value = min( value, self.V(game.get_next_state(state, action), alpha, beta)) beta = min(beta, value) if beta <= alpha: break return value
def V(self, state): # If IsEnd(s) if game.is_end(state): return game.utility(state) # Get possible actions actions = game.get_possible_actions(state) assert len(actions) > 0 # If player == agent (maximizing player) if game.get_player_from_state(state) == game.MAX_PLAYER: value = -game.INT_INF for action in actions: value = max(value, self.V(game.get_next_state( state, action))) # use 'game.get_next_state' # use 'game.get_next_state' # value = max(self.V(game,get_next_state(state,action)) for action in actions) # use 'game.get_next_state' # If player == opponent (minimzing player) else: value = game.INT_INF for action in actions: value = min(value, self.V(game.get_next_state( state, action))) # use 'game.get_next_state' return value
def V(self, state, depth): # If IsEnd(s) if game.is_end(state): return game.utility(state) # If depth = 0 if depth == 0: #print game.get_board_str(state), eval(state) return eval(state) # Get possible actions actions = game.get_possible_actions(state) assert len(actions) > 0 # If player == agent (maximizing player) if game.get_player_from_state(state) == game.MAX_PLAYER: value = -game.INT_INF for action in actions: value = max(value, self.V(game.get_next_state(state, action), depth)) # If player == opponent (minimzing player) else: value = game.INT_INF for action in actions: value = min( value, self.V(game.get_next_state(state, action), depth - 1)) return value
def policy(self, state): actions = game.get_possible_actions(state) assert len(actions) > 0 if game.get_player_from_state(state) == game.MAX_PLAYER: return max(actions, key=lambda x: self.V(game.get_next_state(state, x))) else: return random.choice(actions)
def policy(self, state): actions = game.get_possible_actions(state) assert len(actions) > 0 optimal = max if game.get_player_from_state( state) == game.MAX_PLAYER else min return optimal(actions, key=lambda x: self.V(game.get_next_state(state, x)))
def V(self, state, depth): # BEGIN_YOUR_CODE if game.is_end(state): return game.utility(state) if depth == 0: return eval(state) if game.get_player_from_state(state) == game.MAX_PLAYER: # my-turn value = -game.INT_INF for action in game.get_possible_actions(state): value = max(value, self.V(game.get_next_state(state, action), depth)) else: # opp-turn value = game.INT_INF for action in game.get_possible_actions(state): value = min( value, self.V(game.get_next_state(state, action), depth - 1)) return value
def policy(self, state): # BEGIN_YOUR_CODE actions = game.get_possible_actions(state) player = game.get_player_from_state(state) if player == game.MAX_PLAYER: return actions[mp.argmax([ self.V(game.get_next_state(state, action)) for action in actions ])] else: import random return random.choice(actions)
def V(self, state, alpha=-game.INT_INF, beta=game.INT_INF): # BEGIN_YOUR_CODE if game.is_end(state): return game.utility(state) actions = game.get_possible_actions(state) if game.get_player_from_state(state) == game.MAX_PLAYER: # my-turn value = -game.INT_INF for action in actions: value = max(value, self.V(game.get_next_state(state, action))) else: # opp-turn value = 0 for action in actions: value += self.V(game.get_next_state(state, action)) / len(actions) return value
def policy(self, state): # BEGIN_YOUR_CODE #print [(self.V(game.get_next_state(state, action)), action) for action in game.get_possible_actions(state)] actions = game.get_possible_actions(state) player = game.get_player_from_state(state) if player == game.MAX_PLAYER: return actions[mp.argmax([ self.V(game.get_next_state(state, action)) for action in actions ])] else: return actions[mp.argmin([ self.V(game.get_next_state(state, action)) for action in actions ])]
def V(self, state): # If IsEnd(s) if game.is_end(state): return game.utility(state) # Get possible actions actions = game.get_possible_actions(state) assert len(actions) > 0 # If player == agent (maximizing player) if game.get_player_from_state(state) == game.MAX_PLAYER: value = -game.INT_INF for _X_ in _X_: value = _X_ # use 'game.get_next_state' # If player == opponent (minimzing player) else: value = _X_ return value
def policy(self, state): actions = game.get_possible_actions(state) assert len(actions) > 0 alpha = -game.INT_INF beta = game.INT_INF if game.get_player_from_state(state) == game.MAX_PLAYER: values = [] for action in actions: value = self.V(game.get_next_state(state, action), alpha, beta) values.append(value) alpha = max(alpha, value) return max(list(zip(actions, values)), key=lambda x: x[1])[0] else: values = [] for action in actions: value = self.V(game.get_next_state(state, action), alpha, beta) values.append(value) beta = min(beta, value) return min(list(zip(actions, values)), key=lambda x: x[1])[0]
def policy(self, state): # BEGIN_YOUR_CODE actions = game.get_possible_actions(state) player = game.get_player_from_state(state) if player == game.MAX_PLAYER: values = [] for action in actions: next_state = game.get_next_state(state, action) value = self.V(next_state, self.max_depth) values.append(value) idx = mp.argmax(values) return actions[idx] else: values = [] for action in actions: next_state = game.get_next_state(state, action) value = self.V(next_state, self.max_depth) values.append(value) idx = mp.argmin(values) return actions[idx]