def update_value_map_random(value_map, player, move): value_map[player, move] = 0 if not is_winner(player, move): otherplayer = 3 - player for next_move in next_states(otherplayer, move): if (otherplayer, next_move) not in value_map: update_value_map_random(value_map, otherplayer, next_move)
def player_move_good(player, state): otherplayer = 3 - player moves = next_states(player, state) winning_states = [ next_state for next_state in moves if is_winner(player, next_state) ] if winning_states: return random.choice(winning_states) moves_other = next_states(otherplayer, state) blocking_states = [ next_state for (next_state, next_state_other) in zip(moves, moves_other) if is_winner(otherplayer, next_state_other) ] if blocking_states: return random.choice(blocking_states) return moves[0]
def update_value_map_never_lose(value_map, player, move): otherplayer = 3 - player next_moves = next_states(otherplayer, move) if is_winner(player, move): value_map[player, move] = 1 elif not next_moves: value_map[player, move] = 0 else: # assume other player tries to maximize their value for next_move in next_moves: if (otherplayer, next_move) not in value_map: update_value_map_never_lose(value_map, otherplayer, next_move) max_value = max(value_map[otherplayer, next_move] for next_move in next_moves) # if other player wins, we lose, so our value is minus their value value_map[player, move] = -max_value
def player_move_learner(player, state, value_map, verbose=True, explore=0.1): # List all possible next moves next_moves = next_states(player, state) # List probabilities of winning corresponding to possible next moves #(or 0.5 if no value assigned) next_moves_values = [value_map.get(i, 0.5) for i in next_moves] # Find the index of the move corresponding to the maximum value maxpos = [ i for i, v in enumerate(next_moves_values) if v == max(next_moves_values) ] rand = random.random() # Randomly explore sometimes if rand < explore: if verbose: print("\033[1;32;40m expoloratory move \n") return random.choice(next_moves) # Otherwise return best move else: return next_moves[maxpos[0]]
def update_value_map_good(value_map, player, move): otherplayer = 3 - player next_moves = next_states(otherplayer, move) if is_winner(player, move): value_map[player, move] = 1 elif not next_moves: value_map[player, move] = 0 else: if any(is_winner(otherplayer, next_move) for next_move in next_moves): # bad score if other player can win in the next move value_map[player, move] = -1 else: # heuristic score which prefers center, corners, and edges # in that order scores = (.01, .001, .01, .001, .1, .001, .01, .001, .01) value_map[player, move] = sum(score for (val, score) in zip(move, scores) if val == player) for next_move in next_moves: if (otherplayer, next_move) not in value_map: update_value_map_good(value_map, otherplayer, next_move)
def get_value_map(update_func): value_map = {} initial_state = (0, 0, 0, 0, 0, 0, 0, 0, 0) for move in next_states(1, initial_state): update_func(value_map, 1, move) return value_map
def player_move_random(player, state): return random.choice(next_states(player, state))