def _genmove(self, color, game, flat_board): flat_board = flat_board.reshape(1, len(flat_board)) inp = self.board_to_input(flat_board) current_pred = self.model.predict(inp) my_index = 0 if color == 'b' else 1 my_pred = current_pred[0, my_index] my_value = BLACK if color == 'b' else WHITE # We're still interested in the playable locations playable_locations = game.get_playable_locations(color) results = np.zeros(game.board.shape) for move in playable_locations: if move.is_pass: continue test_board = copy.deepcopy(game.board) test_board.place_stone_and_capture_if_applicable_default_values( move.to_matrix_location(), my_value) inp = self.board_to_input(test_board.flatten()) pred_result = self.model.predict(inp) # pred_result = self.softmax(pred_result) results[move.to_matrix_location()] = pred_result[0, my_index] results -= my_pred row, col = np.unravel_index(results.argmax(), results.shape) move = Move(col=col, row=row) if (results[move.to_matrix_location()] <= 0): move = Move(is_pass=True) return move
def genmove(self, color, game) -> Move: # We're still interested in the playable locations playable_locations = game.get_playable_locations(color) # Format the board and make predictions inp = self.board_to_input(color, game.board) pred_moves = self.model.predict(inp) pred_moves = pred_moves.reshape(9, 9) # print(pred_moves) # print(playable_locations) dummy_value = -10 potential_moves = np.array([[dummy_value] * 9] * 9, dtype=float) for move in playable_locations: # print(move) if move.is_pass: continue loc = move.to_matrix_location() potential_moves[loc[0]][loc[1]] = pred_moves[loc[0]][loc[1]] potential_moves = self.softmax(potential_moves) row, col = np.unravel_index(potential_moves.argmax(), potential_moves.shape) move = Move(col=col, row=row) # if game.board[col,row] != 0: # move = Move(is_pass = True) # return move if potential_moves[move.to_matrix_location()] == dummy_value: move = Move(is_pass=True) return move
def genmove(self, color, game) -> Move: my_index = 0 if color == 'b' else 1 # We're still interested in the playable locations playable_locations = game.get_playable_locations(color) inp = self.board_to_input(color, game.board) current_pred = self.model.predict(inp) # print('Current outcome prediction:', current_pred) # assert (self.softmax(current_pred) == current_pred).all() current_pred = self.softmax(current_pred) my_pred = current_pred[0, my_index] my_value = BLACK if color == 'b' else WHITE results = np.zeros(game.board.shape) for move in playable_locations: if move.is_pass: continue test_board = copy.deepcopy(game.board) test_board[move.to_matrix_location()] = my_value inp = self.board_to_input(color, test_board) pred_result = self.model.predict(inp) pred_result = self.softmax(pred_result) results[move.to_matrix_location()] = pred_result[0, my_index] # print(results>0) # print(my_pred) results -= my_pred # print(results>0) """ `results` now contains our prediction of our win probabilities for each move, adjusted by our current win probability. We can now easily check if a move is worth playing by checking the sign; If it is negative, our probability to win gets worse. In general the higher the number in `results` the better the move.""" row, col = np.unravel_index(results.argmax(), results.shape) move = Move(col=col, row=row) if (results[move.to_matrix_location()] <= 0): move = Move(is_pass=True) # print('Returned move:', move.to_gtp(9)) return move
def genmove(self, color, game) -> Move: board = np.array(game.board) my_value = WHITE if color == 'w' else BLACK # enemy_value = BLACK if my_value == WHITE else WHITE inp = self.generate_input(board, my_value) if self.verbose: print(inp) policy = self.model(inp) policy = policy.data.numpy().flatten() playable_locations = game.get_playable_locations(color) # Default: passing policy_move = Move(is_pass=True) policy_move_prob = policy[81] for move in playable_locations: if self.verbose: print(move) if move.is_pass: continue if policy[move.to_flat_idx()] > policy_move_prob: policy_move = move policy_move_prob = policy[move.to_flat_idx()] return policy_move
def open(self): pygame.init() self.running = True self.screen = pygame.display.set_mode(window_size) pygame.display.set_caption('Go') self.buttons.append( Button(210, 530, 80, 40, 'Pass', self.screen, self.send_pass_move)) self.labels.append( Label(100, 30, 300, 40, self.get_turn_label_text, self.screen)) self.render() while self.running: event = pygame.event.poll() if event.type == pygame.MOUSEBUTTONUP: x, y = event.pos col = int(round( (x - board_top_left_coord[0]) / self.cell_size)) row = int(round( (y - board_top_left_coord[1]) / self.cell_size)) if 0 <= col < self.game.size and 0 <= row < self.game.size: self.controller.receive_move_from_gui(Move(col, row)) for btn in self.buttons: btn.check_mouse_released() if event.type == pygame.QUIT: self.running = False for btn in self.buttons: btn.is_mouse_over_btn() self.render() pygame.quit() sys.exit(0)
def _genmove(self, color, game, flat_board): """Generate a move - PolicyBot logic The logic of this bot is basically: 1. Directly generate a move 2. Take the valid move with the highest score """ color = WHITE if color == 'w' else BLACK flat_board = flat_board.reshape(1, len(flat_board)) # 1. Generate move probabilities inp = self.generate_nn_input(flat_board, color) prediction = self.model.predict(inp)[0] # 2. Look at each valid move and take the best one # Yes, this is looped, bad perf, but it is intuitively understandable # and it leaves little room for errors! playable_locations = game.get_playable_locations(color) best_move = Move(is_pass=True) best_move_prob = prediction[81] for move in playable_locations: if move.is_pass: continue if prediction[move.to_flat_idx()] > best_move_prob: best_move = move best_move_prob = prediction[move.to_flat_idx()] return best_move
def genmove(self, color, game) -> Move: input_board = self.flatten_matrix(game.board, color) pred = self.model.predict(np.array([input_board]).reshape(1, -1)) max_idx = np.argmax(pred) if max_idx is 81: return Move(is_pass=True) else: board = pred[0][0:81] # set all invalid locations to -1 to avoid them being chosen # if all moves are invalid, play pass for move in game.get_invalid_locations(color): flat_idx = move.to_flat_idx(game.size) board[flat_idx] = -1 max_idx = np.argmax(board) row = int(math.floor(max_idx / game.size)) col = int(max_idx % game.size) return Move(col=col, row=row)
def genmove(self, color, game) -> Move: nn_input_board = self.flatten_matrix(game.board) predict = self.model.predict(np.array([nn_input_board])) max_idx = np.argmax(predict) if max_idx is 0: return Move(is_pass=True) else: board = predict[0][1:] # strip away the pass-slot at pos zero # set all invalid locations to 0 to avoid them being chosen # is that cheating the NN or cool? for move in game.get_invalid_locations(color): flat_idx = move.to_flat_idx(game.size) board[flat_idx] = 0 max_idx = np.argmax(board) row = int(math.floor(max_idx / game.size)) col = int(max_idx % game.size) return Move(col=col, row=row)
def check_dead_group(game, col_coord, row_coord): b = game.board total_neighbors = [] loc = Move(col=col_coord, row=row_coord).to_matrix_location() total_neighbors = b.get_adjacent_coords(loc) for n in total_neighbors: if b[n] == EMPTY: return False return True
def _genmove(self, color, game, flat_board): flat_board = flat_board.reshape(1, len(flat_board)) predict = self.model.predict(flat_board)[0] max_idx = np.argmax(predict) if max_idx == 82: return Move(is_pass=True) else: board = predict[:-1] # strip away the pass-slot at pos 82 # set all invalid locations to 0 to avoid them being chosen for move in game.get_invalid_locations(color): flat_idx = move.to_flat_idx(game.size) board[flat_idx] = 0 max_idx = np.argmax(board) # If this move is invalid pass! if board[max_idx] == 0: return Move(is_pass=True) return Move.from_flat_idx(max_idx)
def genmove(self, color, game) -> Move: board = np.array(game.board) my_value = WHITE if color == 'w' else BLACK # enemy_value = BLACK if my_value == WHITE else WHITE inp = self.generate_input(board, my_value) if self.verbose: print(inp) policy, value = self.model(inp) policy = policy.data.numpy().flatten() value = value.data.numpy().flatten() playable_locations = game.get_playable_locations(color) # Default: passing policy_move = value_move = Move(is_pass=True) policy_move_prob = policy[81] value_move_prob = value for move in playable_locations: if self.verbose: print(move) if move.is_pass: continue if self.logic == 'value': # Play move on a test board test_board = copy.deepcopy(game.board) test_board.place_stone_and_capture_if_applicable_default_values( move.to_matrix_location(), my_value) # Evaluate state - attention: Enemy's turn! # inp = self.generate_input(np.array(test_board), enemy_value) # _, enemy_win_prob = self.model(inp) # enemy_win_prob = enemy_win_prob.data.numpy().flatten() # my_new_value = -enemy_win_prob # Disregard that right now and just get my own win prob inp = self.generate_input(np.array(test_board), my_value) _, new_value = self.model(inp) new_value = new_value.data.numpy().flatten() if new_value > value_move_prob: value_move = move value_move_prob = new_value if self.logic == 'policy': if policy[move.to_flat_idx()] > policy_move_prob: policy_move = move policy_move_prob = policy[move.to_flat_idx()] if self.logic == 'policy': out_move = policy_move if self.logic == 'value': out_move = value_move return out_move
def _genmove(self, color, game, flat_board): flat_board = flat_board.reshape(1, len(flat_board)) input_board = flat_board.tolist() input_board = [ self.replace_entry(entry) for row in input_board for entry in row ] if color == BLACK: input_board.append(1) else: input_board.append(-1) pred = self.model.predict(np.array([input_board]).reshape(1, -1))[0] for move in game.get_invalid_locations(color): flat_idx = move.to_flat_idx(game.size) pred[flat_idx] = -1 max_idx = np.argmax(pred) if max_idx == 81: return Move(is_pass=True) else: if pred[max_idx] == -1: return Move(is_pass=True) return Move.from_flat_idx(max_idx)
def genmove(self, color, game) -> Move: # We're still interested in the playable locations playable_locations = game.get_playable_locations(color) # Format the board and make predictions inp = self.board_to_input(color, game.board) bot_logger.debug('Input shape:', inp.shape) bot_logger.debug('Input:', inp) pred_moves = self.model.predict(inp) # pred_moves = self.model.predict(np.zeros((1, 162))) bot_logger.debug('This worked') bot_logger.debug('Predicted moves:', pred_moves) pred_moves = pred_moves.reshape(9, 9) # print(pred_moves) # print(playable_locations) dummy_value = -10 potential_moves = np.array([[dummy_value] * 9] * 9, dtype=float) for move in playable_locations: # print(move) if move.is_pass: continue loc = move.to_matrix_location() potential_moves[loc[0]][loc[1]] = pred_moves[loc[0]][loc[1]] # print([i for row in potential_moves for i in row]) potential_moves = self.softmax(potential_moves) row, col = np.unravel_index(potential_moves.argmax(), potential_moves.shape) move = Move(col=col, row=row) if (potential_moves[move.to_matrix_location()] == dummy_value or potential_moves[move.to_matrix_location()] < (1 / 81 + 0.0001)): move = Move(is_pass=True) return move
def genmove(color, game) -> Move: move = None while move is None: try: print('\nsubmit your move:') move_str = input() move = Move().from_gtp(move_str, game.size) game.play(move, color, testing=True) except InvalidMove_Error as e: move = None print('\ninvalid move, choose another location or "pass":'******'\nbad input, retry or "pass":') return move
def _genmove(self, color, game, flat_board): flat_board = flat_board.reshape(1, len(flat_board)) X = self.board_to_input(flat_board) predict = self.model.predict(X)[0] # Set invalid moves to 0 for move in game.get_invalid_locations(color): flat_idx = move.to_flat_idx() predict[flat_idx] = 0 max_idx = np.argmax(predict) if max_idx == 81 or predict[max_idx] == 0: return Move(is_pass=True) else: return Move.from_flat_idx(max_idx)
def _genmove(self, color, game, flat_board): """Generate a move - ValueBot logic The logic of this bot is basically: 1. Evaluate current probability of winning 2. Evaluate the probabilities of winning for each move 3. Make the best move if there is a valid move that raises the probs """ color = WHITE if color == 'w' else BLACK flat_board = flat_board.reshape(1, len(flat_board)) my_value = color # 1. Get current Win Probability inp = self.generate_nn_input(flat_board, color) current_prob = self.model.predict(inp) assert np.sum(current_prob) == 1, np.sum(current_prob) # print(current_prob) # 2. Evaluate all possible moves best_win_prob = current_prob[0, 0] best_move = Move(is_pass=True) playable_locations = game.get_playable_locations(color) for move in playable_locations: if move.is_pass: continue # Play the move and evaluate the resulting board test_board = copy.deepcopy(game.board) test_board.place_stone_and_capture_if_applicable_default_values( move.to_matrix_location(), my_value) inp = self.generate_nn_input(test_board.flatten(), color) pred_result = self.model.predict(inp)[0, 0] if pred_result > best_win_prob: best_move = move best_win_prob = pred_result return best_move
def run(self): self.game.start() while self.game.is_running: print('\nnext turn\n') response = self.wait_for_response( self.current_player, 'genmove ' + self.current_player.color) if response.startswith('?'): self.log_and_print( 'player ' + self.current_player.name + ' responded with an error, aborting the game: ' + '"' + response[2:] + '"') break move = response[2:] # strip away the "= " self.send_to_player( self.other_player, 'play ' + self.current_player.color + ' ' + move) self.game.play(Move().from_gtp(move, self.game.size), self.current_player.color) print('\n' + self.game.__str__()) time.sleep(self.end_of_turn_sleep_time) # swap players for next turn if self.current_player == self.player1: self.current_player = self.player2 self.other_player = self.player1 else: self.current_player = self.player1 self.other_player = self.player2 self.broadcast('quit') print('\n' + self.game.__str__()) # if self.view is not None: # self.view.game_ended() # else: print('Final result:', self.game.evaluate_points()) sys.exit(0)
def send_pass_move(self): self.controller.receive_move_from_gui(Move(is_pass=True))
def init_game(game, col_coord, row_coord): move = Move(col=col_coord, row=row_coord) game.play(move, 'w') return game