def main(): model = keras.models.load_model('src/learn/RL_Atari/test_model_1.h5') game = Game() col_coord, row_coord = 1, 6 game = init_game(game, col_coord, row_coord) print('new game') print(game) k = 0 #print(model.predict(board2input(game,'b'),batch_size=1)) #time.sleep(40) while k < 4: qval = model.predict(board2input(game, 'b'), batch_size=1) #print(qval) #time.sleep(100) temp_qval = copy.copy(qval) move = np.argmax(qval) #print(move) move = Move.from_flat_idx(move) location = move.to_matrix_location() while game.board[location] != EMPTY: temp_qval[0][np.argmax( temp_qval )] = -100 # arbit low value. To get to second max value. move = np.argmax(temp_qval) move = Move.from_flat_idx(move) location = move.to_matrix_location() game.play(move, 'b') print(game) k = k + 1
def genmove(self, color, game) -> Move: # We're still interested in the playable locations playable_locations = game.get_playable_locations(color) # Format the board and make predictions inp = self.board_to_input(color, game.board) pred_moves = self.model.predict(inp) pred_moves = pred_moves.reshape(9, 9) # print(pred_moves) # print(playable_locations) dummy_value = -10 potential_moves = np.array([[dummy_value] * 9] * 9, dtype=float) for move in playable_locations: # print(move) if move.is_pass: continue loc = move.to_matrix_location() potential_moves[loc[0]][loc[1]] = pred_moves[loc[0]][loc[1]] potential_moves = self.softmax(potential_moves) row, col = np.unravel_index(potential_moves.argmax(), potential_moves.shape) move = Move(col=col, row=row) # if game.board[col,row] != 0: # move = Move(is_pass = True) # return move if potential_moves[move.to_matrix_location()] == dummy_value: move = Move(is_pass=True) return move
def _genmove(self, color, game, flat_board): flat_board = flat_board.reshape(1, len(flat_board)) inp = self.board_to_input(flat_board) current_pred = self.model.predict(inp) my_index = 0 if color == 'b' else 1 my_pred = current_pred[0, my_index] my_value = BLACK if color == 'b' else WHITE # We're still interested in the playable locations playable_locations = game.get_playable_locations(color) results = np.zeros(game.board.shape) for move in playable_locations: if move.is_pass: continue test_board = copy.deepcopy(game.board) test_board.place_stone_and_capture_if_applicable_default_values( move.to_matrix_location(), my_value) inp = self.board_to_input(test_board.flatten()) pred_result = self.model.predict(inp) # pred_result = self.softmax(pred_result) results[move.to_matrix_location()] = pred_result[0, my_index] results -= my_pred row, col = np.unravel_index(results.argmax(), results.shape) move = Move(col=col, row=row) if (results[move.to_matrix_location()] <= 0): move = Move(is_pass=True) return move
def genmove(self, color, game) -> Move: my_index = 0 if color == 'b' else 1 # We're still interested in the playable locations playable_locations = game.get_playable_locations(color) inp = self.board_to_input(color, game.board) current_pred = self.model.predict(inp) # print('Current outcome prediction:', current_pred) # assert (self.softmax(current_pred) == current_pred).all() current_pred = self.softmax(current_pred) my_pred = current_pred[0, my_index] my_value = BLACK if color == 'b' else WHITE results = np.zeros(game.board.shape) for move in playable_locations: if move.is_pass: continue test_board = copy.deepcopy(game.board) test_board[move.to_matrix_location()] = my_value inp = self.board_to_input(color, test_board) pred_result = self.model.predict(inp) pred_result = self.softmax(pred_result) results[move.to_matrix_location()] = pred_result[0, my_index] # print(results>0) # print(my_pred) results -= my_pred # print(results>0) """ `results` now contains our prediction of our win probabilities for each move, adjusted by our current win probability. We can now easily check if a move is worth playing by checking the sign; If it is negative, our probability to win gets worse. In general the higher the number in `results` the better the move.""" row, col = np.unravel_index(results.argmax(), results.shape) move = Move(col=col, row=row) if (results[move.to_matrix_location()] <= 0): move = Move(is_pass=True) # print('Returned move:', move.to_gtp(9)) return move
def genmove(self, color, game) -> Move: board = np.array(game.board) my_value = WHITE if color == 'w' else BLACK # enemy_value = BLACK if my_value == WHITE else WHITE inp = self.generate_input(board, my_value) if self.verbose: print(inp) policy = self.model(inp) policy = policy.data.numpy().flatten() playable_locations = game.get_playable_locations(color) # Default: passing policy_move = Move(is_pass=True) policy_move_prob = policy[81] for move in playable_locations: if self.verbose: print(move) if move.is_pass: continue if policy[move.to_flat_idx()] > policy_move_prob: policy_move = move policy_move_prob = policy[move.to_flat_idx()] return policy_move
def _genmove(self, color, game, flat_board): flat_board = flat_board.reshape(1, len(flat_board)) X = self.board_to_input(flat_board) predict = self.model.predict(X)[0] # Set invalid moves to 0 for move in game.get_invalid_locations(color): flat_idx = move.to_flat_idx() predict[flat_idx] = 0 max_idx = np.argmax(predict) if max_idx == 81 or predict[max_idx] == 0: return Move(is_pass=True) else: return Move.from_flat_idx(max_idx)
def open(self): pygame.init() self.running = True self.screen = pygame.display.set_mode(window_size) pygame.display.set_caption('Go') self.buttons.append( Button(210, 530, 80, 40, 'Pass', self.screen, self.send_pass_move)) self.labels.append( Label(100, 30, 300, 40, self.get_turn_label_text, self.screen)) self.render() while self.running: event = pygame.event.poll() if event.type == pygame.MOUSEBUTTONUP: x, y = event.pos col = int(round( (x - board_top_left_coord[0]) / self.cell_size)) row = int(round( (y - board_top_left_coord[1]) / self.cell_size)) if 0 <= col < self.game.size and 0 <= row < self.game.size: self.controller.receive_move_from_gui(Move(col, row)) for btn in self.buttons: btn.check_mouse_released() if event.type == pygame.QUIT: self.running = False for btn in self.buttons: btn.is_mouse_over_btn() self.render() pygame.quit() sys.exit(0)
def _genmove(self, color, game, flat_board): """Generate a move - PolicyBot logic The logic of this bot is basically: 1. Directly generate a move 2. Take the valid move with the highest score """ color = WHITE if color == 'w' else BLACK flat_board = flat_board.reshape(1, len(flat_board)) # 1. Generate move probabilities inp = self.generate_nn_input(flat_board, color) prediction = self.model.predict(inp)[0] # 2. Look at each valid move and take the best one # Yes, this is looped, bad perf, but it is intuitively understandable # and it leaves little room for errors! playable_locations = game.get_playable_locations(color) best_move = Move(is_pass=True) best_move_prob = prediction[81] for move in playable_locations: if move.is_pass: continue if prediction[move.to_flat_idx()] > best_move_prob: best_move = move best_move_prob = prediction[move.to_flat_idx()] return best_move
def replay_game(sgf_line, func): """Simply recreate a game from a sgf file More of a proof-of-concept or example than really a necessary function. We will use some modified version of this to create the training data. """ collection = sgf.parse(sgf_line) # This all only works if the SGF contains only one game game_tree = collection.children[0] game_properties = game_tree.nodes[0].properties # game_id = game_properties['GN'][0] if not (game_properties['RE'][0].startswith('B') or game_properties['RE'][0].startswith('W')): return None black_win = True if game_properties['RE'][0].startswith('B') else False game = Game(game_properties) # board = Board([[0]*9]*9) out = [] for n in game_tree.nodes[1:]: player_color = list(n.properties.keys())[0] move = Move.from_sgf(str(n.properties[player_color][0])) # board[move.to_matrix_location()] = 1 if player_color=='b' else -1 # neighbors = board.get_all_neigh game.play(move, player_color.lower(), checking=False) out.append(func(game, player_color.lower(), black_win)) out = np.stack(out) # print(out.shape) return out
def genmove(self, color, game) -> Move: nn_input_board = self.flatten_matrix(game.board) predict = self.model.predict(np.array([nn_input_board])) max_idx = np.argmax(predict) if max_idx is 0: return Move(is_pass=True) else: board = predict[0][1:] # strip away the pass-slot at pos zero # set all invalid locations to 0 to avoid them being chosen # is that cheating the NN or cool? for move in game.get_invalid_locations(color): flat_idx = move.to_flat_idx(game.size) board[flat_idx] = 0 max_idx = np.argmax(board) row = int(math.floor(max_idx / game.size)) col = int(max_idx % game.size) return Move(col=col, row=row)
def genmove(self, color, game) -> Move: input_board = self.flatten_matrix(game.board, color) pred = self.model.predict(np.array([input_board]).reshape(1, -1)) max_idx = np.argmax(pred) if max_idx is 81: return Move(is_pass=True) else: board = pred[0][0:81] # set all invalid locations to -1 to avoid them being chosen # if all moves are invalid, play pass for move in game.get_invalid_locations(color): flat_idx = move.to_flat_idx(game.size) board[flat_idx] = -1 max_idx = np.argmax(board) row = int(math.floor(max_idx / game.size)) col = int(max_idx % game.size) return Move(col=col, row=row)
def check_dead_group(game, col_coord, row_coord): b = game.board total_neighbors = [] loc = Move(col=col_coord, row=row_coord).to_matrix_location() total_neighbors = b.get_adjacent_coords(loc) for n in total_neighbors: if b[n] == EMPTY: return False return True
def _genmove(self, color, game, flat_board): flat_board = flat_board.reshape(1, len(flat_board)) predict = self.model.predict(flat_board)[0] max_idx = np.argmax(predict) if max_idx == 82: return Move(is_pass=True) else: board = predict[:-1] # strip away the pass-slot at pos 82 # set all invalid locations to 0 to avoid them being chosen for move in game.get_invalid_locations(color): flat_idx = move.to_flat_idx(game.size) board[flat_idx] = 0 max_idx = np.argmax(board) # If this move is invalid pass! if board[max_idx] == 0: return Move(is_pass=True) return Move.from_flat_idx(max_idx)
def genmove(self, color, game) -> Move: board = np.array(game.board) my_value = WHITE if color == 'w' else BLACK # enemy_value = BLACK if my_value == WHITE else WHITE inp = self.generate_input(board, my_value) if self.verbose: print(inp) policy, value = self.model(inp) policy = policy.data.numpy().flatten() value = value.data.numpy().flatten() playable_locations = game.get_playable_locations(color) # Default: passing policy_move = value_move = Move(is_pass=True) policy_move_prob = policy[81] value_move_prob = value for move in playable_locations: if self.verbose: print(move) if move.is_pass: continue if self.logic == 'value': # Play move on a test board test_board = copy.deepcopy(game.board) test_board.place_stone_and_capture_if_applicable_default_values( move.to_matrix_location(), my_value) # Evaluate state - attention: Enemy's turn! # inp = self.generate_input(np.array(test_board), enemy_value) # _, enemy_win_prob = self.model(inp) # enemy_win_prob = enemy_win_prob.data.numpy().flatten() # my_new_value = -enemy_win_prob # Disregard that right now and just get my own win prob inp = self.generate_input(np.array(test_board), my_value) _, new_value = self.model(inp) new_value = new_value.data.numpy().flatten() if new_value > value_move_prob: value_move = move value_move_prob = new_value if self.logic == 'policy': if policy[move.to_flat_idx()] > policy_move_prob: policy_move = move policy_move_prob = policy[move.to_flat_idx()] if self.logic == 'policy': out_move = policy_move if self.logic == 'value': out_move = value_move return out_move
def _genmove(self, color, game, flat_board): flat_board = flat_board.reshape(1, len(flat_board)) input_board = flat_board.tolist() input_board = [ self.replace_entry(entry) for row in input_board for entry in row ] if color == BLACK: input_board.append(1) else: input_board.append(-1) pred = self.model.predict(np.array([input_board]).reshape(1, -1))[0] for move in game.get_invalid_locations(color): flat_idx = move.to_flat_idx(game.size) pred[flat_idx] = -1 max_idx = np.argmax(pred) if max_idx == 81: return Move(is_pass=True) else: if pred[max_idx] == -1: return Move(is_pass=True) return Move.from_flat_idx(max_idx)
def genmove(color, game) -> Move: move = None while move is None: try: print('\nsubmit your move:') move_str = input() move = Move().from_gtp(move_str, game.size) game.play(move, color, testing=True) except InvalidMove_Error as e: move = None print('\ninvalid move, choose another location or "pass":'******'\nbad input, retry or "pass":') return move
def _genmove(self, color, game, flat_board): """Generate a move - ValueBot logic The logic of this bot is basically: 1. Evaluate current probability of winning 2. Evaluate the probabilities of winning for each move 3. Make the best move if there is a valid move that raises the probs """ color = WHITE if color == 'w' else BLACK flat_board = flat_board.reshape(1, len(flat_board)) my_value = color # 1. Get current Win Probability inp = self.generate_nn_input(flat_board, color) current_prob = self.model.predict(inp) assert np.sum(current_prob) == 1, np.sum(current_prob) # print(current_prob) # 2. Evaluate all possible moves best_win_prob = current_prob[0, 0] best_move = Move(is_pass=True) playable_locations = game.get_playable_locations(color) for move in playable_locations: if move.is_pass: continue # Play the move and evaluate the resulting board test_board = copy.deepcopy(game.board) test_board.place_stone_and_capture_if_applicable_default_values( move.to_matrix_location(), my_value) inp = self.generate_nn_input(test_board.flatten(), color) pred_result = self.model.predict(inp)[0, 0] if pred_result > best_win_prob: best_move = move best_win_prob = pred_result return best_move
def run(self): self.game.start() while self.game.is_running: print('\nnext turn\n') response = self.wait_for_response( self.current_player, 'genmove ' + self.current_player.color) if response.startswith('?'): self.log_and_print( 'player ' + self.current_player.name + ' responded with an error, aborting the game: ' + '"' + response[2:] + '"') break move = response[2:] # strip away the "= " self.send_to_player( self.other_player, 'play ' + self.current_player.color + ' ' + move) self.game.play(Move().from_gtp(move, self.game.size), self.current_player.color) print('\n' + self.game.__str__()) time.sleep(self.end_of_turn_sleep_time) # swap players for next turn if self.current_player == self.player1: self.current_player = self.player2 self.other_player = self.player1 else: self.current_player = self.player1 self.other_player = self.player2 self.broadcast('quit') print('\n' + self.game.__str__()) # if self.view is not None: # self.view.game_ended() # else: print('Final result:', self.game.evaluate_points()) sys.exit(0)
def genmove(self, color, game) -> Move: # We're still interested in the playable locations playable_locations = game.get_playable_locations(color) # Format the board and make predictions inp = self.board_to_input(color, game.board) bot_logger.debug('Input shape:', inp.shape) bot_logger.debug('Input:', inp) pred_moves = self.model.predict(inp) # pred_moves = self.model.predict(np.zeros((1, 162))) bot_logger.debug('This worked') bot_logger.debug('Predicted moves:', pred_moves) pred_moves = pred_moves.reshape(9, 9) # print(pred_moves) # print(playable_locations) dummy_value = -10 potential_moves = np.array([[dummy_value] * 9] * 9, dtype=float) for move in playable_locations: # print(move) if move.is_pass: continue loc = move.to_matrix_location() potential_moves[loc[0]][loc[1]] = pred_moves[loc[0]][loc[1]] # print([i for row in potential_moves for i in row]) potential_moves = self.softmax(potential_moves) row, col = np.unravel_index(potential_moves.argmax(), potential_moves.shape) move = Move(col=col, row=row) if (potential_moves[move.to_matrix_location()] == dummy_value or potential_moves[move.to_matrix_location()] < (1 / 81 + 0.0001)): move = Move(is_pass=True) return move
def send_pass_move(self): self.controller.receive_move_from_gui(Move(is_pass=True))
def main(): model = Sequential() model.add( Dense(units=200, kernel_initializer='uniform', activation='relu', input_shape=(243, ))) model.add(Dense(units=400, kernel_initializer='uniform', activation='relu')) model.add(Dense(units=200, kernel_initializer='uniform', activation='relu')) model.add( Dense(units=81, kernel_initializer='uniform', activation='linear')) rms = RMSprop() model.compile(loss='mse', optimizer=rms) col_coord, row_coord = 1, 6 #random.randint(0, 8), random.randint(0, 8) epochs = 10 gamma = 0.9 epsilon = 1 for i in range(epochs): game = Game() game = init_game(game, col_coord, row_coord) status = 1 # game in progress while (status == 1): qval = model.predict(board2input(game, 'b'), batch_size=1) if (random.random() < epsilon): valid_moves = game.get_playable_locations('b') move = random.choice(valid_moves) while move.is_pass == True: move = random.choice(valid_moves) new_game = copy.deepcopy(game) new_game.play(move, 'b') move = move.to_flat_idx() else: temp_qval = copy.copy(qval) move = (np.argmax(temp_qval)) move = Move.from_flat_idx(move) new_game = copy.deepcopy(game) location = move.to_matrix_location() while new_game.board[location] != EMPTY: temp_qval[0][np.argmax( temp_qval )] = -100 # arbit low value. To get to second max value. move = np.argmax(temp_qval) move = Move.from_flat_idx(move) location = move.to_matrix_location() new_game.play(move, 'b') move = move.to_flat_idx() if check_dead_group(new_game, col_coord, row_coord) == True: reward = 10 status = 0 else: reward = -1 # get maxQ from new state newQ = model.predict(board2input(game, 'b'), batch_size=1) maxQ = newQ[0][move] # update, reward : update = reward if reward = 100, else = reward + gamma*maxQ if reward == -1: # non-terminal state update = (reward + (gamma * maxQ)) else: # terminal state update = reward # set y = qval, and y[action] = update => assigning reward value for action. y = np.zeros((1, 81)) y[:] = qval[:] y[0][move] = update # fit the model according to present shape and y model.fit(board2input(game, 'b'), y, batch_size=1, nb_epoch=1, verbose=0) game = copy.copy(new_game) print('game ' + str(i) + ' ends here') if epsilon > 0.1: epsilon -= (1 / epochs) #print ('epsilon : ' + str(epsilon)) model.save('test_model_1.h5')
def init_game(game, col_coord, row_coord): move = Move(col=col_coord, row=row_coord) game.play(move, 'w') return game
def main(): model = Sequential() model.add( Dense(units=200, kernel_initializer='uniform', activation='relu', input_shape=(243, ))) model.add(Dense(units=400, kernel_initializer='uniform', activation='relu')) model.add(Dense(units=200, kernel_initializer='uniform', activation='relu')) model.add( Dense(units=81, kernel_initializer='uniform', activation='linear')) rms = RMSprop() model.compile(loss='mse', optimizer=rms) epochs = 50000 gamma = 0.975 epsilon = 1 batchSize = 50 buffer = 100 replay = [] h = 0 for i in range(epochs): col_coord, row_coord = random.randint(0, 8), random.randint(0, 8) #print(col_coord,row_coord) game = Game() game = init_game(game, col_coord, row_coord) status = 1 reward = -1 # by default at game start # game in progress while (status == 1): qval = model.predict(board2input(game, 'b'), batch_size=1) if reward == -1: if (random.random() < epsilon): valid_moves = game.get_playable_locations(BLACK) move = random.choice(valid_moves) while move.is_pass == True: move = random.choice(valid_moves) if len(valid_moves) == 0: print('end it') new_game = copy.deepcopy(game) new_game.play(move, 'b') move = move.to_flat_idx() else: temp_qval = copy.copy(qval) move = (np.argmax(temp_qval)) move = Move.from_flat_idx(move) new_game = copy.deepcopy(game) location = move.to_matrix_location() while new_game.board[location] != EMPTY: temp_qval[0][np.argmax( temp_qval )] = -100 # arbit low value. To get to second max value. move = np.argmax(temp_qval) move = Move.from_flat_idx(move) location = move.to_matrix_location() new_game.play(move, 'b') move = move.to_flat_idx() if check_dead_group(new_game, col_coord, row_coord) == True: reward = 50 else: reward = -1 # experience replay storage if len(replay) < buffer: replay.append((board2input(game, 'b'), move, reward, board2input(new_game, 'b'))) else: if (h < (buffer - 1)): h += 1 else: h = 0 replay[h] = (board2input(game, 'b'), move, reward, board2input(new_game, 'b')) minibatch = random.sample(replay, batchSize) X_train = [] y_train = [] for memory in minibatch: (m_game, m_move, m_reward, m_new_game) = memory oldqval = model.predict(m_game, batch_size=1) maxq = oldqval[0][m_move] y = np.zeros(81) y[:] = oldqval if m_reward == 50: update = m_reward else: update = m_reward + gamma * maxq y[m_move] = update X_train.append(m_game) y_train.append(y) X_train = np.stack(X_train) y_train = np.stack(y_train) #print('ytrain: ', y_train[0]) model.fit(X_train, y_train, batch_size=batchSize, epochs=1, verbose=0) game = copy.copy(new_game) if reward == 50: status = 0 print('game ' + str(i) + ' ends here') #print(game) #temp_move = Move.from_flat_idx(move) #print(temp_move) #print(model.predict(board2input(game,'b'),batch_size=1)) #input() if epsilon > 0.1: epsilon -= (1 / epochs) #print ('epsilon : ' + str(epsilon)) if i % 5000 == 0 and i > 0: name = 'src/learn/RL_Atari/hard_atari_' + str(i) + '.h5' model.save(name) model.save('src/learn/RL_Atari/test_model_final.h5')