def main(self): end, winner, _ = False, None, -1 game = Conn4Game(self.rows, self.cols) input_dim = game.encoded_board_dim() output_dim = game.action_size() net = Conn4Net(input_dim, output_dim, load=True, fname=self.net_file) mcts = MCTS(net) NSIMS = 500 TEMP = 0 while not end: if self.human_side == game.currentPlayer: print(game) print('Enter your move (column):') move = int(input()) game.apply_action(move) else: p = mcts.get_action_prob(game, NSIMS, temp=TEMP) move = np.argmax(p) game.apply_action(move) print('AI just played on column {}'.format(move)) end, winner, _ = game.game_ended(1) print(game) if winner == 1 and self.human_side == 1: print('You win!') elif winner == 1 and self.human_side == 2: print('You lose!') elif winner == 2 and self.human_side == 2: print('You win!') elif winner == 2 and self.human_side == 1: print('You lose!') else: print('Draw!')
def run_match(net1, net2, n_sims, n_games, rows=6, cols=7): print('Running match between {} and {}'.format(net1, net2)) game = Conn4Game(rows=rows, cols=cols) input_dim = game.encoded_board_dim() output_dim = game.action_size() _net1 = Conn4Net(input_dim, output_dim, load=True, fname=net1) _net2 = Conn4Net(input_dim, output_dim, load=True, fname=net2) pit = Conn4Pit(_net1, _net2, n_sims, n_games, rows, cols) score = pit.run() return net1, net2, score
def play_net2_net1(self, ngames, mcts1, mcts2): wins_net1 = 0 draws = 0 for _ in range(ngames): game = Conn4Game(self.rows, self.cols) end, winner, _ = False, None, -1 while not end: player = self.net1 if game.currentPlayer == 2 else self.net2 mcts = mcts1 if player == self.net1 else mcts2 p = mcts.get_action_prob(game, self.n_sims, temp=0) a = np.argmax(p) game.apply_action(a) end, winner, _ = game.game_ended(1) if winner == 2: wins_net1 += 1 elif winner is None: draws += 1 return wins_net1 + draws * 0.5
def execute_episode(self): examples = [] game = Conn4Game(self.rows, self.cols) dim = len(game.board) mcts = MCTS(self.best_nn) move = 0 while True: temp = 1#1 if move < 10 else 0.1 p_a = mcts.get_action_prob(game, self.n_sims, temp=temp) s_h = game.hash() s_x = game.encode_board() # filter nonsense moves #p_a = [p_a[i] if game.board[int(i/dim)][i%dim] == -1 else 0 for i in range(len(p_a))] #p_a = [pi/sum(p_a) for pi in p_a] # filter nonsense moves examples.append([s_x, p_a, None]) a = np.random.choice(range(len(p_a)), p=p_a) game.apply_action(a) end, winner, r = game.game_ended(game.currentPlayer) if end: # does the first player won? if winner == 1: m = -1 elif winner == 2: m = 1 else: m = 0 for ex in examples: ex[-1] = m * r m*= -1 return examples move+= 1
if end: # does the first player won? if winner == 1: m = -1 elif winner == 2: m = 1 else: m = 0 for ex in examples: ex[-1] = m * r m*= -1 return examples move+= 1 if __name__ == '__main__': rows = 6 cols = 7 fname = None#'models_conv/best_model6x7_93.hdf5' game = Conn4Game(rows, cols) input_dim = game.encoded_board_dim() output_dim = game.action_size() net1 = Conn4Net(input_dim, output_dim, load=fname is not None, fname=fname) p = PolicyIter(nnet=net1, n_iters=5000, n_episodes=10, n_sims=40, n_games_pit=4, batch_size=32, win_eps=0.55, rows=rows, cols=cols) p.policy_iter()