def test_new_tree_called_once_self_play(self): from engine import Tree fn = Tree.new_tree self.count = 0 def monkey_patch_new_tree(*args, **kwargs): self.count += 1 return fn(*args, **kwargs) Tree.new_tree = monkey_patch_new_tree model = DummyModel() mcts_simulations = 32 # We want some mcts exploration play_game(model, model, mcts_simulations, conf['STOP_EXPLORATION'], self_play=True, num_moves=10) self.assertEqual(self.count, 1) # Only one tree was created
def test_new_tree_called_twice_evaluation(self): from engine import Tree fn = Tree.new_tree self.count = 0 def monkey_patch_new_tree(*args, **kwargs): self.count += 1 return fn(*args, **kwargs) Tree.new_tree = monkey_patch_new_tree model = DummyModel() mcts_simulations = 160 # We want some mcts exploration so both tree overlap play_game(model, model, mcts_simulations, stop_exploration=0, self_play=False, num_moves=100) # This works because we deactivate exploration and dirichlet noise in order to have # deterministic play self.assertEqual(self.count, 2) # Only 2 trees were created
def evaluate(best_model, tested_model): total = 0 wins = 0 desc = "Evaluation %s vs %s" % (tested_model.name, best_model.name) tq = tqdm(range(EVALUATE_N_GAMES), desc=desc) for game in tq: start = datetime.datetime.now() if random() < .5: model1, model2 = best_model, tested_model else: model2, model1 = best_model, tested_model game_data = play_game(model1, model2, MCTS_SIMULATIONS, stop_exploration=0) stop = datetime.datetime.now() winner_model = game_data['winner_model'] if winner_model == tested_model.name: wins += 1 total += 1 moves = len(game_data['moves']) new_desc = desc + " (winrate:%s%% %.2fs/move)" % (int(wins/total*100), (stop - start).seconds / moves) tq.set_description(new_desc) save_game_data(best_model.name, game_data) if wins/total > EVALUATE_MARGIN: print("We found a new best model : %s!" % tested_model.name) elect_model_as_best_model(tested_model) return True return False
def test_play(self): model = DummyModel() mcts_simulations = 8 # mcts batch size is 8 and we need at least one batch game_data = play_game(model, model, mcts_simulations, conf['STOP_EXPLORATION'], self_play=True, num_moves=2) winner = game_data['winner'] test_board1, player = game_init() board = game_data['moves'][0]['board'] self.assertTrue(np.array_equal(board, test_board1)) # First board is empty self.assertEqual(winner, 0) # White should win with 5.5 komi after 2 moves for move, move_data in enumerate( game_data['moves'][::2]): # Black player lost value_target = 1 if winner == move_data['player'] else -1 self.assertEqual(move_data['player'], 1) self.assertEqual(value_target, -1) for move, move_data in enumerate( game_data['moves'][1::2]): # White player won value_target = 1 if winner == move_data['player'] else -1 self.assertEqual(move_data['player'], 0) self.assertEqual(value_target, 1)
def test_save_sgf(self): model = DummyModel() mcts_simulations = 8 # mcts batch size is 8 and we need at least one batch game_data = play_game(model, model, mcts_simulations, conf['STOP_EXPLORATION'], self_play=True, num_moves=100) os.makedirs("games/test_model") save_game_sgf("test_model", 1, game_data) os.remove("games/test_model/game_001.sgf") os.removedirs("games/test_model")
def evaluate(best_model, tested_model): total = 0 wins = 0 desc = "Evaluation %s vs %s" % (tested_model.name, best_model.name) tq = tqdm(range(EVALUATE_N_GAMES), desc=desc) for i in tq: _, _, winner_model = play_game(best_model, tested_model, MCTS_SIMULATIONS, stop_exploration=0) if winner_model == tested_model: wins += 1 total += 1 new_desc = desc + " (winrate:%s%%)" % int(wins / total * 100) tq.set_description(new_desc) if wins / total > EVALUATE_MARGIN: print("We found a new best model : %s!" % tested_model.name) elect_model_as_best_model(tested_model) return True return False
def run(self): # set environment os.environ["CUDA_DEVICE_ORDER"] = "PCI_BUS_ID" os.environ["CUDA_VISIBLE_DEVICES"] = str(self._gpuid) logger.info('cuda_visible_device %s', os.environ["CUDA_VISIBLE_DEVICES"]) if conf['THREAD_SIMULATION']: init_simulation_workers() # load models latest_model, best_model = self.load_model() while True: if latest_model.name != best_model.name: total = 0 wins = 0 desc = "Evaluation %s vs %s" % (latest_model.name, best_model.name) tq = tqdm(range(EVALUATE_N_GAMES), desc=desc) for game in tq: if self._one_game_only >= 0 and tq != game: continue directory = os.path.join(EVAL_DIR, latest_model.name, "game_%03d" % game) if os.path.isdir(directory): continue try: os.makedirs(directory) except Exception: continue start = datetime.datetime.now() game_data = play_game(best_model, latest_model, MCTS_SIMULATIONS, stop_exploration=0) stop = datetime.datetime.now() # Some statistics winner_model = game_data['winner_model'] if winner_model == latest_model.name: wins += 1 total += 1 moves = len(game_data['moves']) new_desc = desc + " (winrate:%s%% %.2fs/move)" % (int( wins / total * 100), (stop - start).seconds / moves) tq.set_description(new_desc) # save_game_data(best_model.name, game, game_data) self.save_eval_game(latest_model.name, game, winner_model) if self._one_game_only >= 0: break else: logger.info("No new trained model") if self._forever: logger.info("Sleep for %s seconds", conf['SLEEP_SECONDS']) time.sleep(conf['SLEEP_SECONDS']) if not self._forever: break latest_model, best_model = self.load_model() destroy_simulation_workers()
if len(sys.argv) < 3: print( 'Missing parameters. 3 parameters required to start game play: num_of_game, model1_name, model2_name' ) else: model1_win = 0 model2_win = 0 num_games = sys.argv[1] model1_name = sys.argv[2] model2_name = sys.argv[3] print("Loading models...") model1 = load_model(os.path.join('..', conf['MODEL_DIR'], model1_name), custom_objects={'loss': loss}) print("Loaded model", model1_name) model2 = load_model(os.path.join('..', conf['MODEL_DIR'], model2_name), custom_objects={'loss': loss}) print("Loaded model", model2_name) desc = "Duel %s vs %s" % (model1.name, model2.name) tq = tqdm(range(int(num_games)), desc=desc) for game in tq: boards_and_policies, winner, winner_model = play_game( model1, model2, MCTS_SIMULATIONS, stop_exploration=0) if winner_model == model1: model1_win += 1 else: model2_win += 1 new_desc = desc + " (%s %d - %d %s)" % (model1_name, model1_win, model2_win, model2_name) tq.set_description(new_desc)