def evaluate( black_model: 'The path to the model to play black', white_model: 'The path to the model to play white', output_dir: 'Where to write the evaluation results' = 'sgf/evaluate', games: 'the number of games to play' = 16, verbose: 'How verbose the players should be (see selfplay)' = 1): utils.ensure_dir_exists(output_dir) with utils.logged_timer("Loading weights"): black_net = dual_net.DualNetwork(black_model) white_net = dual_net.DualNetwork(white_model) with utils.logged_timer("Playing game"): evaluation.play_match(black_net, white_net, games, output_dir, verbose)
def evaluate(black_model: 'The path to the model to play black', white_model: 'The path to the model to play white', output_dir: 'Where to write the evaluation results' = 'data/evaluate/sgf', readouts: 'How many readouts to make per move.' = 400, games: 'the number of games to play' = 16, verbose: 'How verbose the players should be (see selfplay)' = 1): black_model = os.path.abspath(black_model) white_model = os.path.abspath(white_model) with timer("Loading weights"): black_net = dual_net.DualNetwork(black_model) white_net = dual_net.DualNetwork(white_model) with timer("%d games" % games): players = evaluation.play_match(black_net, white_net, games, readouts, verbose) for idx, p in enumerate(players): fname = "{:s}-vs-{:s}-{:d}".format(black_net.name, white_net.name, idx) with open(os.path.join(output_dir, fname + '.sgf'), 'w') as f: f.write( sgf_wrapper.make_sgf(p[0].position.recent, p[0].make_result_string(p[0].position), black_name=os.path.basename(black_model), white_name=os.path.basename(white_model)))
def evaluate( black_model: 'The path to the model to play black', white_model: 'The path to the model to play white', output_dir: 'Where to write the evaluation results'='data/evaluate/sgf', readouts: 'How many readouts to make per move.'=400, games: 'the number of games to play'=16, verbose: 'How verbose the players should be (see selfplay)' = 1): black_model = os.path.abspath(black_model) white_model = os.path.abspath(white_model) with timer("Loading weights"): black_net = dual_net.DualNetwork(black_model) white_net = dual_net.DualNetwork(white_model) with timer("%d games" % games): players = evaluation.play_match( black_net, white_net, games, readouts, verbose) for idx, p in enumerate(players): fname = "{:s}-vs-{:s}-{:d}".format(black_net.name, white_net.name, idx) with open(os.path.join(output_dir, fname + '.sgf'), 'w') as f: f.write(sgf_wrapper.make_sgf(p[0].position.recent, p[0].make_result_string( p[0].position), black_name=os.path.basename( black_model), white_name=os.path.basename(white_model)))
def evaluate( black_model: 'The path to the model to play black', white_model: 'The path to the model to play white', output_dir: 'Where to write the evaluation results' = 'sgf/evaluate', readouts: 'How many readouts to make per move.' = 400, games: 'the number of games to play' = 16, verbose: 'How verbose the players should be (see selfplay)' = 1): _ensure_dir_exists(output_dir) with timer("Loading weights"): black_net = dual_net.DualNetwork(black_model) white_net = dual_net.DualNetwork(white_model) with timer("%d games" % games): evaluation.play_match(black_net, white_net, games, readouts, output_dir, verbose)
def evaluate( black_model: 'The path to the model to play black', white_model: 'The path to the model to play white', output_dir: 'Where to write the evaluation results'='sgf/evaluate', readouts: 'How many readouts to make per move.'=400, games: 'the number of games to play'=16, verbose: 'How verbose the players should be (see selfplay)' = 1): _ensure_dir_exists(output_dir) with timer("Loading weights"): black_net = dual_net.DualNetwork(black_model) white_net = dual_net.DualNetwork(white_model) with timer("%d games" % games): evaluation.play_match( black_net, white_net, games, readouts, output_dir, verbose)
def evaluate(black_model_name, black_net, white_model_name, white_net, evaluate_dir, params): """Evaluate with two models. With two DualNetRunners to play as black and white in a Go match. Two models play several games, and the model that wins by a margin of 55% will be the winner. Args: black_model_name: The name of the model playing black. black_net: The DualNetRunner model for black white_model_name: The name of the model playing white. white_net: The DualNetRunner model for white. evaluate_dir: Where to write the evaluation results. Set as 'base_dir/sgf/evaluate/'. params: A MiniGoParams instance of hyperparameters for the model. Returns: The model name of the winner. Raises: ValueError: if neither `WHITE` or `BLACK` is returned. """ with utils.logged_timer('{} games'.format(params.eval_games)): winner = evaluation.play_match( params, black_net, white_net, params.eval_games, params.eval_readouts, evaluate_dir, params.eval_verbose) if winner != go.WHITE_NAME and winner != go.BLACK_NAME: raise ValueError('Winner should be either White or Black!') return black_model_name if winner == go.BLACK_NAME else white_model_name
def evaluate(black_model_name, black_net, white_model_name, white_net, evaluate_dir, params): """Evaluate with two models. With two DualNetRunners to play as black and white in a Go match. Two models play several games, and the model that wins by a margin of 55% will be the winner. Args: black_model_name: The name of the model playing black. black_net: The DualNetRunner model for black white_model_name: The name of the model playing white. white_net: The DualNetRunner model for white. evaluate_dir: Where to write the evaluation results. Set as 'base_dir/sgf/evaluate/'. params: A MiniGoParams instance of hyperparameters for the model. Returns: The model name of the winner. Raises: ValueError: if neither `WHITE` or `BLACK` is returned. """ with utils.logged_timer('{} games'.format(params.eval_games)): winner = evaluation.play_match( params, black_net, white_net, params.eval_games, params.eval_readouts, evaluate_dir, params.eval_verbose) if winner != go.WHITE_NAME and winner != go.BLACK_NAME: raise ValueError('Winner should be either White or Black!') return black_model_name if winner == go.BLACK_NAME else white_model_name
def evaluate(trained_models_dir, black_model_name, white_model_name, evaluate_dir, params): """Evaluate with two models. With the model name, construct two DualNetRunners to play as black and white in a Go match. Two models play several names, and the model that wins by a margin of 55% will be the winner. Args: trained_models_dir: Directories where the completed generations/models are. black_model_name: The name of the model playing black. white_model_name: The name of the model playing white. evaluate_dir: Where to write the evaluation results. Set as 'base_dir/sgf/evaluate/'' params: An object of hyperparameters for the model. Returns: The model name of the winner. Raises: ValueError: if neither `WHITE` or `BLACK` is returned. """ black_model = os.path.join(trained_models_dir, black_model_name) white_model = os.path.join(trained_models_dir, white_model_name) print('Evaluate models between {} and {}'.format(black_model_name, white_model_name)) _ensure_dir_exists(evaluate_dir) with utils.logged_timer('Loading weights'): black_net = dualnet.DualNetRunner(black_model, params) white_net = dualnet.DualNetRunner(white_model, params) with utils.logged_timer('{} games'.format(params.eval_games)): winner = evaluation.play_match(params, black_net, white_net, params.eval_games, params.eval_readouts, evaluate_dir, params.eval_verbose) if winner != go.WHITE_NAME and winner != go.BLACK_NAME: raise ValueError('Winner should be either White or Black!') return black_model_name if winner == go.BLACK_NAME else white_model_name
def evaluate( black_model: 'The path to the model to play black', white_model: 'The path to the model to play white', output_dir: 'Where to write the evaluation results'='sgf/evaluate', readouts: 'How many readouts to make per move.'=200, games: 'the number of games to play'=20, verbose: 'How verbose the players should be (see selfplay)' = 1): qmeas.start_time('evaluate') _ensure_dir_exists(output_dir) with timer("Loading weights"): black_net = dual_net.DualNetwork(black_model) white_net = dual_net.DualNetwork(white_model) winners = [] with timer("%d games" % games): winners = evaluation.play_match( black_net, white_net, games, readouts, output_dir, verbose) qmeas.stop_time('evaluate') white_count = 0 for win in winners: if 'W' in win or 'w' in win: white_count += 1 return white_count * 1.0 / games
def evaluate( black_model: 'The path to the model to play black', white_model: 'The path to the model to play white', output_dir: 'Where to write the evaluation results'='sgf/evaluate', readouts: 'How many readouts to make per move.'=200, games: 'the number of games to play'=20, verbose: 'How verbose the players should be (see selfplay)' = 1): qmeas.start_time('evaluate') _ensure_dir_exists(output_dir) with timer("Loading weights"): black_net = dual_net.DualNetwork(black_model) white_net = dual_net.DualNetwork(white_model) winners = [] with timer("%d games" % games): winners = evaluation.play_match( black_net, white_net, games, readouts, output_dir, verbose) qmeas.stop_time('evaluate') white_count = 0 for win in winners: if 'W' in win or 'w' in win: white_count += 1 return white_count * 1.0 / games