def __init__(self, net, rl_flags): self.net = net self.N_moves_per_train = rl_flags.N_moves_per_train self.N_games = rl_flags.selfplay_games_per_epoch self.playouts = rl_flags.num_playouts self.position = go.Position(to_play=go.BLACK) self.final_position_collections = [] # number of moves that is considered to resign too early self.dicard_game_threshold = rl_flags.dicard_game_threshold self.game_cut_off_depth = rl_flags.game_cut_off_depth self.resign_threshold = rl_flags.resign_threshold # -0.25 self.resign_delta = rl_flags.resign_delta # 0.05 self.total_resigned_games = 0 self.total_false_resigned_games = 0 self.false_positive_resign_ratio = rl_flags.false_positive_resign_ratio # 0.05 self.no_resign_this_game = False self.num_games_to_evaluate = rl_flags.selfplay_games_against_best_model
def clear(self): self.position = go.Position(komi=self.komi)
import time import utils.go as go import ytils.strategies as strategies import main import Network import utils.sgf_wrapper as sgf_wrapper import utils.load_data_sets as load_data_sets import utils.utils as utils ''' This file requires model to have reinforcment learning feature, will implement in model alphago model ''' net = Network.Network(main.args, main.hps, main.args.load_model_path) now = time.time() positions = [go.Position(to_play=go.WHITE) for i in range(1)] # neural net 1 always plays "black", and variety is accomplished by # letting white play first half the time. strategies.simulate_many_games(net, net, positions) print(time.time() - now) now = time.time() def get_winrate(final_positions): black_win = [ utils.parse_game_result(pos.result()) == go.BLACK for pos in final_positions ] return sum(black_win) / len(black_win)
def reset_position(self): self.position = go.Position(to_play=go.BLACK)
import main import Network import utils.sgf_wrapper as sgf_wrapper import utils.load_data_sets as load_data_sets import utils.utilities as utils from elo.elo import expected, elo ''' This file requires model to have reinforcment learning feature, will implement in model alphago model ''' net = Network.Network(main.args, main.hps, main.args.load_model_path) now = time.time() N_games = 1 positions = [go.Position(to_play=go.BLACK) for i in range(N_games)] # neural net 1 always plays "black", and variety is accomplished by # letting white play first half the time. #simulate_many_games(net, net, positions) #simulate_many_games_mcts(net, net, positions) position = simulate_game_mcts(net, positions[0]) positions = [position] print('Total Time to complete ', time.time() - now) def get_winrate(final_positions): black_win = [ utils.parse_game_result(pos.result()) == go.BLACK for pos in final_positions ]
import utils.strategies as strategies from utils.strategies import simulate_many_games, simulate_many_games_mcts import main import Network import utils.sgf_wrapper as sgf_wrapper import utils.load_data_sets as load_data_sets import utils.utilities as utils ''' This file requires model to have reinforcment learning feature, will implement in model alphago model ''' net = Network.Network(main.args, main.hps, main.args.load_model_path) now = time.time() N_games = 1 positions = [go.Position(to_play=go.WHITE) for i in range(N_games)] # neural net 1 always plays "black", and variety is accomplished by # letting white play first half the time. #simulate_many_games(net, net, positions) simulate_many_games_mcts(net, net, positions) print('Total Time to complete ', time.time() - now) def get_winrate(final_positions): black_win = [ utils.parse_game_result(pos.result()) == go.BLACK for pos in final_positions ] return sum(black_win) / len(black_win)