def __init__(self, net, rl_flags):
        self.net = net

        self.N_moves_per_train = rl_flags.N_moves_per_train
        self.N_games = rl_flags.selfplay_games_per_epoch
        self.playouts = rl_flags.num_playouts

        self.position = go.Position(to_play=go.BLACK)
        self.final_position_collections = []

        # number of moves that is considered to resign too early
        self.dicard_game_threshold = rl_flags.dicard_game_threshold
        self.game_cut_off_depth = rl_flags.game_cut_off_depth

        self.resign_threshold = rl_flags.resign_threshold  # -0.25
        self.resign_delta = rl_flags.resign_delta  # 0.05
        self.total_resigned_games = 0
        self.total_false_resigned_games = 0
        self.false_positive_resign_ratio = rl_flags.false_positive_resign_ratio  # 0.05
        self.no_resign_this_game = False

        self.num_games_to_evaluate = rl_flags.selfplay_games_against_best_model
 def clear(self):
     self.position = go.Position(komi=self.komi)
import time
import utils.go as go
import ytils.strategies as strategies
import main
import Network
import utils.sgf_wrapper as sgf_wrapper
import utils.load_data_sets as load_data_sets
import utils.utils as utils
'''
This file requires model to have reinforcment learning feature, will implement in model alphago model
'''

net = Network.Network(main.args, main.hps, main.args.load_model_path)
now = time.time()

positions = [go.Position(to_play=go.WHITE) for i in range(1)]
# neural net 1 always plays "black", and variety is accomplished by
# letting white play first half the time.
strategies.simulate_many_games(net, net, positions)
print(time.time() - now)
now = time.time()


def get_winrate(final_positions):
    black_win = [
        utils.parse_game_result(pos.result()) == go.BLACK
        for pos in final_positions
    ]
    return sum(black_win) / len(black_win)

 def reset_position(self):
     self.position = go.Position(to_play=go.BLACK)
Beispiel #5
0
import main
import Network
import utils.sgf_wrapper as sgf_wrapper
import utils.load_data_sets as load_data_sets
import utils.utilities as utils
from elo.elo import expected, elo
'''
This file requires model to have reinforcment learning feature, will implement in model alphago model
'''

net = Network.Network(main.args, main.hps, main.args.load_model_path)
now = time.time()

N_games = 1
positions = [go.Position(to_play=go.BLACK) for i in range(N_games)]

# neural net 1 always plays "black", and variety is accomplished by
# letting white play first half the time.
#simulate_many_games(net, net, positions)
#simulate_many_games_mcts(net, net, positions)
position = simulate_game_mcts(net, positions[0])
positions = [position]
print('Total Time to complete ', time.time() - now)


def get_winrate(final_positions):
    black_win = [
        utils.parse_game_result(pos.result()) == go.BLACK
        for pos in final_positions
    ]
import utils.strategies as strategies
from utils.strategies import simulate_many_games, simulate_many_games_mcts
import main
import Network
import utils.sgf_wrapper as sgf_wrapper
import utils.load_data_sets as load_data_sets
import utils.utilities as utils
'''
This file requires model to have reinforcment learning feature, will implement in model alphago model
'''

net = Network.Network(main.args, main.hps, main.args.load_model_path)
now = time.time()

N_games = 1
positions = [go.Position(to_play=go.WHITE) for i in range(N_games)]

# neural net 1 always plays "black", and variety is accomplished by
# letting white play first half the time.
#simulate_many_games(net, net, positions)
simulate_many_games_mcts(net, net, positions)
print('Total Time to complete ', time.time() - now)


def get_winrate(final_positions):
    black_win = [
        utils.parse_game_result(pos.result()) == go.BLACK
        for pos in final_positions
    ]
    return sum(black_win) / len(black_win)