def get_puct_config(self): multiplier = self.game_config.sims_multiplier if multiplier == 0: playouts_per_iteration = 1 else: playouts_per_iteration = 100 * multiplier conf = confs.PUCTPlayerConfig( name="clx", generation=self.game_config.generation, verbose=True, playouts_per_iteration=playouts_per_iteration, playouts_per_iteration_noop=0, dirichlet_noise_alpha=-1, root_expansions_preset_visits=-1, fpu_prior_discount=0.25, puct_before_expansions=3, puct_before_root_expansions=4, puct_constant_before=3.0, puct_constant_after=0.75, choose="choose_temperature", temperature=1.5, depth_temperature_max=self.game_config.depth_temperature_max, depth_temperature_start=4, depth_temperature_increment=0.25, depth_temperature_stop=self.game_config.depth_temperature_stop, random_scale=0.9, max_dump_depth=2) return conf
def test_trained(): # simplemcts vs GOOD_GEN simple = get.get_player("simplemcts") simple.max_run_time = 0.5 eval_config = confs.PUCTEvaluatorConfig(verbose=True, puct_constant=0.85, puct_constant_root=3.0, dirichlet_noise_pct=-1, fpu_prior_discount=0.25, fpu_prior_discount_root=0.15, choose="choose_temperature", temperature=2.0, depth_temperature_max=10.0, depth_temperature_start=0, depth_temperature_increment=0.75, depth_temperature_stop=1, random_scale=1.0, batch_size=1, max_dump_depth=1) puct_config = confs.PUCTPlayerConfig("gzero", True, 200, 0, GOOD_GEN1, eval_config) attrutil.pprint(puct_config) puct_player = PUCTPlayer(puct_config) play(simple, puct_player)
def test_random(): # add two players # simplemcts vs RANDOM_GEN pymcs = get.get_player("simplemcts") pymcs.max_run_time = 0.25 eval_config = templates.base_puct_config(verbose=True, max_dump_depth=1) puct_config = confs.PUCTPlayerConfig("gzero", True, 100, 0, RANDOM_GEN, eval_config) attrutil.pprint(puct_config) puct_player = PUCTPlayer(puct_config) play(pymcs, puct_player)
def get_puct_config(gen, **kwds): eval_config = confs.PUCTEvaluatorConfig(verbose=True, puct_constant=0.85, puct_constant_root=3.0, dirichlet_noise_pct=-1, fpu_prior_discount=0.25, fpu_prior_discount_root=0.15, choose="choose_temperature", temperature=2.0, depth_temperature_max=10.0, depth_temperature_start=0, depth_temperature_increment=0.75, depth_temperature_stop=1, random_scale=1.0, max_dump_depth=2, top_visits_best_guess_converge_ratio=0.8, think_time=2.0, converged_visits=2000, batch_size=32) config = confs.PUCTPlayerConfig(name="puct", verbose=True, generation=gen, playouts_per_iteration=-1, playouts_per_iteration_noop=0, evaluator_config=eval_config) for k, v in kwds.items(): updated = False if at.has(eval_config, k): updated = True setattr(eval_config, k, v) if at.has(config, k): updated = True setattr(config, k, v) if not updated: log.warning("Unused setting %s:%s" % (k, v)) return config
def config(gen): return confs.PUCTPlayerConfig(name=gen, generation=gen, verbose=True, playouts_per_iteration=-1, playouts_per_iteration_noop=0, dirichlet_noise_alpha=-1, root_expansions_preset_visits=1, puct_before_expansions=3, puct_before_root_expansions=5, puct_constant_before=3.0, puct_constant_after=0.75, choose="choose_temperature", temperature=1.5, depth_temperature_max=3.0, depth_temperature_start=0, depth_temperature_increment=0.5, depth_temperature_stop=6, random_scale=1.00, fpu_prior_discount=-1, max_dump_depth=2)
def test_fast_plays(): ''' very fast rollouts, basically this config of puct player is a policy player ''' gm = GameMaster(get_gdl_for_game("breakthrough")) import attr conf = confs.PUCTPlayerConfig(**attr.asdict(default_puct_config)) conf.verbose = False # just checking that we haven't modified default assert not conf.verbose and default_puct_config.verbose conf.playouts_per_iteration = 1 conf.playouts_per_iteration_noop = 0 conf.dirichlet_noise_alpha = -1 print conf # add two players white = PUCTPlayer(conf=conf) black = PUCTPlayer(conf=conf) gm.add_player(white, "white") gm.add_player(black, "black") acc_black_score = 0 acc_red_score = 0 s = time.time() for _ in range(ITERATIONS): gm.start(meta_time=30, move_time=15) gm.play_to_end() acc_black_score += gm.scores["black"] acc_red_score += gm.scores["white"] print gm.get_game_depth() print "time taken", time.time() - s print "white_score", gm.players_map["white"].name, acc_red_score print "black_score", gm.players_map["black"].name, acc_black_score
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3' tf.logging.set_verbosity(tf.logging.ERROR) compete = confs.PUCTPlayerConfig( name="bt_config", ##generation="x1_109", generation="x1_132", verbose=True, playouts_per_iteration=-1, playouts_per_iteration_noop=0, dirichlet_noise_alpha=-1, root_expansions_preset_visits=-1, puct_before_expansions=3, puct_before_root_expansions=5, puct_constant_before=3.0, puct_constant_after=0.75, choose="choose_top_visits", temperature=1.5, depth_temperature_max=3.0, depth_temperature_start=0, depth_temperature_increment=0.5, depth_temperature_stop=4, random_scale=1.00, max_dump_depth=6) def pretty_board(sm): ' pretty print board current state of match ' from ggplib.util.symbols import SymbolFactory
def puct_config_template(generation, name="default"): configs = dict( default=confs.PUCTPlayerConfig(name="default", verbose=True, playouts_per_iteration=2400, playouts_per_iteration_noop=800, dirichlet_noise_alpha=0.05, puct_before_expansions=3, puct_before_root_expansions=5, puct_constant_before=3.0, puct_constant_after=0.75, choose="choose_top_visits", max_dump_depth=2), test=confs.PUCTPlayerConfig(name="test", verbose=True, playouts_per_iteration=42, playouts_per_iteration_noop=0, dirichlet_noise_alpha=0.03, puct_before_expansions=3, puct_before_root_expansions=5, puct_constant_before=3.0, puct_constant_after=0.75, choose="choose_top_visits", max_dump_depth=2), compete=confs.PUCTPlayerConfig(name="compete", verbose=True, playouts_per_iteration=400, playouts_per_iteration_noop=400, dirichlet_noise_alpha=-1, puct_before_expansions=3, puct_before_root_expansions=5, puct_constant_before=3.0, puct_constant_after=1.00, temperature=2.0, depth_temperature_max=10.0, depth_temperature_start=4, depth_temperature_increment=0.25, depth_temperature_stop=100, random_scale=0.75, choose="choose_temperature", max_dump_depth=2), compete2=confs.PUCTPlayerConfig(name="compete2", verbose=True, playouts_per_iteration=800, playouts_per_iteration_noop=800, root_expansions_preset_visits=7, resign_score_value=0.05, playouts_per_iteration_resign=25, dirichlet_noise_alpha=-1, puct_before_expansions=3, puct_before_root_expansions=5, puct_constant_before=3.0, puct_constant_after=1.00, temperature=1.0, depth_temperature_max=2.5, depth_temperature_start=8, depth_temperature_increment=0.1, depth_temperature_stop=60, random_scale=0.65, choose="choose_temperature", max_dump_depth=2), policy=confs.PUCTPlayerConfig(name="policy-test", verbose=True, playouts_per_iteration=0, playouts_per_iteration_noop=0, dirichlet_noise_alpha=-1, choose="choose_top_visits", max_dump_depth=1), policy_compete=confs.PUCTPlayerConfig(name="policy_compete", verbose=True, playouts_per_iteration=0, playouts_per_iteration_noop=0, dirichlet_noise_alpha=-1, temperature=1.0, depth_temperature_max=2.5, depth_temperature_start=8, depth_temperature_increment=0.1, depth_temperature_stop=60, random_scale=0.65, choose="choose_temperature", max_dump_depth=1), max_score=confs.PUCTPlayerConfig(name="max-score", verbose=True, playouts_per_iteration=1, playouts_per_iteration_noop=0, dirichlet_noise_alpha=-1, puct_constant_before=0, puct_constant_after=0, choose="choose_top_visits", max_dump_depth=2), compare=confs.PUCTPlayerConfig(name="compare", verbose=True, playouts_per_iteration=150, playouts_per_iteration_noop=1, dirichlet_noise_alpha=0.03, puct_before_expansions=3, puct_before_root_expansions=5, puct_constant_before=3.0, puct_constant_after=0.75, choose="choose_top_visits", max_dump_depth=2)) conf = configs[name] conf.generation = generation return conf
from ggplib.db.helper import get_gdl_for_game from ggpzero.defs import confs from ggpzero.player.puctplayer import PUCTPlayer import py.test ITERATIONS = 1 current_gen = "x6_111" # first in the run, completely random weights random_gen = "x5_0" default_puct_config = confs.PUCTPlayerConfig(generation=current_gen, playouts_per_iteration=42, playouts_per_iteration_noop=1) def setup(): import tensorflow as tf from ggplib.util.init import setup_once setup_once() from ggpzero.util.keras import init init() os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3' tf.logging.set_verbosity(tf.logging.ERROR)