def main(): print('Python version:', sys.version) print('PyTorch version:', torch.__version__) print('CUDA version', torch.version.cuda) print('Conda env:', os.environ.get("CONDA_DEFAULT_ENV", "")) # Set game to online model. actors = ["actor"] additional_to_load = { ("eval_" + actor_name): (Evaluator.get_option_spec(name="eval_" + actor_name), lambda object_map, actor_name=actor_name: Evaluator( object_map, name="eval_" + actor_name, actor_name=actor_name, stats=None)) for i, actor_name in enumerate(actors) } additional_to_load.update({ ("mi_" + name): (ModelInterface.get_option_spec(), ModelInterface) for name in actors }) env = load_env(os.environ, num_models=1, additional_to_load=additional_to_load) GC = env["game"].initialize() args = env["game"].options model = env["model_loaders"][0].load_model(GC.params) # for actor_name, stat, model_loader, e in \ # zip(actors, stats, env["model_loaders"], evaluators): for i in range(len(actors)): actor_name = actors[i] e = env["eval_" + actor_name] mi = env["mi_" + actor_name] mi.add_model("actor", model, cuda=(args.gpu >= 0), gpu_id=args.gpu) print("register " + actor_name + " for e = " + str(e)) e.setup(sampler=env["sampler"], mi=mi) def actor(batch, e): reply = e.actor(batch) return reply GC.reg_callback(actor_name, lambda batch, e=e: actor(batch, e)) args = env["game"].options GC.start() for actor_name in actors: env["eval_" + actor_name].episode_start(0) while True: GC.run() GC.stop()
def test_mock_load_env(): init_for_test() from rlpytorch import (Trainer, SingleProcessRun, ArgsProvider, ModelLoader, model_loader, Sampler, ModelInterface) envs = os.environ load_module = model_loader.load_module defaults = dict() overrides = dict() num_models = None kwargs = {} trainer = Trainer() runner = SingleProcessRun() game = load_module(envs["game"]).Loader() model_file = load_module(envs["model_file"]) if len(model_file.Models[envs["model"]]) == 2: model_class, method_class = model_file.Models[envs["model"]] sampler_class = Sampler else: model_class, method_class, sampler_class = model_file.Models[ envs["model"]] defaults.update(getattr(model_file, "Defaults", dict())) overrides.update(getattr(model_file, "Overrides", dict())) method = method_class() sampler = sampler_class() mi = ModelInterface() # You might want multiple models loaded. if num_models is None: model_loaders = [ModelLoader(model_class)] else: model_loaders = [ ModelLoader(model_class, model_idx=i) for i in range(num_models) ] env = dict(game=game, method=method, sampler=sampler, model_loaders=model_loaders, mi=mi) env.update(kwargs) parser = argparse.ArgumentParser() # 模拟命令行 cmd_key = 'save_replay_prefix' cmd_v = '~/log/elf/' cmd_line = [f'--{cmd_key}', cmd_v] all_args = ArgsProvider.Load(parser, env, cmd_line=cmd_line, global_defaults=defaults, global_overrides=overrides) assert all_args[cmd_key] == cmd_v assert 'game' in env.keys()
def main_loop(self): evaluator = Evaluator(stats=False) # Set game to online model. env, args = load_env(os.environ, evaluator=evaluator, overrides=dict(num_games=1, batchsize=1, num_games_per_thread=1, greedy=True, T=1, additional_labels="aug_code,move_idx")) GC = env["game"].initialize() model = env["model_loaders"][0].load_model(GC.params) mi = ModelInterface() mi.add_model("model", model) mi.add_model("actor", model, copy=True, cuda=args.gpu is not None, gpu_id=args.gpu) mi["model"].eval() mi["actor"].eval() self.evaluator = evaluator self.last_move_idx = None def human_actor(batch): print("In human_actor") return self.prompt("DF> ", batch) def actor(batch): return self.actor(batch) def train(batch): self.prompt("DF Train> ", batch) evaluator.setup(sampler=env["sampler"], mi=mi) GC.reg_callback_if_exists("actor", actor) GC.reg_callback_if_exists("human_actor", human_actor) GC.reg_callback_if_exists("train", train) GC.Start() evaluator.episode_start(0) while True: GC.Run() if self.exit: break GC.Stop()
def main_loop(self): evaluator = Evaluator(stats=False) # Set game to online model. env, args = load_env(os.environ, evaluator=evaluator, overrides=dict( num_games=1, batchsize=1, num_games_per_thread=1, greedy=True, T=1, additional_labels="aug_code,move_idx")) GC = env["game"].initialize() model = env["model_loaders"][0].load_model(GC.params) mi = ModelInterface() mi.add_model("model", model) mi.add_model("actor", model, copy=True, cuda=args.gpu is not None, gpu_id=args.gpu) mi["model"].eval() mi["actor"].eval() self.evaluator = evaluator self.last_move_idx = None def human_actor(batch): print("In human_actor") return self.prompt("DF> ", batch) def actor(batch): return self.actor(batch) def train(batch): self.prompt("DF Train> ", batch) evaluator.setup(sampler=env["sampler"], mi=mi) GC.reg_callback_if_exists("actor", actor) GC.reg_callback_if_exists("human_actor", human_actor) GC.reg_callback_if_exists("train", train) GC.Start() evaluator.episode_start(0) while True: GC.Run() if self.exit: break GC.Stop()
def main(): # Set game to online model. actors = ["actor_black", "actor_white"] additional_to_load = { ("eval_" + actor_name): (Evaluator.get_option_spec(name="eval_" + actor_name), lambda object_map, actor_name=actor_name: Evaluator( object_map, name="eval_" + actor_name, actor_name=actor_name, stats=None)) for i, actor_name in enumerate(actors) } additional_to_load.update({ ("mi_" + name): (ModelInterface.get_option_spec(), ModelInterface) for name in actors }) env = load_env(os.environ, num_models=2, overrides=dict(actor_only=True), additional_to_load=additional_to_load) GC = env["game"].initialize() stats = [Stats(), Stats()] # for actor_name, stat, model_loader, e in \ # zip(actors, stats, env["model_loaders"], evaluators): for i in range(len(actors)): actor_name = actors[i] stat = stats[i] e = env["eval_" + actor_name] print("register " + actor_name + " for e = " + str(e)) e.setup(sampler=env["sampler"], mi=env["mi_" + actor_name]) def actor(batch, e, stat): reply = e.actor(batch) stat.feed(batch) # eval_iters.stats.feed_batch(batch) return reply GC.reg_callback(actor_name, lambda batch, e=e, stat=stat: actor(batch, e, stat)) root = os.environ.get("root", "./") print("Root: \"%s\"" % root) args = env["game"].options global loop_end loop_end = False def game_start(batch): print("In game start") vers = [int(batch["black_ver"][0]), int(batch["white_ver"][0])] # Use the version number to load models. for model_loader, ver, actor_name in zip(env["model_loaders"], vers, actors): if ver >= 0: while True: try: reload(env["mi_" + actor_name], model_loader, GC.params, args, root, ver, actor_name) break except BaseException: import traceback traceback.print_exc() time.sleep(10) def game_end(batch): global loop_end # print("In game end") wr = batch.GC.getGameStats().getWinRateStats() win_rate = 100.0 * wr.black_wins / wr.total_games \ if wr.total_games > 0 else 0.0 print("%s B/W: %d/%d. Black winrate: %.2f (%d)" % (str(datetime.now()), wr.black_wins, wr.white_wins, win_rate, wr.total_games)) if args.suicide_after_n_games > 0 and \ wr.total_games >= args.suicide_after_n_games: print("#suicide_after_n_games: %d, total_games: %d" % (args.suicide_after_n_games, wr.total_games)) loop_end = True GC.reg_callback_if_exists("game_start", game_start) GC.reg_callback_if_exists("game_end", game_end) # def episode_start(i): # global GC # GC.GC.setSelfplayCount(10000) # evaluator.episode_start(i) GC.start() if args.eval_model_pair: if args.eval_model_pair.find(",") >= 0: black, white = args.eval_model_pair.split(",") else: black = extract_ver(env["model_loaders"][0]) white = extract_ver(env["model_loaders"][1]) # Force them to reload in the future. for model_loader, actor_name in zip(env["model_loaders"], actors): reload_model(model_loader, GC.params, env["mi_" + actor_name], actor_name, args) # We just use one thread to do selfplay. GC.GC.setRequest(int(black), int(white), env['game'].options.resign_thres, 1) for actor_name in actors: env["eval_" + actor_name].episode_start(0) while not loop_end: GC.run() GC.stop()
import argparse from datetime import datetime import sys import os from rlpytorch import LSTMTrainer, Sampler, SingleProcessRun, load_env, ModelLoader, ArgsProvider, ModelInterface if __name__ == '__main__': trainer = LSTMTrainer() runner = SingleProcessRun() env, all_args = load_env(os.environ, trainer=trainer, runner=runner) GC = env["game"].initialize() model = env["model_loaders"][0].load_model(GC.params) mi = ModelInterface() mi.add_model("model", model, optim_params={ "lr" : 0.001}) mi.add_model("actor", model, copy=True, cuda=all_args.gpu is not None, gpu_id=all_args.gpu) trainer.setup(sampler=env["sampler"], mi=mi, rl_method=env["method"]) GC.reg_callback("train", trainer.train) GC.reg_callback("actor", trainer.actor) runner.setup(GC, episode_summary=trainer.episode_summary, episode_start=trainer.episode_start) runner.run()
from datetime import datetime import sys import os from rlpytorch import LSTMTrainer, Sampler, SingleProcessRun, load_env, ModelLoader, ArgsProvider, ModelInterface if __name__ == '__main__': trainer = LSTMTrainer() runner = SingleProcessRun() env, all_args = load_env(os.environ, trainer=trainer, runner=runner) GC = env["game"].initialize() model = env["model_loaders"][0].load_model(GC.params) mi = ModelInterface() mi.add_model("model", model, optim_params={"lr": 0.001}) mi.add_model("actor", model, copy=True, cuda=all_args.gpu is not None, gpu_id=all_args.gpu) trainer.setup(sampler=env["sampler"], mi=mi, rl_method=env["method"]) GC.reg_callback("train", trainer.train) GC.reg_callback("actor", trainer.actor) runner.setup(GC, episode_summary=trainer.episode_summary, episode_start=trainer.episode_start)
def main(): print('Python version:', sys.version) print('PyTorch version:', torch.__version__) print('CUDA version', torch.version.cuda) print('Conda env:', os.environ.get("CONDA_DEFAULT_ENV", "")) # Set game to online model. actors = ["actor_black", "actor_white"] additional_to_load = { ("eval_" + actor_name): (Evaluator.get_option_spec(name="eval_" + actor_name), lambda object_map, actor_name=actor_name: Evaluator( object_map, name="eval_" + actor_name, actor_name=actor_name, stats=None)) for i, actor_name in enumerate(actors) } additional_to_load.update({ ("mi_" + name): (ModelInterface.get_option_spec(), ModelInterface) for name in actors }) env = load_env(os.environ, num_models=2, overrides={'actor_only': True}, additional_to_load=additional_to_load) GC = env["game"].initialize() stats = [Stats(), Stats()] for i in range(len(actors)): actor_name = actors[i] stat = stats[i] e = env["eval_" + actor_name] print(f'register {actor_name} for e = {e!s}') e.setup(sampler=env["sampler"], mi=env["mi_" + actor_name]) def actor(batch, e, stat): reply = e.actor(batch) stat.feed(batch) return reply GC.reg_callback(actor_name, lambda batch, e=e, stat=stat: actor(batch, e, stat)) root = os.environ.get("root", "./") print(f'Root: "{root}"') args = env["game"].options loop_end = False def game_start(batch): print("In game start") vers = [int(batch["black_ver"][0]), int(batch["white_ver"][0])] # Use the version number to load models. for model_loader, ver, actor_name in zip(env["model_loaders"], vers, actors): if ver >= 0: while True: try: reload(env["mi_" + actor_name], model_loader, GC.params, args, root, ver, actor_name) break except BaseException: import traceback traceback.print_exc() time.sleep(10) def game_end(batch): nonlocal loop_end wr = batch.GC.getClient().getGameStats().getWinRateStats() win_rate = (100.0 * wr.black_wins / wr.total_games if wr.total_games > 0 else 0.0) print(f'{datetime.now()!s} B/W: {wr.black_wins}/{wr.white_wins}.' f'Black winrate: {win_rate:.2f} ({wr.total_games})') if args.suicide_after_n_games > 0 and \ wr.total_games >= args.suicide_after_n_games: print(f'#suicide_after_n_games: {args.suicide_after_n_games}, ' f'total_games: {wr.total_games}') loop_end = True GC.reg_callback_if_exists("game_start", game_start) GC.reg_callback_if_exists("game_end", game_end) GC.start() if args.eval_model_pair: if args.eval_model_pair.find(",") >= 0: black, white = args.eval_model_pair.split(",") else: black = extract_ver(env["model_loaders"][0]) white = extract_ver(env["model_loaders"][1]) # Force them to reload in the future. for model_loader, actor_name in zip(env["model_loaders"], actors): reload_model(model_loader, GC.params, env["mi_" + actor_name], actor_name, args) # We just use one thread to do selfplay. GC.GC.getClient().setRequest(int(black), int(white), env['game'].options.resign_thres, 1) for actor_name in actors: env["eval_" + actor_name].episode_start(0) while not loop_end: GC.run() GC.stop()
import os from rlpytorch import LSTMTrainer, Sampler, EvalIters, load_env, ModelLoader, ArgsProvider, ModelInterface if __name__ == '__main__': trainer = LSTMTrainer() eval_iters = EvalIters() env, all_args = load_env(os.environ, overrides=dict(actor_only=True), trainer=trainer, eval_iters=eval_iters) GC = env["game"].initialize() model = env["model_loaders"][0].load_model(GC.params) mi = ModelInterface() mi.add_model("model", model) mi.add_model("actor", model, copy=True, cuda=all_args.gpu is not None, gpu_id=all_args.gpu) trainer.setup(sampler=env["sampler"], mi=env["mi"]) def actor(batch): reply = trainer.actor(batch) eval_iters.stats.feed_batch(batch) return reply GC.reg_callback("actor", actor)
# This source code is licensed under the BSD-style license found in the # LICENSE file in the root directory of this source tree. # Console for DarkForest import sys import os from rlpytorch import load_env, Evaluator, ModelInterface, ArgsProvider, EvalIters if __name__ == '__main__': evaluator = Evaluator(stats=False) # Set game to online model. env, args = load_env(os.environ, evaluator=evaluator, overrides=dict(mode="selfplay", T=1)) GC = env["game"].initialize() model = env["model_loaders"][0].load_model(GC.params) mi = ModelInterface() mi.add_model("model", model) mi.add_model("actor", model, copy=True, cuda=args.gpu is not None, gpu_id=args.gpu) mi["model"].eval() mi["actor"].eval() evaluator.setup(mi=mi) total_batchsize = 0 total_sel_batchsize = 0 def actor(batch): global total_batchsize, total_sel_batchsize reply = evaluator.actor(batch) total_sel_batchsize += batch.batchsize total_batchsize += batch.max_batchsize
import re from rlpytorch import load_env, SingleProcessRun, Trainer, ModelInterface matcher = re.compile(r"save-(\d+).bin") if __name__ == '__main__': additional_to_load = { 'trainer0': ( Trainer.get_option_spec(), lambda option_map: Trainer(option_map)), 'trainer1': ( Trainer.get_option_spec(), lambda option_map: Trainer(option_map)), 'mi0': ( ModelInterface.get_option_spec(), ModelInterface), 'mi1': ( ModelInterface.get_option_spec(), ModelInterface), 'runner': ( SingleProcessRun.get_option_spec(), lambda option_map: SingleProcessRun(option_map)), } env = load_env(os.environ, num_models=2, additional_to_load=additional_to_load, overrides=dict(backprop0=False, backprop1=False, mode="offline_train")) trainer0 = env['trainer0'] trainer1 = env['trainer1'] runner = env['runner']
def main(): print('Python version:', sys.version) print('PyTorch version:', torch.__version__) print('CUDA version', torch.version.cuda) print('Conda env:', os.environ.get("CONDA_DEFAULT_ENV", "")) # Register player names actors = ["actor_white", "actor_black"] """ Class Evaluator is a pure python class, which run neural network in eval mode and get return results and update some stat info. Will creates 'eval_actor_white', 'eval_actor_black'. """ additional_to_load = { ("eval_" + actor_name): (Evaluator.get_option_spec(name="eval_" + actor_name), lambda object_map, actor_name=actor_name: Evaluator( object_map, name="eval_" + actor_name, actor_name=actor_name, stats=None)) for i, actor_name in enumerate(actors) } """ class ModelInterface is a python class saving network models. Its member models is a key-value store to call a CNN model by name. Will creates 'mi_actor_white', 'mi_actor_black'. """ additional_to_load.update({ ("mi_" + name): (ModelInterface.get_option_spec(), ModelInterface) for name in actors }) """ load_env: game - load file game elfgames.american_checkers.game method - load "method" passed via params: file model_american_checkers.py return array with [model, method] model_file=elfgames.american_checkers.model_american_checkers model=df_pred model_loaders - prepare to load(returns instance of class ModelLoader) "model" passed via params: file model_american_checkers.py return array with [model, method] model_file=elfgames.american_checkers.model_american_checkers model=df_pred sampler - Used to sample an action from policy. mi - class ModelInterface is a python class saving network models. Its member models is a key-value store to call a CNN model by name. eval_* - run neural network in eval mode and get return results and update some stat info. """ env = load_env(os.environ, num_models=2, overrides={'actor_only': True}, additional_to_load=additional_to_load) """ Initializes keys('game_end', 'game_start', 'actor_white', 'actor_black') for communication Python and C++ code, defined in Game.py and GameFeature.h. Also, initializes GameContext from C++ library wrapped by GC from python side + sets mode that parsed from options like play/selfplay/train/offline_train. """ GC = env["game"].initialize() """ Registering the methods in the GameContext on the python side. We registered their names earlier when the game was initialized(names were registered on the python and C++ sides). Now its a registration of methods that will be called when we try to pass batch on eval from C++ to Python. Example: We register "human_actor" as key and register the same method on the python side. When AIClientT calls method act(it takes 2 parameters: state, and key) act connect to python and transmits the state by key("human_actor", "actor_black") to these methods(actor() func defined below). """ # Some statistic about batch usage, also we can add more info about games stats. stats = [Stats(), Stats()] for i in range(len(actors)): actor_name = actors[i] stat = stats[i] evaluator = env["eval_" + actor_name] evaluator.setup(sampler=env["sampler"], mi=env["mi_" + actor_name]) def actor(batch, evaluator, stat): reply = evaluator.actor(batch) stat.feed(batch) return reply # To expand the functionality we use lambda GC.reg_callback(actor_name, lambda batch, evaluator=evaluator, stat=stat: actor( batch, evaluator, stat)) # Get the directory containing the models. root = os.environ.get("root", "./") args = env["game"].options # Stops client after N games, defined in --suicide_after_n_games param. loop_end = False """ This method is responsible for updating the model to the current one(received from the server) after starting. Called by 'game_start' key from C++ side. """ def game_start(batch): info = "game_start() load/reload models\n" logger.info(info) vers = [int(batch["white_ver"][0]), int(batch["black_ver"][0])] # Use the version number to load models. for model_loader, ver, actor_name in zip(env["model_loaders"], vers, actors): if ver >= 0: while True: try: reload(env["mi_" + actor_name], model_loader, GC.params, args, root, ver, actor_name) break except BaseException: import traceback traceback.print_exc() time.sleep(10) """ This method is responsible for displaying game statistics, as well as stopping the client after N games(loop_end). Called by 'game_end' key from C++ side. """ def game_end(batch): nonlocal loop_end wr = batch.GC.getClient().getGameStats().getWinRateStats() win_rate = (100.0 * wr.black_wins / (wr.black_wins + wr.white_wins) if (wr.black_wins + wr.white_wins) > 0 else 0.0) info = f'game_end()\tB/W: {wr.black_wins}/{wr.white_wins}, ' info += f'Draw: {wr.both_lost}, ' info += f'Black winrate: {win_rate:.2f}, ' info += f'Total Games: {wr.total_games}' logger.info(info) if args.suicide_after_n_games > 0 and \ wr.total_games >= args.suicide_after_n_games: info = f'game_end()\tTotal Games: {wr.total_games}, ' info += f'#suicide_after_n_games: {args.suicide_after_n_games}' logger.info(info) loop_end = True # Registering the methods described above in Python's GameContext. GC.reg_callback_if_exists("game_start", game_start) GC.reg_callback_if_exists("game_end", game_end) GC.start() """ Upon receiving the --eval_model_pair parameter, we load 2 models from a file and pass models versions to C++ side for evaluation. """ if args.eval_model_pair: if args.eval_model_pair.find(",") >= 0: black, white = args.eval_model_pair.split(",") else: black = extract_ver(env["model_loaders"][0]) white = extract_ver(env["model_loaders"][1]) # Force them to reload in the future. for model_loader, actor_name in zip(env["model_loaders"], actors): reload_model(model_loader, GC.params, env["mi_" + actor_name], actor_name, args) # We just use one thread to do selfplay. GC.GC.getClient().setRequest(int(black), int(white), 1) # Called before each episode, resets actor_count(num of total nn call) for actor_name in actors: env["eval_" + actor_name].episode_start(0) while not loop_end: GC.run() GC.stop()