def test_mock_load_env(): init_for_test() from rlpytorch import (Trainer, SingleProcessRun, ArgsProvider, ModelLoader, model_loader, Sampler, ModelInterface) envs = os.environ load_module = model_loader.load_module defaults = dict() overrides = dict() num_models = None kwargs = {} trainer = Trainer() runner = SingleProcessRun() game = load_module(envs["game"]).Loader() model_file = load_module(envs["model_file"]) if len(model_file.Models[envs["model"]]) == 2: model_class, method_class = model_file.Models[envs["model"]] sampler_class = Sampler else: model_class, method_class, sampler_class = model_file.Models[ envs["model"]] defaults.update(getattr(model_file, "Defaults", dict())) overrides.update(getattr(model_file, "Overrides", dict())) method = method_class() sampler = sampler_class() mi = ModelInterface() # You might want multiple models loaded. if num_models is None: model_loaders = [ModelLoader(model_class)] else: model_loaders = [ ModelLoader(model_class, model_idx=i) for i in range(num_models) ] env = dict(game=game, method=method, sampler=sampler, model_loaders=model_loaders, mi=mi) env.update(kwargs) parser = argparse.ArgumentParser() # 模拟命令行 cmd_key = 'save_replay_prefix' cmd_v = '~/log/elf/' cmd_line = [f'--{cmd_key}', cmd_v] all_args = ArgsProvider.Load(parser, env, cmd_line=cmd_line, global_defaults=defaults, global_overrides=overrides) assert all_args[cmd_key] == cmd_v assert 'game' in env.keys()
"terminal" ])), reply=None) nIter = 5000 elapsed_wait_only = 0 import pickle import argparse if __name__ == '__main__': parser = argparse.ArgumentParser() loader = Loader() args = ArgsProvider.Load(parser, [loader]) def actor(batch): ''' import pdb pdb.set_trace() pickle.dump(utils_elf.to_numpy(sel), open("tmp%d.bin" % k, "wb"), protocol=2) ''' return dict(a=[0] * batch["s"].size(1)) GC = loader.initialize() GC.reg_callback("actor", actor) before = datetime.now() GC.Start()
params["hist_len"] = args.hist_len params["T"] = args.T return GCWrapper(GC, co, desc, use_numpy=False, params=params) cmd_line = "--num_games 16 --batchsize 4 --hist_len 1 --frame_skip 4 --actor_only" nIter = 5000 elapsed_wait_only = 0 if __name__ == '__main__': parser = argparse.ArgumentParser() loader = Loader() args = ArgsProvider.Load(parser, [loader], cmd_line=cmd_line.split(" ")) GC = loader.initialize() def actor(sel, sel_gpu): # pickle.dump(to_numpy(sel), open("tmp%d.bin" % k, "wb"), protocol=2) return dict(a=[0] * sel[0]["s"].size(0)) GC.reg_callback("actor", actor) reward_dist = Counter() before = datetime.now() GC.Start() import tqdm
nIter = 5000 elapsed_wait_only = 0 import pickle import random if __name__ == '__main__': parser = argparse.ArgumentParser() if len(sys.argv) > 1: cmd_line = sys.argv[1:] else: cmd_line = cmd_line.split(" ") loader = Loader() args = ArgsProvider.Load(parser, [loader], cmd_line=cmd_line) GC = loader.initialize() actor_count = 0 train_count = 0 def actor(batch): global actor_count, GC actor_count += 1 batchsize = sel["s"].size(1) actions = [ random.randint(0, GC.params["num_action"] - 1) for i in range(batchsize) ] reply = dict(a=actions)
if __name__ == '__main__': parser = argparse.ArgumentParser() model_file = load_module(os.environ["model_file"]) model_class, method_class = model_file.Models[os.environ["model"]] model_loader = ModelLoader(model_class) game = load_module(os.environ["game"]).Loader() game.args.set_override(actor_only=True, game_multi=2) sampler = Sampler() evaluator = Evaluator(stats=False) eval_iters = EvalIters() args = ArgsProvider.Load(parser, [ game, sampler, evaluator, model_loader, eval_iters ]) GC = game.initialize() GC.setup_gpu(args.gpu) model = model_loader.load_model(GC.params) mi = ModelInterface() mi.add_model("model", model, optim_params={ "lr" : 0.001}) mi.add_model("actor", model, copy=True, cuda=True, gpu_id=args.gpu) def actor(batch): reply = evaluator.actor(batch) ''' s = batch["s"][0][0] seq = batch["seq"][0][0] for i in range(s.size(0)):
params.update(dict( num_group = 1 if args.actor_only else 2, T = args.T, )) return GCWrapper(GC, co, desc, gpu=args.gpu, use_numpy=False, params=params) elapsed_wait_only = 0 if __name__ == '__main__': parser = argparse.ArgumentParser() parser.add_argument("--num_iter", type=int, default=5000) loader = Loader() args = ArgsProvider.Load(parser, [loader], global_overrides=dict(additional_labels="move_idx,game_record_idx")) GC = loader.initialize() import torch nbin = 10 board_size = GC["params"]["board_size"] stats = torch.FloatTensor(nbin, board_size, board_size) counts = torch.FloatTensor(10) game_records_visited = Counter() our_idx = GC.params["our_stone_plane"] opp_idx = GC.params["opponent_stone_plane"] def train(batch):
if recorded_a != actual_a: self._debug( "%s Action was different. recorded %d, actual %d" % (prompt, recorded_a, actual_a)) # Overlapped by 1. self.id2seqs_train[id] = last_seq - 1 if __name__ == '__main__': parser = argparse.ArgumentParser() game = load_module(os.environ["game"]).Loader() collector = StatsCollector() runner = SingleProcessRun() args_providers = [game, runner] all_args = ArgsProvider.Load(parser, args_providers) GC = game.initialize() # GC.setup_gpu(0) collector.set_params(GC.params) GC.reg_callback("actor", collector.actor) GC.reg_callback("train", collector.train) GC.reg_sig_int() runner.setup(GC) runner.run()