def __init__(self): self.args = ArgsProvider( call_from = self, define_args = [ ("num_games", 1024), ("batchsize", 128), # 64 ("game_multi", dict(type=int, default=None)), ("T", 6), # 20 ("eval", dict(action="store_true")), # False ("wait_per_group", dict(action="store_true")), # False ("num_collectors", 0), ("verbose_comm", dict(action="store_true")), # False ("verbose_collector", dict(action="store_true")), # False ("mcts_threads", 0), ("mcts_rollout_per_thread", 1), ("mcts_verbose", dict(action="store_true")), # False ("mcts_save_tree_filename", ""), ("mcts_verbose_time", dict(action="store_true")), # False ("mcts_use_prior", dict(action="store_true")), # False ("mcts_pseudo_games", 0), ("mcts_pick_method", "most_visited"), ], on_get_args = self._on_get_args )
def __init__(self): self.context_args = ContextArgs() self.args = ArgsProvider(define_args=[ ("handicap_level", 0), ("latest_start", 1000), ("latest_start_decay", 0.7), ("fs_ai", 50), ("fs_opponent", 50), ("ai_type", dict(type=str, choices=[ "AI_SIMPLE", "AI_HIT_AND_RUN", "AI_NN", "AI_FLAG_NN", "AI_TD_NN" ], default="AI_NN")), ("opponent_type", dict(type=str, choices=[ "AI_SIMPLE", "AI_HIT_AND_RUN", "AI_FLAG_SIMPLE", "AI_TD_BUILT_IN" ], default="AI_SIMPLE")), ("max_tick", dict(type=int, default=30000, help="Maximal tick")), ("mcts_threads", 64), ("seed", 0), ("simple_ratio", -1), ("ratio_change", 0), ("actor_only", dict(action="store_true")) ], more_args=["batchsize", "T"], child_providers=[self.context_args.args])
def __init__(self, module): self.context_args = ContextArgs() self.module = module self.args = ArgsProvider( call_from=self, define_args=[ ("handicap_level", 0), ("players", dict( type=str, help= ";-separated player infos. For example: type=AI_NN,fs=50,args=backup/AI_SIMPLE|decay/0.99|start/1000,fow=True;type=AI_SIMPLE,fs=50" )), ("max_tick", dict(type=int, default=30000, help="Maximal tick")), ("shuffle_player", dict(action="store_true")), ("mcts_threads", 64), ("seed", 0), ("actor_only", dict(action="store_true")), ("additional_labels", dict( type=str, default=None, help= "Add additional labels in the batch. E.g., id,seq,last_terminal" )), ("model_no_spatial", dict(action="store_true")), # TODO, put it to model ("save_replay_prefix", dict(type=str, default=None)), ("output_file", dict(type=str, default=None)), ("cmd_dumper_prefix", dict(type=str, default=None)) ], more_args=["batchsize", "T", "gpu"], child_providers=[self.context_args.args])
def __init__(self, module): self.context_args = ContextArgs() self.more_labels = MoreLabels() self.module = module basic_define_args = [ ("handicap_level", 0), ("players", dict(type=str, help=";-separated player infos. For example: type=AI_NN,fs=50,args=backup/AI_SIMPLE|decay/0.99|start/1000,fow=True;type=AI_SIMPLE,fs=50")), ("max_tick", dict(type=int, default=30000, help="Maximal tick")), ("shuffle_player", dict(action="store_true")), # false ("num_frames_in_state", 1), ("max_unit_cmd", 1), ("seed", 0), ("actor_only", dict(action="store_true")), # false ("model_no_spatial", dict(action="store_true")), # TODO, put it to model false ("save_replay_prefix", dict(type=str, default=None)), ("output_file", dict(type=str, default=None)), ("cmd_dumper_prefix", dict(type=str, default=None)), ("gpu", dict(type=int, help="gpu to use", default=None)), ] self.args = ArgsProvider( call_from = self, define_args = basic_define_args + self._define_args(), more_args = ["num_games", "batchsize", "T"], child_providers = [ self.context_args.args, self.more_labels.args ] )
def __init__(self): self.pg = PolicyGradient() self.discounted_reward = DiscountedReward() self.value_matcher = ValueMatcher() self.args = ArgsProvider( call_from=self, define_args=[ ("fixed_policy", dict(action="store_true")), ("h_match_policy", dict(action="store_true")), ("h_match_action", dict(action="store_true")), ("h_smooth", dict(action="store_true")), ("contrastive_V", dict(action="store_true")), ], more_args=["num_games", "batchsize", "min_prob"], child_providers=[ self.pg.args, self.discounted_reward.args, self.value_matcher.args ], ) self.prediction_loss = nn.SmoothL1Loss().cuda() self.policy_match_loss = nn.SmoothL1Loss().cuda() self.policy_max_action_loss = nn.NLLLoss().cuda() self.rank_loss = nn.MarginRankingLoss().cuda()
def test_mock_load_env(): init_for_test() from rlpytorch import (Trainer, SingleProcessRun, ArgsProvider, ModelLoader, model_loader, Sampler, ModelInterface) envs = os.environ load_module = model_loader.load_module defaults = dict() overrides = dict() num_models = None kwargs = {} trainer = Trainer() runner = SingleProcessRun() game = load_module(envs["game"]).Loader() model_file = load_module(envs["model_file"]) if len(model_file.Models[envs["model"]]) == 2: model_class, method_class = model_file.Models[envs["model"]] sampler_class = Sampler else: model_class, method_class, sampler_class = model_file.Models[ envs["model"]] defaults.update(getattr(model_file, "Defaults", dict())) overrides.update(getattr(model_file, "Overrides", dict())) method = method_class() sampler = sampler_class() mi = ModelInterface() # You might want multiple models loaded. if num_models is None: model_loaders = [ModelLoader(model_class)] else: model_loaders = [ ModelLoader(model_class, model_idx=i) for i in range(num_models) ] env = dict(game=game, method=method, sampler=sampler, model_loaders=model_loaders, mi=mi) env.update(kwargs) parser = argparse.ArgumentParser() # 模拟命令行 cmd_key = 'save_replay_prefix' cmd_v = '~/log/elf/' cmd_line = [f'--{cmd_key}', cmd_v] all_args = ArgsProvider.Load(parser, env, cmd_line=cmd_line, global_defaults=defaults, global_overrides=overrides) assert all_args[cmd_key] == cmd_v assert 'game' in env.keys()
def __init__(self, args=None): self.args = ArgsProvider( call_from=self, define_args=[], fixed_args=args, ) self.policy_loss = nn.NLLLoss().cuda() self.value_loss = nn.MSELoss().cuda()
def __init__(self): self.args = ArgsProvider( call_from=self, define_args=[ ("multipred_no_backprop", dict(action="store_true")), ], ) self.policy_loss = nn.NLLLoss().cuda() self.value_loss = nn.MSELoss().cuda()
def __init__(self): self.args = ArgsProvider( call_from=self, define_args=[("num_games", 1024), ("batchsize", 128), ("game_multi", dict(type=int, default=None)), ("T", 6), ("eval", dict(action="store_true")), ("wait_per_group", dict(action="store_true")), ("verbose_comm", dict(action="store_true")), ("verbose_collector", dict(action="store_true"))], on_get_args=self._on_get_args)
def __init__(self): self.context_args = ContextArgs() self.args = ArgsProvider(call_from=self, define_args=[ ("actor_only", dict(action="store_true")), ("list_file", "./train.lst"), ("verbose", dict(action="store_true")) ], more_args=["batchsize", "T"], child_providers=[self.context_args.args])
def __init__(self): self.args = ArgsProvider( call_from=self, define_args=[ ("fixed_policy", dict(action="store_true")), ("h_smooth", dict(action="store_true")), ], more_args=["num_games", "batchsize", "min_prob"], child_providers=[], ) self.prediction_loss = nn.SmoothL1Loss().cuda()
def __init__(self): self.args = ArgsProvider( call_from=self, define_args=[ ("additional_labels", dict( type=str, default=None, help= "Add additional labels in the batch. E.g., id,seq,last_terminal" )), ])
def __init__(self): self.context_args = ContextArgs() self.args = ArgsProvider(call_from=self, define_args=[ ("frame_skip", 4), ("hist_len", 4), ("rom_file", "pong.bin"), ("actor_only", dict(action="store_true")), ("reward_clip", 1), ("rom_dir", os.path.dirname(__file__)) ], more_args=["batchsize", "T", "env_eval_only"], child_providers=[self.context_args.args])
def __init__(self): self.context_args = ContextArgs() self.args = ArgsProvider( call_from=self, define_args=[ ("handicap_level", 0), ("latest_start", 1000), ("latest_start_decay", 0.7), ("fs_ai", 50), ("fs_opponent", 50), ("ai_type", dict(type=str, choices=[ "AI_SIMPLE", "AI_HIT_AND_RUN", "AI_NN", "AI_FLAG_NN", "AI_TD_NN" ], default="AI_NN")), ("opponent_type", dict(type=str, choices=[ "AI_SIMPLE", "AI_HIT_AND_RUN", "AI_FLAG_SIMPLE", "AI_TD_BUILT_IN" ], default="AI_SIMPLE")), ("max_tick", dict(type=int, default=30000, help="Maximal tick")), ("mcts_threads", 64), ("seed", 0), ("without_fow", dict(action="store_true")), ("simple_ratio", -1), ("ratio_change", 0), ("actor_only", dict(action="store_true")), ("additional_labels", dict( type=str, default=None, help= "Add additional labels in the batch. E.g., id,seq,last_terminal" )), ("model_no_spatial", dict(action="store_true") ) # TODO, put it to model ], more_args=["batchsize", "T"], child_providers=[self.context_args.args])
def __init__(self): self.context_args = ContextArgs() self.more_labels = MoreLabels() self.args = ArgsProvider( call_from=self, define_args= [("actor_only", dict(action="store_true")), ("list_file", "./train.lst"), ("verbose", dict(action="store_true")), ("data_aug", dict(type=int, default=-1, help="specify data augumentation, 0-7, -1 mean random")), ("ratio_pre_moves", dict(type=float, default=0, help= "how many moves to perform in each thread, before we use the data train the model" )), ("start_ratio_pre_moves", dict( type=float, default=0.5, help= "how many moves to perform in each thread, before we use the first sgf file to train the model" )), ("num_games_per_thread", dict( type=int, default=5, help= "number of concurrent games per threads, used to increase diversity of games" )), ("move_cutoff", dict(type=int, default=-1, help="Cutoff ply in replay")), ("online", dict(action="store_true", help="Set game to online mode")), ("gpu", dict(type=int, default=None))], more_args=["batchsize", "T"], child_providers=[self.context_args.args, self.more_labels.args])
def __init__(self): self.context_args = ContextArgs() self.args = ArgsProvider( call_from=self, define_args=[ ("frame_skip", 4), ("hist_len", 4), ("rom_file", "pong.bin"), ("actor_only", dict(action="store_true")), ("reward_clip", 1), ("rom_dir", os.path.dirname(__file__)), ("additional_labels", dict( type=str, default=None, help= "Add additional labels in the batch. E.g., id,seq,last_terminal" )), ], more_args=["batchsize", "T", "env_eval_only"], child_providers=[self.context_args.args])
"terminal" ])), reply=None) nIter = 5000 elapsed_wait_only = 0 import pickle import argparse if __name__ == '__main__': parser = argparse.ArgumentParser() loader = Loader() args = ArgsProvider.Load(parser, [loader]) def actor(batch): ''' import pdb pdb.set_trace() pickle.dump(utils_elf.to_numpy(sel), open("tmp%d.bin" % k, "wb"), protocol=2) ''' return dict(a=[0] * batch["s"].size(1)) GC = loader.initialize() GC.reg_callback("actor", actor) before = datetime.now() GC.Start()
params["hist_len"] = args.hist_len params["T"] = args.T return GCWrapper(GC, co, desc, use_numpy=False, params=params) cmd_line = "--num_games 16 --batchsize 4 --hist_len 1 --frame_skip 4 --actor_only" nIter = 5000 elapsed_wait_only = 0 if __name__ == '__main__': parser = argparse.ArgumentParser() loader = Loader() args = ArgsProvider.Load(parser, [loader], cmd_line=cmd_line.split(" ")) GC = loader.initialize() def actor(sel, sel_gpu): # pickle.dump(to_numpy(sel), open("tmp%d.bin" % k, "wb"), protocol=2) return dict(a=[0] * sel[0]["s"].size(0)) GC.reg_callback("actor", actor) reward_dist = Counter() before = datetime.now() GC.Start() import tqdm
nIter = 5000 elapsed_wait_only = 0 import pickle import random if __name__ == '__main__': parser = argparse.ArgumentParser() if len(sys.argv) > 1: cmd_line = sys.argv[1:] else: cmd_line = cmd_line.split(" ") loader = Loader() args = ArgsProvider.Load(parser, [loader], cmd_line=cmd_line) GC = loader.initialize() actor_count = 0 train_count = 0 def actor(batch): global actor_count, GC actor_count += 1 batchsize = sel["s"].size(1) actions = [ random.randint(0, GC.params["num_action"] - 1) for i in range(batchsize) ] reply = dict(a=actions)
params.update(dict( num_group = 1 if args.actor_only else 2, T = args.T, )) return GCWrapper(GC, co, desc, gpu=args.gpu, use_numpy=False, params=params) elapsed_wait_only = 0 if __name__ == '__main__': parser = argparse.ArgumentParser() parser.add_argument("--num_iter", type=int, default=5000) loader = Loader() args = ArgsProvider.Load(parser, [loader], global_overrides=dict(additional_labels="move_idx,game_record_idx")) GC = loader.initialize() import torch nbin = 10 board_size = GC["params"]["board_size"] stats = torch.FloatTensor(nbin, board_size, board_size) counts = torch.FloatTensor(10) game_records_visited = Counter() our_idx = GC.params["our_stone_plane"] opp_idx = GC.params["opponent_stone_plane"] def train(batch):
if recorded_a != actual_a: self._debug( "%s Action was different. recorded %d, actual %d" % (prompt, recorded_a, actual_a)) # Overlapped by 1. self.id2seqs_train[id] = last_seq - 1 if __name__ == '__main__': parser = argparse.ArgumentParser() game = load_module(os.environ["game"]).Loader() collector = StatsCollector() runner = SingleProcessRun() args_providers = [game, runner] all_args = ArgsProvider.Load(parser, args_providers) GC = game.initialize() # GC.setup_gpu(0) collector.set_params(GC.params) GC.reg_callback("actor", collector.actor) GC.reg_callback("train", collector.train) GC.reg_sig_int() runner.setup(GC) runner.run()
if __name__ == '__main__': parser = argparse.ArgumentParser() model_file = load_module(os.environ["model_file"]) model_class, method_class = model_file.Models[os.environ["model"]] model_loader = ModelLoader(model_class) game = load_module(os.environ["game"]).Loader() game.args.set_override(actor_only=True, game_multi=2) sampler = Sampler() evaluator = Evaluator(stats=False) eval_iters = EvalIters() args = ArgsProvider.Load(parser, [ game, sampler, evaluator, model_loader, eval_iters ]) GC = game.initialize() GC.setup_gpu(args.gpu) model = model_loader.load_model(GC.params) mi = ModelInterface() mi.add_model("model", model, optim_params={ "lr" : 0.001}) mi.add_model("actor", model, copy=True, cuda=True, gpu_id=args.gpu) def actor(batch): reply = evaluator.actor(batch) ''' s = batch["s"][0][0] seq = batch["seq"][0][0] for i in range(s.size(0)):