def initialize(self): opt = elf.Options() net_opt = elf.NetOptions() opt.loadFromArgs("", self.option_map.getOptionSpec()) net_opt.loadFromArgs("", self.option_map.getOptionSpec()) self.rs = elf.RemoteServers(elf.getNetOptions(opt, net_opt), ["actor", "train"]) GC = elf.BatchReceiver(opt, self.rs) GC.setMode(elf.RECV_ENTRY) batchsize = opt.batchsize print("Batchsize: %d" % batchsize) width = 210 // 2 height = 160 // 2 T = 6 num_action = 4 spec = {} spec["actor"] = dict( input=dict(s=("float", (3, height, width))), reply=dict(a=("int32_t", 1), pi=("float", num_action), V=("float", 1)) ) ''' spec["train"] = dict( input=dict(s_=(T, 3, height, width), r_=(T, 1), a_=(T, 1), pi_=(T, num_action), V_=(T, 1)), ) ''' e = GC.getExtractor() desc = allocExtractor(e, batchsize, spec) params = { "input_dim" : width * height * 3, "num_action" : 4 } print("Init GC Wrapper") has_gpu = self.options.gpu is not None and self.options.gpu >= 0 self.wrapper = GCWrapper( GC, None, batchsize, desc, num_recv=1, default_gpu=(self.options.gpu if has_gpu else None), use_numpy=False, params=params) # wrapper.reg_callback("train", self.on_train) self.wrapper.reg_callback("actor", self.on_actor) self.model = MyModel(params) if has_gpu: self.model.cuda(self.options.gpu) # self.optim = torch.optimi.Adam(self.model.parameters()) self.n = 0
def initialize(self): job_id = os.environ.get("job_id", "local") opt = tutorial.getOpt(self.option_map.getOptionSpec(), job_id) GC = elf.GameContext(opt.base) game_obj = tutorial.Client(opt) game_obj.setGameContext(GC) params = game_obj.getParams() batchsize = getattr(self.options, "base.batchsize") desc = {} desc["actor"] = dict( input=["s"], reply=["a", "V", "pi"], batchsize=batchsize, ) return GCWrapper( GC, game_obj, batchsize, desc, num_recv=2, default_gpu=(self.options.gpu if (self.options.gpu is not None and self.options.gpu >= 0) else None), use_numpy=False, params=params)
def initialize_reduced_service(self): args = self.args reference_name = "reference" train_name = "train" co, GC, params = self._init_gc( player_names=[train_name, reference_name]) desc = {} # For actor model, no reward needed, we only want to get input and return distribution of actions. # sampled action and and value will be filled from the reply. desc["reduced_project"] = self._get_reduced_project() desc["reduced_forward"] = self._get_reduced_forward() desc["reduced_predict"] = self._get_reduced_predict() if params["players"][1]["type"] == "AI_NN": desc["actor"] = self._get_actor_spec() desc["actor"]["batchsize"] //= 2 desc["actor"]["name"] = "reference" self.more_labels.add_labels(desc) return GCWrapper(GC, co, desc, gpu=args.gpu, use_numpy=False, params=params)
def initialize(self): co, GC, params = self._init_gc() args = self.args desc = {} # For actor model, no reward needed, we only want to get input and return distribution of actions. # sampled action and and value will be filled from the reply. # todo : change for dqn # desc["actor"] = self._get_actor_q_spec() desc["actor"] = self._get_actor_q_spec() if not args.actor_only: # For training, we want input, action (filled by actor models), value (filled by actor models) and reward. # todo : change for dqn # desc["train"] = self._get_train_q_spec() desc["train"] = self._get_train_q_spec() self.more_labels.add_labels(desc) params.update( dict(num_group=1 if args.actor_only else 2, action_batchsize=int(desc["actor"]["batchsize"]), train_batchsize=int(desc["train"]["batchsize"]) if not args.actor_only else None, T=args.T, model_no_spatial=args.model_no_spatial)) return GCWrapper(GC, co, desc, gpu=args.gpu, use_numpy=False, params=params)
def initialize_selfplay(self): args = self.args reference_name = "reference" train_name = "train" co, GC, params = self._init_gc(player_names=[train_name, reference_name]) desc = {} # For actor model, no reward needed, we only want to get input and return distribution of actions. # sampled action and and value will be filled from the reply. desc["actor0"] = self._get_actor_spec() desc["actor1"] = self._get_actor_spec() desc["actor0"]["name"] = reference_name desc["actor1"]["name"] = train_name if not args.actor_only: # For training, we want input, action (filled by actor models), value (filled by actor models) and reward. desc["train1"] = self._get_train_spec() desc["train1"]["name"] = train_name self.more_labels.add_labels(desc) params.update(dict( num_group = 1 if args.actor_only else 2, action_batchsize = int(desc["actor0"]["batchsize"]), train_batchsize = int(desc["train1"]["batchsize"]) if not args.actor_only else None, T = args.T, model_no_spatial = args.model_no_spatial )) return GCWrapper(GC, co, desc, gpu=args.gpu, use_numpy=False, params=params)
def initialize(self): args = self.args co = go.ContextOptions() self.context_args.initialize(co) opt = go.GameOptions() opt.seed = 0 opt.list_filename = args.list_file opt.verbose = args.verbose GC = go.GameContext(co, opt) print("Version: ", GC.Version()) params = GC.GetParams() print("Num Actions: ", params["num_action"]) desc = {} # For training: group 1 # We want input, action (filled by actor models), value (filled by actor # models) and reward. desc["train"] = dict( batchsize=args.batchsize, input=dict(T=args.T, keys=set(["features", "a"])), reply=None ) params.update(dict( num_group = 1 if args.actor_only else 2, train_batchsize = int(desc["train"]["batchsize"]), T = args.T, )) return GCWrapper(GC, co, desc, use_numpy=False, params=params)
def initialize(self): args = self.args co = atari.ContextOptions() self.context_args.initialize(co) opt = atari.Options() opt.frame_skip = args.frame_skip opt.rom_file = os.path.join(args.rom_dir, args.rom_file) opt.seed = 42 opt.eval_only = getattr(args, "env_eval_only", 0) == 1 opt.hist_len = args.hist_len opt.reward_clip = args.reward_clip GC = atari.GameContext(co, opt) print("Version: ", GC.Version()) params = GC.GetParams() print("Num Actions: ", params["num_action"]) desc = {} # For actor model, No reward needed, we only want to get input and return distribution of actions. # sampled action and and value will be filled from the reply. desc["actor"] = dict(batchsize=args.batchsize, input=dict(T=1, keys=set( ["s", "last_r", "last_terminal"])), reply=dict(T=1, keys=set(["rv", "pi", "V", "a"]))) if not args.actor_only: # For training: group 1 # We want input, action (filled by actor models), value (filled by actor # models) and reward. desc["train"] = dict(batchsize=args.batchsize, input=dict(T=args.T, keys=set([ "rv", "id", "pi", "s", "a", "last_r", "V", "seq", "last_terminal" ])), reply=None) if args.additional_labels is not None: extra = args.additional_labels.split(",") for _, v in desc.items(): v["input"]["keys"].update(extra) # Initialize shared memory (between Python and C++) based on the specification defined by desc. params["num_group"] = 1 if args.actor_only else 2 params["action_batchsize"] = desc["actor"]["batchsize"] if not args.actor_only: params["train_batchsize"] = desc["train"]["batchsize"] params["hist_len"] = args.hist_len params["T"] = args.T return GCWrapper(GC, co, desc, use_numpy=False, params=params)
def initialize(self): args = self.args co = go.ContextOptions() self.context_args.initialize(co) opt = go.GameOptions() opt.seed = 0 opt.list_filename = args.list_file opt.online = args.online opt.verbose = args.verbose opt.data_aug = args.data_aug opt.ratio_pre_moves = args.ratio_pre_moves opt.start_ratio_pre_moves = args.start_ratio_pre_moves opt.move_cutoff = args.move_cutoff opt.num_games_per_thread = args.num_games_per_thread GC = go.GameContext(co, opt) print("Version: ", GC.Version()) params = GC.GetParams() print("Num Actions: ", params["num_action"]) desc = {} if args.online: desc["actor"] = dict(batchsize=args.batchsize, input=dict(T=args.T, keys=set(["s"])), reply=dict(T=args.T, keys=set(["V", "a"]))) else: desc["train"] = dict(batchsize=args.batchsize, input=dict(T=args.T, keys=set(["s", "offline_a"])), reply=None) self.more_labels.add_labels(desc) params.update(dict( num_group=1 if args.actor_only else 2, T=args.T, )) return GCWrapper(GC, co, desc, gpu=args.gpu, use_numpy=False, params=params)
def initialize(self): opt = go.getServerOpt(self.option_map.getOptionSpec()) desc = {} GC = elf.GameContext(opt.common.base) mode = getattr(self.options, "common.mode") batchsize = getattr(self.options, "common.base.batchsize") if mode in ["train", "train_offline"]: game_obj = go.Server(opt) desc["train"] = dict(input=[ "s", "offline_a", "winner", "mcts_scores", "move_idx", "selfplay_ver" ], reply=None) desc["train_ctrl"] = dict(input=["selfplay_ver"], reply=None, batchsize=1) else: raise "No such mode: " + mode game_obj.setGameContext(GC) params = game_obj.getParams() if self.options.parameter_print: print("**** Options ****") print(opt.info()) print("*****************") print("Version: ", elf.version()) print("Mode: ", mode) print("Num Actions: ", params["num_action"]) self.more_labels.add_labels(desc) return GCWrapper(GC, game_obj, batchsize, desc, num_recv=2, default_gpu=(self.options.gpu if (self.options.gpu is not None and self.options.gpu >= 0) else None), use_numpy=False, params=params, verbose=self.options.parameter_print)
def initialize(self): co, GC, opt = self._set_params() params = GC.getParams() if self.options.parameter_print: print("Mode: ", opt.mode) print("Num Actions: ", params["num_action"]) desc = {} if self.options.mode == "online": desc["human_actor"] = dict( input=["s"], reply=["pi", "a", "V"], batchsize=1, ) # Used for MCTS/Direct play. desc["actor_black"] = dict( input=["s"], reply=["pi", "V", "a", "rv"], timeout_usec=10, batchsize=co.mcts_options.num_rollouts_per_batch) else: raise "No such mode: " + self.options.mode params.update( dict( num_group=1 if self.options.actor_only else 2, T=self.options.T, )) self.more_labels.add_labels(desc) return GCWrapper(GC, self.max_batchsize, desc, num_recv=2, gpu=(self.options.gpu if (self.options.gpu is not None and self.options.gpu >= 0) else None), use_numpy=False, params=params, verbose=self.options.parameter_print)
class RunGC(object): @classmethod def get_option_spec(cls): spec = PyOptionSpec() elf.saveDefaultOptionsToArgs("", spec) elf.saveDefaultNetOptionsToArgs("", spec) spec.addIntOption( 'gpu', 'GPU id to use', -1) spec.addStrListOption( "parsed_args", "dummy option", []) return spec @auto_import_options def __init__(self, option_map): self.option_map = option_map def initialize(self): opt = elf.Options() net_opt = elf.NetOptions() opt.loadFromArgs("", self.option_map.getOptionSpec()) net_opt.loadFromArgs("", self.option_map.getOptionSpec()) self.rs = elf.RemoteServers(elf.getNetOptions(opt, net_opt), ["actor", "train"]) GC = elf.BatchReceiver(opt, self.rs) GC.setMode(elf.RECV_ENTRY) batchsize = opt.batchsize print("Batchsize: %d" % batchsize) width = 210 // 2 height = 160 // 2 T = 6 num_action = 4 spec = {} spec["actor"] = dict( input=dict(s=("float", (3, height, width))), reply=dict(a=("int32_t", 1), pi=("float", num_action), V=("float", 1)) ) ''' spec["train"] = dict( input=dict(s_=(T, 3, height, width), r_=(T, 1), a_=(T, 1), pi_=(T, num_action), V_=(T, 1)), ) ''' e = GC.getExtractor() desc = allocExtractor(e, batchsize, spec) params = { "input_dim" : width * height * 3, "num_action" : 4 } print("Init GC Wrapper") has_gpu = self.options.gpu is not None and self.options.gpu >= 0 self.wrapper = GCWrapper( GC, None, batchsize, desc, num_recv=1, default_gpu=(self.options.gpu if has_gpu else None), use_numpy=False, params=params) # wrapper.reg_callback("train", self.on_train) self.wrapper.reg_callback("actor", self.on_actor) self.model = MyModel(params) if has_gpu: self.model.cuda(self.options.gpu) # self.optim = torch.optimi.Adam(self.model.parameters()) self.n = 0 def on_actor(self, batch): res = self.model(batch) m = torch.distributions.Categorical(res["pi"].data) self.n += 1 if self.n == 20: # gives a single float value #print(psutil.cpu_percent()) # gives an object with many fields #print(psutil.virtual_memory()) self.n = 0 return dict(a=m.sample(), pi=res["pi"].data, V=res["V"].data) def on_train(self, batch): pass
def initialize(self): co, GC, game_opt = self._set_params() params = GC.getParams() if self.options.parameter_print: print("Mode: ", game_opt.mode) print("checkers_num_action: ", params["checkers_num_action"]) desc = {} if self.options.mode == "play": desc["actor_white"] = dict( input=["s", "game_idx"], reply=[ "pi", "a", "V", ], batchsize=1, ) desc["actor_black"] = dict( input=["s"], reply=["pi", "V", "a", "rv"], timeout_usec=10, batchsize=co.mcts_options.num_rollouts_per_batch) elif self.options.mode == "selfplay": desc["game_end"] = dict(batchsize=1, ) desc["game_start"] = dict(batchsize=1, input=["white_ver", "black_ver"], reply=None) # checkers desc["actor_white"] = dict( input=["s"], reply=["pi", "V", "a", "rv"], batchsize=self.options.batchsize2 if self.options.batchsize2 > 0 else self.options.batchsize, timeout_usec=self.options.selfplay_timeout_usec, ) desc["actor_black"] = dict( input=["s"], reply=["pi", "V", "a", "rv"], batchsize=self.options.batchsize2 if self.options.batchsize2 > 0 else self.options.batchsize, timeout_usec=self.options.selfplay_timeout_usec, ) elif self.options.mode == "train" or self.options.mode == "offline_train": desc["train"] = dict(input=[ "s", "offline_a", "winner", "mcts_scores", "move_idx", "selfplay_ver" ], reply=None) desc["train_ctrl"] = dict(input=["selfplay_ver"], reply=None, batchsize=1) else: raise "No such mode: " + self.options.mode params.update( dict( num_group=1 if self.options.actor_only else 2, T=self.options.T, )) self.more_labels.add_labels(desc) return GCWrapper(GC, self.max_batchsize, desc, num_recv=2, gpu=(self.options.gpu if (self.options.gpu is not None and self.options.gpu >= 0) else None), use_numpy=False, params=params, verbose=self.options.parameter_print)
dict(batchsize=128, input=dict(T=1, keys=set(["s", "a", "last_r", "terminal"])), reply=dict(T=1, keys=set(["rv", "pi", "V", "a"]))), "reduced_forward": dict(batchsize=128, input=dict(T=1, keys=set(["s", "a", "last_r", "terminal"])), reply=dict(T=1, keys=set(["rv", "pi", "V", "a"]))), "reduced_project": dict(batchsize=128, input=dict(T=1, keys=set(["s", "a", "last_r", "terminal"])), reply=dict(T=1, keys=set(["rv", "pi", "V", "a"]))) } GC = GCWrapper(GC, co, batch_descriptions, use_numpy=True, params=GC.GetParams()) GC.reg_callback("actor", actor) GC.reg_callback("reduced_predict", reduced_predict) GC.reg_callback("reduced_forward", reduced_forward) GC.reg_callback("reduced_project", reduced_project) GC.Start() while True: GC.Run() GC.Stop()
def initialize(self): args = self.args co = atari.ContextOptions() self.context_args.initialize(co) opt = atari.Options() opt.frame_skip = args.frame_skip opt.rom_file = os.path.join(args.rom_dir, args.rom_file) opt.seed = 42 opt.eval_only = getattr(args, "env_eval_only", 0) == 1 opt.hist_len = args.hist_len opt.reward_clip = args.reward_clip GC = atari.GameContext(co, opt) print("Version: ", GC.Version()) num_action = GC.get_num_actions() print("Num Actions: ", num_action) desc = {} # For actor model, No reward needed, we only want to get input and return distribution of actions. # sampled action and and value will be filled from the reply. desc["actor"] = dict(input=dict(id="", s=str(args.hist_len), last_r="", last_terminal="", _batchsize=str(args.batchsize), _T="1"), reply=dict(rv="", pi=str(num_action), V="1", a="1", _batchsize=str(args.batchsize), _T="1")) if not args.actor_only: # For training: group 1 # We want input, action (filled by actor models), value (filled by actor # models) and reward. desc["train"] = dict(input=dict(rv="", id="", pi=str(num_action), s=str(args.hist_len), a="1", r="1", V="1", seq="", terminal="", _batchsize=str(args.batchsize), _T=str(args.T)), reply=None) # Initialize shared memory (between Python and C++) based on the specification defined by desc. params = dict() params["num_action"] = GC.get_num_actions() params["num_group"] = 1 if args.actor_only else 2 params["action_batchsize"] = int(desc["actor"]["input"]["_batchsize"]) if not args.actor_only: params["train_batchsize"] = int( desc["train"]["input"]["_batchsize"]) params["hist_len"] = args.hist_len params["T"] = args.T return GCWrapper(GC, co, desc, use_numpy=False, params=params)
def initialize(self): args = self.args co = minirts.ContextOptions() self.context_args.initialize(co) opt = minirts.Options() opt.seed = args.seed opt.frame_skip_ai = args.fs_ai opt.frame_skip_opponent = args.fs_opponent opt.simulation_type = minirts.ST_NORMAL opt.ai_type = getattr(minirts, args.ai_type) if args.ai_type == "AI_NN": opt.backup_ai_type = minirts.AI_SIMPLE if args.ai_type == "AI_FLAG_NN": opt.backup_ai_type = minirts.AI_FLAG_SIMPLE opt.opponent_ai_type = getattr(minirts, args.opponent_type) opt.latest_start = args.latest_start opt.latest_start_decay = args.latest_start_decay opt.mcts_threads = args.mcts_threads opt.mcts_rollout_per_thread = 50 opt.max_tick = args.max_tick opt.handicap_level = args.handicap_level opt.simple_ratio = args.simple_ratio opt.ratio_change = args.ratio_change # opt.output_filename = b"simulators.txt" # opt.cmd_dumper_prefix = b"cmd-dump" # opt.save_replay_prefix = b"replay" GC = minirts.GameContext(co, opt) print("Version: ", GC.Version()) num_action = GC.get_num_actions() print("Num Actions: ", num_action) num_unittype = GC.get_num_unittype() print("Num unittype: ", num_unittype) desc = {} # For actor model, no reward needed, we only want to get input and return distribution of actions. # sampled action and and value will be filled from the reply. desc["actor"] = (dict(s=str(num_unittype + 7), r0="", r1="", last_r="", last_terminal="", _batchsize=str(args.batchsize), _T="1"), dict(rv="", pi=str(num_action), V="1", a="1", _batchsize=str(args.batchsize), _T="1")) if not args.actor_only: # For training, we want input, action (filled by actor models), value (filled by actor models) and reward. desc["train"] = (dict(rv="", pi=str(num_action), s=str(num_unittype + 7), r0="", r1="", a="1", r="1", V="1", terminal="", _batchsize=str(args.batchsize), _T=str(args.T)), None) if args.additional_labels is not None: extra = {label: "" for label in args.additional_labels.split(",")} for _, v in desc.items(): v[0].update(extra) params = dict(num_action=num_action, num_unit_type=num_unittype, num_group=1 if args.actor_only else 2, action_batchsize=int(desc["actor"][0]["_batchsize"]), train_batchsize=int(desc["train"][0]["_batchsize"]) if not args.actor_only else None, T=args.T) return GCWrapper(GC, co, desc, use_numpy=False, params=params)
def initialize(self): args = self.args co = minirts.ContextOptions() self.context_args.initialize(co) opt = minirts.Options() opt.seed = args.seed opt.frame_skip_ai = args.fs_ai opt.frame_skip_opponent = args.fs_opponent opt.simulation_type = minirts.ST_NORMAL opt.ai_type = getattr(minirts, args.ai_type) if args.ai_type == "AI_NN": opt.backup_ai_type = minirts.AI_SIMPLE if args.ai_type == "AI_FLAG_NN": opt.backup_ai_type = minirts.AI_FLAG_SIMPLE opt.opponent_ai_type = getattr(minirts, args.opponent_type) opt.latest_start = args.latest_start opt.latest_start_decay = args.latest_start_decay opt.mcts_threads = args.mcts_threads opt.mcts_rollout_per_thread = 50 opt.max_tick = args.max_tick opt.handicap_level = args.handicap_level opt.simple_ratio = args.simple_ratio opt.ratio_change = args.ratio_change # opt.output_filename = b"simulators.txt" # opt.cmd_dumper_prefix = b"cmd-dump" # opt.save_replay_prefix = b"replay" GC = minirts.GameContext(co, opt) params = GC.GetParams() print("Version: ", GC.Version()) print("Num Actions: ", params["num_action"]) print("Num unittype: ", params["num_unit_type"]) desc = {} # For actor model, no reward needed, we only want to get input and return distribution of actions. # sampled action and and value will be filled from the reply. desc["actor"] = dict( batchsize=args.batchsize, input=dict(T=1, keys=set(["s", "res", "last_r", "r0", "r1", "terminal"])), reply=dict(T=1, keys=set(["rv", "pi", "V", "a"]))) if not args.actor_only: # For training: group 1 # We want input, action (filled by actor models), value (filled by actor # models) and reward. desc["train"] = dict(batchsize=args.batchsize, input=dict(T=args.T, keys=set([ "rv", "pi", "s", "res", "a", "last_r", "r0", "r1", "V", "terminal" ])), reply=None) if args.additional_labels is not None: extra = args.additional_labels.split(",") for _, v in desc.items(): v["input"]["keys"].update(extra) params.update( dict(num_group=1 if args.actor_only else 2, action_batchsize=int(desc["actor"]["batchsize"]), train_batchsize=int(desc["train"]["batchsize"]) if not args.actor_only else None, T=args.T)) return GCWrapper(GC, co, desc, use_numpy=False, params=params)
def initialize(self): job_id = os.environ.get("job_id", "local") opt = go.getClientOpt(self.option_map.getOptionSpec(), job_id) mode = getattr(self.options, "common.mode") batchsize = getattr(self.options, "common.base.batchsize") GC = elf.GameContext(opt.common.base) if mode not in ["online", "selfplay"]: raise "No such mode: " + mode game_obj = go.Client(opt) game_obj.setGameContext(GC) params = game_obj.getParams() if self.options.parameter_print: print("**** Options ****") print(opt.info()) print("*****************") print("Version: ", elf.version()) print("Mode: ", mode) print("Num Actions: ", params["num_action"]) desc = {} if mode == "online": desc["human_actor"] = dict( input=[], reply=["a", "timestamp"], batchsize=1, ) # Used for MCTS/Direct play. desc["actor_black"] = dict( input=["s"], reply=["pi", "V", "a", "rv"], timeout_usec=10, batchsize=getattr(self.options, "common.mcts.num_rollout_per_batch")) elif mode == "selfplay": # Used for MCTS/Direct play. white_batchsize = self.options.white_mcts_rollout_per_batch if white_batchsize < 0: white_batchsize = batchsize desc["actor_black"] = dict( input=["s"], reply=["pi", "V", "a", "rv"], batchsize=batchsize, timeout_usec=self.options.selfplay_timeout_usec, ) desc["actor_white"] = dict( input=["s"], reply=["pi", "V", "a", "rv"], batchsize=white_batchsize, timeout_usec=self.options.selfplay_timeout_usec, ) desc["game_end"] = dict(batchsize=1, ) desc["game_start"] = dict(batchsize=1, input=["black_ver", "white_ver"], reply=None) self.more_labels.add_labels(desc) return GCWrapper(GC, game_obj, batchsize, desc, num_recv=8, default_gpu=(self.options.gpu if (self.options.gpu is not None and self.options.gpu >= 0) else None), use_numpy=False, params=params, verbose=self.options.parameter_print)