def initialize(self): args = self.args co = minirts.ContextOptions() self.context_params.initialize(co) opt = minirts.Options() opt.seed = args.seed opt.frame_skip_ai = args.fs_ai opt.frame_skip_opponent = args.fs_opponent opt.simulation_type = minirts.ST_NORMAL opt.ai_type = getattr(minirts, args.ai_type) if args.ai_type == "AI_NN": opt.backup_ai_type = minirts.AI_SIMPLE if args.ai_type == "AI_FLAG_NN": opt.backup_ai_type = minirts.AI_FLAG_SIMPLE opt.opponent_ai_type = getattr(minirts, args.opponent_type) opt.latest_start = args.latest_start opt.latest_start_decay = args.latest_start_decay opt.mcts_threads = args.mcts_threads opt.mcts_rollout_per_thread = 50 opt.max_tick = args.max_tick opt.handicap_level = args.handicap_level opt.simple_ratio = args.simple_ratio opt.ratio_change = args.ratio_change # opt.output_filename = b"simulators.txt" # opt.cmd_dumper_prefix = b"cmd-dump" # opt.save_replay_prefix = b"replay" GC = minirts.GameContext(co, opt) print("Version: ", GC.Version()) num_action = GC.get_num_actions() print("Num Actions: ", num_action) num_unittype = GC.get_num_unittype() print("Num unittype: ", num_unittype) desc = [] name2idx = {} # For actor model: group 0 # No reward needed, we only want to get input and return distribution of actions. # sampled action and and value will be filled from the reply. name2idx["actor"] = len(desc) desc.append((dict(id="", s=str(2), r0="", r1="", last_r="", last_terminal="", _batchsize=str(args.batchsize), _T="1"), dict(rv="", pi=str(num_action), V="1", a="1", _batchsize=str(args.batchsize), _T="1"))) if not args.actor_only: # For training: group 1 # We want input, action (filled by actor models), value (filled by actor # models) and reward. name2idx["train"] = len(desc) desc.append((dict(rv="", id="", pi=str(num_action), s=str(2), r0="", r1="", a="1", r="1", V="1", seq="", terminal="", _batchsize=str(args.batchsize), _T=str(args.T)), None)) inputs, replies = utils_elf.init_collectors(GC, co, desc, use_numpy=False) params = dict( num_action=num_action, num_unit_type=num_unittype, num_group=1 if args.actor_only else 2, action_batchsize=int(desc[name2idx["actor"]][0]["_batchsize"]), train_batchsize=int(desc[name2idx["train"]][0]["_batchsize"]) if not args.actor_only else None, T=args.T) return utils_elf.GCWrapper(GC, inputs, replies, name2idx, params)
def initialize(self): args = self.args co = atari.ContextOptions() self.context_args.initialize(co) opt = atari.Options() opt.frame_skip = args.frame_skip opt.rom_file = os.path.join(args.rom_dir, args.rom_file) opt.seed = 42 opt.hist_len = args.hist_len opt.reward_clip = args.reward_clip GC = atari.GameContext(co, opt) print("Version: ", GC.Version()) num_action = GC.get_num_actions() print("Num Actions: ", num_action) desc = [] name2idx = {} # For actor model: group 0 # No reward needed, we only want to get input and return distribution of actions. # sampled action and and value will be filled from the reply. name2idx["actor"] = len(desc) # Descriptor of each group. # desc = [(input_group0, reply_group0), (input_group1, reply_group1), ...] # GC.Wait(0) will return a batch of game states in the same group. # For example, if you register group 0 as the actor group, which has the history length of 1, and group 1 as the optimizer group which has the history length of T # Then you can check the group id to decide which Python procedure to use to deal with the group, by checking their group_id. # For self-play, we can register one group each player. desc.append((dict(id="", s=str(args.hist_len), last_r="", last_terminal="", _batchsize=str(args.batchsize), _T="1"), dict(rv="", pi=str(num_action), V="1", a="1", _batchsize=str(args.batchsize), _T="1"))) if not args.actor_only: # For training: group 1 # We want input, action (filled by actor models), value (filled by actor # models) and reward. name2idx["train"] = len(desc) desc.append((dict(rv="", id="", pi=str(num_action), s=str(args.hist_len), a="1", r="1", V="1", seq="", terminal="", _batchsize=str(args.batchsize), _T=str(args.T)), None)) # Initialize shared memory (between Python and C++) based on the specification defined by desc. inputs, replies = utils_elf.init_collectors(GC, co, desc, use_numpy=False) params = dict() params["num_action"] = GC.get_num_actions() params["num_group"] = 1 if args.actor_only else 2 params["action_batchsize"] = int( desc[name2idx["actor"]][0]["_batchsize"]) if not args.actor_only: params["train_batchsize"] = int( desc[name2idx["train"]][0]["_batchsize"]) params["hist_len"] = args.hist_len params["T"] = args.T return utils_elf.GCWrapper(GC, inputs, replies, name2idx, params)
def initialize(self): args = self.args co = atari.ContextOptions() self.context_params.initialize(co) opt = atari.Options() opt.frame_skip = args.frame_skip opt.rom_file = os.path.join(args.rom_dir, args.rom_file) opt.seed = 42 opt.hist_len = args.hist_len opt.reward_clip = args.reward_clip GC = atari.GameContext(co, opt) print("Version: ", GC.Version()) num_action = GC.get_num_actions() print("Num Actions: ", num_action) desc = [] name2idx = {} # For actor model: group 0 # No reward needed, we only want to get input and return distribution of actions. # sampled action and and value will be filled from the reply. name2idx["actor"] = len(desc) desc.append((dict(id="", s=str(args.hist_len), last_r="", last_terminal="", _batchsize=str(args.batchsize), _T="1"), dict(rv="", pi=str(num_action), V="1", a="1", _batchsize=str(args.batchsize), _T="1"))) if not args.actor_only: # For training: group 1 # We want input, action (filled by actor models), value (filled by actor # models) and reward. name2idx["train"] = len(desc) desc.append((dict(rv="", id="", pi=str(num_action), s=str(args.hist_len), a="1", r="1", V="1", seq="", terminal="", _batchsize=str(args.batchsize), _T=str(args.T)), None)) inputs, replies = utils_elf.init_collectors(GC, co, desc, use_numpy=False) params = dict() params["num_action"] = GC.get_num_actions() params["num_group"] = 1 if args.actor_only else 2 params["action_batchsize"] = int( desc[name2idx["actor"]][0]["_batchsize"]) if not args.actor_only: params["train_batchsize"] = int( desc[name2idx["train"]][0]["_batchsize"]) params["hist_len"] = args.hist_len params["T"] = args.T return utils_elf.GCWrapper(GC, inputs, replies, name2idx, params)