def initialize(self): args = self.args co = atari.ContextOptions() self.context_args.initialize(co) opt = atari.Options() opt.frame_skip = args.frame_skip opt.rom_file = os.path.join(args.rom_dir, args.rom_file) opt.seed = 42 opt.eval_only = getattr(args, "env_eval_only", 0) == 1 opt.hist_len = args.hist_len opt.reward_clip = args.reward_clip GC = atari.GameContext(co, opt) print("Version: ", GC.Version()) params = GC.GetParams() print("Num Actions: ", params["num_action"]) desc = {} # For actor model, No reward needed, we only want to get input and return distribution of actions. # sampled action and and value will be filled from the reply. desc["actor"] = dict(batchsize=args.batchsize, input=dict(T=1, keys=set( ["s", "last_r", "last_terminal"])), reply=dict(T=1, keys=set(["rv", "pi", "V", "a"]))) if not args.actor_only: # For training: group 1 # We want input, action (filled by actor models), value (filled by actor # models) and reward. desc["train"] = dict(batchsize=args.batchsize, input=dict(T=args.T, keys=set([ "rv", "id", "pi", "s", "a", "last_r", "V", "seq", "last_terminal" ])), reply=None) if args.additional_labels is not None: extra = args.additional_labels.split(",") for _, v in desc.items(): v["input"]["keys"].update(extra) # Initialize shared memory (between Python and C++) based on the specification defined by desc. params["num_group"] = 1 if args.actor_only else 2 params["action_batchsize"] = desc["actor"]["batchsize"] if not args.actor_only: params["train_batchsize"] = desc["train"]["batchsize"] params["hist_len"] = args.hist_len params["T"] = args.T return GCWrapper(GC, co, desc, use_numpy=False, params=params)
def benchmark(ngame): batchsize = 16 co, opt = initialize_game( batchsize, num_games=ngame, frame_skip=frame_skip, T=4) GC = atari.GameContext(co, opt) start = time.time() sys.stderr.write("Start benchmark ngame={}...\n".format(ngame)) stats = Counter() for i in range(ngame): stats[i] = 0 for k in range(Niter): infos = GC.Wait(0) #for i in range(len(infos)): # stats[infos[i].meta().id] += 1 GC.Steps(infos) print(stats) return Niter / (time.time() - start) * frame_skip * batchsize
def initialize(self): args = self.args co = atari.ContextOptions() self.context_args.initialize(co) opt = atari.Options() opt.frame_skip = args.frame_skip opt.rom_file = os.path.join(args.rom_dir, args.rom_file) opt.seed = 42 opt.hist_len = args.hist_len opt.reward_clip = args.reward_clip GC = atari.GameContext(co, opt) print("Version: ", GC.Version()) num_action = GC.get_num_actions() print("Num Actions: ", num_action) desc = [] name2idx = {} # For actor model: group 0 # No reward needed, we only want to get input and return distribution of actions. # sampled action and and value will be filled from the reply. name2idx["actor"] = len(desc) # Descriptor of each group. # desc = [(input_group0, reply_group0), (input_group1, reply_group1), ...] # GC.Wait(0) will return a batch of game states in the same group. # For example, if you register group 0 as the actor group, which has the history length of 1, and group 1 as the optimizer group which has the history length of T # Then you can check the group id to decide which Python procedure to use to deal with the group, by checking their group_id. # For self-play, we can register one group each player. desc.append((dict(id="", s=str(args.hist_len), last_r="", last_terminal="", _batchsize=str(args.batchsize), _T="1"), dict(rv="", pi=str(num_action), V="1", a="1", _batchsize=str(args.batchsize), _T="1"))) if not args.actor_only: # For training: group 1 # We want input, action (filled by actor models), value (filled by actor # models) and reward. name2idx["train"] = len(desc) desc.append((dict(rv="", id="", pi=str(num_action), s=str(args.hist_len), a="1", r="1", V="1", seq="", terminal="", _batchsize=str(args.batchsize), _T=str(args.T)), None)) # Initialize shared memory (between Python and C++) based on the specification defined by desc. inputs, replies = utils_elf.init_collectors(GC, co, desc, use_numpy=False) params = dict() params["num_action"] = GC.get_num_actions() params["num_group"] = 1 if args.actor_only else 2 params["action_batchsize"] = int( desc[name2idx["actor"]][0]["_batchsize"]) if not args.actor_only: params["train_batchsize"] = int( desc[name2idx["train"]][0]["_batchsize"]) params["hist_len"] = args.hist_len params["T"] = args.T return utils_elf.GCWrapper(GC, inputs, replies, name2idx, params)
def initialize(self): args = self.args co = atari.ContextOptions() self.context_args.initialize(co) opt = atari.Options() opt.frame_skip = args.frame_skip opt.rom_file = os.path.join(args.rom_dir, args.rom_file) opt.seed = 42 opt.eval_only = getattr(args, "env_eval_only", 0) == 1 opt.hist_len = args.hist_len opt.reward_clip = args.reward_clip GC = atari.GameContext(co, opt) print("Version: ", GC.Version()) num_action = GC.get_num_actions() print("Num Actions: ", num_action) desc = {} # For actor model, No reward needed, we only want to get input and return distribution of actions. # sampled action and and value will be filled from the reply. desc["actor"] = dict(input=dict(id="", s=str(args.hist_len), last_r="", last_terminal="", _batchsize=str(args.batchsize), _T="1"), reply=dict(rv="", pi=str(num_action), V="1", a="1", _batchsize=str(args.batchsize), _T="1")) if not args.actor_only: # For training: group 1 # We want input, action (filled by actor models), value (filled by actor # models) and reward. desc["train"] = dict(input=dict(rv="", id="", pi=str(num_action), s=str(args.hist_len), a="1", r="1", V="1", seq="", terminal="", _batchsize=str(args.batchsize), _T=str(args.T)), reply=None) # Initialize shared memory (between Python and C++) based on the specification defined by desc. params = dict() params["num_action"] = GC.get_num_actions() params["num_group"] = 1 if args.actor_only else 2 params["action_batchsize"] = int(desc["actor"]["input"]["_batchsize"]) if not args.actor_only: params["train_batchsize"] = int( desc["train"]["input"]["_batchsize"]) params["hist_len"] = args.hist_len params["T"] = args.T return GCWrapper(GC, co, desc, use_numpy=False, params=params)
def initialize(self): args = self.args co = atari.ContextOptions() self.context_params.initialize(co) opt = atari.Options() opt.frame_skip = args.frame_skip opt.rom_file = os.path.join(args.rom_dir, args.rom_file) opt.seed = 42 opt.hist_len = args.hist_len opt.reward_clip = args.reward_clip GC = atari.GameContext(co, opt) print("Version: ", GC.Version()) num_action = GC.get_num_actions() print("Num Actions: ", num_action) desc = [] name2idx = {} # For actor model: group 0 # No reward needed, we only want to get input and return distribution of actions. # sampled action and and value will be filled from the reply. name2idx["actor"] = len(desc) desc.append((dict(id="", s=str(args.hist_len), last_r="", last_terminal="", _batchsize=str(args.batchsize), _T="1"), dict(rv="", pi=str(num_action), V="1", a="1", _batchsize=str(args.batchsize), _T="1"))) if not args.actor_only: # For training: group 1 # We want input, action (filled by actor models), value (filled by actor # models) and reward. name2idx["train"] = len(desc) desc.append((dict(rv="", id="", pi=str(num_action), s=str(args.hist_len), a="1", r="1", V="1", seq="", terminal="", _batchsize=str(args.batchsize), _T=str(args.T)), None)) inputs, replies = utils_elf.init_collectors(GC, co, desc, use_numpy=False) params = dict() params["num_action"] = GC.get_num_actions() params["num_group"] = 1 if args.actor_only else 2 params["action_batchsize"] = int( desc[name2idx["actor"]][0]["_batchsize"]) if not args.actor_only: params["train_batchsize"] = int( desc[name2idx["train"]][0]["_batchsize"]) params["hist_len"] = args.hist_len params["T"] = args.T return utils_elf.GCWrapper(GC, inputs, replies, name2idx, params)