예제 #1
0
파일: game.py 프로젝트: StephanZheng/ELF-1
    def initialize(self):
        args = self.args
        co = atari.ContextOptions()
        self.context_args.initialize(co)

        opt = atari.Options()
        opt.frame_skip = args.frame_skip
        opt.rom_file = os.path.join(args.rom_dir, args.rom_file)
        opt.seed = 42
        opt.eval_only = getattr(args, "env_eval_only", 0) == 1
        opt.hist_len = args.hist_len
        opt.reward_clip = args.reward_clip

        GC = atari.GameContext(co, opt)
        print("Version: ", GC.Version())

        params = GC.GetParams()
        print("Num Actions: ", params["num_action"])

        desc = {}
        # For actor model, No reward needed, we only want to get input and return distribution of actions.
        # sampled action and and value will be filled from the reply.

        desc["actor"] = dict(batchsize=args.batchsize,
                             input=dict(T=1,
                                        keys=set(
                                            ["s", "last_r", "last_terminal"])),
                             reply=dict(T=1, keys=set(["rv", "pi", "V", "a"])))

        if not args.actor_only:
            # For training: group 1
            # We want input, action (filled by actor models), value (filled by actor
            # models) and reward.
            desc["train"] = dict(batchsize=args.batchsize,
                                 input=dict(T=args.T,
                                            keys=set([
                                                "rv", "id", "pi", "s", "a",
                                                "last_r", "V", "seq",
                                                "last_terminal"
                                            ])),
                                 reply=None)

        if args.additional_labels is not None:
            extra = args.additional_labels.split(",")
            for _, v in desc.items():
                v["input"]["keys"].update(extra)

        # Initialize shared memory (between Python and C++) based on the specification defined by desc.
        params["num_group"] = 1 if args.actor_only else 2
        params["action_batchsize"] = desc["actor"]["batchsize"]
        if not args.actor_only:
            params["train_batchsize"] = desc["train"]["batchsize"]
        params["hist_len"] = args.hist_len
        params["T"] = args.T

        return GCWrapper(GC, co, desc, use_numpy=False, params=params)
예제 #2
0
def benchmark(ngame):
    batchsize = 16
    co, opt = initialize_game(
        batchsize, num_games=ngame,
        frame_skip=frame_skip, T=4)
    GC = atari.GameContext(co, opt)
    start = time.time()
    sys.stderr.write("Start benchmark ngame={}...\n".format(ngame))

    stats = Counter()
    for i in range(ngame): stats[i] = 0

    for k in range(Niter):
        infos = GC.Wait(0)
        #for i in range(len(infos)):
        #    stats[infos[i].meta().id] += 1
        GC.Steps(infos)
    print(stats)
    return Niter / (time.time() - start) * frame_skip * batchsize
예제 #3
0
    def initialize(self):
        args = self.args
        co = atari.ContextOptions()
        self.context_args.initialize(co)

        opt = atari.Options()
        opt.frame_skip = args.frame_skip
        opt.rom_file = os.path.join(args.rom_dir, args.rom_file)
        opt.seed = 42
        opt.hist_len = args.hist_len
        opt.reward_clip = args.reward_clip

        GC = atari.GameContext(co, opt)
        print("Version: ", GC.Version())

        num_action = GC.get_num_actions()
        print("Num Actions: ", num_action)

        desc = []
        name2idx = {}
        # For actor model: group 0
        # No reward needed, we only want to get input and return distribution of actions.
        # sampled action and and value will be filled from the reply.
        name2idx["actor"] = len(desc)

        # Descriptor of each group.
        #     desc = [(input_group0, reply_group0), (input_group1, reply_group1), ...]
        # GC.Wait(0) will return a batch of game states in the same group.
        # For example, if you register group 0 as the actor group, which has the history length of 1, and group 1 as the optimizer group which has the history length of T
        # Then you can check the group id to decide which Python procedure to use to deal with the group, by checking their group_id.
        # For self-play, we can register one group each player.
        desc.append((dict(id="",
                          s=str(args.hist_len),
                          last_r="",
                          last_terminal="",
                          _batchsize=str(args.batchsize),
                          _T="1"),
                     dict(rv="",
                          pi=str(num_action),
                          V="1",
                          a="1",
                          _batchsize=str(args.batchsize),
                          _T="1")))

        if not args.actor_only:
            # For training: group 1
            # We want input, action (filled by actor models), value (filled by actor
            # models) and reward.
            name2idx["train"] = len(desc)
            desc.append((dict(rv="",
                              id="",
                              pi=str(num_action),
                              s=str(args.hist_len),
                              a="1",
                              r="1",
                              V="1",
                              seq="",
                              terminal="",
                              _batchsize=str(args.batchsize),
                              _T=str(args.T)), None))

        # Initialize shared memory (between Python and C++) based on the specification defined by desc.
        inputs, replies = utils_elf.init_collectors(GC,
                                                    co,
                                                    desc,
                                                    use_numpy=False)

        params = dict()
        params["num_action"] = GC.get_num_actions()
        params["num_group"] = 1 if args.actor_only else 2
        params["action_batchsize"] = int(
            desc[name2idx["actor"]][0]["_batchsize"])
        if not args.actor_only:
            params["train_batchsize"] = int(
                desc[name2idx["train"]][0]["_batchsize"])
        params["hist_len"] = args.hist_len
        params["T"] = args.T

        return utils_elf.GCWrapper(GC, inputs, replies, name2idx, params)
예제 #4
0
    def initialize(self):
        args = self.args
        co = atari.ContextOptions()
        self.context_args.initialize(co)

        opt = atari.Options()
        opt.frame_skip = args.frame_skip
        opt.rom_file = os.path.join(args.rom_dir, args.rom_file)
        opt.seed = 42
        opt.eval_only = getattr(args, "env_eval_only", 0) == 1
        opt.hist_len = args.hist_len
        opt.reward_clip = args.reward_clip

        GC = atari.GameContext(co, opt)
        print("Version: ", GC.Version())

        num_action = GC.get_num_actions()
        print("Num Actions: ", num_action)

        desc = {}
        # For actor model, No reward needed, we only want to get input and return distribution of actions.
        # sampled action and and value will be filled from the reply.

        desc["actor"] = dict(input=dict(id="",
                                        s=str(args.hist_len),
                                        last_r="",
                                        last_terminal="",
                                        _batchsize=str(args.batchsize),
                                        _T="1"),
                             reply=dict(rv="",
                                        pi=str(num_action),
                                        V="1",
                                        a="1",
                                        _batchsize=str(args.batchsize),
                                        _T="1"))

        if not args.actor_only:
            # For training: group 1
            # We want input, action (filled by actor models), value (filled by actor
            # models) and reward.
            desc["train"] = dict(input=dict(rv="",
                                            id="",
                                            pi=str(num_action),
                                            s=str(args.hist_len),
                                            a="1",
                                            r="1",
                                            V="1",
                                            seq="",
                                            terminal="",
                                            _batchsize=str(args.batchsize),
                                            _T=str(args.T)),
                                 reply=None)

        # Initialize shared memory (between Python and C++) based on the specification defined by desc.
        params = dict()
        params["num_action"] = GC.get_num_actions()
        params["num_group"] = 1 if args.actor_only else 2
        params["action_batchsize"] = int(desc["actor"]["input"]["_batchsize"])
        if not args.actor_only:
            params["train_batchsize"] = int(
                desc["train"]["input"]["_batchsize"])
        params["hist_len"] = args.hist_len
        params["T"] = args.T

        return GCWrapper(GC, co, desc, use_numpy=False, params=params)
예제 #5
0
    def initialize(self):
        args = self.args
        co = atari.ContextOptions()
        self.context_params.initialize(co)

        opt = atari.Options()
        opt.frame_skip = args.frame_skip
        opt.rom_file = os.path.join(args.rom_dir, args.rom_file)
        opt.seed = 42
        opt.hist_len = args.hist_len
        opt.reward_clip = args.reward_clip

        GC = atari.GameContext(co, opt)
        print("Version: ", GC.Version())

        num_action = GC.get_num_actions()
        print("Num Actions: ", num_action)

        desc = []
        name2idx = {}
        # For actor model: group 0
        # No reward needed, we only want to get input and return distribution of actions.
        # sampled action and and value will be filled from the reply.
        name2idx["actor"] = len(desc)
        desc.append((dict(id="",
                          s=str(args.hist_len),
                          last_r="",
                          last_terminal="",
                          _batchsize=str(args.batchsize),
                          _T="1"),
                     dict(rv="",
                          pi=str(num_action),
                          V="1",
                          a="1",
                          _batchsize=str(args.batchsize),
                          _T="1")))

        if not args.actor_only:
            # For training: group 1
            # We want input, action (filled by actor models), value (filled by actor
            # models) and reward.
            name2idx["train"] = len(desc)
            desc.append((dict(rv="",
                              id="",
                              pi=str(num_action),
                              s=str(args.hist_len),
                              a="1",
                              r="1",
                              V="1",
                              seq="",
                              terminal="",
                              _batchsize=str(args.batchsize),
                              _T=str(args.T)), None))

        inputs, replies = utils_elf.init_collectors(GC,
                                                    co,
                                                    desc,
                                                    use_numpy=False)

        params = dict()
        params["num_action"] = GC.get_num_actions()
        params["num_group"] = 1 if args.actor_only else 2
        params["action_batchsize"] = int(
            desc[name2idx["actor"]][0]["_batchsize"])
        if not args.actor_only:
            params["train_batchsize"] = int(
                desc[name2idx["train"]][0]["_batchsize"])
        params["hist_len"] = args.hist_len
        params["T"] = args.T

        return utils_elf.GCWrapper(GC, inputs, replies, name2idx, params)