Exemple #1
0
    def initialize(self):
        args = self.args

        co = minirts.ContextOptions()
        self.context_params.initialize(co)

        opt = minirts.Options()
        opt.seed = args.seed
        opt.frame_skip_ai = args.fs_ai
        opt.frame_skip_opponent = args.fs_opponent
        opt.simulation_type = minirts.ST_NORMAL
        opt.ai_type = getattr(minirts, args.ai_type)
        if args.ai_type == "AI_NN":
            opt.backup_ai_type = minirts.AI_SIMPLE
        if args.ai_type == "AI_FLAG_NN":
            opt.backup_ai_type = minirts.AI_FLAG_SIMPLE
        opt.opponent_ai_type = getattr(minirts, args.opponent_type)
        opt.latest_start = args.latest_start
        opt.latest_start_decay = args.latest_start_decay
        opt.mcts_threads = args.mcts_threads
        opt.mcts_rollout_per_thread = 50
        opt.max_tick = args.max_tick
        opt.handicap_level = args.handicap_level
        opt.simple_ratio = args.simple_ratio
        opt.ratio_change = args.ratio_change
        # opt.output_filename = b"simulators.txt"
        # opt.cmd_dumper_prefix = b"cmd-dump"
        # opt.save_replay_prefix = b"replay"

        GC = minirts.GameContext(co, opt)
        print("Version: ", GC.Version())

        num_action = GC.get_num_actions()
        print("Num Actions: ", num_action)

        num_unittype = GC.get_num_unittype()
        print("Num unittype: ", num_unittype)

        desc = []
        name2idx = {}
        # For actor model: group 0
        # No reward needed, we only want to get input and return distribution of actions.
        # sampled action and and value will be filled from the reply.
        name2idx["actor"] = len(desc)
        desc.append((dict(id="",
                          s=str(2),
                          r0="",
                          r1="",
                          last_r="",
                          last_terminal="",
                          _batchsize=str(args.batchsize),
                          _T="1"),
                     dict(rv="",
                          pi=str(num_action),
                          V="1",
                          a="1",
                          _batchsize=str(args.batchsize),
                          _T="1")))

        if not args.actor_only:
            # For training: group 1
            # We want input, action (filled by actor models), value (filled by actor
            # models) and reward.
            name2idx["train"] = len(desc)
            desc.append((dict(rv="",
                              id="",
                              pi=str(num_action),
                              s=str(2),
                              r0="",
                              r1="",
                              a="1",
                              r="1",
                              V="1",
                              seq="",
                              terminal="",
                              _batchsize=str(args.batchsize),
                              _T=str(args.T)), None))

        inputs, replies = utils_elf.init_collectors(GC,
                                                    co,
                                                    desc,
                                                    use_numpy=False)

        params = dict(
            num_action=num_action,
            num_unit_type=num_unittype,
            num_group=1 if args.actor_only else 2,
            action_batchsize=int(desc[name2idx["actor"]][0]["_batchsize"]),
            train_batchsize=int(desc[name2idx["train"]][0]["_batchsize"])
            if not args.actor_only else None,
            T=args.T)

        return utils_elf.GCWrapper(GC, inputs, replies, name2idx, params)
Exemple #2
0
    def initialize(self):
        args = self.args
        co = atari.ContextOptions()
        self.context_args.initialize(co)

        opt = atari.Options()
        opt.frame_skip = args.frame_skip
        opt.rom_file = os.path.join(args.rom_dir, args.rom_file)
        opt.seed = 42
        opt.hist_len = args.hist_len
        opt.reward_clip = args.reward_clip

        GC = atari.GameContext(co, opt)
        print("Version: ", GC.Version())

        num_action = GC.get_num_actions()
        print("Num Actions: ", num_action)

        desc = []
        name2idx = {}
        # For actor model: group 0
        # No reward needed, we only want to get input and return distribution of actions.
        # sampled action and and value will be filled from the reply.
        name2idx["actor"] = len(desc)

        # Descriptor of each group.
        #     desc = [(input_group0, reply_group0), (input_group1, reply_group1), ...]
        # GC.Wait(0) will return a batch of game states in the same group.
        # For example, if you register group 0 as the actor group, which has the history length of 1, and group 1 as the optimizer group which has the history length of T
        # Then you can check the group id to decide which Python procedure to use to deal with the group, by checking their group_id.
        # For self-play, we can register one group each player.
        desc.append((dict(id="",
                          s=str(args.hist_len),
                          last_r="",
                          last_terminal="",
                          _batchsize=str(args.batchsize),
                          _T="1"),
                     dict(rv="",
                          pi=str(num_action),
                          V="1",
                          a="1",
                          _batchsize=str(args.batchsize),
                          _T="1")))

        if not args.actor_only:
            # For training: group 1
            # We want input, action (filled by actor models), value (filled by actor
            # models) and reward.
            name2idx["train"] = len(desc)
            desc.append((dict(rv="",
                              id="",
                              pi=str(num_action),
                              s=str(args.hist_len),
                              a="1",
                              r="1",
                              V="1",
                              seq="",
                              terminal="",
                              _batchsize=str(args.batchsize),
                              _T=str(args.T)), None))

        # Initialize shared memory (between Python and C++) based on the specification defined by desc.
        inputs, replies = utils_elf.init_collectors(GC,
                                                    co,
                                                    desc,
                                                    use_numpy=False)

        params = dict()
        params["num_action"] = GC.get_num_actions()
        params["num_group"] = 1 if args.actor_only else 2
        params["action_batchsize"] = int(
            desc[name2idx["actor"]][0]["_batchsize"])
        if not args.actor_only:
            params["train_batchsize"] = int(
                desc[name2idx["train"]][0]["_batchsize"])
        params["hist_len"] = args.hist_len
        params["T"] = args.T

        return utils_elf.GCWrapper(GC, inputs, replies, name2idx, params)
Exemple #3
0
    def initialize(self):
        args = self.args
        co = atari.ContextOptions()
        self.context_params.initialize(co)

        opt = atari.Options()
        opt.frame_skip = args.frame_skip
        opt.rom_file = os.path.join(args.rom_dir, args.rom_file)
        opt.seed = 42
        opt.hist_len = args.hist_len
        opt.reward_clip = args.reward_clip

        GC = atari.GameContext(co, opt)
        print("Version: ", GC.Version())

        num_action = GC.get_num_actions()
        print("Num Actions: ", num_action)

        desc = []
        name2idx = {}
        # For actor model: group 0
        # No reward needed, we only want to get input and return distribution of actions.
        # sampled action and and value will be filled from the reply.
        name2idx["actor"] = len(desc)
        desc.append((dict(id="",
                          s=str(args.hist_len),
                          last_r="",
                          last_terminal="",
                          _batchsize=str(args.batchsize),
                          _T="1"),
                     dict(rv="",
                          pi=str(num_action),
                          V="1",
                          a="1",
                          _batchsize=str(args.batchsize),
                          _T="1")))

        if not args.actor_only:
            # For training: group 1
            # We want input, action (filled by actor models), value (filled by actor
            # models) and reward.
            name2idx["train"] = len(desc)
            desc.append((dict(rv="",
                              id="",
                              pi=str(num_action),
                              s=str(args.hist_len),
                              a="1",
                              r="1",
                              V="1",
                              seq="",
                              terminal="",
                              _batchsize=str(args.batchsize),
                              _T=str(args.T)), None))

        inputs, replies = utils_elf.init_collectors(GC,
                                                    co,
                                                    desc,
                                                    use_numpy=False)

        params = dict()
        params["num_action"] = GC.get_num_actions()
        params["num_group"] = 1 if args.actor_only else 2
        params["action_batchsize"] = int(
            desc[name2idx["actor"]][0]["_batchsize"])
        if not args.actor_only:
            params["train_batchsize"] = int(
                desc[name2idx["train"]][0]["_batchsize"])
        params["hist_len"] = args.hist_len
        params["T"] = args.T

        return utils_elf.GCWrapper(GC, inputs, replies, name2idx, params)