Exemple #1
0
    def initialize(self):
        opt = elf.Options()
        net_opt = elf.NetOptions()

        opt.loadFromArgs("", self.option_map.getOptionSpec())
        net_opt.loadFromArgs("", self.option_map.getOptionSpec())

        self.rs = elf.RemoteServers(elf.getNetOptions(opt, net_opt), ["actor", "train"])
        GC = elf.BatchReceiver(opt, self.rs)
        GC.setMode(elf.RECV_ENTRY)
        batchsize = opt.batchsize

        print("Batchsize: %d" % batchsize)

        width = 210 // 2
        height = 160 // 2
        T = 6
        num_action = 4

        spec = {}
        spec["actor"] = dict(
            input=dict(s=("float", (3, height, width))),
            reply=dict(a=("int32_t", 1), pi=("float", num_action), V=("float", 1))
        )
        '''
        spec["train"] = dict(
            input=dict(s_=(T, 3, height, width), r_=(T, 1), a_=(T, 1), pi_=(T, num_action), V_=(T, 1)),
        )
        '''

        e = GC.getExtractor()
        desc = allocExtractor(e, batchsize, spec)

        params = {
           "input_dim" : width * height * 3,
           "num_action" : 4
        }

        print("Init GC Wrapper")
        has_gpu = self.options.gpu is not None and self.options.gpu >= 0

        self.wrapper = GCWrapper(
            GC, None, batchsize, desc, num_recv=1, default_gpu=(self.options.gpu if has_gpu else None),
            use_numpy=False, params=params)

        # wrapper.reg_callback("train", self.on_train)
        self.wrapper.reg_callback("actor", self.on_actor)
        self.model = MyModel(params)
        if has_gpu:
            self.model.cuda(self.options.gpu)
        # self.optim = torch.optimi.Adam(self.model.parameters())
        self.n = 0
Exemple #2
0
    def initialize(self):
        job_id = os.environ.get("job_id", "local")
        opt = tutorial.getOpt(self.option_map.getOptionSpec(), job_id)

        GC = elf.GameContext(opt.base)
        game_obj = tutorial.Client(opt)
        game_obj.setGameContext(GC)
        params = game_obj.getParams()

        batchsize = getattr(self.options, "base.batchsize")

        desc = {}
        desc["actor"] = dict(
            input=["s"],
            reply=["a", "V", "pi"],
            batchsize=batchsize,
        )

        return GCWrapper(
            GC,
            game_obj,
            batchsize,
            desc,
            num_recv=2,
            default_gpu=(self.options.gpu
                         if (self.options.gpu is not None and self.options.gpu >= 0)
                         else None),
            use_numpy=False,
            params=params)
Exemple #3
0
    def initialize_reduced_service(self):
        args = self.args

        reference_name = "reference"
        train_name = "train"
        co, GC, params = self._init_gc(
            player_names=[train_name, reference_name])

        desc = {}
        # For actor model, no reward needed, we only want to get input and return distribution of actions.
        # sampled action and and value will be filled from the reply.
        desc["reduced_project"] = self._get_reduced_project()
        desc["reduced_forward"] = self._get_reduced_forward()
        desc["reduced_predict"] = self._get_reduced_predict()
        if params["players"][1]["type"] == "AI_NN":
            desc["actor"] = self._get_actor_spec()
            desc["actor"]["batchsize"] //= 2
            desc["actor"]["name"] = "reference"

        self.more_labels.add_labels(desc)

        return GCWrapper(GC,
                         co,
                         desc,
                         gpu=args.gpu,
                         use_numpy=False,
                         params=params)
Exemple #4
0
    def initialize(self):
        co, GC, params = self._init_gc()
        args = self.args

        desc = {}
        # For actor model, no reward needed, we only want to get input and return distribution of actions.
        # sampled action and and value will be filled from the reply.
        # todo : change for dqn
        # desc["actor"] = self._get_actor_q_spec()
        desc["actor"] = self._get_actor_q_spec()

        if not args.actor_only:
            # For training, we want input, action (filled by actor models), value (filled by actor models) and reward.
            # todo : change for dqn
            # desc["train"] = self._get_train_q_spec()
            desc["train"] = self._get_train_q_spec()

        self.more_labels.add_labels(desc)

        params.update(
            dict(num_group=1 if args.actor_only else 2,
                 action_batchsize=int(desc["actor"]["batchsize"]),
                 train_batchsize=int(desc["train"]["batchsize"])
                 if not args.actor_only else None,
                 T=args.T,
                 model_no_spatial=args.model_no_spatial))

        return GCWrapper(GC,
                         co,
                         desc,
                         gpu=args.gpu,
                         use_numpy=False,
                         params=params)
Exemple #5
0
    def initialize_selfplay(self):
        args = self.args
        reference_name = "reference"
        train_name = "train"

        co, GC, params = self._init_gc(player_names=[train_name, reference_name])

        desc = {}
        # For actor model, no reward needed, we only want to get input and return distribution of actions.
        # sampled action and and value will be filled from the reply.
        desc["actor0"] = self._get_actor_spec()
        desc["actor1"] = self._get_actor_spec()

        desc["actor0"]["name"] = reference_name
        desc["actor1"]["name"] = train_name

        if not args.actor_only:
            # For training, we want input, action (filled by actor models), value (filled by actor models) and reward.
            desc["train1"] = self._get_train_spec()
            desc["train1"]["name"] = train_name

        self.more_labels.add_labels(desc)

        params.update(dict(
            num_group = 1 if args.actor_only else 2,
            action_batchsize = int(desc["actor0"]["batchsize"]),
            train_batchsize = int(desc["train1"]["batchsize"]) if not args.actor_only else None,
            T = args.T,
            model_no_spatial = args.model_no_spatial
        ))

        return GCWrapper(GC, co, desc, gpu=args.gpu, use_numpy=False, params=params)
Exemple #6
0
    def initialize(self):
        args = self.args
        co = go.ContextOptions()
        self.context_args.initialize(co)

        opt = go.GameOptions()
        opt.seed = 0
        opt.list_filename = args.list_file
        opt.verbose = args.verbose
        GC = go.GameContext(co, opt)
        print("Version: ", GC.Version())

        params = GC.GetParams()
        print("Num Actions: ", params["num_action"])

        desc = {}

        # For training: group 1
        # We want input, action (filled by actor models), value (filled by actor
        # models) and reward.
        desc["train"] = dict(
            batchsize=args.batchsize,
            input=dict(T=args.T, keys=set(["features", "a"])),
            reply=None
        )

        params.update(dict(
            num_group = 1 if args.actor_only else 2,
            train_batchsize = int(desc["train"]["batchsize"]),
            T = args.T,
        ))

        return GCWrapper(GC, co, desc, use_numpy=False, params=params)
Exemple #7
0
    def initialize(self):
        args = self.args
        co = atari.ContextOptions()
        self.context_args.initialize(co)

        opt = atari.Options()
        opt.frame_skip = args.frame_skip
        opt.rom_file = os.path.join(args.rom_dir, args.rom_file)
        opt.seed = 42
        opt.eval_only = getattr(args, "env_eval_only", 0) == 1
        opt.hist_len = args.hist_len
        opt.reward_clip = args.reward_clip

        GC = atari.GameContext(co, opt)
        print("Version: ", GC.Version())

        params = GC.GetParams()
        print("Num Actions: ", params["num_action"])

        desc = {}
        # For actor model, No reward needed, we only want to get input and return distribution of actions.
        # sampled action and and value will be filled from the reply.

        desc["actor"] = dict(batchsize=args.batchsize,
                             input=dict(T=1,
                                        keys=set(
                                            ["s", "last_r", "last_terminal"])),
                             reply=dict(T=1, keys=set(["rv", "pi", "V", "a"])))

        if not args.actor_only:
            # For training: group 1
            # We want input, action (filled by actor models), value (filled by actor
            # models) and reward.
            desc["train"] = dict(batchsize=args.batchsize,
                                 input=dict(T=args.T,
                                            keys=set([
                                                "rv", "id", "pi", "s", "a",
                                                "last_r", "V", "seq",
                                                "last_terminal"
                                            ])),
                                 reply=None)

        if args.additional_labels is not None:
            extra = args.additional_labels.split(",")
            for _, v in desc.items():
                v["input"]["keys"].update(extra)

        # Initialize shared memory (between Python and C++) based on the specification defined by desc.
        params["num_group"] = 1 if args.actor_only else 2
        params["action_batchsize"] = desc["actor"]["batchsize"]
        if not args.actor_only:
            params["train_batchsize"] = desc["train"]["batchsize"]
        params["hist_len"] = args.hist_len
        params["T"] = args.T

        return GCWrapper(GC, co, desc, use_numpy=False, params=params)
Exemple #8
0
    def initialize(self):
        args = self.args
        co = go.ContextOptions()
        self.context_args.initialize(co)

        opt = go.GameOptions()
        opt.seed = 0
        opt.list_filename = args.list_file
        opt.online = args.online
        opt.verbose = args.verbose
        opt.data_aug = args.data_aug
        opt.ratio_pre_moves = args.ratio_pre_moves
        opt.start_ratio_pre_moves = args.start_ratio_pre_moves
        opt.move_cutoff = args.move_cutoff
        opt.num_games_per_thread = args.num_games_per_thread
        GC = go.GameContext(co, opt)
        print("Version: ", GC.Version())

        params = GC.GetParams()
        print("Num Actions: ", params["num_action"])

        desc = {}
        if args.online:
            desc["actor"] = dict(batchsize=args.batchsize,
                                 input=dict(T=args.T, keys=set(["s"])),
                                 reply=dict(T=args.T, keys=set(["V", "a"])))
        else:
            desc["train"] = dict(batchsize=args.batchsize,
                                 input=dict(T=args.T,
                                            keys=set(["s", "offline_a"])),
                                 reply=None)

        self.more_labels.add_labels(desc)

        params.update(dict(
            num_group=1 if args.actor_only else 2,
            T=args.T,
        ))

        return GCWrapper(GC,
                         co,
                         desc,
                         gpu=args.gpu,
                         use_numpy=False,
                         params=params)
Exemple #9
0
    def initialize(self):
        opt = go.getServerOpt(self.option_map.getOptionSpec())

        desc = {}
        GC = elf.GameContext(opt.common.base)

        mode = getattr(self.options, "common.mode")
        batchsize = getattr(self.options, "common.base.batchsize")

        if mode in ["train", "train_offline"]:
            game_obj = go.Server(opt)
            desc["train"] = dict(input=[
                "s", "offline_a", "winner", "mcts_scores", "move_idx",
                "selfplay_ver"
            ],
                                 reply=None)
            desc["train_ctrl"] = dict(input=["selfplay_ver"],
                                      reply=None,
                                      batchsize=1)
        else:
            raise "No such mode: " + mode

        game_obj.setGameContext(GC)
        params = game_obj.getParams()

        if self.options.parameter_print:
            print("**** Options ****")
            print(opt.info())
            print("*****************")
            print("Version: ", elf.version())
            print("Mode: ", mode)
            print("Num Actions: ", params["num_action"])

        self.more_labels.add_labels(desc)
        return GCWrapper(GC,
                         game_obj,
                         batchsize,
                         desc,
                         num_recv=2,
                         default_gpu=(self.options.gpu if
                                      (self.options.gpu is not None
                                       and self.options.gpu >= 0) else None),
                         use_numpy=False,
                         params=params,
                         verbose=self.options.parameter_print)
Exemple #10
0
    def initialize(self):
        co, GC, opt = self._set_params()

        params = GC.getParams()

        if self.options.parameter_print:
            print("Mode: ", opt.mode)
            print("Num Actions: ", params["num_action"])

        desc = {}
        if self.options.mode == "online":
            desc["human_actor"] = dict(
                input=["s"],
                reply=["pi", "a", "V"],
                batchsize=1,
            )
            # Used for MCTS/Direct play.
            desc["actor_black"] = dict(
                input=["s"],
                reply=["pi", "V", "a", "rv"],
                timeout_usec=10,
                batchsize=co.mcts_options.num_rollouts_per_batch)
        else:
            raise "No such mode: " + self.options.mode

        params.update(
            dict(
                num_group=1 if self.options.actor_only else 2,
                T=self.options.T,
            ))

        self.more_labels.add_labels(desc)
        return GCWrapper(GC,
                         self.max_batchsize,
                         desc,
                         num_recv=2,
                         gpu=(self.options.gpu if
                              (self.options.gpu is not None
                               and self.options.gpu >= 0) else None),
                         use_numpy=False,
                         params=params,
                         verbose=self.options.parameter_print)
Exemple #11
0
class RunGC(object):
    @classmethod
    def get_option_spec(cls):
        spec = PyOptionSpec()
        elf.saveDefaultOptionsToArgs("", spec)
        elf.saveDefaultNetOptionsToArgs("", spec)
        spec.addIntOption(
            'gpu',
            'GPU id to use',
            -1)
        spec.addStrListOption(
            "parsed_args",
            "dummy option",
            [])

        return spec

    @auto_import_options
    def __init__(self, option_map):
        self.option_map = option_map

    def initialize(self):
        opt = elf.Options()
        net_opt = elf.NetOptions()

        opt.loadFromArgs("", self.option_map.getOptionSpec())
        net_opt.loadFromArgs("", self.option_map.getOptionSpec())

        self.rs = elf.RemoteServers(elf.getNetOptions(opt, net_opt), ["actor", "train"])
        GC = elf.BatchReceiver(opt, self.rs)
        GC.setMode(elf.RECV_ENTRY)
        batchsize = opt.batchsize

        print("Batchsize: %d" % batchsize)

        width = 210 // 2
        height = 160 // 2
        T = 6
        num_action = 4

        spec = {}
        spec["actor"] = dict(
            input=dict(s=("float", (3, height, width))),
            reply=dict(a=("int32_t", 1), pi=("float", num_action), V=("float", 1))
        )
        '''
        spec["train"] = dict(
            input=dict(s_=(T, 3, height, width), r_=(T, 1), a_=(T, 1), pi_=(T, num_action), V_=(T, 1)),
        )
        '''

        e = GC.getExtractor()
        desc = allocExtractor(e, batchsize, spec)

        params = {
           "input_dim" : width * height * 3,
           "num_action" : 4
        }

        print("Init GC Wrapper")
        has_gpu = self.options.gpu is not None and self.options.gpu >= 0

        self.wrapper = GCWrapper(
            GC, None, batchsize, desc, num_recv=1, default_gpu=(self.options.gpu if has_gpu else None),
            use_numpy=False, params=params)

        # wrapper.reg_callback("train", self.on_train)
        self.wrapper.reg_callback("actor", self.on_actor)
        self.model = MyModel(params)
        if has_gpu:
            self.model.cuda(self.options.gpu)
        # self.optim = torch.optimi.Adam(self.model.parameters())
        self.n = 0

    def on_actor(self, batch):
        res = self.model(batch)
        m = torch.distributions.Categorical(res["pi"].data)
        self.n += 1
        if self.n == 20:
            # gives a single float value
            #print(psutil.cpu_percent())
            # gives an object with many fields
            #print(psutil.virtual_memory())
            self.n = 0

        return dict(a=m.sample(), pi=res["pi"].data, V=res["V"].data)

    def on_train(self, batch):
        pass
Exemple #12
0
    def initialize(self):
        co, GC, game_opt = self._set_params()

        params = GC.getParams()

        if self.options.parameter_print:
            print("Mode: ", game_opt.mode)
            print("checkers_num_action: ", params["checkers_num_action"])

        desc = {}

        if self.options.mode == "play":
            desc["actor_white"] = dict(
                input=["s", "game_idx"],
                reply=[
                    "pi",
                    "a",
                    "V",
                ],
                batchsize=1,
            )
            desc["actor_black"] = dict(
                input=["s"],
                reply=["pi", "V", "a", "rv"],
                timeout_usec=10,
                batchsize=co.mcts_options.num_rollouts_per_batch)
        elif self.options.mode == "selfplay":
            desc["game_end"] = dict(batchsize=1, )
            desc["game_start"] = dict(batchsize=1,
                                      input=["white_ver", "black_ver"],
                                      reply=None)

            # checkers
            desc["actor_white"] = dict(
                input=["s"],
                reply=["pi", "V", "a", "rv"],
                batchsize=self.options.batchsize2
                if self.options.batchsize2 > 0 else self.options.batchsize,
                timeout_usec=self.options.selfplay_timeout_usec,
            )
            desc["actor_black"] = dict(
                input=["s"],
                reply=["pi", "V", "a", "rv"],
                batchsize=self.options.batchsize2
                if self.options.batchsize2 > 0 else self.options.batchsize,
                timeout_usec=self.options.selfplay_timeout_usec,
            )

        elif self.options.mode == "train" or self.options.mode == "offline_train":
            desc["train"] = dict(input=[
                "s", "offline_a", "winner", "mcts_scores", "move_idx",
                "selfplay_ver"
            ],
                                 reply=None)
            desc["train_ctrl"] = dict(input=["selfplay_ver"],
                                      reply=None,
                                      batchsize=1)

        else:
            raise "No such mode: " + self.options.mode

        params.update(
            dict(
                num_group=1 if self.options.actor_only else 2,
                T=self.options.T,
            ))

        self.more_labels.add_labels(desc)
        return GCWrapper(GC,
                         self.max_batchsize,
                         desc,
                         num_recv=2,
                         gpu=(self.options.gpu if
                              (self.options.gpu is not None
                               and self.options.gpu >= 0) else None),
                         use_numpy=False,
                         params=params,
                         verbose=self.options.parameter_print)
    dict(batchsize=128,
         input=dict(T=1, keys=set(["s", "a", "last_r", "terminal"])),
         reply=dict(T=1, keys=set(["rv", "pi", "V", "a"]))),
    "reduced_forward":
    dict(batchsize=128,
         input=dict(T=1, keys=set(["s", "a", "last_r", "terminal"])),
         reply=dict(T=1, keys=set(["rv", "pi", "V", "a"]))),
    "reduced_project":
    dict(batchsize=128,
         input=dict(T=1, keys=set(["s", "a", "last_r", "terminal"])),
         reply=dict(T=1, keys=set(["rv", "pi", "V", "a"])))
}

GC = GCWrapper(GC,
               co,
               batch_descriptions,
               use_numpy=True,
               params=GC.GetParams())

GC.reg_callback("actor", actor)
GC.reg_callback("reduced_predict", reduced_predict)
GC.reg_callback("reduced_forward", reduced_forward)
GC.reg_callback("reduced_project", reduced_project)

GC.Start()

while True:
    GC.Run()

GC.Stop()
Exemple #14
0
    def initialize(self):
        args = self.args
        co = atari.ContextOptions()
        self.context_args.initialize(co)

        opt = atari.Options()
        opt.frame_skip = args.frame_skip
        opt.rom_file = os.path.join(args.rom_dir, args.rom_file)
        opt.seed = 42
        opt.eval_only = getattr(args, "env_eval_only", 0) == 1
        opt.hist_len = args.hist_len
        opt.reward_clip = args.reward_clip

        GC = atari.GameContext(co, opt)
        print("Version: ", GC.Version())

        num_action = GC.get_num_actions()
        print("Num Actions: ", num_action)

        desc = {}
        # For actor model, No reward needed, we only want to get input and return distribution of actions.
        # sampled action and and value will be filled from the reply.

        desc["actor"] = dict(input=dict(id="",
                                        s=str(args.hist_len),
                                        last_r="",
                                        last_terminal="",
                                        _batchsize=str(args.batchsize),
                                        _T="1"),
                             reply=dict(rv="",
                                        pi=str(num_action),
                                        V="1",
                                        a="1",
                                        _batchsize=str(args.batchsize),
                                        _T="1"))

        if not args.actor_only:
            # For training: group 1
            # We want input, action (filled by actor models), value (filled by actor
            # models) and reward.
            desc["train"] = dict(input=dict(rv="",
                                            id="",
                                            pi=str(num_action),
                                            s=str(args.hist_len),
                                            a="1",
                                            r="1",
                                            V="1",
                                            seq="",
                                            terminal="",
                                            _batchsize=str(args.batchsize),
                                            _T=str(args.T)),
                                 reply=None)

        # Initialize shared memory (between Python and C++) based on the specification defined by desc.
        params = dict()
        params["num_action"] = GC.get_num_actions()
        params["num_group"] = 1 if args.actor_only else 2
        params["action_batchsize"] = int(desc["actor"]["input"]["_batchsize"])
        if not args.actor_only:
            params["train_batchsize"] = int(
                desc["train"]["input"]["_batchsize"])
        params["hist_len"] = args.hist_len
        params["T"] = args.T

        return GCWrapper(GC, co, desc, use_numpy=False, params=params)
Exemple #15
0
    def initialize(self):
        args = self.args

        co = minirts.ContextOptions()
        self.context_args.initialize(co)

        opt = minirts.Options()
        opt.seed = args.seed
        opt.frame_skip_ai = args.fs_ai
        opt.frame_skip_opponent = args.fs_opponent
        opt.simulation_type = minirts.ST_NORMAL
        opt.ai_type = getattr(minirts, args.ai_type)
        if args.ai_type == "AI_NN":
            opt.backup_ai_type = minirts.AI_SIMPLE
        if args.ai_type == "AI_FLAG_NN":
            opt.backup_ai_type = minirts.AI_FLAG_SIMPLE
        opt.opponent_ai_type = getattr(minirts, args.opponent_type)
        opt.latest_start = args.latest_start
        opt.latest_start_decay = args.latest_start_decay
        opt.mcts_threads = args.mcts_threads
        opt.mcts_rollout_per_thread = 50
        opt.max_tick = args.max_tick
        opt.handicap_level = args.handicap_level
        opt.simple_ratio = args.simple_ratio
        opt.ratio_change = args.ratio_change
        # opt.output_filename = b"simulators.txt"
        # opt.cmd_dumper_prefix = b"cmd-dump"
        # opt.save_replay_prefix = b"replay"

        GC = minirts.GameContext(co, opt)
        print("Version: ", GC.Version())

        num_action = GC.get_num_actions()
        print("Num Actions: ", num_action)

        num_unittype = GC.get_num_unittype()
        print("Num unittype: ", num_unittype)

        desc = {}
        # For actor model, no reward needed, we only want to get input and return distribution of actions.
        # sampled action and and value will be filled from the reply.
        desc["actor"] = (dict(s=str(num_unittype + 7),
                              r0="",
                              r1="",
                              last_r="",
                              last_terminal="",
                              _batchsize=str(args.batchsize),
                              _T="1"),
                         dict(rv="",
                              pi=str(num_action),
                              V="1",
                              a="1",
                              _batchsize=str(args.batchsize),
                              _T="1"))

        if not args.actor_only:
            # For training, we want input, action (filled by actor models), value (filled by actor models) and reward.
            desc["train"] = (dict(rv="",
                                  pi=str(num_action),
                                  s=str(num_unittype + 7),
                                  r0="",
                                  r1="",
                                  a="1",
                                  r="1",
                                  V="1",
                                  terminal="",
                                  _batchsize=str(args.batchsize),
                                  _T=str(args.T)), None)

        if args.additional_labels is not None:
            extra = {label: "" for label in args.additional_labels.split(",")}
            for _, v in desc.items():
                v[0].update(extra)

        params = dict(num_action=num_action,
                      num_unit_type=num_unittype,
                      num_group=1 if args.actor_only else 2,
                      action_batchsize=int(desc["actor"][0]["_batchsize"]),
                      train_batchsize=int(desc["train"][0]["_batchsize"])
                      if not args.actor_only else None,
                      T=args.T)

        return GCWrapper(GC, co, desc, use_numpy=False, params=params)
Exemple #16
0
    def initialize(self):
        args = self.args

        co = minirts.ContextOptions()
        self.context_args.initialize(co)

        opt = minirts.Options()
        opt.seed = args.seed
        opt.frame_skip_ai = args.fs_ai
        opt.frame_skip_opponent = args.fs_opponent
        opt.simulation_type = minirts.ST_NORMAL
        opt.ai_type = getattr(minirts, args.ai_type)
        if args.ai_type == "AI_NN":
            opt.backup_ai_type = minirts.AI_SIMPLE
        if args.ai_type == "AI_FLAG_NN":
            opt.backup_ai_type = minirts.AI_FLAG_SIMPLE
        opt.opponent_ai_type = getattr(minirts, args.opponent_type)
        opt.latest_start = args.latest_start
        opt.latest_start_decay = args.latest_start_decay
        opt.mcts_threads = args.mcts_threads
        opt.mcts_rollout_per_thread = 50
        opt.max_tick = args.max_tick
        opt.handicap_level = args.handicap_level
        opt.simple_ratio = args.simple_ratio
        opt.ratio_change = args.ratio_change
        # opt.output_filename = b"simulators.txt"
        # opt.cmd_dumper_prefix = b"cmd-dump"
        # opt.save_replay_prefix = b"replay"

        GC = minirts.GameContext(co, opt)
        params = GC.GetParams()
        print("Version: ", GC.Version())

        print("Num Actions: ", params["num_action"])
        print("Num unittype: ", params["num_unit_type"])

        desc = {}
        # For actor model, no reward needed, we only want to get input and return distribution of actions.
        # sampled action and and value will be filled from the reply.
        desc["actor"] = dict(
            batchsize=args.batchsize,
            input=dict(T=1,
                       keys=set(["s", "res", "last_r", "r0", "r1",
                                 "terminal"])),
            reply=dict(T=1, keys=set(["rv", "pi", "V", "a"])))

        if not args.actor_only:
            # For training: group 1
            # We want input, action (filled by actor models), value (filled by actor
            # models) and reward.
            desc["train"] = dict(batchsize=args.batchsize,
                                 input=dict(T=args.T,
                                            keys=set([
                                                "rv", "pi", "s", "res", "a",
                                                "last_r", "r0", "r1", "V",
                                                "terminal"
                                            ])),
                                 reply=None)

        if args.additional_labels is not None:
            extra = args.additional_labels.split(",")
            for _, v in desc.items():
                v["input"]["keys"].update(extra)

        params.update(
            dict(num_group=1 if args.actor_only else 2,
                 action_batchsize=int(desc["actor"]["batchsize"]),
                 train_batchsize=int(desc["train"]["batchsize"])
                 if not args.actor_only else None,
                 T=args.T))

        return GCWrapper(GC, co, desc, use_numpy=False, params=params)
Exemple #17
0
    def initialize(self):
        job_id = os.environ.get("job_id", "local")
        opt = go.getClientOpt(self.option_map.getOptionSpec(), job_id)
        mode = getattr(self.options, "common.mode")
        batchsize = getattr(self.options, "common.base.batchsize")

        GC = elf.GameContext(opt.common.base)

        if mode not in ["online", "selfplay"]:
            raise "No such mode: " + mode
        game_obj = go.Client(opt)
        game_obj.setGameContext(GC)

        params = game_obj.getParams()
        if self.options.parameter_print:
            print("**** Options ****")
            print(opt.info())
            print("*****************")
            print("Version: ", elf.version())
            print("Mode: ", mode)
            print("Num Actions: ", params["num_action"])

        desc = {}
        if mode == "online":
            desc["human_actor"] = dict(
                input=[],
                reply=["a", "timestamp"],
                batchsize=1,
            )
            # Used for MCTS/Direct play.
            desc["actor_black"] = dict(
                input=["s"],
                reply=["pi", "V", "a", "rv"],
                timeout_usec=10,
                batchsize=getattr(self.options,
                                  "common.mcts.num_rollout_per_batch"))
        elif mode == "selfplay":
            # Used for MCTS/Direct play.
            white_batchsize = self.options.white_mcts_rollout_per_batch
            if white_batchsize < 0:
                white_batchsize = batchsize

            desc["actor_black"] = dict(
                input=["s"],
                reply=["pi", "V", "a", "rv"],
                batchsize=batchsize,
                timeout_usec=self.options.selfplay_timeout_usec,
            )

            desc["actor_white"] = dict(
                input=["s"],
                reply=["pi", "V", "a", "rv"],
                batchsize=white_batchsize,
                timeout_usec=self.options.selfplay_timeout_usec,
            )
            desc["game_end"] = dict(batchsize=1, )
            desc["game_start"] = dict(batchsize=1,
                                      input=["black_ver", "white_ver"],
                                      reply=None)

        self.more_labels.add_labels(desc)
        return GCWrapper(GC,
                         game_obj,
                         batchsize,
                         desc,
                         num_recv=8,
                         default_gpu=(self.options.gpu if
                                      (self.options.gpu is not None
                                       and self.options.gpu >= 0) else None),
                         use_numpy=False,
                         params=params,
                         verbose=self.options.parameter_print)