Exemple #1
0
class Loader:
    def __init__(self):
        self.context_args = ContextArgs()

        self.args = ArgsProvider(
            call_from = self,
            define_args = [
                ("actor_only", dict(action="store_true")),
                ("list_file", "./train.lst"),
                ("verbose", dict(action="store_true"))
            ],
            more_args = ["batchsize", "T"],
            child_providers = [ self.context_args.args ]
        )

    def initialize(self):
        args = self.args
        co = go.ContextOptions()
        self.context_args.initialize(co)

        opt = go.GameOptions()
        opt.seed = 0
        opt.list_filename = args.list_file
        opt.verbose = args.verbose
        GC = go.GameContext(co, opt)
        print("Version: ", GC.Version())

        params = GC.GetParams()
        print("Num Actions: ", params["num_action"])

        desc = {}

        # For training: group 1
        # We want input, action (filled by actor models), value (filled by actor
        # models) and reward.
        desc["train"] = dict(
            batchsize=args.batchsize,
            input=dict(T=args.T, keys=set(["features", "a"])),
            reply=None
        )

        params.update(dict(
            num_group = 1 if args.actor_only else 2,
            train_batchsize = int(desc["train"]["batchsize"]),
            T = args.T,
        ))

        return GCWrapper(GC, co, desc, use_numpy=False, params=params)
Exemple #2
0
class Loader(object):
    @classmethod
    def get_option_spec(cls):
        spec = PyOptionSpec()
        spec.addBoolOption('actor_only', 'TODO: fill this help message in',
                           False)
        spec.addStrListOption(
            'list_files', 'Provide a list of json files for offline training',
            [])
        spec.addIntOption('port', 'TODO: fill this help message in', 5556)
        spec.addStrOption('server_addr', 'TODO: fill this help message in', '')
        spec.addStrOption('server_id', 'TODO: fill this help message in', '')
        spec.addIntOption('q_min_size', 'TODO: fill this help message in', 10)
        spec.addIntOption('q_max_size', 'TODO: fill this help message in',
                          1000)
        spec.addIntOption('num_reader', 'TODO: fill this help message in', 50)
        spec.addIntOption('num_reset_ranking',
                          'TODO: fill this help message in', 5000)
        spec.addIntOption(
            'client_max_delay_sec',
            'Maximum amount of allowed delays in sec. If the client '
            'didn\'t respond after that, we think it is dead.', 1200)
        spec.addBoolOption('verbose', 'TODO: fill this help message in', False)
        spec.addBoolOption('keep_prev_selfplay',
                           'TODO: fill this help message in', False)
        spec.addIntOption(
            'num_games_per_thread',
            ('For offline mode, it is the number of concurrent games per '
             'thread, used to increase diversity of games; for selfplay mode, '
             'it is the number of games played at each thread, and after that '
             'we need to call restartAllGames() to resume.'), -1)
        spec.addIntOption('expected_num_clients', 'Expected number of clients',
                          -1)
        spec.addIntOption('checkers_num_future_actions',
                          'TODO: fill this help message in', 1)
        spec.addStrOption('mode', 'TODO: fill this help message in', 'play')
        spec.addBoolOption('black_use_policy_network_only',
                           'TODO: fill this help message in', False)
        spec.addBoolOption('white_use_policy_network_only',
                           'TODO: fill this help message in', False)
        spec.addBoolOption('use_mcts', 'TODO: fill this help message in',
                           False)
        spec.addBoolOption('use_mcts_ai2', 'TODO: fill this help message in',
                           False)
        spec.addFloatOption(
            'white_puct',
            'PUCT for white when it is > 0.0. If it is -1 then we use'
            'the same puct for both side (specified by mcts_options).'
            'A HACK to use different puct for different model. Should'
            'be replaced by a more systematic approach.', -1.0)
        spec.addIntOption('white_mcts_rollout_per_batch',
                          'white mcts rollout per batch', -1)
        spec.addIntOption('white_mcts_rollout_per_thread',
                          'white mcts rollout per thread', -1)
        spec.addStrOption('dump_record_prefix',
                          'TODO: fill this help message in', '')
        spec.addStrOption('selfplay_records_directory',
                          'TODO: fill this help message in', '')
        spec.addStrOption('eval_records_directory',
                          'TODO: fill this help message in', '')
        spec.addStrOption('records_buffer_directory',
                          'TODO: fill this help message in', '')
        spec.addIntOption('policy_distri_cutoff',
                          'first N moves will be randomly', 0)
        spec.addIntOption('selfplay_timeout_usec',
                          'TODO: fill this help message in', 0)
        spec.addIntOption('gpu', 'TODO: fill this help message in', -1)
        spec.addBoolOption('policy_distri_training_for_all',
                           'TODO: fill this help message in', False)
        spec.addBoolOption('parameter_print',
                           'TODO: fill this help message in', True)
        spec.addIntOption('batchsize', 'batch size', 128)
        spec.addIntOption('batchsize2', 'batch size', -1)
        spec.addIntOption('T', 'number of timesteps', 6)
        spec.addIntOption(
            'selfplay_init_num',
            ('Initial number of selfplay games to generate before training a '
             'new model'), 2000)
        spec.addIntOption(
            'selfplay_update_num',
            ('Additional number of selfplay games to generate after a model '
             'is updated'), 1000)
        spec.addBoolOption('selfplay_async',
                           ('Whether to use async mode in selfplay'), False)
        spec.addIntOption(
            'eval_num_games',
            ('number of evaluation to be performed to decide whether a model '
             'is better than the other'), 400)
        spec.addFloatOption('eval_winrate_thres',
                            'Win rate threshold for evalution', 0.55)
        spec.addIntOption(
            'eval_old_model',
            ('If specified, then we directly switch to evaluation mode '
             'between the loaded model and the old model specified by this '
             'switch'), -1)
        spec.addStrOption(
            'eval_model_pair',
            ('If specified for df_selfplay.py, then the two models will be '
             'evaluated on this client'), '')
        spec.addBoolOption(
            'cheat_eval_new_model_wins_half',
            'When enabled, in evaluation mode, when the game '
            'finishes, the player with the most recent model gets 100% '
            'win rate half of the time.'
            'This is used to test the framework', False)
        spec.addBoolOption(
            'cheat_selfplay_random_result',
            'When enabled, in selfplay mode the result of the game is random'
            'This is used to test the framework', False)
        spec.addBoolOption('human_plays_for_black', '', False)
        spec.addIntOption(
            'suicide_after_n_games',
            'return after n games have finished, -1 means it never ends', -1)

        spec.merge(PyOptionSpec.fromClasses((ContextArgs, MoreLabels)))
        return spec

    @auto_import_options
    def __init__(self, option_map):
        self.context_args = ContextArgs(option_map)
        self.more_labels = MoreLabels(option_map)

    def _set_params(self):
        co = american_checkers.ContextOptions()
        self.context_args.initialize(co)
        co.job_id = os.environ.get("job_id", "local")
        if self.options.parameter_print:
            co.print()

        game_opt = american_checkers.GameOptions()

        game_opt.seed = 0
        game_opt.list_files = self.options.list_files

        if self.options.server_addr:
            game_opt.server_addr = self.options.server_addr
        else:
            if self.options.server_id:
                game_opt.server_addr = addrs[self.options.server_id]
                game_opt.server_id = self.options.server_id
            else:
                game_opt.server_addr = ""
                game_opt.server_id = ""

        game_opt.port = self.options.port
        game_opt.mode = self.options.mode
        game_opt.use_mcts = self.options.use_mcts
        game_opt.use_mcts_ai2 = self.options.use_mcts_ai2
        game_opt.dump_record_prefix = self.options.dump_record_prefix

        game_opt.selfplay_records_directory = self.options.selfplay_records_directory
        if len(self.options.selfplay_records_directory
               ) != 0 and not os.path.exists(
                   self.options.selfplay_records_directory):
            os.mkdir(self.options.selfplay_records_directory)

        game_opt.eval_records_directory = self.options.eval_records_directory
        if len(self.options.eval_records_directory
               ) != 0 and not os.path.exists(
                   self.options.eval_records_directory):
            os.mkdir(self.options.eval_records_directory)

        game_opt.records_buffer_directory = self.options.records_buffer_directory
        if len(self.options.records_buffer_directory
               ) != 0 and not os.path.exists(
                   self.options.records_buffer_directory):
            os.mkdir(self.options.records_buffer_directory)


        game_opt.policy_distri_training_for_all = \
            self.options.policy_distri_training_for_all
        game_opt.verbose = self.options.verbose
        game_opt.black_use_policy_network_only = \
            self.options.black_use_policy_network_only
        game_opt.white_use_policy_network_only = \
            self.options.white_use_policy_network_only
        game_opt.q_min_size = self.options.q_min_size
        game_opt.q_max_size = self.options.q_max_size
        game_opt.num_reader = self.options.num_reader
        game_opt.checkers_num_future_actions = self.options.checkers_num_future_actions
        game_opt.num_reset_ranking = self.options.num_reset_ranking
        game_opt.policy_distri_cutoff = self.options.policy_distri_cutoff
        game_opt.num_games_per_thread = self.options.num_games_per_thread
        game_opt.keep_prev_selfplay = self.options.keep_prev_selfplay
        game_opt.expected_num_clients = self.options.expected_num_clients

        game_opt.white_puct = self.options.white_puct
        game_opt.white_mcts_rollout_per_batch = \
            self.options.white_mcts_rollout_per_batch
        game_opt.white_mcts_rollout_per_thread = \
            self.options.white_mcts_rollout_per_thread

        game_opt.client_max_delay_sec = self.options.client_max_delay_sec
        game_opt.selfplay_init_num = self.options.selfplay_init_num
        game_opt.selfplay_update_num = self.options.selfplay_update_num
        game_opt.selfplay_async = self.options.selfplay_async
        game_opt.eval_num_games = self.options.eval_num_games
        game_opt.eval_thres = self.options.eval_winrate_thres
        game_opt.cheat_eval_new_model_wins_half = \
            self.options.cheat_eval_new_model_wins_half
        game_opt.cheat_selfplay_random_result = \
            self.options.cheat_selfplay_random_result

        if self.options.human_plays_for_black:
            game_opt.human_plays_for = 0
        else:
            game_opt.human_plays_for = 1

        self.max_batchsize = max(
            self.options.batchsize, self.options.batchsize2) \
            if self.options.batchsize2 > 0 \
            else self.options.batchsize
        co.batchsize = self.max_batchsize

        GC = american_checkers.GameContext(co, game_opt)

        if self.options.parameter_print:
            print("**************** GameOptions ****************")
            print(game_opt.info())
            print("*********************************************")
            print("Version: ", GC.ctx().version())
            print("*********************************************")

        return co, GC, game_opt

    def initialize(self):
        co, GC, game_opt = self._set_params()

        params = GC.getParams()

        if self.options.parameter_print:
            print("Mode: ", game_opt.mode)
            print("checkers_num_action: ", params["checkers_num_action"])

        desc = {}

        if self.options.mode == "play":
            desc["actor_white"] = dict(
                input=["s", "game_idx"],
                reply=[
                    "pi",
                    "a",
                    "V",
                ],
                batchsize=1,
            )
            desc["actor_black"] = dict(
                input=["s"],
                reply=["pi", "V", "a", "rv"],
                timeout_usec=10,
                batchsize=co.mcts_options.num_rollouts_per_batch)
        elif self.options.mode == "selfplay":
            desc["game_end"] = dict(batchsize=1, )
            desc["game_start"] = dict(batchsize=1,
                                      input=["white_ver", "black_ver"],
                                      reply=None)

            # checkers
            desc["actor_white"] = dict(
                input=["s"],
                reply=["pi", "V", "a", "rv"],
                batchsize=self.options.batchsize2
                if self.options.batchsize2 > 0 else self.options.batchsize,
                timeout_usec=self.options.selfplay_timeout_usec,
            )
            desc["actor_black"] = dict(
                input=["s"],
                reply=["pi", "V", "a", "rv"],
                batchsize=self.options.batchsize2
                if self.options.batchsize2 > 0 else self.options.batchsize,
                timeout_usec=self.options.selfplay_timeout_usec,
            )

        elif self.options.mode == "train" or self.options.mode == "offline_train":
            desc["train"] = dict(input=[
                "s", "offline_a", "winner", "mcts_scores", "move_idx",
                "selfplay_ver"
            ],
                                 reply=None)
            desc["train_ctrl"] = dict(input=["selfplay_ver"],
                                      reply=None,
                                      batchsize=1)

        else:
            raise "No such mode: " + self.options.mode

        params.update(
            dict(
                num_group=1 if self.options.actor_only else 2,
                T=self.options.T,
            ))

        self.more_labels.add_labels(desc)
        return GCWrapper(GC,
                         self.max_batchsize,
                         desc,
                         num_recv=2,
                         gpu=(self.options.gpu if
                              (self.options.gpu is not None
                               and self.options.gpu >= 0) else None),
                         use_numpy=False,
                         params=params,
                         verbose=self.options.parameter_print)
Exemple #3
0
class Loader(object):
    @classmethod
    def get_option_spec(cls):
        spec = PyOptionSpec()
        spec.addStrOption(
            'preload_sgf',
            'TODO: fill this help message in',
            '')
        spec.addIntOption(
            'preload_sgf_move_to',
            'TODO: fill this help message in',
            -1)
        spec.addBoolOption(
            'actor_only',
            'TODO: fill this help message in',
            False)
        spec.addStrListOption(
            'list_files',
            'Provide a list of json files for offline training',
            [])
        spec.addIntOption(
            'port',
            'TODO: fill this help message in',
            5556)
        spec.addStrOption(
            'server_addr',
            'TODO: fill this help message in',
            '')
        spec.addStrOption(
            'server_id',
            'TODO: fill this help message in',
            '')
        spec.addIntOption(
            'q_min_size',
            'TODO: fill this help message in',
            10)
        spec.addIntOption(
            'q_max_size',
            'TODO: fill this help message in',
            1000)
        spec.addIntOption(
            'num_reader',
            'TODO: fill this help message in',
            50)
        spec.addIntOption(
            'num_reset_ranking',
            'TODO: fill this help message in',
            5000)
        spec.addIntOption(
            'client_max_delay_sec',
            'Maximum amount of allowed delays in sec. If the client '
            'didn\'t respond after that, we think it is dead.',
            1200)
        spec.addBoolOption(
            'verbose',
            'TODO: fill this help message in',
            False)
        spec.addBoolOption(
            'keep_prev_selfplay',
            'TODO: fill this help message in',
            False)
        spec.addBoolOption(
            'print_result',
            'TODO: fill this help message in',
            False)
        spec.addIntOption(
            'data_aug',
            'specify data augumentation, 0-7, -1 mean random',
            -1)
        spec.addIntOption(
            'ratio_pre_moves',
            ('how many moves to perform in each thread, before we use the '
             'data to train the model'),
            0)
        spec.addFloatOption(
            'start_ratio_pre_moves',
            ('how many moves to perform in each thread, before we use the '
             'first sgf file to train the model'),
            0.5)
        spec.addIntOption(
            'num_games_per_thread',
            ('For offline mode, it is the number of concurrent games per '
             'thread, used to increase diversity of games; for selfplay mode, '
             'it is the number of games played at each thread, and after that '
             'we need to call restartAllGames() to resume.'),
            -1)
        spec.addIntOption(
            'expected_num_clients',
            'Expected number of clients',
            -1
        )
        spec.addIntOption(
            'num_future_actions',
            'TODO: fill this help message in',
            1)
        spec.addIntOption(
            'move_cutoff',
            'Cutoff ply in replay',
            -1)
        spec.addStrOption(
            'mode',
            'TODO: fill this help message in',
            'online')
        spec.addBoolOption(
            'black_use_policy_network_only',
            'TODO: fill this help message in',
            False)
        spec.addBoolOption(
            'white_use_policy_network_only',
            'TODO: fill this help message in',
            False)
        spec.addIntOption(
            'ply_pass_enabled',
            'TODO: fill this help message in',
            0)
        spec.addBoolOption(
            'use_mcts',
            'TODO: fill this help message in',
            False)
        spec.addBoolOption(
            'use_mcts_ai2',
            'TODO: fill this help message in',
            False)
        spec.addFloatOption(
            'white_puct',
            'PUCT for white when it is > 0.0. If it is -1 then we use'
            'the same puct for both side (specified by mcts_options).'
            'A HACK to use different puct for different model. Should'
            'be replaced by a more systematic approach.',
            -1.0)
        spec.addIntOption(
            'white_mcts_rollout_per_batch',
            'white mcts rollout per batch',
            -1)
        spec.addIntOption(
            'white_mcts_rollout_per_thread',
            'white mcts rollout per thread',
            -1)
        spec.addBoolOption(
            'use_df_feature',
            'TODO: fill this help message in',
            False)
        spec.addStrOption(
            'dump_record_prefix',
            'TODO: fill this help message in',
            '')
        spec.addIntOption(
            'policy_distri_cutoff',
            'TODO: fill this help message in',
            0)
        spec.addFloatOption(
            'resign_thres',
            'TODO: fill this help message in',
            0.0)
        spec.addBoolOption(
            'following_pass',
            'TODO: fill this help message in',
            False)
        spec.addIntOption(
            'selfplay_timeout_usec',
            'TODO: fill this help message in',
            0)
        spec.addIntOption(
            'gpu',
            'TODO: fill this help message in',
            -1)
        spec.addBoolOption(
            'policy_distri_training_for_all',
            'TODO: fill this help message in',
            False)
        spec.addBoolOption(
            'parameter_print',
            'TODO: fill this help message in',
            True)
        spec.addIntOption(
            'batchsize',
            'batch size',
            128)
        spec.addIntOption(
            'batchsize2',
            'batch size',
            -1)
        spec.addIntOption(
            'T',
            'number of timesteps',
            6)
        spec.addIntOption(
            'selfplay_init_num',
            ('Initial number of selfplay games to generate before training a '
             'new model'),
            2000)
        spec.addIntOption(
            'selfplay_update_num',
            ('Additional number of selfplay games to generate after a model '
             'is updated'),
            1000)
        spec.addBoolOption(
            'selfplay_async',
            ('Whether to use async mode in selfplay'),
            False)
        spec.addIntOption(
            'eval_num_games',
            ('number of evaluation to be performed to decide whether a model '
             'is better than the other'),
            400)
        spec.addFloatOption(
            'eval_winrate_thres',
            'Win rate threshold for evalution',
            0.55)
        spec.addIntOption(
            'eval_old_model',
            ('If specified, then we directly switch to evaluation mode '
             'between the loaded model and the old model specified by this '
             'switch'),
            -1)
        spec.addStrOption(
            'eval_model_pair',
            ('If specified for df_selfplay.py, then the two models will be '
             'evaluated on this client'),
            '')
        spec.addStrOption(
            'comment',
            'Comment for this run',
            '')
        spec.addBoolOption(
            'cheat_eval_new_model_wins_half',
            'When enabled, in evaluation mode, when the game '
            'finishes, the player with the most recent model gets 100% '
            'win rate half of the time.'
            'This is used to test the framework',
            False)
        spec.addBoolOption(
            'cheat_selfplay_random_result',
            'When enabled, in selfplay mode the result of the game is random'
            'This is used to test the framework',
            False)
        spec.addIntOption(
            'suicide_after_n_games',
            'return after n games have finished, -1 means it never ends',
            -1)

        spec.merge(PyOptionSpec.fromClasses((ContextArgs, MoreLabels)))

        return spec

    @auto_import_options
    def __init__(self, option_map):
        self.context_args = ContextArgs(option_map)
        self.more_labels = MoreLabels(option_map)

    def _set_params(self):
        co = go.ContextOptions()
        self.context_args.initialize(co)
        co.job_id = os.environ.get("job_id", "local")
        if self.options.parameter_print:
            co.print()

        opt = go.GameOptions()
        opt.seed = 0
        opt.list_files = self.options.list_files

        if self.options.server_addr:
            opt.server_addr = self.options.server_addr
        else:
            if self.options.server_id:
                opt.server_addr = addrs[self.options.server_id]
                opt.server_id = self.options.server_id
            else:
                opt.server_addr = ""
                opt.server_id = ""

        opt.port = self.options.port
        opt.mode = self.options.mode
        opt.use_mcts = self.options.use_mcts
        opt.use_mcts_ai2 = self.options.use_mcts_ai2
        opt.use_df_feature = self.options.use_df_feature
        opt.dump_record_prefix = self.options.dump_record_prefix
        opt.policy_distri_training_for_all = \
            self.options.policy_distri_training_for_all
        opt.verbose = self.options.verbose
        opt.black_use_policy_network_only = \
            self.options.black_use_policy_network_only
        opt.white_use_policy_network_only = \
            self.options.white_use_policy_network_only
        opt.data_aug = self.options.data_aug
        opt.ratio_pre_moves = self.options.ratio_pre_moves
        opt.q_min_size = self.options.q_min_size
        opt.q_max_size = self.options.q_max_size
        opt.num_reader = self.options.num_reader
        opt.start_ratio_pre_moves = self.options.start_ratio_pre_moves
        opt.ply_pass_enabled = self.options.ply_pass_enabled
        opt.num_future_actions = self.options.num_future_actions
        opt.num_reset_ranking = self.options.num_reset_ranking
        opt.move_cutoff = self.options.move_cutoff
        opt.policy_distri_cutoff = self.options.policy_distri_cutoff
        opt.num_games_per_thread = self.options.num_games_per_thread
        opt.following_pass = self.options.following_pass
        opt.resign_thres = self.options.resign_thres
        opt.preload_sgf = self.options.preload_sgf
        opt.preload_sgf_move_to = self.options.preload_sgf_move_to
        opt.keep_prev_selfplay = self.options.keep_prev_selfplay
        opt.expected_num_clients = self.options.expected_num_clients

        opt.white_puct = self.options.white_puct
        opt.white_mcts_rollout_per_batch = \
            self.options.white_mcts_rollout_per_batch
        opt.white_mcts_rollout_per_thread = \
            self.options.white_mcts_rollout_per_thread

        opt.client_max_delay_sec = self.options.client_max_delay_sec
        opt.print_result = self.options.print_result
        opt.selfplay_init_num = self.options.selfplay_init_num
        opt.selfplay_update_num = self.options.selfplay_update_num
        opt.selfplay_async = self.options.selfplay_async
        opt.eval_num_games = self.options.eval_num_games
        opt.eval_thres = self.options.eval_winrate_thres
        opt.cheat_eval_new_model_wins_half = \
            self.options.cheat_eval_new_model_wins_half
        opt.cheat_selfplay_random_result = \
            self.options.cheat_selfplay_random_result

        self.max_batchsize = max(
            self.options.batchsize, self.options.batchsize2) \
            if self.options.batchsize2 > 0 \
            else self.options.batchsize
        co.batchsize = self.max_batchsize

        GC = go.GameContext(co, opt)

        if self.options.parameter_print:
            print("**** Options ****")
            print(opt.info())
            print("*****************")
            print("Version: ", GC.ctx().version())

        return co, GC, opt

    def initialize(self):
        co, GC, opt = self._set_params()

        params = GC.getParams()

        if self.options.parameter_print:
            print("Mode: ", opt.mode)
            print("Num Actions: ", params["num_action"])

        desc = {}
        if self.options.mode == "online":
            desc["human_actor"] = dict(
                input=["s"],
                reply=["pi", "a", "V"],
                batchsize=1,
            )
            # Used for MCTS/Direct play.
            desc["actor_black"] = dict(
                input=["s"],
                reply=["pi", "V", "a", "rv"],
                timeout_usec=10,
                batchsize=co.mcts_options.num_rollouts_per_batch
            )
        elif self.options.mode == "selfplay":
            # Used for MCTS/Direct play.
            desc["actor_black"] = dict(
                input=["s"],
                reply=["pi", "V", "a", "rv"],
                batchsize=self.options.batchsize,
                timeout_usec=self.options.selfplay_timeout_usec,
            )
            desc["actor_white"] = dict(
                input=["s"],
                reply=["pi", "V", "a", "rv"],
                batchsize=self.options.batchsize2
                if self.options.batchsize2 > 0
                else self.options.batchsize,
                timeout_usec=self.options.selfplay_timeout_usec,
            )
            desc["game_end"] = dict(
                batchsize=1,
            )
            desc["game_start"] = dict(
                batchsize=1,
                input=["black_ver", "white_ver"],
                reply=None
            )
        elif self.options.mode == "train" or \
                self.options.mode == "offline_train":
            desc["train"] = dict(
                input=["s", "offline_a", "winner", "mcts_scores", "move_idx",
                       "selfplay_ver"],
                reply=None
            )
            desc["train_ctrl"] = dict(
                input=["selfplay_ver"],
                reply=None,
                batchsize=1
            )
        else:
            raise "No such mode: " + self.options.mode

        params.update(dict(
            num_group=1 if self.options.actor_only else 2,
            T=self.options.T,
        ))

        self.more_labels.add_labels(desc)
        return GCWrapper(
            GC,
            self.max_batchsize,
            desc,
            num_recv=2,
            gpu=(self.options.gpu
                 if (self.options.gpu is not None and self.options.gpu >= 0)
                 else None),
            use_numpy=False,
            params=params,
            verbose=self.options.parameter_print)
Exemple #4
0
class CommonLoader:
    def __init__(self, module):
        self.context_args = ContextArgs()
        self.more_labels = MoreLabels()
        self.module = module

        basic_define_args = [
            ("handicap_level", 0),  
            ("players", dict(type=str, help=";-separated player infos. For example: type=AI_NN,fs=50,args=backup/AI_SIMPLE|decay/0.99|start/1000,fow=True;type=AI_SIMPLE,fs=50")),
            ("max_tick", dict(type=int, default=30000, help="Maximal tick")),
            ("shuffle_player", dict(action="store_true")),    # false
            ("num_frames_in_state", 1),    
            ("max_unit_cmd", 1),
            ("seed", 0),
            ("actor_only", dict(action="store_true")),   # false
            ("model_no_spatial", dict(action="store_true")), # TODO, put it to model false
            ("save_replay_prefix", dict(type=str, default=None)),  
            ("output_file", dict(type=str, default=None)),         
            ("cmd_dumper_prefix", dict(type=str, default=None)),
            ("gpu", dict(type=int, help="gpu to use", default=None)),
        ]

        self.args = ArgsProvider(
            call_from = self,
            define_args = basic_define_args + self._define_args(),
            more_args = ["num_games", "batchsize", "T"],
            child_providers = [ self.context_args.args, self.more_labels.args ]
        )

    def _set_key(self, ai_options, key, value):
        if not hasattr(ai_options, key):
            print("AIOptions does not have key = " + key)
            return

        # Can we automate this?
        bool_convert = dict(t=True, true=True, f=False, false=False)

        try:
            if key == "fow":
                setattr(ai_options, key, bool_convert[value.lower()])
            elif key == "name" or key == "args" or key == "type":
                setattr(ai_options, key, value)
            else:
                setattr(ai_options, key, int(value))
        except ValueError:
            print("Value error! key = " + str(key) + " value = " + str(value))
            sys.exit(1)

    def _parse_players(self, opt, player_names):
        players_str = str(self.args.players)
        if players_str[0] == "\"" and players_str[-1] == "\"":
            players_str = players_str[1:-1]

        player_infos = []
        for i, player in enumerate(players_str.split(";")):
            ai_options = self.module.AIOptions()
            ai_options.num_frames_in_state = self.args.num_frames_in_state
            info = dict()
            for item in player.split(","):
                key, value = item.split("=")
                self._set_key(ai_options, key, value)
                info[key] = value

            if player_names is not None:
                self._set_key(ai_options, "name", player_names[i])
                info["name"] = player_names[i]

            opt.AddAIOptions(ai_options)
            player_infos.append(info)

        return player_infos

    def _init_gc(self, player_names=None):
        args = self.args

        co = self.module.ContextOptions()   #/elf/python_options_utils_cpp.h
        self.context_args.initialize(co)

        opt = self.module.PythonOptions()
        opt.seed = args.seed
        opt.shuffle_player = args.shuffle_player
        opt.max_unit_cmd = args.max_unit_cmd
        opt.max_tick = args.max_tick
        # [TODO] Put it to TD.
        opt.handicap_level = args.handicap_level

        player_infos = self._parse_players(opt, player_names) # 处理Players并设置为AI

        # opt.output_filename = b"simulators.txt"
        # opt.output_filename = b"cout"
        if args.save_replay_prefix is not None:
            opt.save_replay_prefix = args.save_replay_prefix.encode('ascii')
        if args.output_file is not None:
            opt.output_filename = args.output_file.encode("ascii")
        if args.cmd_dumper_prefix is not None:
            opt.cmd_dumper_prefix = args.cmd_dumper_prefix.encode("ascii")

        print("Options:")
        opt.Print()

        print("ContextOptions:")
        co.print()

        GC = self.module.GameContext(co, opt)
        self._on_gc(GC)

        params = GC.GetParams()
        print("Version: ", GC.Version())
        print("Num Actions: ", params["num_action"])
        print("Num unittype: ", params["num_unit_type"])
        print("num planes: ", params["num_planes"])
        params["rts_engine_version"] = GC.Version()
        params["players"] = player_infos

        return co, GC, params

    def _define_args(self):
        return []

    def _on_gc(self, GC):
        pass

    @abc.abstractmethod
    def _get_train_spec(self):
        pass

    @abc.abstractmethod
    def _get_actor_spec(self):
        pass

    def initialize(self):
        co, GC, params = self._init_gc()
        args = self.args
        desc = {}
        # For actor model, no reward needed, we only want to get input and return distribution of actions.
        # sampled action and and value will be filled from the reply.
        desc["actor"] = self._get_actor_spec()

        if not args.actor_only:
            # For training, we want input, action (filled by actor models), value (filled by actor models) and reward.
            desc["train"] = self._get_train_spec()

        self.more_labels.add_labels(desc)  

        params.update(dict(
            num_group = 1 if args.actor_only else 2,
            action_batchsize = int(desc["actor"]["batchsize"]),
            train_batchsize = int(desc["train"]["batchsize"]) if not args.actor_only else None,
            T = args.T,
            model_no_spatial = args.model_no_spatial
        ))
        

        return GCWrapper(GC, co, desc, gpu=args.gpu, use_numpy=False, params=params)

    def initialize_selfplay(self):
        args = self.args
        reference_name = "reference"
        train_name = "train"

        co, GC, params = self._init_gc(player_names=[train_name, reference_name])

        desc = {}
        # For actor model, no reward needed, we only want to get input and return distribution of actions.
        # sampled action and and value will be filled from the reply.
        desc["actor0"] = self._get_actor_spec()
        desc["actor1"] = self._get_actor_spec()

        desc["actor0"]["name"] = reference_name
        desc["actor1"]["name"] = train_name

        if not args.actor_only:
            # For training, we want input, action (filled by actor models), value (filled by actor models) and reward.
            desc["train1"] = self._get_train_spec()
            desc["train1"]["name"] = train_name

        self.more_labels.add_labels(desc)

        params.update(dict(
            num_group = 1 if args.actor_only else 2,
            action_batchsize = int(desc["actor0"]["batchsize"]),
            train_batchsize = int(desc["train1"]["batchsize"]) if not args.actor_only else None,
            T = args.T,
            model_no_spatial = args.model_no_spatial
        ))

        return GCWrapper(GC, co, desc, gpu=args.gpu, use_numpy=False, params=params)

    def initialize_reduced_service(self):
        args = self.args

        reference_name = "reference"
        train_name = "train"
        co, GC, params = self._init_gc(player_names=[train_name, reference_name])

        desc = {}
        # For actor model, no reward needed, we only want to get input and return distribution of actions.
        # sampled action and and value will be filled from the reply.
        desc["reduced_project"] = self._get_reduced_project()
        desc["reduced_forward"] = self._get_reduced_forward()
        desc["reduced_predict"] = self._get_reduced_predict()
        if params["players"][1]["type"] == "AI_NN":
            desc["actor"] = self._get_actor_spec()
            desc["actor"]["batchsize"] //= 2
            desc["actor"]["name"] = "reference"

        self.more_labels.add_labels(desc)

        return GCWrapper(GC, co, desc, gpu=args.gpu, use_numpy=False, params=params)
Exemple #5
0
class Loader:
    def __init__(self):
        self.context_args = ContextArgs()

        self.args = ArgsProvider(call_from=self,
                                 define_args=[
                                     ("frame_skip", 4), ("hist_len", 4),
                                     ("rom_file", "pong.bin"),
                                     ("actor_only", dict(action="store_true")),
                                     ("reward_clip", 1),
                                     ("rom_dir", os.path.dirname(__file__))
                                 ],
                                 more_args=["batchsize", "T", "env_eval_only"],
                                 child_providers=[self.context_args.args])

    def initialize(self):
        args = self.args
        co = atari.ContextOptions()
        self.context_args.initialize(co)

        opt = atari.Options()
        opt.frame_skip = args.frame_skip
        opt.rom_file = os.path.join(args.rom_dir, args.rom_file)
        opt.seed = 42
        opt.eval_only = getattr(args, "env_eval_only", 0) == 1
        opt.hist_len = args.hist_len
        opt.reward_clip = args.reward_clip

        GC = atari.GameContext(co, opt)
        print("Version: ", GC.Version())

        num_action = GC.get_num_actions()
        print("Num Actions: ", num_action)

        desc = {}
        # For actor model, No reward needed, we only want to get input and return distribution of actions.
        # sampled action and and value will be filled from the reply.

        desc["actor"] = dict(input=dict(id="",
                                        s=str(args.hist_len),
                                        last_r="",
                                        last_terminal="",
                                        _batchsize=str(args.batchsize),
                                        _T="1"),
                             reply=dict(rv="",
                                        pi=str(num_action),
                                        V="1",
                                        a="1",
                                        _batchsize=str(args.batchsize),
                                        _T="1"))

        if not args.actor_only:
            # For training: group 1
            # We want input, action (filled by actor models), value (filled by actor
            # models) and reward.
            desc["train"] = dict(input=dict(rv="",
                                            id="",
                                            pi=str(num_action),
                                            s=str(args.hist_len),
                                            a="1",
                                            r="1",
                                            V="1",
                                            seq="",
                                            terminal="",
                                            _batchsize=str(args.batchsize),
                                            _T=str(args.T)),
                                 reply=None)

        # Initialize shared memory (between Python and C++) based on the specification defined by desc.
        params = dict()
        params["num_action"] = GC.get_num_actions()
        params["num_group"] = 1 if args.actor_only else 2
        params["action_batchsize"] = int(desc["actor"]["input"]["_batchsize"])
        if not args.actor_only:
            params["train_batchsize"] = int(
                desc["train"]["input"]["_batchsize"])
        params["hist_len"] = args.hist_len
        params["T"] = args.T

        return GCWrapper(GC, co, desc, use_numpy=False, params=params)
Exemple #6
0
class Loader:
    def __init__(self):
        self.context_args = ContextArgs()

        self.args = ArgsProvider(
            call_from=self,
            define_args=[
                ("handicap_level", 0),
                ("latest_start", 1000),
                ("latest_start_decay", 0.7),
                ("fs_ai", 50),
                ("fs_opponent", 50),
                ("ai_type",
                 dict(type=str,
                      choices=[
                          "AI_SIMPLE", "AI_HIT_AND_RUN", "AI_NN", "AI_FLAG_NN",
                          "AI_TD_NN"
                      ],
                      default="AI_NN")),
                ("opponent_type",
                 dict(type=str,
                      choices=[
                          "AI_SIMPLE", "AI_HIT_AND_RUN", "AI_FLAG_SIMPLE",
                          "AI_TD_BUILT_IN"
                      ],
                      default="AI_SIMPLE")),
                ("max_tick", dict(type=int, default=30000,
                                  help="Maximal tick")),
                ("mcts_threads", 64),
                ("seed", 0),
                ("simple_ratio", -1),
                ("ratio_change", 0),
                ("actor_only", dict(action="store_true")),
                ("additional_labels",
                 dict(
                     type=str,
                     default=None,
                     help=
                     "Add additional labels in the batch. E.g., id,seq,last_terminal"
                 )),
            ],
            more_args=["batchsize", "T"],
            child_providers=[self.context_args.args])

    def initialize(self):
        args = self.args

        co = minirts.ContextOptions()
        self.context_args.initialize(co)

        opt = minirts.Options()
        opt.seed = args.seed
        opt.frame_skip_ai = args.fs_ai
        opt.frame_skip_opponent = args.fs_opponent
        opt.simulation_type = minirts.ST_NORMAL
        opt.ai_type = getattr(minirts, args.ai_type)
        if args.ai_type == "AI_NN":
            opt.backup_ai_type = minirts.AI_SIMPLE
        if args.ai_type == "AI_FLAG_NN":
            opt.backup_ai_type = minirts.AI_FLAG_SIMPLE
        opt.opponent_ai_type = getattr(minirts, args.opponent_type)
        opt.latest_start = args.latest_start
        opt.latest_start_decay = args.latest_start_decay
        opt.mcts_threads = args.mcts_threads
        opt.mcts_rollout_per_thread = 50
        opt.max_tick = args.max_tick
        opt.handicap_level = args.handicap_level
        opt.simple_ratio = args.simple_ratio
        opt.ratio_change = args.ratio_change
        # opt.output_filename = b"simulators.txt"
        # opt.cmd_dumper_prefix = b"cmd-dump"
        # opt.save_replay_prefix = b"replay"

        GC = minirts.GameContext(co, opt)
        print("Version: ", GC.Version())

        num_action = GC.get_num_actions()
        print("Num Actions: ", num_action)

        num_unittype = GC.get_num_unittype()
        print("Num unittype: ", num_unittype)

        desc = {}
        # For actor model, no reward needed, we only want to get input and return distribution of actions.
        # sampled action and and value will be filled from the reply.
        desc["actor"] = (dict(s=str(num_unittype + 7),
                              r0="",
                              r1="",
                              last_r="",
                              last_terminal="",
                              _batchsize=str(args.batchsize),
                              _T="1"),
                         dict(rv="",
                              pi=str(num_action),
                              V="1",
                              a="1",
                              _batchsize=str(args.batchsize),
                              _T="1"))

        if not args.actor_only:
            # For training, we want input, action (filled by actor models), value (filled by actor models) and reward.
            desc["train"] = (dict(rv="",
                                  pi=str(num_action),
                                  s=str(num_unittype + 7),
                                  r0="",
                                  r1="",
                                  a="1",
                                  r="1",
                                  V="1",
                                  terminal="",
                                  _batchsize=str(args.batchsize),
                                  _T=str(args.T)), None)

        if args.additional_labels is not None:
            extra = {label: "" for label in args.additional_labels.split(",")}
            for _, v in desc.items():
                v[0].update(extra)

        params = dict(num_action=num_action,
                      num_unit_type=num_unittype,
                      num_group=1 if args.actor_only else 2,
                      action_batchsize=int(desc["actor"][0]["_batchsize"]),
                      train_batchsize=int(desc["train"][0]["_batchsize"])
                      if not args.actor_only else None,
                      T=args.T)

        return GCWrapper(GC, co, desc, use_numpy=False, params=params)
Exemple #7
0
class Loader:
    def __init__(self):
        self.context_args = ContextArgs()

        self.args = ArgsProvider(
            call_from=self,
            define_args=[
                ("frame_skip", 4),
                ("hist_len", 4),
                ("rom_file", "pong.bin"),
                ("actor_only", dict(action="store_true")),
                ("reward_clip", 1),
                ("rom_dir", os.path.dirname(__file__)),
                ("additional_labels",
                 dict(
                     type=str,
                     default=None,
                     help=
                     "Add additional labels in the batch. E.g., id,seq,last_terminal"
                 )),
                ("gpu", dict(type=int, default=None)),
            ],
            more_args=["batchsize", "T", "env_eval_only"],
            child_providers=[self.context_args.args])

    def initialize(self):
        args = self.args
        co = atari.ContextOptions()
        self.context_args.initialize(co)

        opt = atari.GameOptions()
        opt.frame_skip = args.frame_skip
        opt.rom_file = os.path.join(args.rom_dir, args.rom_file)
        opt.seed = 42
        opt.eval_only = getattr(args, "env_eval_only", 0) == 1
        opt.hist_len = args.hist_len
        opt.reward_clip = args.reward_clip

        GC = atari.GameContext(co, opt)
        print("Version: ", GC.Version())

        params = GC.GetParams()
        print("Num Actions: ", params["num_action"])

        desc = {}
        # For actor model, No reward needed, we only want to get input and return distribution of actions.
        # sampled action and and value will be filled from the reply.

        desc["actor"] = dict(batchsize=args.batchsize,
                             input=dict(T=1,
                                        keys=set(
                                            ["s", "last_r", "last_terminal"])),
                             reply=dict(T=1, keys=set(["rv", "pi", "V", "a"])))

        if not args.actor_only:
            # For training: group 1
            # We want input, action (filled by actor models), value (filled by actor
            # models) and reward.
            desc["train"] = dict(batchsize=args.batchsize,
                                 input=dict(T=args.T,
                                            keys=set([
                                                "rv", "id", "pi", "s", "a",
                                                "last_r", "V", "seq",
                                                "last_terminal"
                                            ])),
                                 reply=None)

        if args.additional_labels is not None:
            extra = args.additional_labels.split(",")
            for _, v in desc.items():
                v["input"]["keys"].update(extra)

        # Initialize shared memory (between Python and C++) based on the specification defined by desc.
        params["num_group"] = 1 if args.actor_only else 2
        params["action_batchsize"] = desc["actor"]["batchsize"]
        if not args.actor_only:
            params["train_batchsize"] = desc["train"]["batchsize"]
        params["hist_len"] = args.hist_len
        params["T"] = args.T

        return GCWrapper(GC,
                         co,
                         desc,
                         gpu=args.gpu,
                         use_numpy=False,
                         params=params)
Exemple #8
0
class Loader(object):
    @classmethod
    def get_option_spec(cls):
        spec = PyOptionSpec()
        spec.addStrOption('preload_sgf', 'TODO: fill this help message in', '')
        spec.addIntOption('preload_sgf_move_to',
                          'TODO: fill this help message in', -1)
        spec.addStrOption('mode', 'TODO: fill this help message in', "online")
        spec.addBoolOption('actor_only', 'TODO: fill this help message in',
                           False)
        spec.addIntOption('num_reset_ranking',
                          'TODO: fill this help message in', 5000)
        spec.addBoolOption('verbose', 'TODO: fill this help message in', False)
        spec.addBoolOption('print_result', 'TODO: fill this help message in',
                           False)
        spec.addIntOption('data_aug',
                          'specify data augumentation, 0-7, -1 mean random',
                          -1)
        spec.addIntOption(
            'num_games_per_thread',
            ('For offline mode, it is the number of concurrent games per '
             'thread, used to increase diversity of games; for selfplay mode, '
             'it is the number of games played at each thread, and after that '
             'we need to call restartAllGames() to resume.'), -1)
        spec.addIntOption('num_future_actions',
                          'TODO: fill this help message in', 1)
        spec.addIntOption('move_cutoff', 'Cutoff ply in replay', -1)
        spec.addStrOption('mode', 'TODO: fill this help message in', 'online')
        spec.addBoolOption('black_use_policy_network_only',
                           'TODO: fill this help message in', False)
        spec.addIntOption('ply_pass_enabled',
                          'TODO: fill this help message in', 0)
        spec.addBoolOption('use_mcts', 'TODO: fill this help message in',
                           False)
        spec.addBoolOption('use_df_feature', 'TODO: fill this help message in',
                           False)
        spec.addStrOption('dump_record_prefix',
                          'TODO: fill this help message in', '')
        spec.addFloatOption('resign_thres', 'TODO: fill this help message in',
                            0.0)
        spec.addBoolOption('following_pass', 'TODO: fill this help message in',
                           False)
        spec.addIntOption('gpu', 'TODO: fill this help message in', -1)
        spec.addBoolOption('parameter_print',
                           'TODO: fill this help message in', True)
        spec.addIntOption('batchsize', 'batch size', 128)
        spec.addIntOption('batchsize2', 'batch size', -1)
        spec.addFloatOption('eval_winrate_thres',
                            'Win rate threshold for evalution', 0.55)
        spec.addIntOption(
            'suicide_after_n_games',
            'return after n games have finished, -1 means it never ends', -1)

        spec.merge(PyOptionSpec.fromClasses((ContextArgs, MoreLabels)))

        return spec

    @auto_import_options
    def __init__(self, option_map):
        self.context_args = ContextArgs(option_map)
        self.more_labels = MoreLabels(option_map)

    def _set_params(self):
        co = go.ContextOptions()
        self.context_args.initialize(co)
        co.job_id = os.environ.get("job_id", "local")
        if self.options.parameter_print:
            co.print()

        opt = go.GameOptions()
        opt.seed = 0

        opt.mode = self.options.mode
        opt.use_mcts = self.options.use_mcts
        opt.use_df_feature = self.options.use_df_feature
        opt.dump_record_prefix = self.options.dump_record_prefix
        opt.verbose = self.options.verbose
        opt.black_use_policy_network_only = \
            self.options.black_use_policy_network_only
        opt.data_aug = self.options.data_aug
        opt.ply_pass_enabled = self.options.ply_pass_enabled
        opt.num_reset_ranking = self.options.num_reset_ranking
        opt.move_cutoff = self.options.move_cutoff
        opt.num_games_per_thread = self.options.num_games_per_thread
        opt.following_pass = self.options.following_pass
        opt.resign_thres = self.options.resign_thres
        opt.preload_sgf = self.options.preload_sgf
        opt.preload_sgf_move_to = self.options.preload_sgf_move_to

        opt.print_result = self.options.print_result

        self.max_batchsize = max(
            self.options.batchsize, self.options.batchsize2) \
            if self.options.batchsize2 > 0 \
            else self.options.batchsize
        co.batchsize = self.max_batchsize

        GC = go.GameContext(co, opt)

        if self.options.parameter_print:
            print("**** Options ****")
            print(opt.info())
            print("*****************")
            print("Version: ", GC.ctx().version())

        return co, GC, opt

    def initialize(self):
        co, GC, opt = self._set_params()

        params = GC.getParams()

        if self.options.parameter_print:
            print("Mode: ", opt.mode)
            print("Num Actions: ", params["num_action"])

        desc = {}
        if self.options.mode == "online":
            desc["human_actor"] = dict(
                input=["s"],
                reply=["pi", "a", "V"],
                batchsize=1,
            )
            # Used for MCTS/Direct play.
            desc["actor_black"] = dict(
                input=["s"],
                reply=["pi", "V", "a", "rv"],
                timeout_usec=10,
                batchsize=co.mcts_options.num_rollouts_per_batch)
        else:
            raise "No such mode: " + self.options.mode

        params.update(
            dict(
                num_group=1 if self.options.actor_only else 2,
                T=self.options.T,
            ))

        self.more_labels.add_labels(desc)
        return GCWrapper(GC,
                         self.max_batchsize,
                         desc,
                         num_recv=2,
                         gpu=(self.options.gpu if
                              (self.options.gpu is not None
                               and self.options.gpu >= 0) else None),
                         use_numpy=False,
                         params=params,
                         verbose=self.options.parameter_print)
Exemple #9
0
class CommonLoader:
    def __init__(self, module):
        self.context_args = ContextArgs()
        self.module = module

        self.args = ArgsProvider(
            call_from=self,
            define_args=[
                ("handicap_level", 0),
                ("players",
                 dict(
                     type=str,
                     help=
                     ";-separated player infos. For example: type=AI_NN,fs=50,args=backup/AI_SIMPLE|decay/0.99|start/1000,fow=True;type=AI_SIMPLE,fs=50"
                 )),
                ("max_tick", dict(type=int, default=30000,
                                  help="Maximal tick")),
                ("shuffle_player", dict(action="store_true")),
                ("mcts_threads", 64),
                ("seed", 0),
                ("actor_only", dict(action="store_true")),
                ("additional_labels",
                 dict(
                     type=str,
                     default=None,
                     help=
                     "Add additional labels in the batch. E.g., id,seq,last_terminal"
                 )),
                ("model_no_spatial",
                 dict(action="store_true")),  # TODO, put it to model
                ("save_replay_prefix", dict(type=str, default=None)),
                ("output_file", dict(type=str, default=None)),
                ("cmd_dumper_prefix", dict(type=str, default=None))
            ],
            more_args=["batchsize", "T", "gpu"],
            child_providers=[self.context_args.args])

    def _set_key(self, ai_options, key, value):
        if not hasattr(ai_options, key):
            print("AIOptions does not have key = " + key)
            return

        # Can we automate this?
        bool_convert = dict(t=True, true=True, f=False, false=False)

        try:
            if key == "fow":
                setattr(ai_options, key, bool_convert[value.lower()])
            elif key == "name" or key == "args" or key == "type":
                setattr(ai_options, key, value)
            else:
                setattr(ai_options, key, int(value))
        except ValueError:
            print("Value error! key = " + str(key) + " value = " + str(value))
            sys.exit(1)

    def _parse_players(self, opt, player_names):
        players_str = str(self.args.players)
        if players_str[0] == "\"" and players_str[-1] == "\"":
            players_str = players_str[1:-1]

        for i, player in enumerate(players_str.split(";")):
            ai_options = self.module.AIOptions()
            for item in player.split(","):
                key, value = item.split("=")
                self._set_key(ai_options, key, value)
            if player_names is not None:
                self._set_key(ai_options, "name", player_names[i])
            opt.AddAIOptions(ai_options)

    def _init_gc(self, player_names=None):
        args = self.args

        co = self.module.ContextOptions()
        self.context_args.initialize(co)

        opt = self.module.PythonOptions()
        opt.seed = args.seed
        opt.shuffle_player = args.shuffle_player
        opt.mcts_threads = args.mcts_threads
        opt.mcts_rollout_per_thread = 50
        opt.max_tick = args.max_tick
        # [TODO] Put it to TD.
        opt.handicap_level = args.handicap_level

        self._parse_players(opt, player_names)

        # opt.output_filename = b"simulators.txt"
        # opt.output_filename = b"cout"
        if args.save_replay_prefix is not None:
            opt.save_replay_prefix = args.save_replay_prefix.encode('ascii')
        if args.output_file is not None:
            opt.output_filename = args.output_file.encode("ascii")
        if args.cmd_dumper_prefix is not None:
            opt.cmd_dumper_prefix = args.cmd_dumper_prefix.encode("ascii")
        opt.Print()

        GC = self.module.GameContext(co, opt)
        params = GC.GetParams()
        print("Version: ", GC.Version())
        print("Num Actions: ", params["num_action"])
        print("Num unittype: ", params["num_unit_type"])
        params["rts_engine_version"] = GC.Version()

        return co, GC, params

    def _add_more_labels(self, desc):
        args = self.args
        if args.additional_labels is None: return

        extra = args.additional_labels.split(",")
        for _, v in desc.items():
            v["input"]["keys"].update(extra)

    def _add_player_name(self, desc, player_name):
        desc["filters"] = dict(player_name=player_name)

    def initialize(self):
        co, GC, params = self._init_gc()
        args = self.args

        desc = {}
        # For actor model, no reward needed, we only want to get input and return distribution of actions.
        # sampled action and and value will be filled from the reply.
        desc["actor"] = self._get_actor_spec()

        if not args.actor_only:
            # For training, we want input, action (filled by actor models), value (filled by actor models) and reward.
            desc["train"] = self._get_train_spec()

        self._add_more_labels(desc)

        params.update(
            dict(num_group=1 if args.actor_only else 2,
                 action_batchsize=int(desc["actor"]["batchsize"]),
                 train_batchsize=int(desc["train"]["batchsize"])
                 if not args.actor_only else None,
                 T=args.T,
                 model_no_spatial=args.model_no_spatial))

        return GCWrapper(GC,
                         co,
                         desc,
                         gpu=args.gpu,
                         use_numpy=False,
                         params=params)

    def initialize_selfplay(self):
        args = self.args
        reference_name = "reference"
        train_name = "train"

        co, GC, params = self._init_gc(
            player_names=[train_name, reference_name])

        desc = {}
        # For actor model, no reward needed, we only want to get input and return distribution of actions.
        # sampled action and and value will be filled from the reply.
        desc["actor0"] = self._get_actor_spec()
        desc["actor1"] = self._get_actor_spec()

        self._add_player_name(desc["actor0"], reference_name)
        self._add_player_name(desc["actor1"], train_name)

        if not args.actor_only:
            # For training, we want input, action (filled by actor models), value (filled by actor models) and reward.
            desc["train1"] = self._get_train_spec()
            self._add_player_name(desc["train1"], train_name)

        self._add_more_labels(desc)

        params.update(
            dict(num_group=1 if args.actor_only else 2,
                 action_batchsize=int(desc["actor0"]["batchsize"]),
                 train_batchsize=int(desc["train1"]["batchsize"])
                 if not args.actor_only else None,
                 T=args.T,
                 model_no_spatial=args.model_no_spatial))

        return GCWrapper(GC,
                         co,
                         desc,
                         gpu=args.gpu,
                         use_numpy=False,
                         params=params)
Exemple #10
0
class Loader:
    def __init__(self):
        self.context_args = ContextArgs()
        self.more_labels = MoreLabels()

        self.args = ArgsProvider(
            call_from = self,
            define_args = [
                ("actor_only", dict(action="store_true")),
                ("list_file", "./train.lst"),
                ("verbose", dict(action="store_true")),
                ("data_aug", dict(type=int, default=-1, help="specify data augumentation, 0-7, -1 mean random")),
                ("ratio_pre_moves", dict(type=float, default=0, help="how many moves to perform in each thread, before we use the data train the model")),
                ("start_ratio_pre_moves", dict(type=float, default=0.5, help="how many moves to perform in each thread, before we use the first sgf file to train the model")),
                ("num_games_per_thread", dict(type=int, default=5, help="number of concurrent games per threads, used to increase diversity of games")),
                ("move_cutoff", dict(type=int, default=-1, help="Cutoff ply in replay")),
                ("mode", "online"),
                ("use_mcts", dict(action="store_true")),
                ("gpu", dict(type=int, default=None))
            ],
            more_args = ["batchsize", "T"],
            child_providers = [ self.context_args.args, self.more_labels.args ]
        )

    def initialize(self):
        args = self.args
        co = go.ContextOptions()
        self.context_args.initialize(co)
        co.print()

        opt = go.GameOptions()
        opt.seed = 0
        opt.list_filename = args.list_file
        opt.mode = args.mode
        opt.use_mcts = args.use_mcts
        opt.verbose = args.verbose
        opt.data_aug = args.data_aug
        opt.ratio_pre_moves = args.ratio_pre_moves
        opt.start_ratio_pre_moves = args.start_ratio_pre_moves
        opt.move_cutoff = args.move_cutoff
        opt.num_games_per_thread = args.num_games_per_thread
        GC = go.GameContext(co, opt)
        print("Version: ", GC.Version())

        params = GC.GetParams()
        print("Num Actions: ", params["num_action"])

        desc = {}
        if args.mode == "online":
            desc["human_actor"] = dict(
                batchsize=args.batchsize,
                input=dict(T=1, keys=set(["s"])),
                reply=dict(T=1, keys=set(["pi", "a"])),
                name="human_actor",
            )
            # Used for MCTS/Direct play.
            desc["actor"] = dict(
                batchsize=args.batchsize,
                input=dict(T=1, keys=set(["s"])),
                reply=dict(T=1, keys=set(["pi", "V", "a"])),
                name="actor",
            )
        elif args.mode == "selfplay":
            # Used for MCTS/Direct play.
            desc["actor"] = dict(
                batchsize=args.batchsize,
                input=dict(T=1, keys=set(["s"])),
                reply=dict(T=1, keys=set(["pi", "V"])),
                name="actor",
                timeout_usec = 10,
            )
        else:
            desc["train"] = dict(
                batchsize=args.batchsize,
                input=dict(T=args.T, keys=set(["s", "offline_a"])),
                reply=None
            )

        self.more_labels.add_labels(desc)

        params.update(dict(
            num_group = 1 if args.actor_only else 2,
            T = args.T,
        ))

        return GCWrapper(GC, co, desc, gpu=args.gpu, use_numpy=False, params=params)
Exemple #11
0
class Loader:
    def __init__(self):
        self.context_args = ContextArgs()

        self.args = ArgsProvider(
            call_from = self,
            define_args = [
                ("handicap_level", 0),
                ("latest_start", 1000),
                ("latest_start_decay", 0.7),
                ("fs_ai", 50),
                ("fs_opponent", 50),
                ("ai_type", dict(type=str, choices=["AI_SIMPLE", "AI_HIT_AND_RUN", "AI_NN", "AI_FLAG_NN", "AI_TD_NN"], default="AI_NN")),
                ("opponent_type", dict(type=str, choices=["AI_SIMPLE", "AI_HIT_AND_RUN", "AI_FLAG_SIMPLE", "AI_TD_BUILT_IN"], default="AI_SIMPLE")),
                ("max_tick", dict(type=int, default=30000, help="Maximal tick")),
                ("mcts_threads", 64),
                ("seed", 0),
                ("simple_ratio", -1),
                ("ratio_change", 0),
                ("actor_only", dict(action="store_true")),
                ("additional_labels", dict(type=str, default=None, help="Add additional labels in the batch. E.g., id,seq,last_terminal")),
            ],
            more_args = ["batchsize", "T"],
            child_providers = [ self.context_args.args ]
        )

    def initialize(self):
        args = self.args

        co = minirts.ContextOptions()
        self.context_args.initialize(co)

        opt = minirts.Options()
        opt.seed = args.seed
        opt.frame_skip_ai = args.fs_ai
        opt.frame_skip_opponent = args.fs_opponent
        opt.simulation_type = minirts.ST_NORMAL
        opt.ai_type = getattr(minirts, args.ai_type)
        if args.ai_type == "AI_NN":
            opt.backup_ai_type = minirts.AI_SIMPLE
        if args.ai_type == "AI_FLAG_NN":
            opt.backup_ai_type = minirts.AI_FLAG_SIMPLE
        opt.opponent_ai_type = getattr(minirts, args.opponent_type)
        opt.latest_start = args.latest_start
        opt.latest_start_decay = args.latest_start_decay
        opt.mcts_threads = args.mcts_threads
        opt.mcts_rollout_per_thread = 50
        opt.max_tick = args.max_tick
        opt.handicap_level = args.handicap_level
        opt.simple_ratio = args.simple_ratio
        opt.ratio_change = args.ratio_change
        # opt.output_filename = b"simulators.txt"
        # opt.cmd_dumper_prefix = b"cmd-dump"
        # opt.save_replay_prefix = b"replay"

        GC = minirts.GameContext(co, opt)
        print("Version: ", GC.Version())

        num_action = GC.get_num_actions()
        print("Num Actions: ", num_action)

        num_unittype = GC.get_num_unittype()
        print("Num unittype: ", num_unittype)

        desc = {}
        # For actor model, no reward needed, we only want to get input and return distribution of actions.
        # sampled action and and value will be filled from the reply.
        desc["actor"] = dict(
            input=dict(s=str(num_unittype+7), r0="", r1="", last_r="", last_terminal="", _batchsize=str(args.batchsize), _T="1"),
            reply=dict(rv="", pi=str(num_action), V="1", a="1", _batchsize=str(args.batchsize), _T="1")
        )

        if not args.actor_only:
            # For training, we want input, action (filled by actor models), value (filled by actor models) and reward.
            desc["train"] = dict(
                input=dict(rv="", pi=str(num_action), s=str(num_unittype+7),
                           r0="", r1="", a="1", r="1", V="1", terminal="",
                           _batchsize=str(args.batchsize), _T=str(args.T)),
                reply=None
            )

        if args.additional_labels is not None:
            extra = { label : "" for label in args.additional_labels.split(",") }
            for _, v in desc.items():
                v["input"].update(extra)

        params = dict(
            num_action = num_action,
            num_unit_type = num_unittype,
            num_group = 1 if args.actor_only else 2,
            action_batchsize = int(desc["actor"]["input"]["_batchsize"]),
            train_batchsize = int(desc["train"]["input"]["_batchsize"]) if not args.actor_only else None,
            T = args.T
        )

        return GCWrapper(GC, co, desc, use_numpy=False, params=params)
Exemple #12
0
class CommonLoader:
    def __init__(self, module):
        self.context_args = ContextArgs()
        self.more_labels = MoreLabels()
        self.module = module

        basic_define_args = [
            ("handicap_level", 0),
            ("players", dict(type=str, help=";-separated player infos. For example: type=AI_NN,fs=50,args=backup/AI_SIMPLE|decay/0.99|start/1000,fow=True;type=AI_SIMPLE,fs=50")),
            ("max_tick", dict(type=int, default=30000, help="Maximal tick")),
            ("shuffle_player", dict(action="store_true")),
            ("num_frames_in_state", 1),
            ("max_unit_cmd", 1),
            ("seed", 0),
            ("actor_only", dict(action="store_true")),
            ("model_no_spatial", dict(action="store_true")), # TODO, put it to model
            ("save_replay_prefix", dict(type=str, default=None)),
            ("output_file", dict(type=str, default=None)),
            ("cmd_dumper_prefix", dict(type=str, default=None)),
            ("gpu", dict(type=int, help="gpu to use", default=None)),
        ]

        self.args = ArgsProvider(
            call_from = self,
            define_args = basic_define_args + self._define_args(),
            more_args = ["num_games", "batchsize", "T"],
            child_providers = [ self.context_args.args, self.more_labels.args ]
        )

    def _set_key(self, ai_options, key, value):
        if not hasattr(ai_options, key):
            print("AIOptions does not have key = " + key)
            return

        # Can we automate this?
        bool_convert = dict(t=True, true=True, f=False, false=False)

        try:
            if key == "fow":
                setattr(ai_options, key, bool_convert[value.lower()])
            elif key == "name" or key == "args" or key == "type":
                setattr(ai_options, key, value)
            else:
                setattr(ai_options, key, int(value))
        except ValueError:
            print("Value error! key = " + str(key) + " value = " + str(value))
            sys.exit(1)

    def _parse_players(self, opt, player_names):
        players_str = str(self.args.players)
        if players_str[0] == "\"" and players_str[-1] == "\"":
            players_str = players_str[1:-1]

        player_infos = []
        for i, player in enumerate(players_str.split(";")):
            ai_options = self.module.AIOptions()
            ai_options.num_frames_in_state = self.args.num_frames_in_state
            info = dict()
            for item in player.split(","):
                key, value = item.split("=")
                self._set_key(ai_options, key, value)
                info[key] = value

            if player_names is not None:
                self._set_key(ai_options, "name", player_names[i])
                info["name"] = player_names[i]

            opt.AddAIOptions(ai_options)
            player_infos.append(info)

        return player_infos

    def _init_gc(self, player_names=None):
        args = self.args

        co = self.module.ContextOptions()
        self.context_args.initialize(co)

        opt = self.module.PythonOptions()
        opt.seed = args.seed
        opt.shuffle_player = args.shuffle_player
        opt.max_unit_cmd = args.max_unit_cmd
        opt.max_tick = args.max_tick
        # [TODO] Put it to TD.
        opt.handicap_level = args.handicap_level

        player_infos = self._parse_players(opt, player_names)

        # opt.output_filename = b"simulators.txt"
        # opt.output_filename = b"cout"
        if args.save_replay_prefix is not None:
            opt.save_replay_prefix = args.save_replay_prefix.encode('ascii')
        if args.output_file is not None:
            opt.output_filename = args.output_file.encode("ascii")
        if args.cmd_dumper_prefix is not None:
            opt.cmd_dumper_prefix = args.cmd_dumper_prefix.encode("ascii")

        print("Options:")
        opt.Print()

        print("ContextOptions:")
        co.print()

        GC = self.module.GameContext(co, opt)
        self._on_gc(GC)

        params = GC.GetParams()
        print("Version: ", GC.Version())
        print("Num Actions: ", params["num_action"])
        print("Num unittype: ", params["num_unit_type"])
        print("num planes: ", params["num_planes"])
        params["rts_engine_version"] = GC.Version()
        params["players"] = player_infos

        return co, GC, params

    def _define_args(self):
        return []

    def _on_gc(self, GC):
        pass

    @abc.abstractmethod
    def _get_train_spec(self):
        pass

    @abc.abstractmethod
    def _get_actor_spec(self):
        pass

    def initialize(self):
        co, GC, params = self._init_gc()
        args = self.args

        desc = {}
        # For actor model, no reward needed, we only want to get input and return distribution of actions.
        # sampled action and and value will be filled from the reply.
        desc["actor"] = self._get_actor_spec()

        if not args.actor_only:
            # For training, we want input, action (filled by actor models), value (filled by actor models) and reward.
            desc["train"] = self._get_train_spec()

        self.more_labels.add_labels(desc)

        params.update(dict(
            num_group = 1 if args.actor_only else 2,
            action_batchsize = int(desc["actor"]["batchsize"]),
            train_batchsize = int(desc["train"]["batchsize"]) if not args.actor_only else None,
            T = args.T,
            model_no_spatial = args.model_no_spatial
        ))

        return GCWrapper(GC, co, desc, gpu=args.gpu, use_numpy=False, params=params)

    def initialize_selfplay(self):
        args = self.args
        reference_name = "reference"
        train_name = "train"

        co, GC, params = self._init_gc(player_names=[train_name, reference_name])

        desc = {}
        # For actor model, no reward needed, we only want to get input and return distribution of actions.
        # sampled action and and value will be filled from the reply.
        desc["actor0"] = self._get_actor_spec()
        desc["actor1"] = self._get_actor_spec()

        desc["actor0"]["name"] = reference_name
        desc["actor1"]["name"] = train_name

        if not args.actor_only:
            # For training, we want input, action (filled by actor models), value (filled by actor models) and reward.
            desc["train1"] = self._get_train_spec()
            desc["train1"]["name"] = train_name

        self.more_labels.add_labels(desc)

        params.update(dict(
            num_group = 1 if args.actor_only else 2,
            action_batchsize = int(desc["actor0"]["batchsize"]),
            train_batchsize = int(desc["train1"]["batchsize"]) if not args.actor_only else None,
            T = args.T,
            model_no_spatial = args.model_no_spatial
        ))

        return GCWrapper(GC, co, desc, gpu=args.gpu, use_numpy=False, params=params)

    def initialize_reduced_service(self):
        args = self.args

        reference_name = "reference"
        train_name = "train"
        co, GC, params = self._init_gc(player_names=[train_name, reference_name])

        desc = {}
        # For actor model, no reward needed, we only want to get input and return distribution of actions.
        # sampled action and and value will be filled from the reply.
        desc["reduced_project"] = self._get_reduced_project()
        desc["reduced_forward"] = self._get_reduced_forward()
        desc["reduced_predict"] = self._get_reduced_predict()
        if params["players"][1]["type"] == "AI_NN":
            desc["actor"] = self._get_actor_spec()
            desc["actor"]["batchsize"] //= 2
            desc["actor"]["name"] = "reference"

        self.more_labels.add_labels(desc)

        return GCWrapper(GC, co, desc, gpu=args.gpu, use_numpy=False, params=params)
Exemple #13
0
class Loader:
    def __init__(self):
        self.context_args = ContextArgs()

        self.args = ArgsProvider(
            call_from=self,
            define_args=[
                ("frame_skip", 4),
                ("hist_len", 4),
                ("rom_file", "pong.bin"),
                ("actor_only", dict(action="store_true")),
                ("reward_clip", 1),
                ("rom_dir", os.path.dirname(__file__)),
                ("additional_labels", dict(
                    type=str,
                    default=None,
                    help="Add additional labels in the batch."
                    " E.g., id,seq,last_terminal",
                )),
                ("gpu", dict(type=int, default=None)),
            ],
            more_args=["batchsize", "T", "env_eval_only"],
            child_providers=[self.context_args.args])

    def initialize(self):
        args = self.args
        co = atari.ContextOptions()
        self.context_args.initialize(co)

        opt = atari.GameOptions()
        opt.frame_skip = args.frame_skip
        opt.rom_file = os.path.join(args.rom_dir, args.rom_file)
        opt.seed = 42
        opt.eval_only = getattr(args, "env_eval_only", 0) == 1
        opt.hist_len = args.hist_len
        opt.reward_clip = args.reward_clip

        GC = atari.GameContext(co, opt)
        print("Version: ", GC.Version())

        params = GC.GetParams()
        print("Num Actions: ", params["num_action"])

        desc = {}
        # For actor model, No reward needed, we only want to get input
        # and return distribution of actions.
        # sampled action and and value will be filled from the reply.

        desc["actor"] = dict(
            batchsize=args.batchsize,
            input=dict(T=1, keys=set(["s", "last_r", "last_terminal"])),
            reply=dict(T=1, keys=set(["rv", "pi", "V", "a"])))

        if not args.actor_only:
            # For training: group 1
            # We want input, action (filled by actor models), value
            # (filled by actor models) and reward.
            desc["train"] = dict(
                batchsize=args.batchsize,
                input=dict(
                    T=args.T,
                    keys=set([
                        "rv", "id", "pi", "s", "a", "last_r", "V", "seq",
                        "last_terminal"
                    ])),
                reply=None)

        if args.additional_labels is not None:
            extra = args.additional_labels.split(",")
            for _, v in desc.items():
                v["input"]["keys"].update(extra)

        # Initialize shared memory (between Python and C++)
        # based on the specification defined by desc.
        params["num_group"] = 1 if args.actor_only else 2
        params["action_batchsize"] = desc["actor"]["batchsize"]
        if not args.actor_only:
            params["train_batchsize"] = desc["train"]["batchsize"]
        params["hist_len"] = args.hist_len
        params["T"] = args.T

        return GCWrapper(
            GC, co, desc, gpu=args.gpu, use_numpy=False, params=params)