Beispiel #1
0
    def __init__(self, module):
        self.context_args = ContextArgs()
        self.more_labels = MoreLabels()
        self.module = module

        basic_define_args = [
            ("handicap_level", 0),  
            ("players", dict(type=str, help=";-separated player infos. For example: type=AI_NN,fs=50,args=backup/AI_SIMPLE|decay/0.99|start/1000,fow=True;type=AI_SIMPLE,fs=50")),
            ("max_tick", dict(type=int, default=30000, help="Maximal tick")),
            ("shuffle_player", dict(action="store_true")),    # false
            ("num_frames_in_state", 1),    
            ("max_unit_cmd", 1),
            ("seed", 0),
            ("actor_only", dict(action="store_true")),   # false
            ("model_no_spatial", dict(action="store_true")), # TODO, put it to model false
            ("save_replay_prefix", dict(type=str, default=None)),  
            ("output_file", dict(type=str, default=None)),         
            ("cmd_dumper_prefix", dict(type=str, default=None)),
            ("gpu", dict(type=int, help="gpu to use", default=None)),
        ]

        self.args = ArgsProvider(
            call_from = self,
            define_args = basic_define_args + self._define_args(),
            more_args = ["num_games", "batchsize", "T"],
            child_providers = [ self.context_args.args, self.more_labels.args ]
        )
Beispiel #2
0
    def __init__(self, module):
        self.context_args = ContextArgs()
        self.more_labels = MoreLabels()
        self.module = module

        basic_define_args = [
            ("handicap_level", 0),
            ("players", dict(type=str, help=";-separated player infos. For example: type=AI_NN,fs=50,args=backup/AI_SIMPLE|decay/0.99|start/1000,fow=True;type=AI_SIMPLE,fs=50")),
            ("max_tick", dict(type=int, default=30000, help="Maximal tick")),
            ("shuffle_player", dict(action="store_true")),
            ("num_frames_in_state", 1),
            ("max_unit_cmd", 1),
            ("seed", 0),
            ("actor_only", dict(action="store_true")),
            ("model_no_spatial", dict(action="store_true")), # TODO, put it to model
            ("save_replay_prefix", dict(type=str, default=None)),
            ("output_file", dict(type=str, default=None)),
            ("cmd_dumper_prefix", dict(type=str, default=None)),
            ("gpu", dict(type=int, help="gpu to use", default=None)),
        ]

        self.args = ArgsProvider(
            call_from = self,
            define_args = basic_define_args + self._define_args(),
            more_args = ["num_games", "batchsize", "T"],
            child_providers = [ self.context_args.args, self.more_labels.args ]
        )
Beispiel #3
0
    def __init__(self):
        self.context_args = ContextArgs()
        self.more_labels = MoreLabels()

        self.args = ArgsProvider(
            call_from=self,
            define_args=
            [("actor_only", dict(action="store_true")),
             ("list_file", "./train.lst"),
             ("verbose", dict(action="store_true")),
             ("data_aug",
              dict(type=int,
                   default=-1,
                   help="specify data augumentation, 0-7, -1 mean random")),
             ("ratio_pre_moves",
              dict(type=float,
                   default=0,
                   help=
                   "how many moves to perform in each thread, before we use the data train the model"
                   )),
             ("start_ratio_pre_moves",
              dict(
                  type=float,
                  default=0.5,
                  help=
                  "how many moves to perform in each thread, before we use the first sgf file to train the model"
              )),
             ("num_games_per_thread",
              dict(
                  type=int,
                  default=5,
                  help=
                  "number of concurrent games per threads, used to increase diversity of games"
              )),
             ("move_cutoff",
              dict(type=int, default=-1, help="Cutoff ply in replay")),
             ("online",
              dict(action="store_true", help="Set game to online mode")),
             ("gpu", dict(type=int, default=None))],
            more_args=["batchsize", "T"],
            child_providers=[self.context_args.args, self.more_labels.args])
Beispiel #4
0
    def __init__(self):
        self.context_args = ContextArgs()
        self.more_labels = MoreLabels()

        self.args = ArgsProvider(
            call_from = self,
            define_args = [
                ("actor_only", dict(action="store_true")),
                ("list_file", "./train.lst"),
                ("verbose", dict(action="store_true")),
                ("data_aug", dict(type=int, default=-1, help="specify data augumentation, 0-7, -1 mean random")),
                ("ratio_pre_moves", dict(type=float, default=0, help="how many moves to perform in each thread, before we use the data train the model")),
                ("start_ratio_pre_moves", dict(type=float, default=0.5, help="how many moves to perform in each thread, before we use the first sgf file to train the model")),
                ("num_games_per_thread", dict(type=int, default=5, help="number of concurrent games per threads, used to increase diversity of games")),
                ("move_cutoff", dict(type=int, default=-1, help="Cutoff ply in replay")),
                ("mode", "online"),
                ("use_mcts", dict(action="store_true")),
                ("gpu", dict(type=int, default=None))
            ],
            more_args = ["batchsize", "T"],
            child_providers = [ self.context_args.args, self.more_labels.args ]
        )
Beispiel #5
0
class Loader(object):
    @classmethod
    def get_option_spec(cls):
        spec = PyOptionSpec()
        spec.addBoolOption('actor_only', 'TODO: fill this help message in',
                           False)
        spec.addStrListOption(
            'list_files', 'Provide a list of json files for offline training',
            [])
        spec.addIntOption('port', 'TODO: fill this help message in', 5556)
        spec.addStrOption('server_addr', 'TODO: fill this help message in', '')
        spec.addStrOption('server_id', 'TODO: fill this help message in', '')
        spec.addIntOption('q_min_size', 'TODO: fill this help message in', 10)
        spec.addIntOption('q_max_size', 'TODO: fill this help message in',
                          1000)
        spec.addIntOption('num_reader', 'TODO: fill this help message in', 50)
        spec.addIntOption('num_reset_ranking',
                          'TODO: fill this help message in', 5000)
        spec.addIntOption(
            'client_max_delay_sec',
            'Maximum amount of allowed delays in sec. If the client '
            'didn\'t respond after that, we think it is dead.', 1200)
        spec.addBoolOption('verbose', 'TODO: fill this help message in', False)
        spec.addBoolOption('keep_prev_selfplay',
                           'TODO: fill this help message in', False)
        spec.addIntOption(
            'num_games_per_thread',
            ('For offline mode, it is the number of concurrent games per '
             'thread, used to increase diversity of games; for selfplay mode, '
             'it is the number of games played at each thread, and after that '
             'we need to call restartAllGames() to resume.'), -1)
        spec.addIntOption('expected_num_clients', 'Expected number of clients',
                          -1)
        spec.addIntOption('checkers_num_future_actions',
                          'TODO: fill this help message in', 1)
        spec.addStrOption('mode', 'TODO: fill this help message in', 'play')
        spec.addBoolOption('black_use_policy_network_only',
                           'TODO: fill this help message in', False)
        spec.addBoolOption('white_use_policy_network_only',
                           'TODO: fill this help message in', False)
        spec.addBoolOption('use_mcts', 'TODO: fill this help message in',
                           False)
        spec.addBoolOption('use_mcts_ai2', 'TODO: fill this help message in',
                           False)
        spec.addFloatOption(
            'white_puct',
            'PUCT for white when it is > 0.0. If it is -1 then we use'
            'the same puct for both side (specified by mcts_options).'
            'A HACK to use different puct for different model. Should'
            'be replaced by a more systematic approach.', -1.0)
        spec.addIntOption('white_mcts_rollout_per_batch',
                          'white mcts rollout per batch', -1)
        spec.addIntOption('white_mcts_rollout_per_thread',
                          'white mcts rollout per thread', -1)
        spec.addStrOption('dump_record_prefix',
                          'TODO: fill this help message in', '')
        spec.addStrOption('selfplay_records_directory',
                          'TODO: fill this help message in', '')
        spec.addStrOption('eval_records_directory',
                          'TODO: fill this help message in', '')
        spec.addStrOption('records_buffer_directory',
                          'TODO: fill this help message in', '')
        spec.addIntOption('policy_distri_cutoff',
                          'first N moves will be randomly', 0)
        spec.addIntOption('selfplay_timeout_usec',
                          'TODO: fill this help message in', 0)
        spec.addIntOption('gpu', 'TODO: fill this help message in', -1)
        spec.addBoolOption('policy_distri_training_for_all',
                           'TODO: fill this help message in', False)
        spec.addBoolOption('parameter_print',
                           'TODO: fill this help message in', True)
        spec.addIntOption('batchsize', 'batch size', 128)
        spec.addIntOption('batchsize2', 'batch size', -1)
        spec.addIntOption('T', 'number of timesteps', 6)
        spec.addIntOption(
            'selfplay_init_num',
            ('Initial number of selfplay games to generate before training a '
             'new model'), 2000)
        spec.addIntOption(
            'selfplay_update_num',
            ('Additional number of selfplay games to generate after a model '
             'is updated'), 1000)
        spec.addBoolOption('selfplay_async',
                           ('Whether to use async mode in selfplay'), False)
        spec.addIntOption(
            'eval_num_games',
            ('number of evaluation to be performed to decide whether a model '
             'is better than the other'), 400)
        spec.addFloatOption('eval_winrate_thres',
                            'Win rate threshold for evalution', 0.55)
        spec.addIntOption(
            'eval_old_model',
            ('If specified, then we directly switch to evaluation mode '
             'between the loaded model and the old model specified by this '
             'switch'), -1)
        spec.addStrOption(
            'eval_model_pair',
            ('If specified for df_selfplay.py, then the two models will be '
             'evaluated on this client'), '')
        spec.addBoolOption(
            'cheat_eval_new_model_wins_half',
            'When enabled, in evaluation mode, when the game '
            'finishes, the player with the most recent model gets 100% '
            'win rate half of the time.'
            'This is used to test the framework', False)
        spec.addBoolOption(
            'cheat_selfplay_random_result',
            'When enabled, in selfplay mode the result of the game is random'
            'This is used to test the framework', False)
        spec.addBoolOption('human_plays_for_black', '', False)
        spec.addIntOption(
            'suicide_after_n_games',
            'return after n games have finished, -1 means it never ends', -1)

        spec.merge(PyOptionSpec.fromClasses((ContextArgs, MoreLabels)))
        return spec

    @auto_import_options
    def __init__(self, option_map):
        self.context_args = ContextArgs(option_map)
        self.more_labels = MoreLabels(option_map)

    def _set_params(self):
        co = american_checkers.ContextOptions()
        self.context_args.initialize(co)
        co.job_id = os.environ.get("job_id", "local")
        if self.options.parameter_print:
            co.print()

        game_opt = american_checkers.GameOptions()

        game_opt.seed = 0
        game_opt.list_files = self.options.list_files

        if self.options.server_addr:
            game_opt.server_addr = self.options.server_addr
        else:
            if self.options.server_id:
                game_opt.server_addr = addrs[self.options.server_id]
                game_opt.server_id = self.options.server_id
            else:
                game_opt.server_addr = ""
                game_opt.server_id = ""

        game_opt.port = self.options.port
        game_opt.mode = self.options.mode
        game_opt.use_mcts = self.options.use_mcts
        game_opt.use_mcts_ai2 = self.options.use_mcts_ai2
        game_opt.dump_record_prefix = self.options.dump_record_prefix

        game_opt.selfplay_records_directory = self.options.selfplay_records_directory
        if len(self.options.selfplay_records_directory
               ) != 0 and not os.path.exists(
                   self.options.selfplay_records_directory):
            os.mkdir(self.options.selfplay_records_directory)

        game_opt.eval_records_directory = self.options.eval_records_directory
        if len(self.options.eval_records_directory
               ) != 0 and not os.path.exists(
                   self.options.eval_records_directory):
            os.mkdir(self.options.eval_records_directory)

        game_opt.records_buffer_directory = self.options.records_buffer_directory
        if len(self.options.records_buffer_directory
               ) != 0 and not os.path.exists(
                   self.options.records_buffer_directory):
            os.mkdir(self.options.records_buffer_directory)


        game_opt.policy_distri_training_for_all = \
            self.options.policy_distri_training_for_all
        game_opt.verbose = self.options.verbose
        game_opt.black_use_policy_network_only = \
            self.options.black_use_policy_network_only
        game_opt.white_use_policy_network_only = \
            self.options.white_use_policy_network_only
        game_opt.q_min_size = self.options.q_min_size
        game_opt.q_max_size = self.options.q_max_size
        game_opt.num_reader = self.options.num_reader
        game_opt.checkers_num_future_actions = self.options.checkers_num_future_actions
        game_opt.num_reset_ranking = self.options.num_reset_ranking
        game_opt.policy_distri_cutoff = self.options.policy_distri_cutoff
        game_opt.num_games_per_thread = self.options.num_games_per_thread
        game_opt.keep_prev_selfplay = self.options.keep_prev_selfplay
        game_opt.expected_num_clients = self.options.expected_num_clients

        game_opt.white_puct = self.options.white_puct
        game_opt.white_mcts_rollout_per_batch = \
            self.options.white_mcts_rollout_per_batch
        game_opt.white_mcts_rollout_per_thread = \
            self.options.white_mcts_rollout_per_thread

        game_opt.client_max_delay_sec = self.options.client_max_delay_sec
        game_opt.selfplay_init_num = self.options.selfplay_init_num
        game_opt.selfplay_update_num = self.options.selfplay_update_num
        game_opt.selfplay_async = self.options.selfplay_async
        game_opt.eval_num_games = self.options.eval_num_games
        game_opt.eval_thres = self.options.eval_winrate_thres
        game_opt.cheat_eval_new_model_wins_half = \
            self.options.cheat_eval_new_model_wins_half
        game_opt.cheat_selfplay_random_result = \
            self.options.cheat_selfplay_random_result

        if self.options.human_plays_for_black:
            game_opt.human_plays_for = 0
        else:
            game_opt.human_plays_for = 1

        self.max_batchsize = max(
            self.options.batchsize, self.options.batchsize2) \
            if self.options.batchsize2 > 0 \
            else self.options.batchsize
        co.batchsize = self.max_batchsize

        GC = american_checkers.GameContext(co, game_opt)

        if self.options.parameter_print:
            print("**************** GameOptions ****************")
            print(game_opt.info())
            print("*********************************************")
            print("Version: ", GC.ctx().version())
            print("*********************************************")

        return co, GC, game_opt

    def initialize(self):
        co, GC, game_opt = self._set_params()

        params = GC.getParams()

        if self.options.parameter_print:
            print("Mode: ", game_opt.mode)
            print("checkers_num_action: ", params["checkers_num_action"])

        desc = {}

        if self.options.mode == "play":
            desc["actor_white"] = dict(
                input=["s", "game_idx"],
                reply=[
                    "pi",
                    "a",
                    "V",
                ],
                batchsize=1,
            )
            desc["actor_black"] = dict(
                input=["s"],
                reply=["pi", "V", "a", "rv"],
                timeout_usec=10,
                batchsize=co.mcts_options.num_rollouts_per_batch)
        elif self.options.mode == "selfplay":
            desc["game_end"] = dict(batchsize=1, )
            desc["game_start"] = dict(batchsize=1,
                                      input=["white_ver", "black_ver"],
                                      reply=None)

            # checkers
            desc["actor_white"] = dict(
                input=["s"],
                reply=["pi", "V", "a", "rv"],
                batchsize=self.options.batchsize2
                if self.options.batchsize2 > 0 else self.options.batchsize,
                timeout_usec=self.options.selfplay_timeout_usec,
            )
            desc["actor_black"] = dict(
                input=["s"],
                reply=["pi", "V", "a", "rv"],
                batchsize=self.options.batchsize2
                if self.options.batchsize2 > 0 else self.options.batchsize,
                timeout_usec=self.options.selfplay_timeout_usec,
            )

        elif self.options.mode == "train" or self.options.mode == "offline_train":
            desc["train"] = dict(input=[
                "s", "offline_a", "winner", "mcts_scores", "move_idx",
                "selfplay_ver"
            ],
                                 reply=None)
            desc["train_ctrl"] = dict(input=["selfplay_ver"],
                                      reply=None,
                                      batchsize=1)

        else:
            raise "No such mode: " + self.options.mode

        params.update(
            dict(
                num_group=1 if self.options.actor_only else 2,
                T=self.options.T,
            ))

        self.more_labels.add_labels(desc)
        return GCWrapper(GC,
                         self.max_batchsize,
                         desc,
                         num_recv=2,
                         gpu=(self.options.gpu if
                              (self.options.gpu is not None
                               and self.options.gpu >= 0) else None),
                         use_numpy=False,
                         params=params,
                         verbose=self.options.parameter_print)
Beispiel #6
0
 def __init__(self, option_map):
     self.context_args = ContextArgs(option_map)
     self.more_labels = MoreLabels(option_map)
Beispiel #7
0
 def __init__(self, option_map):
     self.context_args = ContextArgs(option_map)
     self.more_labels = MoreLabels(option_map)
Beispiel #8
0
class Loader(object):
    @classmethod
    def get_option_spec(cls):
        spec = PyOptionSpec()
        spec.addStrOption(
            'preload_sgf',
            'TODO: fill this help message in',
            '')
        spec.addIntOption(
            'preload_sgf_move_to',
            'TODO: fill this help message in',
            -1)
        spec.addBoolOption(
            'actor_only',
            'TODO: fill this help message in',
            False)
        spec.addStrListOption(
            'list_files',
            'Provide a list of json files for offline training',
            [])
        spec.addIntOption(
            'port',
            'TODO: fill this help message in',
            5556)
        spec.addStrOption(
            'server_addr',
            'TODO: fill this help message in',
            '')
        spec.addStrOption(
            'server_id',
            'TODO: fill this help message in',
            '')
        spec.addIntOption(
            'q_min_size',
            'TODO: fill this help message in',
            10)
        spec.addIntOption(
            'q_max_size',
            'TODO: fill this help message in',
            1000)
        spec.addIntOption(
            'num_reader',
            'TODO: fill this help message in',
            50)
        spec.addIntOption(
            'num_reset_ranking',
            'TODO: fill this help message in',
            5000)
        spec.addIntOption(
            'client_max_delay_sec',
            'Maximum amount of allowed delays in sec. If the client '
            'didn\'t respond after that, we think it is dead.',
            1200)
        spec.addBoolOption(
            'verbose',
            'TODO: fill this help message in',
            False)
        spec.addBoolOption(
            'keep_prev_selfplay',
            'TODO: fill this help message in',
            False)
        spec.addBoolOption(
            'print_result',
            'TODO: fill this help message in',
            False)
        spec.addIntOption(
            'data_aug',
            'specify data augumentation, 0-7, -1 mean random',
            -1)
        spec.addIntOption(
            'ratio_pre_moves',
            ('how many moves to perform in each thread, before we use the '
             'data to train the model'),
            0)
        spec.addFloatOption(
            'start_ratio_pre_moves',
            ('how many moves to perform in each thread, before we use the '
             'first sgf file to train the model'),
            0.5)
        spec.addIntOption(
            'num_games_per_thread',
            ('For offline mode, it is the number of concurrent games per '
             'thread, used to increase diversity of games; for selfplay mode, '
             'it is the number of games played at each thread, and after that '
             'we need to call restartAllGames() to resume.'),
            -1)
        spec.addIntOption(
            'expected_num_clients',
            'Expected number of clients',
            -1
        )
        spec.addIntOption(
            'num_future_actions',
            'TODO: fill this help message in',
            1)
        spec.addIntOption(
            'move_cutoff',
            'Cutoff ply in replay',
            -1)
        spec.addStrOption(
            'mode',
            'TODO: fill this help message in',
            'online')
        spec.addBoolOption(
            'black_use_policy_network_only',
            'TODO: fill this help message in',
            False)
        spec.addBoolOption(
            'white_use_policy_network_only',
            'TODO: fill this help message in',
            False)
        spec.addIntOption(
            'ply_pass_enabled',
            'TODO: fill this help message in',
            0)
        spec.addBoolOption(
            'use_mcts',
            'TODO: fill this help message in',
            False)
        spec.addBoolOption(
            'use_mcts_ai2',
            'TODO: fill this help message in',
            False)
        spec.addFloatOption(
            'white_puct',
            'PUCT for white when it is > 0.0. If it is -1 then we use'
            'the same puct for both side (specified by mcts_options).'
            'A HACK to use different puct for different model. Should'
            'be replaced by a more systematic approach.',
            -1.0)
        spec.addIntOption(
            'white_mcts_rollout_per_batch',
            'white mcts rollout per batch',
            -1)
        spec.addIntOption(
            'white_mcts_rollout_per_thread',
            'white mcts rollout per thread',
            -1)
        spec.addBoolOption(
            'use_df_feature',
            'TODO: fill this help message in',
            False)
        spec.addStrOption(
            'dump_record_prefix',
            'TODO: fill this help message in',
            '')
        spec.addIntOption(
            'policy_distri_cutoff',
            'TODO: fill this help message in',
            0)
        spec.addFloatOption(
            'resign_thres',
            'TODO: fill this help message in',
            0.0)
        spec.addBoolOption(
            'following_pass',
            'TODO: fill this help message in',
            False)
        spec.addIntOption(
            'selfplay_timeout_usec',
            'TODO: fill this help message in',
            0)
        spec.addIntOption(
            'gpu',
            'TODO: fill this help message in',
            -1)
        spec.addBoolOption(
            'policy_distri_training_for_all',
            'TODO: fill this help message in',
            False)
        spec.addBoolOption(
            'parameter_print',
            'TODO: fill this help message in',
            True)
        spec.addIntOption(
            'batchsize',
            'batch size',
            128)
        spec.addIntOption(
            'batchsize2',
            'batch size',
            -1)
        spec.addIntOption(
            'T',
            'number of timesteps',
            6)
        spec.addIntOption(
            'selfplay_init_num',
            ('Initial number of selfplay games to generate before training a '
             'new model'),
            2000)
        spec.addIntOption(
            'selfplay_update_num',
            ('Additional number of selfplay games to generate after a model '
             'is updated'),
            1000)
        spec.addBoolOption(
            'selfplay_async',
            ('Whether to use async mode in selfplay'),
            False)
        spec.addIntOption(
            'eval_num_games',
            ('number of evaluation to be performed to decide whether a model '
             'is better than the other'),
            400)
        spec.addFloatOption(
            'eval_winrate_thres',
            'Win rate threshold for evalution',
            0.55)
        spec.addIntOption(
            'eval_old_model',
            ('If specified, then we directly switch to evaluation mode '
             'between the loaded model and the old model specified by this '
             'switch'),
            -1)
        spec.addStrOption(
            'eval_model_pair',
            ('If specified for df_selfplay.py, then the two models will be '
             'evaluated on this client'),
            '')
        spec.addStrOption(
            'comment',
            'Comment for this run',
            '')
        spec.addBoolOption(
            'cheat_eval_new_model_wins_half',
            'When enabled, in evaluation mode, when the game '
            'finishes, the player with the most recent model gets 100% '
            'win rate half of the time.'
            'This is used to test the framework',
            False)
        spec.addBoolOption(
            'cheat_selfplay_random_result',
            'When enabled, in selfplay mode the result of the game is random'
            'This is used to test the framework',
            False)
        spec.addIntOption(
            'suicide_after_n_games',
            'return after n games have finished, -1 means it never ends',
            -1)

        spec.merge(PyOptionSpec.fromClasses((ContextArgs, MoreLabels)))

        return spec

    @auto_import_options
    def __init__(self, option_map):
        self.context_args = ContextArgs(option_map)
        self.more_labels = MoreLabels(option_map)

    def _set_params(self):
        co = go.ContextOptions()
        self.context_args.initialize(co)
        co.job_id = os.environ.get("job_id", "local")
        if self.options.parameter_print:
            co.print()

        opt = go.GameOptions()
        opt.seed = 0
        opt.list_files = self.options.list_files

        if self.options.server_addr:
            opt.server_addr = self.options.server_addr
        else:
            if self.options.server_id:
                opt.server_addr = addrs[self.options.server_id]
                opt.server_id = self.options.server_id
            else:
                opt.server_addr = ""
                opt.server_id = ""

        opt.port = self.options.port
        opt.mode = self.options.mode
        opt.use_mcts = self.options.use_mcts
        opt.use_mcts_ai2 = self.options.use_mcts_ai2
        opt.use_df_feature = self.options.use_df_feature
        opt.dump_record_prefix = self.options.dump_record_prefix
        opt.policy_distri_training_for_all = \
            self.options.policy_distri_training_for_all
        opt.verbose = self.options.verbose
        opt.black_use_policy_network_only = \
            self.options.black_use_policy_network_only
        opt.white_use_policy_network_only = \
            self.options.white_use_policy_network_only
        opt.data_aug = self.options.data_aug
        opt.ratio_pre_moves = self.options.ratio_pre_moves
        opt.q_min_size = self.options.q_min_size
        opt.q_max_size = self.options.q_max_size
        opt.num_reader = self.options.num_reader
        opt.start_ratio_pre_moves = self.options.start_ratio_pre_moves
        opt.ply_pass_enabled = self.options.ply_pass_enabled
        opt.num_future_actions = self.options.num_future_actions
        opt.num_reset_ranking = self.options.num_reset_ranking
        opt.move_cutoff = self.options.move_cutoff
        opt.policy_distri_cutoff = self.options.policy_distri_cutoff
        opt.num_games_per_thread = self.options.num_games_per_thread
        opt.following_pass = self.options.following_pass
        opt.resign_thres = self.options.resign_thres
        opt.preload_sgf = self.options.preload_sgf
        opt.preload_sgf_move_to = self.options.preload_sgf_move_to
        opt.keep_prev_selfplay = self.options.keep_prev_selfplay
        opt.expected_num_clients = self.options.expected_num_clients

        opt.white_puct = self.options.white_puct
        opt.white_mcts_rollout_per_batch = \
            self.options.white_mcts_rollout_per_batch
        opt.white_mcts_rollout_per_thread = \
            self.options.white_mcts_rollout_per_thread

        opt.client_max_delay_sec = self.options.client_max_delay_sec
        opt.print_result = self.options.print_result
        opt.selfplay_init_num = self.options.selfplay_init_num
        opt.selfplay_update_num = self.options.selfplay_update_num
        opt.selfplay_async = self.options.selfplay_async
        opt.eval_num_games = self.options.eval_num_games
        opt.eval_thres = self.options.eval_winrate_thres
        opt.cheat_eval_new_model_wins_half = \
            self.options.cheat_eval_new_model_wins_half
        opt.cheat_selfplay_random_result = \
            self.options.cheat_selfplay_random_result

        self.max_batchsize = max(
            self.options.batchsize, self.options.batchsize2) \
            if self.options.batchsize2 > 0 \
            else self.options.batchsize
        co.batchsize = self.max_batchsize

        GC = go.GameContext(co, opt)

        if self.options.parameter_print:
            print("**** Options ****")
            print(opt.info())
            print("*****************")
            print("Version: ", GC.ctx().version())

        return co, GC, opt

    def initialize(self):
        co, GC, opt = self._set_params()

        params = GC.getParams()

        if self.options.parameter_print:
            print("Mode: ", opt.mode)
            print("Num Actions: ", params["num_action"])

        desc = {}
        if self.options.mode == "online":
            desc["human_actor"] = dict(
                input=["s"],
                reply=["pi", "a", "V"],
                batchsize=1,
            )
            # Used for MCTS/Direct play.
            desc["actor_black"] = dict(
                input=["s"],
                reply=["pi", "V", "a", "rv"],
                timeout_usec=10,
                batchsize=co.mcts_options.num_rollouts_per_batch
            )
        elif self.options.mode == "selfplay":
            # Used for MCTS/Direct play.
            desc["actor_black"] = dict(
                input=["s"],
                reply=["pi", "V", "a", "rv"],
                batchsize=self.options.batchsize,
                timeout_usec=self.options.selfplay_timeout_usec,
            )
            desc["actor_white"] = dict(
                input=["s"],
                reply=["pi", "V", "a", "rv"],
                batchsize=self.options.batchsize2
                if self.options.batchsize2 > 0
                else self.options.batchsize,
                timeout_usec=self.options.selfplay_timeout_usec,
            )
            desc["game_end"] = dict(
                batchsize=1,
            )
            desc["game_start"] = dict(
                batchsize=1,
                input=["black_ver", "white_ver"],
                reply=None
            )
        elif self.options.mode == "train" or \
                self.options.mode == "offline_train":
            desc["train"] = dict(
                input=["s", "offline_a", "winner", "mcts_scores", "move_idx",
                       "selfplay_ver"],
                reply=None
            )
            desc["train_ctrl"] = dict(
                input=["selfplay_ver"],
                reply=None,
                batchsize=1
            )
        else:
            raise "No such mode: " + self.options.mode

        params.update(dict(
            num_group=1 if self.options.actor_only else 2,
            T=self.options.T,
        ))

        self.more_labels.add_labels(desc)
        return GCWrapper(
            GC,
            self.max_batchsize,
            desc,
            num_recv=2,
            gpu=(self.options.gpu
                 if (self.options.gpu is not None and self.options.gpu >= 0)
                 else None),
            use_numpy=False,
            params=params,
            verbose=self.options.parameter_print)
Beispiel #9
0
class CommonLoader:
    def __init__(self, module):
        self.context_args = ContextArgs()
        self.more_labels = MoreLabels()
        self.module = module

        basic_define_args = [
            ("handicap_level", 0),  
            ("players", dict(type=str, help=";-separated player infos. For example: type=AI_NN,fs=50,args=backup/AI_SIMPLE|decay/0.99|start/1000,fow=True;type=AI_SIMPLE,fs=50")),
            ("max_tick", dict(type=int, default=30000, help="Maximal tick")),
            ("shuffle_player", dict(action="store_true")),    # false
            ("num_frames_in_state", 1),    
            ("max_unit_cmd", 1),
            ("seed", 0),
            ("actor_only", dict(action="store_true")),   # false
            ("model_no_spatial", dict(action="store_true")), # TODO, put it to model false
            ("save_replay_prefix", dict(type=str, default=None)),  
            ("output_file", dict(type=str, default=None)),         
            ("cmd_dumper_prefix", dict(type=str, default=None)),
            ("gpu", dict(type=int, help="gpu to use", default=None)),
        ]

        self.args = ArgsProvider(
            call_from = self,
            define_args = basic_define_args + self._define_args(),
            more_args = ["num_games", "batchsize", "T"],
            child_providers = [ self.context_args.args, self.more_labels.args ]
        )

    def _set_key(self, ai_options, key, value):
        if not hasattr(ai_options, key):
            print("AIOptions does not have key = " + key)
            return

        # Can we automate this?
        bool_convert = dict(t=True, true=True, f=False, false=False)

        try:
            if key == "fow":
                setattr(ai_options, key, bool_convert[value.lower()])
            elif key == "name" or key == "args" or key == "type":
                setattr(ai_options, key, value)
            else:
                setattr(ai_options, key, int(value))
        except ValueError:
            print("Value error! key = " + str(key) + " value = " + str(value))
            sys.exit(1)

    def _parse_players(self, opt, player_names):
        players_str = str(self.args.players)
        if players_str[0] == "\"" and players_str[-1] == "\"":
            players_str = players_str[1:-1]

        player_infos = []
        for i, player in enumerate(players_str.split(";")):
            ai_options = self.module.AIOptions()
            ai_options.num_frames_in_state = self.args.num_frames_in_state
            info = dict()
            for item in player.split(","):
                key, value = item.split("=")
                self._set_key(ai_options, key, value)
                info[key] = value

            if player_names is not None:
                self._set_key(ai_options, "name", player_names[i])
                info["name"] = player_names[i]

            opt.AddAIOptions(ai_options)
            player_infos.append(info)

        return player_infos

    def _init_gc(self, player_names=None):
        args = self.args

        co = self.module.ContextOptions()   #/elf/python_options_utils_cpp.h
        self.context_args.initialize(co)

        opt = self.module.PythonOptions()
        opt.seed = args.seed
        opt.shuffle_player = args.shuffle_player
        opt.max_unit_cmd = args.max_unit_cmd
        opt.max_tick = args.max_tick
        # [TODO] Put it to TD.
        opt.handicap_level = args.handicap_level

        player_infos = self._parse_players(opt, player_names) # 处理Players并设置为AI

        # opt.output_filename = b"simulators.txt"
        # opt.output_filename = b"cout"
        if args.save_replay_prefix is not None:
            opt.save_replay_prefix = args.save_replay_prefix.encode('ascii')
        if args.output_file is not None:
            opt.output_filename = args.output_file.encode("ascii")
        if args.cmd_dumper_prefix is not None:
            opt.cmd_dumper_prefix = args.cmd_dumper_prefix.encode("ascii")

        print("Options:")
        opt.Print()

        print("ContextOptions:")
        co.print()

        GC = self.module.GameContext(co, opt)
        self._on_gc(GC)

        params = GC.GetParams()
        print("Version: ", GC.Version())
        print("Num Actions: ", params["num_action"])
        print("Num unittype: ", params["num_unit_type"])
        print("num planes: ", params["num_planes"])
        params["rts_engine_version"] = GC.Version()
        params["players"] = player_infos

        return co, GC, params

    def _define_args(self):
        return []

    def _on_gc(self, GC):
        pass

    @abc.abstractmethod
    def _get_train_spec(self):
        pass

    @abc.abstractmethod
    def _get_actor_spec(self):
        pass

    def initialize(self):
        co, GC, params = self._init_gc()
        args = self.args
        desc = {}
        # For actor model, no reward needed, we only want to get input and return distribution of actions.
        # sampled action and and value will be filled from the reply.
        desc["actor"] = self._get_actor_spec()

        if not args.actor_only:
            # For training, we want input, action (filled by actor models), value (filled by actor models) and reward.
            desc["train"] = self._get_train_spec()

        self.more_labels.add_labels(desc)  

        params.update(dict(
            num_group = 1 if args.actor_only else 2,
            action_batchsize = int(desc["actor"]["batchsize"]),
            train_batchsize = int(desc["train"]["batchsize"]) if not args.actor_only else None,
            T = args.T,
            model_no_spatial = args.model_no_spatial
        ))
        

        return GCWrapper(GC, co, desc, gpu=args.gpu, use_numpy=False, params=params)

    def initialize_selfplay(self):
        args = self.args
        reference_name = "reference"
        train_name = "train"

        co, GC, params = self._init_gc(player_names=[train_name, reference_name])

        desc = {}
        # For actor model, no reward needed, we only want to get input and return distribution of actions.
        # sampled action and and value will be filled from the reply.
        desc["actor0"] = self._get_actor_spec()
        desc["actor1"] = self._get_actor_spec()

        desc["actor0"]["name"] = reference_name
        desc["actor1"]["name"] = train_name

        if not args.actor_only:
            # For training, we want input, action (filled by actor models), value (filled by actor models) and reward.
            desc["train1"] = self._get_train_spec()
            desc["train1"]["name"] = train_name

        self.more_labels.add_labels(desc)

        params.update(dict(
            num_group = 1 if args.actor_only else 2,
            action_batchsize = int(desc["actor0"]["batchsize"]),
            train_batchsize = int(desc["train1"]["batchsize"]) if not args.actor_only else None,
            T = args.T,
            model_no_spatial = args.model_no_spatial
        ))

        return GCWrapper(GC, co, desc, gpu=args.gpu, use_numpy=False, params=params)

    def initialize_reduced_service(self):
        args = self.args

        reference_name = "reference"
        train_name = "train"
        co, GC, params = self._init_gc(player_names=[train_name, reference_name])

        desc = {}
        # For actor model, no reward needed, we only want to get input and return distribution of actions.
        # sampled action and and value will be filled from the reply.
        desc["reduced_project"] = self._get_reduced_project()
        desc["reduced_forward"] = self._get_reduced_forward()
        desc["reduced_predict"] = self._get_reduced_predict()
        if params["players"][1]["type"] == "AI_NN":
            desc["actor"] = self._get_actor_spec()
            desc["actor"]["batchsize"] //= 2
            desc["actor"]["name"] = "reference"

        self.more_labels.add_labels(desc)

        return GCWrapper(GC, co, desc, gpu=args.gpu, use_numpy=False, params=params)
Beispiel #10
0
 def __init__(self, option_map):
     self.more_labels = MoreLabels(option_map)
     self.option_map = option_map
Beispiel #11
0
class Loader(object):
    @classmethod
    def get_option_spec(cls):
        spec = PyOptionSpec()
        go.getClientPredefined(spec.getOptionSpec())

        spec.addIntOption('gpu', 'GPU id to use', -1)
        spec.addStrOption(
            'eval_model_pair',
            ('If specified for df_selfplay.py, then the two models will be '
             'evaluated on this client'), '')
        spec.addStrOption('comment', 'Comment for this run', '')

        spec.addIntOption('selfplay_timeout_usec', 'Timeout used for MCTS', 10)

        spec.addBoolOption("parameter_print", "Print parameters", True)

        spec.merge(PyOptionSpec.fromClasses((MoreLabels, )))
        return spec

    @auto_import_options
    def __init__(self, option_map):
        self.more_labels = MoreLabels(option_map)
        self.option_map = option_map

    def initialize(self):
        job_id = os.environ.get("job_id", "local")
        opt = go.getClientOpt(self.option_map.getOptionSpec(), job_id)
        mode = getattr(self.options, "common.mode")
        batchsize = getattr(self.options, "common.base.batchsize")

        GC = elf.GameContext(opt.common.base)

        if mode not in ["online", "selfplay"]:
            raise "No such mode: " + mode
        game_obj = go.Client(opt)
        game_obj.setGameContext(GC)

        params = game_obj.getParams()
        if self.options.parameter_print:
            print("**** Options ****")
            print(opt.info())
            print("*****************")
            print("Version: ", elf.version())
            print("Mode: ", mode)
            print("Num Actions: ", params["num_action"])

        desc = {}
        if mode == "online":
            desc["human_actor"] = dict(
                input=[],
                reply=["a", "timestamp"],
                batchsize=1,
            )
            # Used for MCTS/Direct play.
            desc["actor_black"] = dict(
                input=["s"],
                reply=["pi", "V", "a", "rv"],
                timeout_usec=10,
                batchsize=getattr(self.options,
                                  "common.mcts.num_rollout_per_batch"))
        elif mode == "selfplay":
            # Used for MCTS/Direct play.
            white_batchsize = self.options.white_mcts_rollout_per_batch
            if white_batchsize < 0:
                white_batchsize = batchsize

            desc["actor_black"] = dict(
                input=["s"],
                reply=["pi", "V", "a", "rv"],
                batchsize=batchsize,
                timeout_usec=self.options.selfplay_timeout_usec,
            )

            desc["actor_white"] = dict(
                input=["s"],
                reply=["pi", "V", "a", "rv"],
                batchsize=white_batchsize,
                timeout_usec=self.options.selfplay_timeout_usec,
            )
            desc["game_end"] = dict(batchsize=1, )
            desc["game_start"] = dict(batchsize=1,
                                      input=["black_ver", "white_ver"],
                                      reply=None)

        self.more_labels.add_labels(desc)
        return GCWrapper(GC,
                         game_obj,
                         batchsize,
                         desc,
                         num_recv=8,
                         default_gpu=(self.options.gpu if
                                      (self.options.gpu is not None
                                       and self.options.gpu >= 0) else None),
                         use_numpy=False,
                         params=params,
                         verbose=self.options.parameter_print)
Beispiel #12
0
class Loader(object):
    @classmethod
    def get_option_spec(cls):
        spec = PyOptionSpec()
        spec.addStrOption('preload_sgf', 'TODO: fill this help message in', '')
        spec.addIntOption('preload_sgf_move_to',
                          'TODO: fill this help message in', -1)
        spec.addStrOption('mode', 'TODO: fill this help message in', "online")
        spec.addBoolOption('actor_only', 'TODO: fill this help message in',
                           False)
        spec.addIntOption('num_reset_ranking',
                          'TODO: fill this help message in', 5000)
        spec.addBoolOption('verbose', 'TODO: fill this help message in', False)
        spec.addBoolOption('print_result', 'TODO: fill this help message in',
                           False)
        spec.addIntOption('data_aug',
                          'specify data augumentation, 0-7, -1 mean random',
                          -1)
        spec.addIntOption(
            'num_games_per_thread',
            ('For offline mode, it is the number of concurrent games per '
             'thread, used to increase diversity of games; for selfplay mode, '
             'it is the number of games played at each thread, and after that '
             'we need to call restartAllGames() to resume.'), -1)
        spec.addIntOption('num_future_actions',
                          'TODO: fill this help message in', 1)
        spec.addIntOption('move_cutoff', 'Cutoff ply in replay', -1)
        spec.addStrOption('mode', 'TODO: fill this help message in', 'online')
        spec.addBoolOption('black_use_policy_network_only',
                           'TODO: fill this help message in', False)
        spec.addIntOption('ply_pass_enabled',
                          'TODO: fill this help message in', 0)
        spec.addBoolOption('use_mcts', 'TODO: fill this help message in',
                           False)
        spec.addBoolOption('use_df_feature', 'TODO: fill this help message in',
                           False)
        spec.addStrOption('dump_record_prefix',
                          'TODO: fill this help message in', '')
        spec.addFloatOption('resign_thres', 'TODO: fill this help message in',
                            0.0)
        spec.addBoolOption('following_pass', 'TODO: fill this help message in',
                           False)
        spec.addIntOption('gpu', 'TODO: fill this help message in', -1)
        spec.addBoolOption('parameter_print',
                           'TODO: fill this help message in', True)
        spec.addIntOption('batchsize', 'batch size', 128)
        spec.addIntOption('batchsize2', 'batch size', -1)
        spec.addFloatOption('eval_winrate_thres',
                            'Win rate threshold for evalution', 0.55)
        spec.addIntOption(
            'suicide_after_n_games',
            'return after n games have finished, -1 means it never ends', -1)

        spec.merge(PyOptionSpec.fromClasses((ContextArgs, MoreLabels)))

        return spec

    @auto_import_options
    def __init__(self, option_map):
        self.context_args = ContextArgs(option_map)
        self.more_labels = MoreLabels(option_map)

    def _set_params(self):
        co = go.ContextOptions()
        self.context_args.initialize(co)
        co.job_id = os.environ.get("job_id", "local")
        if self.options.parameter_print:
            co.print()

        opt = go.GameOptions()
        opt.seed = 0

        opt.mode = self.options.mode
        opt.use_mcts = self.options.use_mcts
        opt.use_df_feature = self.options.use_df_feature
        opt.dump_record_prefix = self.options.dump_record_prefix
        opt.verbose = self.options.verbose
        opt.black_use_policy_network_only = \
            self.options.black_use_policy_network_only
        opt.data_aug = self.options.data_aug
        opt.ply_pass_enabled = self.options.ply_pass_enabled
        opt.num_reset_ranking = self.options.num_reset_ranking
        opt.move_cutoff = self.options.move_cutoff
        opt.num_games_per_thread = self.options.num_games_per_thread
        opt.following_pass = self.options.following_pass
        opt.resign_thres = self.options.resign_thres
        opt.preload_sgf = self.options.preload_sgf
        opt.preload_sgf_move_to = self.options.preload_sgf_move_to

        opt.print_result = self.options.print_result

        self.max_batchsize = max(
            self.options.batchsize, self.options.batchsize2) \
            if self.options.batchsize2 > 0 \
            else self.options.batchsize
        co.batchsize = self.max_batchsize

        GC = go.GameContext(co, opt)

        if self.options.parameter_print:
            print("**** Options ****")
            print(opt.info())
            print("*****************")
            print("Version: ", GC.ctx().version())

        return co, GC, opt

    def initialize(self):
        co, GC, opt = self._set_params()

        params = GC.getParams()

        if self.options.parameter_print:
            print("Mode: ", opt.mode)
            print("Num Actions: ", params["num_action"])

        desc = {}
        if self.options.mode == "online":
            desc["human_actor"] = dict(
                input=["s"],
                reply=["pi", "a", "V"],
                batchsize=1,
            )
            # Used for MCTS/Direct play.
            desc["actor_black"] = dict(
                input=["s"],
                reply=["pi", "V", "a", "rv"],
                timeout_usec=10,
                batchsize=co.mcts_options.num_rollouts_per_batch)
        else:
            raise "No such mode: " + self.options.mode

        params.update(
            dict(
                num_group=1 if self.options.actor_only else 2,
                T=self.options.T,
            ))

        self.more_labels.add_labels(desc)
        return GCWrapper(GC,
                         self.max_batchsize,
                         desc,
                         num_recv=2,
                         gpu=(self.options.gpu if
                              (self.options.gpu is not None
                               and self.options.gpu >= 0) else None),
                         use_numpy=False,
                         params=params,
                         verbose=self.options.parameter_print)
Beispiel #13
0
class Loader:
    def __init__(self):
        self.context_args = ContextArgs()
        self.more_labels = MoreLabels()

        self.args = ArgsProvider(
            call_from = self,
            define_args = [
                ("actor_only", dict(action="store_true")),
                ("list_file", "./train.lst"),
                ("verbose", dict(action="store_true")),
                ("data_aug", dict(type=int, default=-1, help="specify data augumentation, 0-7, -1 mean random")),
                ("ratio_pre_moves", dict(type=float, default=0, help="how many moves to perform in each thread, before we use the data train the model")),
                ("start_ratio_pre_moves", dict(type=float, default=0.5, help="how many moves to perform in each thread, before we use the first sgf file to train the model")),
                ("num_games_per_thread", dict(type=int, default=5, help="number of concurrent games per threads, used to increase diversity of games")),
                ("move_cutoff", dict(type=int, default=-1, help="Cutoff ply in replay")),
                ("mode", "online"),
                ("use_mcts", dict(action="store_true")),
                ("gpu", dict(type=int, default=None))
            ],
            more_args = ["batchsize", "T"],
            child_providers = [ self.context_args.args, self.more_labels.args ]
        )

    def initialize(self):
        args = self.args
        co = go.ContextOptions()
        self.context_args.initialize(co)
        co.print()

        opt = go.GameOptions()
        opt.seed = 0
        opt.list_filename = args.list_file
        opt.mode = args.mode
        opt.use_mcts = args.use_mcts
        opt.verbose = args.verbose
        opt.data_aug = args.data_aug
        opt.ratio_pre_moves = args.ratio_pre_moves
        opt.start_ratio_pre_moves = args.start_ratio_pre_moves
        opt.move_cutoff = args.move_cutoff
        opt.num_games_per_thread = args.num_games_per_thread
        GC = go.GameContext(co, opt)
        print("Version: ", GC.Version())

        params = GC.GetParams()
        print("Num Actions: ", params["num_action"])

        desc = {}
        if args.mode == "online":
            desc["human_actor"] = dict(
                batchsize=args.batchsize,
                input=dict(T=1, keys=set(["s"])),
                reply=dict(T=1, keys=set(["pi", "a"])),
                name="human_actor",
            )
            # Used for MCTS/Direct play.
            desc["actor"] = dict(
                batchsize=args.batchsize,
                input=dict(T=1, keys=set(["s"])),
                reply=dict(T=1, keys=set(["pi", "V", "a"])),
                name="actor",
            )
        elif args.mode == "selfplay":
            # Used for MCTS/Direct play.
            desc["actor"] = dict(
                batchsize=args.batchsize,
                input=dict(T=1, keys=set(["s"])),
                reply=dict(T=1, keys=set(["pi", "V"])),
                name="actor",
                timeout_usec = 10,
            )
        else:
            desc["train"] = dict(
                batchsize=args.batchsize,
                input=dict(T=args.T, keys=set(["s", "offline_a"])),
                reply=None
            )

        self.more_labels.add_labels(desc)

        params.update(dict(
            num_group = 1 if args.actor_only else 2,
            T = args.T,
        ))

        return GCWrapper(GC, co, desc, gpu=args.gpu, use_numpy=False, params=params)
Beispiel #14
0
class Loader(object):
    @classmethod
    def get_option_spec(cls):
        spec = PyOptionSpec()
        go.getServerPredefined(spec.getOptionSpec())

        spec.addIntOption('gpu', 'GPU id to use', -1)
        spec.addIntOption(
            'eval_old_model',
            ('If specified, then we directly switch to evaluation mode '
             'between the loaded model and the old model specified by this '
             'switch'), -1)
        spec.addStrOption('comment', 'Comment for this run', '')
        spec.addBoolOption("parameter_print", "Print parameters", True)

        spec.merge(PyOptionSpec.fromClasses((MoreLabels, )))
        return spec

    @auto_import_options
    def __init__(self, option_map):
        self.more_labels = MoreLabels(option_map)
        self.option_map = option_map

    def initialize(self):
        opt = go.getServerOpt(self.option_map.getOptionSpec())

        desc = {}
        GC = elf.GameContext(opt.common.base)

        mode = getattr(self.options, "common.mode")
        batchsize = getattr(self.options, "common.base.batchsize")

        if mode in ["train", "train_offline"]:
            game_obj = go.Server(opt)
            desc["train"] = dict(input=[
                "s", "offline_a", "winner", "mcts_scores", "move_idx",
                "selfplay_ver"
            ],
                                 reply=None)
            desc["train_ctrl"] = dict(input=["selfplay_ver"],
                                      reply=None,
                                      batchsize=1)
        else:
            raise "No such mode: " + mode

        game_obj.setGameContext(GC)
        params = game_obj.getParams()

        if self.options.parameter_print:
            print("**** Options ****")
            print(opt.info())
            print("*****************")
            print("Version: ", elf.version())
            print("Mode: ", mode)
            print("Num Actions: ", params["num_action"])

        self.more_labels.add_labels(desc)
        return GCWrapper(GC,
                         game_obj,
                         batchsize,
                         desc,
                         num_recv=2,
                         default_gpu=(self.options.gpu if
                                      (self.options.gpu is not None
                                       and self.options.gpu >= 0) else None),
                         use_numpy=False,
                         params=params,
                         verbose=self.options.parameter_print)
Beispiel #15
0
class CommonLoader:
    def __init__(self, module):
        self.context_args = ContextArgs()
        self.more_labels = MoreLabels()
        self.module = module

        basic_define_args = [
            ("handicap_level", 0),
            ("players", dict(type=str, help=";-separated player infos. For example: type=AI_NN,fs=50,args=backup/AI_SIMPLE|decay/0.99|start/1000,fow=True;type=AI_SIMPLE,fs=50")),
            ("max_tick", dict(type=int, default=30000, help="Maximal tick")),
            ("shuffle_player", dict(action="store_true")),
            ("num_frames_in_state", 1),
            ("max_unit_cmd", 1),
            ("seed", 0),
            ("actor_only", dict(action="store_true")),
            ("model_no_spatial", dict(action="store_true")), # TODO, put it to model
            ("save_replay_prefix", dict(type=str, default=None)),
            ("output_file", dict(type=str, default=None)),
            ("cmd_dumper_prefix", dict(type=str, default=None)),
            ("gpu", dict(type=int, help="gpu to use", default=None)),
        ]

        self.args = ArgsProvider(
            call_from = self,
            define_args = basic_define_args + self._define_args(),
            more_args = ["num_games", "batchsize", "T"],
            child_providers = [ self.context_args.args, self.more_labels.args ]
        )

    def _set_key(self, ai_options, key, value):
        if not hasattr(ai_options, key):
            print("AIOptions does not have key = " + key)
            return

        # Can we automate this?
        bool_convert = dict(t=True, true=True, f=False, false=False)

        try:
            if key == "fow":
                setattr(ai_options, key, bool_convert[value.lower()])
            elif key == "name" or key == "args" or key == "type":
                setattr(ai_options, key, value)
            else:
                setattr(ai_options, key, int(value))
        except ValueError:
            print("Value error! key = " + str(key) + " value = " + str(value))
            sys.exit(1)

    def _parse_players(self, opt, player_names):
        players_str = str(self.args.players)
        if players_str[0] == "\"" and players_str[-1] == "\"":
            players_str = players_str[1:-1]

        player_infos = []
        for i, player in enumerate(players_str.split(";")):
            ai_options = self.module.AIOptions()
            ai_options.num_frames_in_state = self.args.num_frames_in_state
            info = dict()
            for item in player.split(","):
                key, value = item.split("=")
                self._set_key(ai_options, key, value)
                info[key] = value

            if player_names is not None:
                self._set_key(ai_options, "name", player_names[i])
                info["name"] = player_names[i]

            opt.AddAIOptions(ai_options)
            player_infos.append(info)

        return player_infos

    def _init_gc(self, player_names=None):
        args = self.args

        co = self.module.ContextOptions()
        self.context_args.initialize(co)

        opt = self.module.PythonOptions()
        opt.seed = args.seed
        opt.shuffle_player = args.shuffle_player
        opt.max_unit_cmd = args.max_unit_cmd
        opt.max_tick = args.max_tick
        # [TODO] Put it to TD.
        opt.handicap_level = args.handicap_level

        player_infos = self._parse_players(opt, player_names)

        # opt.output_filename = b"simulators.txt"
        # opt.output_filename = b"cout"
        if args.save_replay_prefix is not None:
            opt.save_replay_prefix = args.save_replay_prefix.encode('ascii')
        if args.output_file is not None:
            opt.output_filename = args.output_file.encode("ascii")
        if args.cmd_dumper_prefix is not None:
            opt.cmd_dumper_prefix = args.cmd_dumper_prefix.encode("ascii")

        print("Options:")
        opt.Print()

        print("ContextOptions:")
        co.print()

        GC = self.module.GameContext(co, opt)
        self._on_gc(GC)

        params = GC.GetParams()
        print("Version: ", GC.Version())
        print("Num Actions: ", params["num_action"])
        print("Num unittype: ", params["num_unit_type"])
        print("num planes: ", params["num_planes"])
        params["rts_engine_version"] = GC.Version()
        params["players"] = player_infos

        return co, GC, params

    def _define_args(self):
        return []

    def _on_gc(self, GC):
        pass

    @abc.abstractmethod
    def _get_train_spec(self):
        pass

    @abc.abstractmethod
    def _get_actor_spec(self):
        pass

    def initialize(self):
        co, GC, params = self._init_gc()
        args = self.args

        desc = {}
        # For actor model, no reward needed, we only want to get input and return distribution of actions.
        # sampled action and and value will be filled from the reply.
        desc["actor"] = self._get_actor_spec()

        if not args.actor_only:
            # For training, we want input, action (filled by actor models), value (filled by actor models) and reward.
            desc["train"] = self._get_train_spec()

        self.more_labels.add_labels(desc)

        params.update(dict(
            num_group = 1 if args.actor_only else 2,
            action_batchsize = int(desc["actor"]["batchsize"]),
            train_batchsize = int(desc["train"]["batchsize"]) if not args.actor_only else None,
            T = args.T,
            model_no_spatial = args.model_no_spatial
        ))

        return GCWrapper(GC, co, desc, gpu=args.gpu, use_numpy=False, params=params)

    def initialize_selfplay(self):
        args = self.args
        reference_name = "reference"
        train_name = "train"

        co, GC, params = self._init_gc(player_names=[train_name, reference_name])

        desc = {}
        # For actor model, no reward needed, we only want to get input and return distribution of actions.
        # sampled action and and value will be filled from the reply.
        desc["actor0"] = self._get_actor_spec()
        desc["actor1"] = self._get_actor_spec()

        desc["actor0"]["name"] = reference_name
        desc["actor1"]["name"] = train_name

        if not args.actor_only:
            # For training, we want input, action (filled by actor models), value (filled by actor models) and reward.
            desc["train1"] = self._get_train_spec()
            desc["train1"]["name"] = train_name

        self.more_labels.add_labels(desc)

        params.update(dict(
            num_group = 1 if args.actor_only else 2,
            action_batchsize = int(desc["actor0"]["batchsize"]),
            train_batchsize = int(desc["train1"]["batchsize"]) if not args.actor_only else None,
            T = args.T,
            model_no_spatial = args.model_no_spatial
        ))

        return GCWrapper(GC, co, desc, gpu=args.gpu, use_numpy=False, params=params)

    def initialize_reduced_service(self):
        args = self.args

        reference_name = "reference"
        train_name = "train"
        co, GC, params = self._init_gc(player_names=[train_name, reference_name])

        desc = {}
        # For actor model, no reward needed, we only want to get input and return distribution of actions.
        # sampled action and and value will be filled from the reply.
        desc["reduced_project"] = self._get_reduced_project()
        desc["reduced_forward"] = self._get_reduced_forward()
        desc["reduced_predict"] = self._get_reduced_predict()
        if params["players"][1]["type"] == "AI_NN":
            desc["actor"] = self._get_actor_spec()
            desc["actor"]["batchsize"] //= 2
            desc["actor"]["name"] = "reference"

        self.more_labels.add_labels(desc)

        return GCWrapper(GC, co, desc, gpu=args.gpu, use_numpy=False, params=params)