def __init__(self, module): self.context_args = ContextArgs() self.more_labels = MoreLabels() self.module = module basic_define_args = [ ("handicap_level", 0), ("players", dict(type=str, help=";-separated player infos. For example: type=AI_NN,fs=50,args=backup/AI_SIMPLE|decay/0.99|start/1000,fow=True;type=AI_SIMPLE,fs=50")), ("max_tick", dict(type=int, default=30000, help="Maximal tick")), ("shuffle_player", dict(action="store_true")), # false ("num_frames_in_state", 1), ("max_unit_cmd", 1), ("seed", 0), ("actor_only", dict(action="store_true")), # false ("model_no_spatial", dict(action="store_true")), # TODO, put it to model false ("save_replay_prefix", dict(type=str, default=None)), ("output_file", dict(type=str, default=None)), ("cmd_dumper_prefix", dict(type=str, default=None)), ("gpu", dict(type=int, help="gpu to use", default=None)), ] self.args = ArgsProvider( call_from = self, define_args = basic_define_args + self._define_args(), more_args = ["num_games", "batchsize", "T"], child_providers = [ self.context_args.args, self.more_labels.args ] )
def __init__(self, module): self.context_args = ContextArgs() self.more_labels = MoreLabels() self.module = module basic_define_args = [ ("handicap_level", 0), ("players", dict(type=str, help=";-separated player infos. For example: type=AI_NN,fs=50,args=backup/AI_SIMPLE|decay/0.99|start/1000,fow=True;type=AI_SIMPLE,fs=50")), ("max_tick", dict(type=int, default=30000, help="Maximal tick")), ("shuffle_player", dict(action="store_true")), ("num_frames_in_state", 1), ("max_unit_cmd", 1), ("seed", 0), ("actor_only", dict(action="store_true")), ("model_no_spatial", dict(action="store_true")), # TODO, put it to model ("save_replay_prefix", dict(type=str, default=None)), ("output_file", dict(type=str, default=None)), ("cmd_dumper_prefix", dict(type=str, default=None)), ("gpu", dict(type=int, help="gpu to use", default=None)), ] self.args = ArgsProvider( call_from = self, define_args = basic_define_args + self._define_args(), more_args = ["num_games", "batchsize", "T"], child_providers = [ self.context_args.args, self.more_labels.args ] )
def __init__(self): self.context_args = ContextArgs() self.more_labels = MoreLabels() self.args = ArgsProvider( call_from=self, define_args= [("actor_only", dict(action="store_true")), ("list_file", "./train.lst"), ("verbose", dict(action="store_true")), ("data_aug", dict(type=int, default=-1, help="specify data augumentation, 0-7, -1 mean random")), ("ratio_pre_moves", dict(type=float, default=0, help= "how many moves to perform in each thread, before we use the data train the model" )), ("start_ratio_pre_moves", dict( type=float, default=0.5, help= "how many moves to perform in each thread, before we use the first sgf file to train the model" )), ("num_games_per_thread", dict( type=int, default=5, help= "number of concurrent games per threads, used to increase diversity of games" )), ("move_cutoff", dict(type=int, default=-1, help="Cutoff ply in replay")), ("online", dict(action="store_true", help="Set game to online mode")), ("gpu", dict(type=int, default=None))], more_args=["batchsize", "T"], child_providers=[self.context_args.args, self.more_labels.args])
def __init__(self): self.context_args = ContextArgs() self.more_labels = MoreLabels() self.args = ArgsProvider( call_from = self, define_args = [ ("actor_only", dict(action="store_true")), ("list_file", "./train.lst"), ("verbose", dict(action="store_true")), ("data_aug", dict(type=int, default=-1, help="specify data augumentation, 0-7, -1 mean random")), ("ratio_pre_moves", dict(type=float, default=0, help="how many moves to perform in each thread, before we use the data train the model")), ("start_ratio_pre_moves", dict(type=float, default=0.5, help="how many moves to perform in each thread, before we use the first sgf file to train the model")), ("num_games_per_thread", dict(type=int, default=5, help="number of concurrent games per threads, used to increase diversity of games")), ("move_cutoff", dict(type=int, default=-1, help="Cutoff ply in replay")), ("mode", "online"), ("use_mcts", dict(action="store_true")), ("gpu", dict(type=int, default=None)) ], more_args = ["batchsize", "T"], child_providers = [ self.context_args.args, self.more_labels.args ] )
class Loader(object): @classmethod def get_option_spec(cls): spec = PyOptionSpec() spec.addBoolOption('actor_only', 'TODO: fill this help message in', False) spec.addStrListOption( 'list_files', 'Provide a list of json files for offline training', []) spec.addIntOption('port', 'TODO: fill this help message in', 5556) spec.addStrOption('server_addr', 'TODO: fill this help message in', '') spec.addStrOption('server_id', 'TODO: fill this help message in', '') spec.addIntOption('q_min_size', 'TODO: fill this help message in', 10) spec.addIntOption('q_max_size', 'TODO: fill this help message in', 1000) spec.addIntOption('num_reader', 'TODO: fill this help message in', 50) spec.addIntOption('num_reset_ranking', 'TODO: fill this help message in', 5000) spec.addIntOption( 'client_max_delay_sec', 'Maximum amount of allowed delays in sec. If the client ' 'didn\'t respond after that, we think it is dead.', 1200) spec.addBoolOption('verbose', 'TODO: fill this help message in', False) spec.addBoolOption('keep_prev_selfplay', 'TODO: fill this help message in', False) spec.addIntOption( 'num_games_per_thread', ('For offline mode, it is the number of concurrent games per ' 'thread, used to increase diversity of games; for selfplay mode, ' 'it is the number of games played at each thread, and after that ' 'we need to call restartAllGames() to resume.'), -1) spec.addIntOption('expected_num_clients', 'Expected number of clients', -1) spec.addIntOption('checkers_num_future_actions', 'TODO: fill this help message in', 1) spec.addStrOption('mode', 'TODO: fill this help message in', 'play') spec.addBoolOption('black_use_policy_network_only', 'TODO: fill this help message in', False) spec.addBoolOption('white_use_policy_network_only', 'TODO: fill this help message in', False) spec.addBoolOption('use_mcts', 'TODO: fill this help message in', False) spec.addBoolOption('use_mcts_ai2', 'TODO: fill this help message in', False) spec.addFloatOption( 'white_puct', 'PUCT for white when it is > 0.0. If it is -1 then we use' 'the same puct for both side (specified by mcts_options).' 'A HACK to use different puct for different model. Should' 'be replaced by a more systematic approach.', -1.0) spec.addIntOption('white_mcts_rollout_per_batch', 'white mcts rollout per batch', -1) spec.addIntOption('white_mcts_rollout_per_thread', 'white mcts rollout per thread', -1) spec.addStrOption('dump_record_prefix', 'TODO: fill this help message in', '') spec.addStrOption('selfplay_records_directory', 'TODO: fill this help message in', '') spec.addStrOption('eval_records_directory', 'TODO: fill this help message in', '') spec.addStrOption('records_buffer_directory', 'TODO: fill this help message in', '') spec.addIntOption('policy_distri_cutoff', 'first N moves will be randomly', 0) spec.addIntOption('selfplay_timeout_usec', 'TODO: fill this help message in', 0) spec.addIntOption('gpu', 'TODO: fill this help message in', -1) spec.addBoolOption('policy_distri_training_for_all', 'TODO: fill this help message in', False) spec.addBoolOption('parameter_print', 'TODO: fill this help message in', True) spec.addIntOption('batchsize', 'batch size', 128) spec.addIntOption('batchsize2', 'batch size', -1) spec.addIntOption('T', 'number of timesteps', 6) spec.addIntOption( 'selfplay_init_num', ('Initial number of selfplay games to generate before training a ' 'new model'), 2000) spec.addIntOption( 'selfplay_update_num', ('Additional number of selfplay games to generate after a model ' 'is updated'), 1000) spec.addBoolOption('selfplay_async', ('Whether to use async mode in selfplay'), False) spec.addIntOption( 'eval_num_games', ('number of evaluation to be performed to decide whether a model ' 'is better than the other'), 400) spec.addFloatOption('eval_winrate_thres', 'Win rate threshold for evalution', 0.55) spec.addIntOption( 'eval_old_model', ('If specified, then we directly switch to evaluation mode ' 'between the loaded model and the old model specified by this ' 'switch'), -1) spec.addStrOption( 'eval_model_pair', ('If specified for df_selfplay.py, then the two models will be ' 'evaluated on this client'), '') spec.addBoolOption( 'cheat_eval_new_model_wins_half', 'When enabled, in evaluation mode, when the game ' 'finishes, the player with the most recent model gets 100% ' 'win rate half of the time.' 'This is used to test the framework', False) spec.addBoolOption( 'cheat_selfplay_random_result', 'When enabled, in selfplay mode the result of the game is random' 'This is used to test the framework', False) spec.addBoolOption('human_plays_for_black', '', False) spec.addIntOption( 'suicide_after_n_games', 'return after n games have finished, -1 means it never ends', -1) spec.merge(PyOptionSpec.fromClasses((ContextArgs, MoreLabels))) return spec @auto_import_options def __init__(self, option_map): self.context_args = ContextArgs(option_map) self.more_labels = MoreLabels(option_map) def _set_params(self): co = american_checkers.ContextOptions() self.context_args.initialize(co) co.job_id = os.environ.get("job_id", "local") if self.options.parameter_print: co.print() game_opt = american_checkers.GameOptions() game_opt.seed = 0 game_opt.list_files = self.options.list_files if self.options.server_addr: game_opt.server_addr = self.options.server_addr else: if self.options.server_id: game_opt.server_addr = addrs[self.options.server_id] game_opt.server_id = self.options.server_id else: game_opt.server_addr = "" game_opt.server_id = "" game_opt.port = self.options.port game_opt.mode = self.options.mode game_opt.use_mcts = self.options.use_mcts game_opt.use_mcts_ai2 = self.options.use_mcts_ai2 game_opt.dump_record_prefix = self.options.dump_record_prefix game_opt.selfplay_records_directory = self.options.selfplay_records_directory if len(self.options.selfplay_records_directory ) != 0 and not os.path.exists( self.options.selfplay_records_directory): os.mkdir(self.options.selfplay_records_directory) game_opt.eval_records_directory = self.options.eval_records_directory if len(self.options.eval_records_directory ) != 0 and not os.path.exists( self.options.eval_records_directory): os.mkdir(self.options.eval_records_directory) game_opt.records_buffer_directory = self.options.records_buffer_directory if len(self.options.records_buffer_directory ) != 0 and not os.path.exists( self.options.records_buffer_directory): os.mkdir(self.options.records_buffer_directory) game_opt.policy_distri_training_for_all = \ self.options.policy_distri_training_for_all game_opt.verbose = self.options.verbose game_opt.black_use_policy_network_only = \ self.options.black_use_policy_network_only game_opt.white_use_policy_network_only = \ self.options.white_use_policy_network_only game_opt.q_min_size = self.options.q_min_size game_opt.q_max_size = self.options.q_max_size game_opt.num_reader = self.options.num_reader game_opt.checkers_num_future_actions = self.options.checkers_num_future_actions game_opt.num_reset_ranking = self.options.num_reset_ranking game_opt.policy_distri_cutoff = self.options.policy_distri_cutoff game_opt.num_games_per_thread = self.options.num_games_per_thread game_opt.keep_prev_selfplay = self.options.keep_prev_selfplay game_opt.expected_num_clients = self.options.expected_num_clients game_opt.white_puct = self.options.white_puct game_opt.white_mcts_rollout_per_batch = \ self.options.white_mcts_rollout_per_batch game_opt.white_mcts_rollout_per_thread = \ self.options.white_mcts_rollout_per_thread game_opt.client_max_delay_sec = self.options.client_max_delay_sec game_opt.selfplay_init_num = self.options.selfplay_init_num game_opt.selfplay_update_num = self.options.selfplay_update_num game_opt.selfplay_async = self.options.selfplay_async game_opt.eval_num_games = self.options.eval_num_games game_opt.eval_thres = self.options.eval_winrate_thres game_opt.cheat_eval_new_model_wins_half = \ self.options.cheat_eval_new_model_wins_half game_opt.cheat_selfplay_random_result = \ self.options.cheat_selfplay_random_result if self.options.human_plays_for_black: game_opt.human_plays_for = 0 else: game_opt.human_plays_for = 1 self.max_batchsize = max( self.options.batchsize, self.options.batchsize2) \ if self.options.batchsize2 > 0 \ else self.options.batchsize co.batchsize = self.max_batchsize GC = american_checkers.GameContext(co, game_opt) if self.options.parameter_print: print("**************** GameOptions ****************") print(game_opt.info()) print("*********************************************") print("Version: ", GC.ctx().version()) print("*********************************************") return co, GC, game_opt def initialize(self): co, GC, game_opt = self._set_params() params = GC.getParams() if self.options.parameter_print: print("Mode: ", game_opt.mode) print("checkers_num_action: ", params["checkers_num_action"]) desc = {} if self.options.mode == "play": desc["actor_white"] = dict( input=["s", "game_idx"], reply=[ "pi", "a", "V", ], batchsize=1, ) desc["actor_black"] = dict( input=["s"], reply=["pi", "V", "a", "rv"], timeout_usec=10, batchsize=co.mcts_options.num_rollouts_per_batch) elif self.options.mode == "selfplay": desc["game_end"] = dict(batchsize=1, ) desc["game_start"] = dict(batchsize=1, input=["white_ver", "black_ver"], reply=None) # checkers desc["actor_white"] = dict( input=["s"], reply=["pi", "V", "a", "rv"], batchsize=self.options.batchsize2 if self.options.batchsize2 > 0 else self.options.batchsize, timeout_usec=self.options.selfplay_timeout_usec, ) desc["actor_black"] = dict( input=["s"], reply=["pi", "V", "a", "rv"], batchsize=self.options.batchsize2 if self.options.batchsize2 > 0 else self.options.batchsize, timeout_usec=self.options.selfplay_timeout_usec, ) elif self.options.mode == "train" or self.options.mode == "offline_train": desc["train"] = dict(input=[ "s", "offline_a", "winner", "mcts_scores", "move_idx", "selfplay_ver" ], reply=None) desc["train_ctrl"] = dict(input=["selfplay_ver"], reply=None, batchsize=1) else: raise "No such mode: " + self.options.mode params.update( dict( num_group=1 if self.options.actor_only else 2, T=self.options.T, )) self.more_labels.add_labels(desc) return GCWrapper(GC, self.max_batchsize, desc, num_recv=2, gpu=(self.options.gpu if (self.options.gpu is not None and self.options.gpu >= 0) else None), use_numpy=False, params=params, verbose=self.options.parameter_print)
def __init__(self, option_map): self.context_args = ContextArgs(option_map) self.more_labels = MoreLabels(option_map)
class Loader(object): @classmethod def get_option_spec(cls): spec = PyOptionSpec() spec.addStrOption( 'preload_sgf', 'TODO: fill this help message in', '') spec.addIntOption( 'preload_sgf_move_to', 'TODO: fill this help message in', -1) spec.addBoolOption( 'actor_only', 'TODO: fill this help message in', False) spec.addStrListOption( 'list_files', 'Provide a list of json files for offline training', []) spec.addIntOption( 'port', 'TODO: fill this help message in', 5556) spec.addStrOption( 'server_addr', 'TODO: fill this help message in', '') spec.addStrOption( 'server_id', 'TODO: fill this help message in', '') spec.addIntOption( 'q_min_size', 'TODO: fill this help message in', 10) spec.addIntOption( 'q_max_size', 'TODO: fill this help message in', 1000) spec.addIntOption( 'num_reader', 'TODO: fill this help message in', 50) spec.addIntOption( 'num_reset_ranking', 'TODO: fill this help message in', 5000) spec.addIntOption( 'client_max_delay_sec', 'Maximum amount of allowed delays in sec. If the client ' 'didn\'t respond after that, we think it is dead.', 1200) spec.addBoolOption( 'verbose', 'TODO: fill this help message in', False) spec.addBoolOption( 'keep_prev_selfplay', 'TODO: fill this help message in', False) spec.addBoolOption( 'print_result', 'TODO: fill this help message in', False) spec.addIntOption( 'data_aug', 'specify data augumentation, 0-7, -1 mean random', -1) spec.addIntOption( 'ratio_pre_moves', ('how many moves to perform in each thread, before we use the ' 'data to train the model'), 0) spec.addFloatOption( 'start_ratio_pre_moves', ('how many moves to perform in each thread, before we use the ' 'first sgf file to train the model'), 0.5) spec.addIntOption( 'num_games_per_thread', ('For offline mode, it is the number of concurrent games per ' 'thread, used to increase diversity of games; for selfplay mode, ' 'it is the number of games played at each thread, and after that ' 'we need to call restartAllGames() to resume.'), -1) spec.addIntOption( 'expected_num_clients', 'Expected number of clients', -1 ) spec.addIntOption( 'num_future_actions', 'TODO: fill this help message in', 1) spec.addIntOption( 'move_cutoff', 'Cutoff ply in replay', -1) spec.addStrOption( 'mode', 'TODO: fill this help message in', 'online') spec.addBoolOption( 'black_use_policy_network_only', 'TODO: fill this help message in', False) spec.addBoolOption( 'white_use_policy_network_only', 'TODO: fill this help message in', False) spec.addIntOption( 'ply_pass_enabled', 'TODO: fill this help message in', 0) spec.addBoolOption( 'use_mcts', 'TODO: fill this help message in', False) spec.addBoolOption( 'use_mcts_ai2', 'TODO: fill this help message in', False) spec.addFloatOption( 'white_puct', 'PUCT for white when it is > 0.0. If it is -1 then we use' 'the same puct for both side (specified by mcts_options).' 'A HACK to use different puct for different model. Should' 'be replaced by a more systematic approach.', -1.0) spec.addIntOption( 'white_mcts_rollout_per_batch', 'white mcts rollout per batch', -1) spec.addIntOption( 'white_mcts_rollout_per_thread', 'white mcts rollout per thread', -1) spec.addBoolOption( 'use_df_feature', 'TODO: fill this help message in', False) spec.addStrOption( 'dump_record_prefix', 'TODO: fill this help message in', '') spec.addIntOption( 'policy_distri_cutoff', 'TODO: fill this help message in', 0) spec.addFloatOption( 'resign_thres', 'TODO: fill this help message in', 0.0) spec.addBoolOption( 'following_pass', 'TODO: fill this help message in', False) spec.addIntOption( 'selfplay_timeout_usec', 'TODO: fill this help message in', 0) spec.addIntOption( 'gpu', 'TODO: fill this help message in', -1) spec.addBoolOption( 'policy_distri_training_for_all', 'TODO: fill this help message in', False) spec.addBoolOption( 'parameter_print', 'TODO: fill this help message in', True) spec.addIntOption( 'batchsize', 'batch size', 128) spec.addIntOption( 'batchsize2', 'batch size', -1) spec.addIntOption( 'T', 'number of timesteps', 6) spec.addIntOption( 'selfplay_init_num', ('Initial number of selfplay games to generate before training a ' 'new model'), 2000) spec.addIntOption( 'selfplay_update_num', ('Additional number of selfplay games to generate after a model ' 'is updated'), 1000) spec.addBoolOption( 'selfplay_async', ('Whether to use async mode in selfplay'), False) spec.addIntOption( 'eval_num_games', ('number of evaluation to be performed to decide whether a model ' 'is better than the other'), 400) spec.addFloatOption( 'eval_winrate_thres', 'Win rate threshold for evalution', 0.55) spec.addIntOption( 'eval_old_model', ('If specified, then we directly switch to evaluation mode ' 'between the loaded model and the old model specified by this ' 'switch'), -1) spec.addStrOption( 'eval_model_pair', ('If specified for df_selfplay.py, then the two models will be ' 'evaluated on this client'), '') spec.addStrOption( 'comment', 'Comment for this run', '') spec.addBoolOption( 'cheat_eval_new_model_wins_half', 'When enabled, in evaluation mode, when the game ' 'finishes, the player with the most recent model gets 100% ' 'win rate half of the time.' 'This is used to test the framework', False) spec.addBoolOption( 'cheat_selfplay_random_result', 'When enabled, in selfplay mode the result of the game is random' 'This is used to test the framework', False) spec.addIntOption( 'suicide_after_n_games', 'return after n games have finished, -1 means it never ends', -1) spec.merge(PyOptionSpec.fromClasses((ContextArgs, MoreLabels))) return spec @auto_import_options def __init__(self, option_map): self.context_args = ContextArgs(option_map) self.more_labels = MoreLabels(option_map) def _set_params(self): co = go.ContextOptions() self.context_args.initialize(co) co.job_id = os.environ.get("job_id", "local") if self.options.parameter_print: co.print() opt = go.GameOptions() opt.seed = 0 opt.list_files = self.options.list_files if self.options.server_addr: opt.server_addr = self.options.server_addr else: if self.options.server_id: opt.server_addr = addrs[self.options.server_id] opt.server_id = self.options.server_id else: opt.server_addr = "" opt.server_id = "" opt.port = self.options.port opt.mode = self.options.mode opt.use_mcts = self.options.use_mcts opt.use_mcts_ai2 = self.options.use_mcts_ai2 opt.use_df_feature = self.options.use_df_feature opt.dump_record_prefix = self.options.dump_record_prefix opt.policy_distri_training_for_all = \ self.options.policy_distri_training_for_all opt.verbose = self.options.verbose opt.black_use_policy_network_only = \ self.options.black_use_policy_network_only opt.white_use_policy_network_only = \ self.options.white_use_policy_network_only opt.data_aug = self.options.data_aug opt.ratio_pre_moves = self.options.ratio_pre_moves opt.q_min_size = self.options.q_min_size opt.q_max_size = self.options.q_max_size opt.num_reader = self.options.num_reader opt.start_ratio_pre_moves = self.options.start_ratio_pre_moves opt.ply_pass_enabled = self.options.ply_pass_enabled opt.num_future_actions = self.options.num_future_actions opt.num_reset_ranking = self.options.num_reset_ranking opt.move_cutoff = self.options.move_cutoff opt.policy_distri_cutoff = self.options.policy_distri_cutoff opt.num_games_per_thread = self.options.num_games_per_thread opt.following_pass = self.options.following_pass opt.resign_thres = self.options.resign_thres opt.preload_sgf = self.options.preload_sgf opt.preload_sgf_move_to = self.options.preload_sgf_move_to opt.keep_prev_selfplay = self.options.keep_prev_selfplay opt.expected_num_clients = self.options.expected_num_clients opt.white_puct = self.options.white_puct opt.white_mcts_rollout_per_batch = \ self.options.white_mcts_rollout_per_batch opt.white_mcts_rollout_per_thread = \ self.options.white_mcts_rollout_per_thread opt.client_max_delay_sec = self.options.client_max_delay_sec opt.print_result = self.options.print_result opt.selfplay_init_num = self.options.selfplay_init_num opt.selfplay_update_num = self.options.selfplay_update_num opt.selfplay_async = self.options.selfplay_async opt.eval_num_games = self.options.eval_num_games opt.eval_thres = self.options.eval_winrate_thres opt.cheat_eval_new_model_wins_half = \ self.options.cheat_eval_new_model_wins_half opt.cheat_selfplay_random_result = \ self.options.cheat_selfplay_random_result self.max_batchsize = max( self.options.batchsize, self.options.batchsize2) \ if self.options.batchsize2 > 0 \ else self.options.batchsize co.batchsize = self.max_batchsize GC = go.GameContext(co, opt) if self.options.parameter_print: print("**** Options ****") print(opt.info()) print("*****************") print("Version: ", GC.ctx().version()) return co, GC, opt def initialize(self): co, GC, opt = self._set_params() params = GC.getParams() if self.options.parameter_print: print("Mode: ", opt.mode) print("Num Actions: ", params["num_action"]) desc = {} if self.options.mode == "online": desc["human_actor"] = dict( input=["s"], reply=["pi", "a", "V"], batchsize=1, ) # Used for MCTS/Direct play. desc["actor_black"] = dict( input=["s"], reply=["pi", "V", "a", "rv"], timeout_usec=10, batchsize=co.mcts_options.num_rollouts_per_batch ) elif self.options.mode == "selfplay": # Used for MCTS/Direct play. desc["actor_black"] = dict( input=["s"], reply=["pi", "V", "a", "rv"], batchsize=self.options.batchsize, timeout_usec=self.options.selfplay_timeout_usec, ) desc["actor_white"] = dict( input=["s"], reply=["pi", "V", "a", "rv"], batchsize=self.options.batchsize2 if self.options.batchsize2 > 0 else self.options.batchsize, timeout_usec=self.options.selfplay_timeout_usec, ) desc["game_end"] = dict( batchsize=1, ) desc["game_start"] = dict( batchsize=1, input=["black_ver", "white_ver"], reply=None ) elif self.options.mode == "train" or \ self.options.mode == "offline_train": desc["train"] = dict( input=["s", "offline_a", "winner", "mcts_scores", "move_idx", "selfplay_ver"], reply=None ) desc["train_ctrl"] = dict( input=["selfplay_ver"], reply=None, batchsize=1 ) else: raise "No such mode: " + self.options.mode params.update(dict( num_group=1 if self.options.actor_only else 2, T=self.options.T, )) self.more_labels.add_labels(desc) return GCWrapper( GC, self.max_batchsize, desc, num_recv=2, gpu=(self.options.gpu if (self.options.gpu is not None and self.options.gpu >= 0) else None), use_numpy=False, params=params, verbose=self.options.parameter_print)
class CommonLoader: def __init__(self, module): self.context_args = ContextArgs() self.more_labels = MoreLabels() self.module = module basic_define_args = [ ("handicap_level", 0), ("players", dict(type=str, help=";-separated player infos. For example: type=AI_NN,fs=50,args=backup/AI_SIMPLE|decay/0.99|start/1000,fow=True;type=AI_SIMPLE,fs=50")), ("max_tick", dict(type=int, default=30000, help="Maximal tick")), ("shuffle_player", dict(action="store_true")), # false ("num_frames_in_state", 1), ("max_unit_cmd", 1), ("seed", 0), ("actor_only", dict(action="store_true")), # false ("model_no_spatial", dict(action="store_true")), # TODO, put it to model false ("save_replay_prefix", dict(type=str, default=None)), ("output_file", dict(type=str, default=None)), ("cmd_dumper_prefix", dict(type=str, default=None)), ("gpu", dict(type=int, help="gpu to use", default=None)), ] self.args = ArgsProvider( call_from = self, define_args = basic_define_args + self._define_args(), more_args = ["num_games", "batchsize", "T"], child_providers = [ self.context_args.args, self.more_labels.args ] ) def _set_key(self, ai_options, key, value): if not hasattr(ai_options, key): print("AIOptions does not have key = " + key) return # Can we automate this? bool_convert = dict(t=True, true=True, f=False, false=False) try: if key == "fow": setattr(ai_options, key, bool_convert[value.lower()]) elif key == "name" or key == "args" or key == "type": setattr(ai_options, key, value) else: setattr(ai_options, key, int(value)) except ValueError: print("Value error! key = " + str(key) + " value = " + str(value)) sys.exit(1) def _parse_players(self, opt, player_names): players_str = str(self.args.players) if players_str[0] == "\"" and players_str[-1] == "\"": players_str = players_str[1:-1] player_infos = [] for i, player in enumerate(players_str.split(";")): ai_options = self.module.AIOptions() ai_options.num_frames_in_state = self.args.num_frames_in_state info = dict() for item in player.split(","): key, value = item.split("=") self._set_key(ai_options, key, value) info[key] = value if player_names is not None: self._set_key(ai_options, "name", player_names[i]) info["name"] = player_names[i] opt.AddAIOptions(ai_options) player_infos.append(info) return player_infos def _init_gc(self, player_names=None): args = self.args co = self.module.ContextOptions() #/elf/python_options_utils_cpp.h self.context_args.initialize(co) opt = self.module.PythonOptions() opt.seed = args.seed opt.shuffle_player = args.shuffle_player opt.max_unit_cmd = args.max_unit_cmd opt.max_tick = args.max_tick # [TODO] Put it to TD. opt.handicap_level = args.handicap_level player_infos = self._parse_players(opt, player_names) # 处理Players并设置为AI # opt.output_filename = b"simulators.txt" # opt.output_filename = b"cout" if args.save_replay_prefix is not None: opt.save_replay_prefix = args.save_replay_prefix.encode('ascii') if args.output_file is not None: opt.output_filename = args.output_file.encode("ascii") if args.cmd_dumper_prefix is not None: opt.cmd_dumper_prefix = args.cmd_dumper_prefix.encode("ascii") print("Options:") opt.Print() print("ContextOptions:") co.print() GC = self.module.GameContext(co, opt) self._on_gc(GC) params = GC.GetParams() print("Version: ", GC.Version()) print("Num Actions: ", params["num_action"]) print("Num unittype: ", params["num_unit_type"]) print("num planes: ", params["num_planes"]) params["rts_engine_version"] = GC.Version() params["players"] = player_infos return co, GC, params def _define_args(self): return [] def _on_gc(self, GC): pass @abc.abstractmethod def _get_train_spec(self): pass @abc.abstractmethod def _get_actor_spec(self): pass def initialize(self): co, GC, params = self._init_gc() args = self.args desc = {} # For actor model, no reward needed, we only want to get input and return distribution of actions. # sampled action and and value will be filled from the reply. desc["actor"] = self._get_actor_spec() if not args.actor_only: # For training, we want input, action (filled by actor models), value (filled by actor models) and reward. desc["train"] = self._get_train_spec() self.more_labels.add_labels(desc) params.update(dict( num_group = 1 if args.actor_only else 2, action_batchsize = int(desc["actor"]["batchsize"]), train_batchsize = int(desc["train"]["batchsize"]) if not args.actor_only else None, T = args.T, model_no_spatial = args.model_no_spatial )) return GCWrapper(GC, co, desc, gpu=args.gpu, use_numpy=False, params=params) def initialize_selfplay(self): args = self.args reference_name = "reference" train_name = "train" co, GC, params = self._init_gc(player_names=[train_name, reference_name]) desc = {} # For actor model, no reward needed, we only want to get input and return distribution of actions. # sampled action and and value will be filled from the reply. desc["actor0"] = self._get_actor_spec() desc["actor1"] = self._get_actor_spec() desc["actor0"]["name"] = reference_name desc["actor1"]["name"] = train_name if not args.actor_only: # For training, we want input, action (filled by actor models), value (filled by actor models) and reward. desc["train1"] = self._get_train_spec() desc["train1"]["name"] = train_name self.more_labels.add_labels(desc) params.update(dict( num_group = 1 if args.actor_only else 2, action_batchsize = int(desc["actor0"]["batchsize"]), train_batchsize = int(desc["train1"]["batchsize"]) if not args.actor_only else None, T = args.T, model_no_spatial = args.model_no_spatial )) return GCWrapper(GC, co, desc, gpu=args.gpu, use_numpy=False, params=params) def initialize_reduced_service(self): args = self.args reference_name = "reference" train_name = "train" co, GC, params = self._init_gc(player_names=[train_name, reference_name]) desc = {} # For actor model, no reward needed, we only want to get input and return distribution of actions. # sampled action and and value will be filled from the reply. desc["reduced_project"] = self._get_reduced_project() desc["reduced_forward"] = self._get_reduced_forward() desc["reduced_predict"] = self._get_reduced_predict() if params["players"][1]["type"] == "AI_NN": desc["actor"] = self._get_actor_spec() desc["actor"]["batchsize"] //= 2 desc["actor"]["name"] = "reference" self.more_labels.add_labels(desc) return GCWrapper(GC, co, desc, gpu=args.gpu, use_numpy=False, params=params)
def __init__(self, option_map): self.more_labels = MoreLabels(option_map) self.option_map = option_map
class Loader(object): @classmethod def get_option_spec(cls): spec = PyOptionSpec() go.getClientPredefined(spec.getOptionSpec()) spec.addIntOption('gpu', 'GPU id to use', -1) spec.addStrOption( 'eval_model_pair', ('If specified for df_selfplay.py, then the two models will be ' 'evaluated on this client'), '') spec.addStrOption('comment', 'Comment for this run', '') spec.addIntOption('selfplay_timeout_usec', 'Timeout used for MCTS', 10) spec.addBoolOption("parameter_print", "Print parameters", True) spec.merge(PyOptionSpec.fromClasses((MoreLabels, ))) return spec @auto_import_options def __init__(self, option_map): self.more_labels = MoreLabels(option_map) self.option_map = option_map def initialize(self): job_id = os.environ.get("job_id", "local") opt = go.getClientOpt(self.option_map.getOptionSpec(), job_id) mode = getattr(self.options, "common.mode") batchsize = getattr(self.options, "common.base.batchsize") GC = elf.GameContext(opt.common.base) if mode not in ["online", "selfplay"]: raise "No such mode: " + mode game_obj = go.Client(opt) game_obj.setGameContext(GC) params = game_obj.getParams() if self.options.parameter_print: print("**** Options ****") print(opt.info()) print("*****************") print("Version: ", elf.version()) print("Mode: ", mode) print("Num Actions: ", params["num_action"]) desc = {} if mode == "online": desc["human_actor"] = dict( input=[], reply=["a", "timestamp"], batchsize=1, ) # Used for MCTS/Direct play. desc["actor_black"] = dict( input=["s"], reply=["pi", "V", "a", "rv"], timeout_usec=10, batchsize=getattr(self.options, "common.mcts.num_rollout_per_batch")) elif mode == "selfplay": # Used for MCTS/Direct play. white_batchsize = self.options.white_mcts_rollout_per_batch if white_batchsize < 0: white_batchsize = batchsize desc["actor_black"] = dict( input=["s"], reply=["pi", "V", "a", "rv"], batchsize=batchsize, timeout_usec=self.options.selfplay_timeout_usec, ) desc["actor_white"] = dict( input=["s"], reply=["pi", "V", "a", "rv"], batchsize=white_batchsize, timeout_usec=self.options.selfplay_timeout_usec, ) desc["game_end"] = dict(batchsize=1, ) desc["game_start"] = dict(batchsize=1, input=["black_ver", "white_ver"], reply=None) self.more_labels.add_labels(desc) return GCWrapper(GC, game_obj, batchsize, desc, num_recv=8, default_gpu=(self.options.gpu if (self.options.gpu is not None and self.options.gpu >= 0) else None), use_numpy=False, params=params, verbose=self.options.parameter_print)
class Loader(object): @classmethod def get_option_spec(cls): spec = PyOptionSpec() spec.addStrOption('preload_sgf', 'TODO: fill this help message in', '') spec.addIntOption('preload_sgf_move_to', 'TODO: fill this help message in', -1) spec.addStrOption('mode', 'TODO: fill this help message in', "online") spec.addBoolOption('actor_only', 'TODO: fill this help message in', False) spec.addIntOption('num_reset_ranking', 'TODO: fill this help message in', 5000) spec.addBoolOption('verbose', 'TODO: fill this help message in', False) spec.addBoolOption('print_result', 'TODO: fill this help message in', False) spec.addIntOption('data_aug', 'specify data augumentation, 0-7, -1 mean random', -1) spec.addIntOption( 'num_games_per_thread', ('For offline mode, it is the number of concurrent games per ' 'thread, used to increase diversity of games; for selfplay mode, ' 'it is the number of games played at each thread, and after that ' 'we need to call restartAllGames() to resume.'), -1) spec.addIntOption('num_future_actions', 'TODO: fill this help message in', 1) spec.addIntOption('move_cutoff', 'Cutoff ply in replay', -1) spec.addStrOption('mode', 'TODO: fill this help message in', 'online') spec.addBoolOption('black_use_policy_network_only', 'TODO: fill this help message in', False) spec.addIntOption('ply_pass_enabled', 'TODO: fill this help message in', 0) spec.addBoolOption('use_mcts', 'TODO: fill this help message in', False) spec.addBoolOption('use_df_feature', 'TODO: fill this help message in', False) spec.addStrOption('dump_record_prefix', 'TODO: fill this help message in', '') spec.addFloatOption('resign_thres', 'TODO: fill this help message in', 0.0) spec.addBoolOption('following_pass', 'TODO: fill this help message in', False) spec.addIntOption('gpu', 'TODO: fill this help message in', -1) spec.addBoolOption('parameter_print', 'TODO: fill this help message in', True) spec.addIntOption('batchsize', 'batch size', 128) spec.addIntOption('batchsize2', 'batch size', -1) spec.addFloatOption('eval_winrate_thres', 'Win rate threshold for evalution', 0.55) spec.addIntOption( 'suicide_after_n_games', 'return after n games have finished, -1 means it never ends', -1) spec.merge(PyOptionSpec.fromClasses((ContextArgs, MoreLabels))) return spec @auto_import_options def __init__(self, option_map): self.context_args = ContextArgs(option_map) self.more_labels = MoreLabels(option_map) def _set_params(self): co = go.ContextOptions() self.context_args.initialize(co) co.job_id = os.environ.get("job_id", "local") if self.options.parameter_print: co.print() opt = go.GameOptions() opt.seed = 0 opt.mode = self.options.mode opt.use_mcts = self.options.use_mcts opt.use_df_feature = self.options.use_df_feature opt.dump_record_prefix = self.options.dump_record_prefix opt.verbose = self.options.verbose opt.black_use_policy_network_only = \ self.options.black_use_policy_network_only opt.data_aug = self.options.data_aug opt.ply_pass_enabled = self.options.ply_pass_enabled opt.num_reset_ranking = self.options.num_reset_ranking opt.move_cutoff = self.options.move_cutoff opt.num_games_per_thread = self.options.num_games_per_thread opt.following_pass = self.options.following_pass opt.resign_thres = self.options.resign_thres opt.preload_sgf = self.options.preload_sgf opt.preload_sgf_move_to = self.options.preload_sgf_move_to opt.print_result = self.options.print_result self.max_batchsize = max( self.options.batchsize, self.options.batchsize2) \ if self.options.batchsize2 > 0 \ else self.options.batchsize co.batchsize = self.max_batchsize GC = go.GameContext(co, opt) if self.options.parameter_print: print("**** Options ****") print(opt.info()) print("*****************") print("Version: ", GC.ctx().version()) return co, GC, opt def initialize(self): co, GC, opt = self._set_params() params = GC.getParams() if self.options.parameter_print: print("Mode: ", opt.mode) print("Num Actions: ", params["num_action"]) desc = {} if self.options.mode == "online": desc["human_actor"] = dict( input=["s"], reply=["pi", "a", "V"], batchsize=1, ) # Used for MCTS/Direct play. desc["actor_black"] = dict( input=["s"], reply=["pi", "V", "a", "rv"], timeout_usec=10, batchsize=co.mcts_options.num_rollouts_per_batch) else: raise "No such mode: " + self.options.mode params.update( dict( num_group=1 if self.options.actor_only else 2, T=self.options.T, )) self.more_labels.add_labels(desc) return GCWrapper(GC, self.max_batchsize, desc, num_recv=2, gpu=(self.options.gpu if (self.options.gpu is not None and self.options.gpu >= 0) else None), use_numpy=False, params=params, verbose=self.options.parameter_print)
class Loader: def __init__(self): self.context_args = ContextArgs() self.more_labels = MoreLabels() self.args = ArgsProvider( call_from = self, define_args = [ ("actor_only", dict(action="store_true")), ("list_file", "./train.lst"), ("verbose", dict(action="store_true")), ("data_aug", dict(type=int, default=-1, help="specify data augumentation, 0-7, -1 mean random")), ("ratio_pre_moves", dict(type=float, default=0, help="how many moves to perform in each thread, before we use the data train the model")), ("start_ratio_pre_moves", dict(type=float, default=0.5, help="how many moves to perform in each thread, before we use the first sgf file to train the model")), ("num_games_per_thread", dict(type=int, default=5, help="number of concurrent games per threads, used to increase diversity of games")), ("move_cutoff", dict(type=int, default=-1, help="Cutoff ply in replay")), ("mode", "online"), ("use_mcts", dict(action="store_true")), ("gpu", dict(type=int, default=None)) ], more_args = ["batchsize", "T"], child_providers = [ self.context_args.args, self.more_labels.args ] ) def initialize(self): args = self.args co = go.ContextOptions() self.context_args.initialize(co) co.print() opt = go.GameOptions() opt.seed = 0 opt.list_filename = args.list_file opt.mode = args.mode opt.use_mcts = args.use_mcts opt.verbose = args.verbose opt.data_aug = args.data_aug opt.ratio_pre_moves = args.ratio_pre_moves opt.start_ratio_pre_moves = args.start_ratio_pre_moves opt.move_cutoff = args.move_cutoff opt.num_games_per_thread = args.num_games_per_thread GC = go.GameContext(co, opt) print("Version: ", GC.Version()) params = GC.GetParams() print("Num Actions: ", params["num_action"]) desc = {} if args.mode == "online": desc["human_actor"] = dict( batchsize=args.batchsize, input=dict(T=1, keys=set(["s"])), reply=dict(T=1, keys=set(["pi", "a"])), name="human_actor", ) # Used for MCTS/Direct play. desc["actor"] = dict( batchsize=args.batchsize, input=dict(T=1, keys=set(["s"])), reply=dict(T=1, keys=set(["pi", "V", "a"])), name="actor", ) elif args.mode == "selfplay": # Used for MCTS/Direct play. desc["actor"] = dict( batchsize=args.batchsize, input=dict(T=1, keys=set(["s"])), reply=dict(T=1, keys=set(["pi", "V"])), name="actor", timeout_usec = 10, ) else: desc["train"] = dict( batchsize=args.batchsize, input=dict(T=args.T, keys=set(["s", "offline_a"])), reply=None ) self.more_labels.add_labels(desc) params.update(dict( num_group = 1 if args.actor_only else 2, T = args.T, )) return GCWrapper(GC, co, desc, gpu=args.gpu, use_numpy=False, params=params)
class Loader(object): @classmethod def get_option_spec(cls): spec = PyOptionSpec() go.getServerPredefined(spec.getOptionSpec()) spec.addIntOption('gpu', 'GPU id to use', -1) spec.addIntOption( 'eval_old_model', ('If specified, then we directly switch to evaluation mode ' 'between the loaded model and the old model specified by this ' 'switch'), -1) spec.addStrOption('comment', 'Comment for this run', '') spec.addBoolOption("parameter_print", "Print parameters", True) spec.merge(PyOptionSpec.fromClasses((MoreLabels, ))) return spec @auto_import_options def __init__(self, option_map): self.more_labels = MoreLabels(option_map) self.option_map = option_map def initialize(self): opt = go.getServerOpt(self.option_map.getOptionSpec()) desc = {} GC = elf.GameContext(opt.common.base) mode = getattr(self.options, "common.mode") batchsize = getattr(self.options, "common.base.batchsize") if mode in ["train", "train_offline"]: game_obj = go.Server(opt) desc["train"] = dict(input=[ "s", "offline_a", "winner", "mcts_scores", "move_idx", "selfplay_ver" ], reply=None) desc["train_ctrl"] = dict(input=["selfplay_ver"], reply=None, batchsize=1) else: raise "No such mode: " + mode game_obj.setGameContext(GC) params = game_obj.getParams() if self.options.parameter_print: print("**** Options ****") print(opt.info()) print("*****************") print("Version: ", elf.version()) print("Mode: ", mode) print("Num Actions: ", params["num_action"]) self.more_labels.add_labels(desc) return GCWrapper(GC, game_obj, batchsize, desc, num_recv=2, default_gpu=(self.options.gpu if (self.options.gpu is not None and self.options.gpu >= 0) else None), use_numpy=False, params=params, verbose=self.options.parameter_print)
class CommonLoader: def __init__(self, module): self.context_args = ContextArgs() self.more_labels = MoreLabels() self.module = module basic_define_args = [ ("handicap_level", 0), ("players", dict(type=str, help=";-separated player infos. For example: type=AI_NN,fs=50,args=backup/AI_SIMPLE|decay/0.99|start/1000,fow=True;type=AI_SIMPLE,fs=50")), ("max_tick", dict(type=int, default=30000, help="Maximal tick")), ("shuffle_player", dict(action="store_true")), ("num_frames_in_state", 1), ("max_unit_cmd", 1), ("seed", 0), ("actor_only", dict(action="store_true")), ("model_no_spatial", dict(action="store_true")), # TODO, put it to model ("save_replay_prefix", dict(type=str, default=None)), ("output_file", dict(type=str, default=None)), ("cmd_dumper_prefix", dict(type=str, default=None)), ("gpu", dict(type=int, help="gpu to use", default=None)), ] self.args = ArgsProvider( call_from = self, define_args = basic_define_args + self._define_args(), more_args = ["num_games", "batchsize", "T"], child_providers = [ self.context_args.args, self.more_labels.args ] ) def _set_key(self, ai_options, key, value): if not hasattr(ai_options, key): print("AIOptions does not have key = " + key) return # Can we automate this? bool_convert = dict(t=True, true=True, f=False, false=False) try: if key == "fow": setattr(ai_options, key, bool_convert[value.lower()]) elif key == "name" or key == "args" or key == "type": setattr(ai_options, key, value) else: setattr(ai_options, key, int(value)) except ValueError: print("Value error! key = " + str(key) + " value = " + str(value)) sys.exit(1) def _parse_players(self, opt, player_names): players_str = str(self.args.players) if players_str[0] == "\"" and players_str[-1] == "\"": players_str = players_str[1:-1] player_infos = [] for i, player in enumerate(players_str.split(";")): ai_options = self.module.AIOptions() ai_options.num_frames_in_state = self.args.num_frames_in_state info = dict() for item in player.split(","): key, value = item.split("=") self._set_key(ai_options, key, value) info[key] = value if player_names is not None: self._set_key(ai_options, "name", player_names[i]) info["name"] = player_names[i] opt.AddAIOptions(ai_options) player_infos.append(info) return player_infos def _init_gc(self, player_names=None): args = self.args co = self.module.ContextOptions() self.context_args.initialize(co) opt = self.module.PythonOptions() opt.seed = args.seed opt.shuffle_player = args.shuffle_player opt.max_unit_cmd = args.max_unit_cmd opt.max_tick = args.max_tick # [TODO] Put it to TD. opt.handicap_level = args.handicap_level player_infos = self._parse_players(opt, player_names) # opt.output_filename = b"simulators.txt" # opt.output_filename = b"cout" if args.save_replay_prefix is not None: opt.save_replay_prefix = args.save_replay_prefix.encode('ascii') if args.output_file is not None: opt.output_filename = args.output_file.encode("ascii") if args.cmd_dumper_prefix is not None: opt.cmd_dumper_prefix = args.cmd_dumper_prefix.encode("ascii") print("Options:") opt.Print() print("ContextOptions:") co.print() GC = self.module.GameContext(co, opt) self._on_gc(GC) params = GC.GetParams() print("Version: ", GC.Version()) print("Num Actions: ", params["num_action"]) print("Num unittype: ", params["num_unit_type"]) print("num planes: ", params["num_planes"]) params["rts_engine_version"] = GC.Version() params["players"] = player_infos return co, GC, params def _define_args(self): return [] def _on_gc(self, GC): pass @abc.abstractmethod def _get_train_spec(self): pass @abc.abstractmethod def _get_actor_spec(self): pass def initialize(self): co, GC, params = self._init_gc() args = self.args desc = {} # For actor model, no reward needed, we only want to get input and return distribution of actions. # sampled action and and value will be filled from the reply. desc["actor"] = self._get_actor_spec() if not args.actor_only: # For training, we want input, action (filled by actor models), value (filled by actor models) and reward. desc["train"] = self._get_train_spec() self.more_labels.add_labels(desc) params.update(dict( num_group = 1 if args.actor_only else 2, action_batchsize = int(desc["actor"]["batchsize"]), train_batchsize = int(desc["train"]["batchsize"]) if not args.actor_only else None, T = args.T, model_no_spatial = args.model_no_spatial )) return GCWrapper(GC, co, desc, gpu=args.gpu, use_numpy=False, params=params) def initialize_selfplay(self): args = self.args reference_name = "reference" train_name = "train" co, GC, params = self._init_gc(player_names=[train_name, reference_name]) desc = {} # For actor model, no reward needed, we only want to get input and return distribution of actions. # sampled action and and value will be filled from the reply. desc["actor0"] = self._get_actor_spec() desc["actor1"] = self._get_actor_spec() desc["actor0"]["name"] = reference_name desc["actor1"]["name"] = train_name if not args.actor_only: # For training, we want input, action (filled by actor models), value (filled by actor models) and reward. desc["train1"] = self._get_train_spec() desc["train1"]["name"] = train_name self.more_labels.add_labels(desc) params.update(dict( num_group = 1 if args.actor_only else 2, action_batchsize = int(desc["actor0"]["batchsize"]), train_batchsize = int(desc["train1"]["batchsize"]) if not args.actor_only else None, T = args.T, model_no_spatial = args.model_no_spatial )) return GCWrapper(GC, co, desc, gpu=args.gpu, use_numpy=False, params=params) def initialize_reduced_service(self): args = self.args reference_name = "reference" train_name = "train" co, GC, params = self._init_gc(player_names=[train_name, reference_name]) desc = {} # For actor model, no reward needed, we only want to get input and return distribution of actions. # sampled action and and value will be filled from the reply. desc["reduced_project"] = self._get_reduced_project() desc["reduced_forward"] = self._get_reduced_forward() desc["reduced_predict"] = self._get_reduced_predict() if params["players"][1]["type"] == "AI_NN": desc["actor"] = self._get_actor_spec() desc["actor"]["batchsize"] //= 2 desc["actor"]["name"] = "reference" self.more_labels.add_labels(desc) return GCWrapper(GC, co, desc, gpu=args.gpu, use_numpy=False, params=params)