def get_option_spec(cls): spec = PyOptionSpec() spec.addStrListOption( 'additional_labels', 'add additional labels in the batch; e.g. id, seq, last_terminal', []) return spec
def get_option_spec(cls, name='eval'): spec = PyOptionSpec() spec.addStrListOption('keys_in_reply', 'keys in reply', []) spec.addIntOption('num_minibatch', 'number of minibatches', 5000) spec.addStrListOption('parsed_args', 'dummy option', []) spec.merge(Stats.get_option_spec(name)) return spec
def get_option_spec(cls, name='eval'): # print("\u001b[31;1m|py|\u001b[0m\u001b[37m", "Evaluator::", inspect.currentframe().f_code.co_name) # print("\u001b[31;1m", os.path.dirname(os.path.abspath(__file__)), " - ", os.path.basename(__file__), "\u001b[0m") spec = PyOptionSpec() spec.addStrListOption('keys_in_reply', 'keys in reply', []) spec.addIntOption('num_minibatch', 'number of minibatches', 5000) spec.addStrListOption('parsed_args', 'dummy option', '') spec.merge(Stats.get_option_spec(name)) return spec
def get_option_spec(cls): spec = PyOptionSpec() spec.addFloatOption('entropy_ratio', 'the entropy ratio we put on PolicyGradient', 0.01) spec.addFloatOption('grad_clip_norm', 'gradient norm clipping', 0.0) spec.addFloatOption('min_prob', 'mininal probability used in training', 1e-6) spec.addFloatOption('ratio_clamp', 'maximum importance sampling ratio', 10.0) spec.addStrListOption('policy_action_nodes', 'the entropy ratio we put on PolicyGradient', ['pi,a']) return spec
def get_option_spec(cls): spec = PyOptionSpec() test.setSpecOptions(spec.getOptionSpec()) elf_C.setSpecELFOptions(spec.getOptionSpec()) spec.addIntOption('gpu', 'GPU id to use', 0) spec.addIntOption('freq_update', 'How much update before updating the acting model', 50) spec.addStrOption('distri_mode', 'server or client', "") spec.addIntOption('num_recv', '', 2) spec.addStrListOption('parsed_args', 'dummy option', []) spec.merge(PyOptionSpec.fromClasses((PPO, ))) return spec
def get_option_spec(cls): spec = PyOptionSpec() spec.addStrOption( 'sample_policy', 'choices of epsilon-greedy, multinomial, or uniform', 'epsilon-greedy') spec.addBoolOption('store_greedy', ('if enabled, picks maximum-probability action; ' 'otherwise, sample from distribution'), False) spec.addFloatOption('epsilon', 'used in epsilon-greedy', 0.0) spec.addStrListOption('sample_nodes', 'nodes to be sampled and saved', ['pi,a']) return spec
def get_option_spec(cls): spec = PyOptionSpec() elf.saveDefaultOptionsToArgs("", spec) elf.saveDefaultNetOptionsToArgs("", spec) spec.addIntOption( 'gpu', 'GPU id to use', -1) spec.addStrListOption( "parsed_args", "dummy option", []) return spec
def get_option_spec(cls): spec = PyOptionSpec() elf_C.setSpecELFOptions(spec.getOptionSpec()) test.setSpecTSOptions(spec.getOptionSpec()) spec.addIntOption( 'gpu', 'GPU id to use', 0) spec.addStrOption( 'load', 'Load old model', "") spec.addStrListOption( 'parsed_args', 'dummy option', []) return spec
def get_option_spec(cls, name='eval'): spec = PyOptionSpec() spec.addStrListOption( 'keys_in_reply', 'keys in reply', []) spec.addIntOption( 'num_minibatch', 'number of minibatches', 5000) spec.addStrListOption( 'parsed_args', 'dummy option', '') spec.merge(Stats.get_option_spec(name)) return spec
def get_option_spec(cls): spec = PyOptionSpec() spec.addStrOption( 'sample_policy', 'choices of epsilon-greedy, multinomial, or uniform', 'epsilon-greedy') spec.addBoolOption( 'store_greedy', ('if enabled, picks maximum-probability action; ' 'otherwise, sample from distribution'), False) spec.addFloatOption( 'epsilon', 'used in epsilon-greedy', 0.0) spec.addStrListOption( 'sample_nodes', 'nodes to be sampled and saved', ['pi,a']) return spec
def get_option_spec(cls, model_class=None, model_idx=None): spec = PyOptionSpec() spec.addStrOption('load', 'load model', '') spec.addStrListOption( 'onload', ('functions to call after loading. e.g., reset,zero_first_layer. ' 'These functions are specified in the model'), []) spec.addStrListOption('omit_keys', 'omitted keys when loading', []) spec.addStrListOption('replace_prefix', 'replace prefix', []) spec.addIntOption('gpu', 'which GPU to use', -1) spec.addBoolOption( 'check_loaded_options', 'Toggles consistency check of loaded vs. current model options.', True) spec.addBoolOption('use_fp16', 'use_fp16', False) spec.addFloatOption( 'load_model_sleep_interval', ('If zero, has no effect. If positive, then before loading the ' 'model, we will sleep for an interval of ' 'duration (secs) ~ Uniform[0, load_model_sleep_interval]'), 0.0) if model_class is not None and hasattr(model_class, 'get_option_spec'): spec.merge(model_class.get_option_spec()) idx_suffix = '' if model_idx is None else str(model_idx) spec.addPrefixSuffixToOptionNames('', idx_suffix) return spec
def get_option_spec(cls): spec = PyOptionSpec() spec.addFloatOption( 'entropy_ratio', 'the entropy ratio we put on PolicyGradient', 0.01) spec.addFloatOption( 'grad_clip_norm', 'gradient norm clipping', 0.0) spec.addFloatOption( 'min_prob', 'mininal probability used in training', 1e-6) spec.addFloatOption( 'ratio_clamp', 'maximum importance sampling ratio', 10.0) spec.addStrListOption( 'policy_action_nodes', 'the entropy ratio we put on PolicyGradient', ['pi,a']) return spec
def get_option_spec(cls): spec = PyOptionSpec() spec.addBoolOption('actor_only', 'TODO: fill this help message in', False) spec.addStrListOption( 'list_files', 'Provide a list of json files for offline training', []) spec.addIntOption('port', 'TODO: fill this help message in', 5556) spec.addStrOption('server_addr', 'TODO: fill this help message in', '') spec.addStrOption('server_id', 'TODO: fill this help message in', '') spec.addIntOption('q_min_size', 'TODO: fill this help message in', 10) spec.addIntOption('q_max_size', 'TODO: fill this help message in', 1000) spec.addIntOption('num_reader', 'TODO: fill this help message in', 50) spec.addIntOption('num_reset_ranking', 'TODO: fill this help message in', 5000) spec.addIntOption( 'client_max_delay_sec', 'Maximum amount of allowed delays in sec. If the client ' 'didn\'t respond after that, we think it is dead.', 1200) spec.addBoolOption('verbose', 'TODO: fill this help message in', False) spec.addBoolOption('keep_prev_selfplay', 'TODO: fill this help message in', False) spec.addIntOption( 'num_games_per_thread', ('For offline mode, it is the number of concurrent games per ' 'thread, used to increase diversity of games; for selfplay mode, ' 'it is the number of games played at each thread, and after that ' 'we need to call restartAllGames() to resume.'), -1) spec.addIntOption('expected_num_clients', 'Expected number of clients', -1) spec.addIntOption('checkers_num_future_actions', 'TODO: fill this help message in', 1) spec.addStrOption('mode', 'TODO: fill this help message in', 'play') spec.addBoolOption('black_use_policy_network_only', 'TODO: fill this help message in', False) spec.addBoolOption('white_use_policy_network_only', 'TODO: fill this help message in', False) spec.addBoolOption('use_mcts', 'TODO: fill this help message in', False) spec.addBoolOption('use_mcts_ai2', 'TODO: fill this help message in', False) spec.addFloatOption( 'white_puct', 'PUCT for white when it is > 0.0. If it is -1 then we use' 'the same puct for both side (specified by mcts_options).' 'A HACK to use different puct for different model. Should' 'be replaced by a more systematic approach.', -1.0) spec.addIntOption('white_mcts_rollout_per_batch', 'white mcts rollout per batch', -1) spec.addIntOption('white_mcts_rollout_per_thread', 'white mcts rollout per thread', -1) spec.addStrOption('dump_record_prefix', 'TODO: fill this help message in', '') spec.addStrOption('selfplay_records_directory', 'TODO: fill this help message in', '') spec.addStrOption('eval_records_directory', 'TODO: fill this help message in', '') spec.addStrOption('records_buffer_directory', 'TODO: fill this help message in', '') spec.addIntOption('policy_distri_cutoff', 'first N moves will be randomly', 0) spec.addIntOption('selfplay_timeout_usec', 'TODO: fill this help message in', 0) spec.addIntOption('gpu', 'TODO: fill this help message in', -1) spec.addBoolOption('policy_distri_training_for_all', 'TODO: fill this help message in', False) spec.addBoolOption('parameter_print', 'TODO: fill this help message in', True) spec.addIntOption('batchsize', 'batch size', 128) spec.addIntOption('batchsize2', 'batch size', -1) spec.addIntOption('T', 'number of timesteps', 6) spec.addIntOption( 'selfplay_init_num', ('Initial number of selfplay games to generate before training a ' 'new model'), 2000) spec.addIntOption( 'selfplay_update_num', ('Additional number of selfplay games to generate after a model ' 'is updated'), 1000) spec.addBoolOption('selfplay_async', ('Whether to use async mode in selfplay'), False) spec.addIntOption( 'eval_num_games', ('number of evaluation to be performed to decide whether a model ' 'is better than the other'), 400) spec.addFloatOption('eval_winrate_thres', 'Win rate threshold for evalution', 0.55) spec.addIntOption( 'eval_old_model', ('If specified, then we directly switch to evaluation mode ' 'between the loaded model and the old model specified by this ' 'switch'), -1) spec.addStrOption( 'eval_model_pair', ('If specified for df_selfplay.py, then the two models will be ' 'evaluated on this client'), '') spec.addBoolOption( 'cheat_eval_new_model_wins_half', 'When enabled, in evaluation mode, when the game ' 'finishes, the player with the most recent model gets 100% ' 'win rate half of the time.' 'This is used to test the framework', False) spec.addBoolOption( 'cheat_selfplay_random_result', 'When enabled, in selfplay mode the result of the game is random' 'This is used to test the framework', False) spec.addBoolOption('human_plays_for_black', '', False) spec.addIntOption( 'suicide_after_n_games', 'return after n games have finished, -1 means it never ends', -1) spec.merge(PyOptionSpec.fromClasses((ContextArgs, MoreLabels))) return spec
def get_option_spec(cls): spec = PyOptionSpec() spec.addStrOption( 'preload_sgf', 'TODO: fill this help message in', '') spec.addIntOption( 'preload_sgf_move_to', 'TODO: fill this help message in', -1) spec.addBoolOption( 'actor_only', 'TODO: fill this help message in', False) spec.addStrListOption( 'list_files', 'Provide a list of json files for offline training', []) spec.addIntOption( 'port', 'TODO: fill this help message in', 5556) spec.addStrOption( 'server_addr', 'TODO: fill this help message in', '') spec.addStrOption( 'server_id', 'TODO: fill this help message in', '') spec.addIntOption( 'q_min_size', 'TODO: fill this help message in', 10) spec.addIntOption( 'q_max_size', 'TODO: fill this help message in', 1000) spec.addIntOption( 'num_reader', 'TODO: fill this help message in', 50) spec.addIntOption( 'num_reset_ranking', 'TODO: fill this help message in', 5000) spec.addIntOption( 'client_max_delay_sec', 'Maximum amount of allowed delays in sec. If the client ' 'didn\'t respond after that, we think it is dead.', 1200) spec.addBoolOption( 'verbose', 'TODO: fill this help message in', False) spec.addBoolOption( 'keep_prev_selfplay', 'TODO: fill this help message in', False) spec.addBoolOption( 'print_result', 'TODO: fill this help message in', False) spec.addIntOption( 'data_aug', 'specify data augumentation, 0-7, -1 mean random', -1) spec.addIntOption( 'ratio_pre_moves', ('how many moves to perform in each thread, before we use the ' 'data to train the model'), 0) spec.addFloatOption( 'start_ratio_pre_moves', ('how many moves to perform in each thread, before we use the ' 'first sgf file to train the model'), 0.5) spec.addIntOption( 'num_games_per_thread', ('For offline mode, it is the number of concurrent games per ' 'thread, used to increase diversity of games; for selfplay mode, ' 'it is the number of games played at each thread, and after that ' 'we need to call restartAllGames() to resume.'), -1) spec.addIntOption( 'expected_num_clients', 'Expected number of clients', -1 ) spec.addIntOption( 'num_future_actions', 'TODO: fill this help message in', 1) spec.addIntOption( 'move_cutoff', 'Cutoff ply in replay', -1) spec.addStrOption( 'mode', 'TODO: fill this help message in', 'online') spec.addBoolOption( 'black_use_policy_network_only', 'TODO: fill this help message in', False) spec.addBoolOption( 'white_use_policy_network_only', 'TODO: fill this help message in', False) spec.addIntOption( 'ply_pass_enabled', 'TODO: fill this help message in', 0) spec.addBoolOption( 'use_mcts', 'TODO: fill this help message in', False) spec.addBoolOption( 'use_mcts_ai2', 'TODO: fill this help message in', False) spec.addFloatOption( 'white_puct', 'PUCT for white when it is > 0.0. If it is -1 then we use' 'the same puct for both side (specified by mcts_options).' 'A HACK to use different puct for different model. Should' 'be replaced by a more systematic approach.', -1.0) spec.addIntOption( 'white_mcts_rollout_per_batch', 'white mcts rollout per batch', -1) spec.addIntOption( 'white_mcts_rollout_per_thread', 'white mcts rollout per thread', -1) spec.addBoolOption( 'use_df_feature', 'TODO: fill this help message in', False) spec.addStrOption( 'dump_record_prefix', 'TODO: fill this help message in', '') spec.addIntOption( 'policy_distri_cutoff', 'TODO: fill this help message in', 0) spec.addFloatOption( 'resign_thres', 'TODO: fill this help message in', 0.0) spec.addBoolOption( 'following_pass', 'TODO: fill this help message in', False) spec.addIntOption( 'selfplay_timeout_usec', 'TODO: fill this help message in', 0) spec.addIntOption( 'gpu', 'TODO: fill this help message in', -1) spec.addBoolOption( 'policy_distri_training_for_all', 'TODO: fill this help message in', False) spec.addBoolOption( 'parameter_print', 'TODO: fill this help message in', True) spec.addIntOption( 'batchsize', 'batch size', 128) spec.addIntOption( 'batchsize2', 'batch size', -1) spec.addIntOption( 'T', 'number of timesteps', 6) spec.addIntOption( 'selfplay_init_num', ('Initial number of selfplay games to generate before training a ' 'new model'), 2000) spec.addIntOption( 'selfplay_update_num', ('Additional number of selfplay games to generate after a model ' 'is updated'), 1000) spec.addBoolOption( 'selfplay_async', ('Whether to use async mode in selfplay'), False) spec.addIntOption( 'eval_num_games', ('number of evaluation to be performed to decide whether a model ' 'is better than the other'), 400) spec.addFloatOption( 'eval_winrate_thres', 'Win rate threshold for evalution', 0.55) spec.addIntOption( 'eval_old_model', ('If specified, then we directly switch to evaluation mode ' 'between the loaded model and the old model specified by this ' 'switch'), -1) spec.addStrOption( 'eval_model_pair', ('If specified for df_selfplay.py, then the two models will be ' 'evaluated on this client'), '') spec.addStrOption( 'comment', 'Comment for this run', '') spec.addBoolOption( 'cheat_eval_new_model_wins_half', 'When enabled, in evaluation mode, when the game ' 'finishes, the player with the most recent model gets 100% ' 'win rate half of the time.' 'This is used to test the framework', False) spec.addBoolOption( 'cheat_selfplay_random_result', 'When enabled, in selfplay mode the result of the game is random' 'This is used to test the framework', False) spec.addIntOption( 'suicide_after_n_games', 'return after n games have finished, -1 means it never ends', -1) spec.merge(PyOptionSpec.fromClasses((ContextArgs, MoreLabels))) return spec