class ServerConfig(object): game = attribute("breakthrough") generation_prefix = attribute("v42") port = attribute(9000) current_step = attribute(0) # number of samples to acquire before starting to train num_samples_to_train = attribute(1024) # maximum growth while training max_samples_growth = attribute(0.2) # the starting generation description base_generation_description = attribute(default=attr_factory(GenerationDescription)) # the base network model base_network_model = attribute(default=attr_factory(NNModelConfig)) # the starting training config base_training_config = attribute(default=attr_factory(TrainNNConfig)) # the self play config self_play_config = attribute(default=attr_factory(SelfPlayConfig)) # save the samples every n seconds checkpoint_interval = attribute(60.0 * 5) # this forces the network to be reset to random weights, every n generations reset_network_every_n_generations = attribute(-1)
class SelfPlayConfig(object): # In each full game played out will oscillate between using sample_iterations and n < # evals_per_move. so if set to 25% will take 25% of samples, and 75% will be skipped using n # evals. This idea is adopted from KataGo and is NOT a full implementation of the idea there. # This is just the simplest way to introduce concept without changing much code. < 0, off. oscillate_sampling_pct = attribute(0.25) # temperature for policy (XXX remove this, I have never used it) temperature_for_policy = attribute(1.0) # sample is the actual sample we take to train for. The focus is on good policy distribution. puct_config = attribute(default=attr_factory(PUCTEvaluatorConfig)) evals_per_move = attribute(800) # resign # two levels, resign0 should have more freedom than resign1 resign0_score_probability = attribute(0.9) resign0_pct = attribute(0.5) resign1_score_probability = attribute(0.975) resign1_pct = attribute(0.1) # run to end after resign - pct -> chance to actually run, score to exit on run_to_end_pct = attribute(0.2) run_to_end_evals = attribute(42) run_to_end_puct_config = attribute( default=attr_factory(PUCTEvaluatorConfig)) run_to_end_early_score = attribute(0.01) run_to_end_minimum_game_depth = attribute(30) # aborts play if play depth exceeds this max_length (-1 off) abort_max_length = attribute(-1)
class AllRatings(object): game = at.attribute("game") # list of PlayerRating players = at.attribute(default=at.attr_factory(list)) # simple log of recent games log = at.attribute(default=at.attr_factory(list))
class GameDesc(object): game = attribute("checkers") # x_cords = "a b c d e f g h".split() x_cords = attribute(attr_factory(list)) # y_cords = "1 2 3 4 5 6 7 8".split() y_cords = attribute(attr_factory(list)) # list of BoardChannels (length kind of needs to be >= 1, or not much using convs) board_channels = attribute(attr_factory(list)) # list of list of ControlChannels control_channels = attribute(attr_factory(list))
class NNModelConfig(object): role_count = attribute(2) input_rows = attribute(8) input_columns = attribute(8) input_channels = attribute(8) residual_layers = attribute(8) cnn_filter_size = attribute(64) cnn_kernel_size = attribute(3) value_hidden_size = attribute(256) multiple_policies = attribute(False) # the size of policy distribution. The size of the list will be 1 if not multiple_policies. policy_dist_count = attribute(default=attr_factory(list)) l2_regularisation = attribute(False) # < 0 - no dropout dropout_rate_policy = attribute(0.333) dropout_rate_value = attribute(0.5) leaky_relu = attribute(False)
class BoardTerm(object): ''' For nxn boards, we identify which terms we use and index into the base. ''' term_idx = attribute(3) # terms = ["white", "black", "arrow"] terms = attribute(attr_factory(list))
class NNModelConfig(object): role_count = attribute(2) input_rows = attribute(8) input_columns = attribute(8) input_channels = attribute(8) residual_layers = attribute(8) cnn_filter_size = attribute(64) cnn_kernel_size = attribute(3) value_hidden_size = attribute(256) # the size of policy distribution. policy_dist_count = attribute(default=attr_factory(list)) # < 0 - no dropout dropout_rate_policy = attribute(0.333) dropout_rate_value = attribute(0.5) leaky_relu = attribute(False) squeeze_excite_layers = attribute(False) resnet_v2 = attribute(False) global_pooling_value = attribute(False) concat_all_layers = attribute(False)
class WorkerConfig(object): connect_port = attribute(9000) connect_ip_addr = attribute("127.0.0.1") do_training = attribute(False) do_self_play = attribute(False) self_play_batch_size = attribute(1) # passed into Supervisor, used instead of hard coded value. number_of_polls_before_dumping_stats = attribute(1024) # use to create SelfPlayManager unique_identifier = attribute("pleasesetme") # slow things down sleep_between_poll = attribute(-1) # send back whatever samples we have gather at this - sort of application level keep alive server_poll_time = attribute(10) # the minimum number of samples gathered before sending to the server min_num_samples = attribute(128) # if this is set to zero, will do inline num_workers = attribute(0) # run system commands to get the neural network isn't in data run_cmds_if_no_nn = attribute(default=attr_factory(list)) # will exit if there is an update to the config exit_on_update_config = attribute(False) # dont replace the network every new network, instead wait n generations replace_network_every_n_gens = attribute(1)
class TrainNNConfig(object): game = attribute("breakthrough") # the generation prefix is what defines our models (along with step). Be careful not to # overwrite these. generation_prefix = attribute("v2_") # uses previous network? use_previous = attribute(True) next_step = attribute(42) overwrite_existing = attribute(False) validation_split = attribute(0.8) batch_size = attribute(32) epochs = attribute(10) # this is applied even if max_sample_count can't be reached starting_step = attribute(0) # one of adam / amsgrad/ SGD compile_strategy = attribute("adam") learning_rate = attribute(None) # experimental: # list of tuple. Idea is that at epoch we take a percentage of the samples to train. # [(5, 1.0), (10, 0.8), (0, 0.5), (-5, 0.2)] # which translates into, take all samples of first 5, 80% of next 10, 50% of next n, and 20% of # the last 5. also assert number of gens is more than sum(abs(k) for k,_ in resample_buckets) resample_buckets = attribute(default=attr_factory(list)) # set the maximum size for an epoch. buckets will be scaled accordingly. max_epoch_size = attribute(-1) # set the initial weight before for the first epoch between training initial_value_weight = attribute(1.0)
class SelfPlayConfig(object): # -1 is off, and defaults to alpha-zero style max_number_of_samples = attribute(4) # temperature for policy temperature_for_policy = attribute(1.0) # percentage of games to play from beginning to end (using sample_xxx config) play_full_game_pct = attribute(-1) # select will get to the point where we start sampling select_puct_config = attribute(default=attr_factory(PUCTEvaluatorConfig)) select_iterations = attribute(100) # sample is the actual sample we take to train for. The focus is on good policy distribution. sample_puct_config = attribute(default=attr_factory(PUCTEvaluatorConfig)) sample_iterations = attribute(800) # after samples, will play to the end using this config score_puct_config = attribute(default=attr_factory(PUCTEvaluatorConfig)) score_iterations = attribute(100) # if the probability of losing drops below - then resign # and ignore resignation - and continue to end # two levels, resign0 should have more freedom than resign1 resign0_score_probability = attribute(0.9) resign0_false_positive_retry_percentage = attribute(0.5) resign1_score_probability = attribute(0.975) resign1_false_positive_retry_percentage = attribute(0.1) # aborts play if play depth exceeds this max_length (-1 off) abort_max_length = attribute(-1) # lookback to see if states are draw number_repeat_states_draw = attribute(-1) # score to back prop, to try and avoid repeat states repeat_states_score = attribute(0.45) # chance of really resigning. Will exit collecting. pct_actually_resign = attribute(0.4) # run to end (or scoring) - pct -> chance to actually run, score to exit on run_to_end_early_pct = attribute(0.2) run_to_end_early_score = attribute(0.01) run_to_end_minimum_game_depth = attribute(30)
class ControlBase(object): ''' a control base is basically a mapping from a gdl base to a value ''' # list of argument terms - which must match exactly arg_terms = attribute(attr_factory(list)) # we set the channel to this value value = attribute(1)
class PUCTPlayerConfig(object): name = attribute("Player") verbose = attribute(False) # XXX these should be renamed, and values less abused (0, -1 have special meaning) playouts_per_iteration = attribute(800) playouts_per_iteration_noop = attribute(1) generation = attribute("latest") # one of PUCTEvaluatorConfig/PUCTEvaluatorV2Config evaluator_config = attribute(default=attr_factory(PUCTEvaluatorV2Config))
class SelfPlayConfig(object): # -1 is off, and defaults to alpha-zero style max_number_of_samples = attribute(4) # select will get to the point where we start sampling select_puct_config = attribute(default=attr_factory(PUCTEvaluatorConfig)) select_iterations = attribute(100) # sample is the actual sample we take to train for. The focus is on good policy distribution. sample_puct_config = attribute(default=attr_factory(PUCTEvaluatorConfig)) sample_iterations = attribute(800) # after samples, will play to the end using this config score_puct_config = attribute(default=attr_factory(PUCTEvaluatorConfig)) score_iterations = attribute(100) # if the probability of losing drops below - then resign # and ignore resignation - and continue to end # two levels, resign0 should have more freedom than resign1 resign0_score_probability = attribute(0.9) resign0_false_positive_retry_percentage = attribute(0.5) resign1_score_probability = attribute(0.975) resign1_false_positive_retry_percentage = attribute(0.1)
class Symmetries(object): ''' defines the bases which symmetries can be applied ''' # list of ApplySymmetry apply_bases = attribute(attr_factory(list)) # list of terms skip_bases = attribute(attr_factory(list)) # list of ApplySymmetry apply_actions = attribute(attr_factory(list)) # list of terms skip_actions = attribute(attr_factory(list)) # do horizontal reflection do_reflection = attribute(False) # rotate x4 do_rotations_90 = attribute(False) # rotate x2 do_rotations_180 = attribute(False)
class LGConfig(object): # LG convention is to postfix non humans with "_bot" whoami = at.attribute("gzero_bot") # fetch these from your browser, after logged in cookie = at.attribute("login2=.......; JSESSIONID=.......") # dry run, dont actually send moves dry_run = at.attribute(True) # list of GameConfig play_games = at.attribute(default=at.attr_factory(list)) # store game path store_path = at.attribute("/home/rxe/working/ggpzero/data/lg/")
class BoardChannels(object): ''' board channels are defined by (a) the base term (b) a cross product of the board terms The value set on the channel itself will be if a matching base is set on the x/y cordinates. ''' base_term = attribute("cell") # these are index to the term identifying the coordinates x_term_idx = attribute(1) y_term_idx = attribute(2) # list of BoardTerm (if any) - will result in taking a cross product if len() > 1 board_terms = attribute(attr_factory(list))
class TrainNNConfig(object): game = attribute("breakthrough") # the generation prefix is what defines our models (along with step). Be careful not to # overwrite these. generation_prefix = attribute("x1_") # uses previous network? use_previous = attribute(True) next_step = attribute(42) overwrite_existing = attribute(False) validation_split = attribute(0.8) batch_size = attribute(32) epochs = attribute(10) # this is applied even if max_sample_count can't be reached starting_step = attribute(0) # one of adam / amsgrad/ SGD compile_strategy = attribute("SGD") learning_rate = attribute(0.01) l2_regularisation = attribute(0.0001) # list of tuple. This is the replay buffer. # [(5, 1.0), (10, 0.8)] # Will take the first 5 generations with all data and 80% of the next 10 generations. Every # generation after is ignored. # [(-1, 1.0)] # Will take all generations with 100% data. # XXX better name would be replay_sample_buckets resample_buckets = attribute(default=attr_factory(list)) # set the maximum size for an epoch. buckets will be scaled accordingly. max_epoch_size = attribute(-1) # set the initial weight before for the first epoch between training on the next epoch the # value weight will automatically adjust based on whether overfitting occurs initial_value_weight = attribute(1.0)
class WorkerConfig(object): connect_port = attribute(9000) connect_ip_addr = attribute("127.0.0.1") do_training = attribute(False) do_self_play = attribute(False) self_play_batch_size = attribute(1) # slow things down sleep_between_poll = attribute(-1) # send back whatever samples we have gather at this - sort of application level keep alive server_poll_time = attribute(10) # the minimum number of samples gathered before sending to the server min_num_samples = attribute(128) # if this is set to zero, will do inline num_workers = attribute(0) # run system commands to get the neural network isn't in data run_cmds_if_no_nn = attribute(default=attr_factory(list)) # will exit if there is an update to the config exit_on_update_config = attribute(False)
class ApplySymmetry(object): base_term = attribute("cell") # these are index to the term identifying the coordinates x_terms_idx = attribute(attr_factory(list)) y_terms_idx = attribute(attr_factory(list))
class WorkerConfigMsg(object): conf = attribute(default=attr_factory(confs.WorkerConfig))
class ConfigureSelfPlay(object): game = attribute("game") generation_name = attribute("gen0") self_play_conf = attribute(default=attr_factory(confs.SelfPlayConfig))
class ControlChannel(object): ''' Creates a single channel. The control bases need to be mutually exclusive (ie only one set at a time). If none are set the value of the channel will be zero. If a channel is set, it is the value defined in the ControlBase ''' # a list of control bases. control_bases = attribute(attr_factory(list))
class RequestSamples(object): # list of states (0/1 tuples) - to reduce duplicates new_states = attribute(default=attr_factory(list))
class RequestSampleResponse(object): # list of def.confs.Sample samples = attribute(default=attr_factory(list)) duplicates_seen = attribute(0)
class RequestNetworkTrain(object): game = attribute("game") train_conf = attribute(default=attr_factory(confs.TrainNNConfig)) network_model = attribute(default=attr_factory(confs.NNModelConfig)) generation_description = attribute(default=attr_factory(datadesc.GenerationDescription))