Esempio n. 1
0
class ServerConfig(object):
    game = attribute("breakthrough")
    generation_prefix = attribute("v42")

    port = attribute(9000)

    current_step = attribute(0)

    # number of samples to acquire before starting to train
    num_samples_to_train = attribute(1024)

    # maximum growth while training
    max_samples_growth = attribute(0.2)

    # the starting generation description
    base_generation_description = attribute(default=attr_factory(GenerationDescription))

    # the base network model
    base_network_model = attribute(default=attr_factory(NNModelConfig))

    # the starting training config
    base_training_config = attribute(default=attr_factory(TrainNNConfig))

    # the self play config
    self_play_config = attribute(default=attr_factory(SelfPlayConfig))

    # save the samples every n seconds
    checkpoint_interval = attribute(60.0 * 5)

    # this forces the network to be reset to random weights, every n generations
    reset_network_every_n_generations = attribute(-1)
Esempio n. 2
0
class SelfPlayConfig(object):
    # In each full game played out will oscillate between using sample_iterations and n <
    # evals_per_move.  so if set to 25% will take 25% of samples, and 75% will be skipped using n
    # evals.  This idea is adopted from KataGo and is NOT a full implementation of the idea there.
    # This is just the simplest way to introduce concept without changing much code.  < 0, off.
    oscillate_sampling_pct = attribute(0.25)

    # temperature for policy (XXX remove this, I have never used it)
    temperature_for_policy = attribute(1.0)

    # sample is the actual sample we take to train for.  The focus is on good policy distribution.
    puct_config = attribute(default=attr_factory(PUCTEvaluatorConfig))
    evals_per_move = attribute(800)

    # resign
    # two levels, resign0 should have more freedom than resign1
    resign0_score_probability = attribute(0.9)
    resign0_pct = attribute(0.5)

    resign1_score_probability = attribute(0.975)
    resign1_pct = attribute(0.1)

    # run to end after resign - pct -> chance to actually run, score to exit on
    run_to_end_pct = attribute(0.2)
    run_to_end_evals = attribute(42)
    run_to_end_puct_config = attribute(
        default=attr_factory(PUCTEvaluatorConfig))
    run_to_end_early_score = attribute(0.01)
    run_to_end_minimum_game_depth = attribute(30)

    # aborts play if play depth exceeds this max_length (-1 off)
    abort_max_length = attribute(-1)
Esempio n. 3
0
class AllRatings(object):
    game = at.attribute("game")

    # list of PlayerRating
    players = at.attribute(default=at.attr_factory(list))

    # simple log of recent games
    log = at.attribute(default=at.attr_factory(list))
Esempio n. 4
0
class GameDesc(object):
    game = attribute("checkers")

    # x_cords = "a b c d e f g h".split()
    x_cords = attribute(attr_factory(list))

    # y_cords = "1 2 3 4 5 6 7 8".split()
    y_cords = attribute(attr_factory(list))

    # list of BoardChannels (length kind of needs to be >= 1, or not much using convs)
    board_channels = attribute(attr_factory(list))

    # list of list of ControlChannels
    control_channels = attribute(attr_factory(list))
Esempio n. 5
0
class NNModelConfig(object):
    role_count = attribute(2)

    input_rows = attribute(8)
    input_columns = attribute(8)
    input_channels = attribute(8)

    residual_layers = attribute(8)
    cnn_filter_size = attribute(64)
    cnn_kernel_size = attribute(3)

    value_hidden_size = attribute(256)

    multiple_policies = attribute(False)

    # the size of policy distribution.  The size of the list will be 1 if not multiple_policies.
    policy_dist_count = attribute(default=attr_factory(list))

    l2_regularisation = attribute(False)

    # < 0 - no dropout
    dropout_rate_policy = attribute(0.333)
    dropout_rate_value = attribute(0.5)

    leaky_relu = attribute(False)
Esempio n. 6
0
class BoardTerm(object):
    ''' For nxn boards, we identify which terms we use and index into the base. '''

    term_idx = attribute(3)

    # terms = ["white", "black", "arrow"]
    terms = attribute(attr_factory(list))
Esempio n. 7
0
class NNModelConfig(object):
    role_count = attribute(2)

    input_rows = attribute(8)
    input_columns = attribute(8)
    input_channels = attribute(8)

    residual_layers = attribute(8)
    cnn_filter_size = attribute(64)
    cnn_kernel_size = attribute(3)

    value_hidden_size = attribute(256)

    # the size of policy distribution.
    policy_dist_count = attribute(default=attr_factory(list))

    # < 0 - no dropout
    dropout_rate_policy = attribute(0.333)
    dropout_rate_value = attribute(0.5)

    leaky_relu = attribute(False)
    squeeze_excite_layers = attribute(False)
    resnet_v2 = attribute(False)
    global_pooling_value = attribute(False)
    concat_all_layers = attribute(False)
Esempio n. 8
0
class WorkerConfig(object):
    connect_port = attribute(9000)
    connect_ip_addr = attribute("127.0.0.1")
    do_training = attribute(False)
    do_self_play = attribute(False)
    self_play_batch_size = attribute(1)

    # passed into Supervisor, used instead of hard coded value.
    number_of_polls_before_dumping_stats = attribute(1024)

    # use to create SelfPlayManager
    unique_identifier = attribute("pleasesetme")

    # slow things down
    sleep_between_poll = attribute(-1)

    # send back whatever samples we have gather at this - sort of application level keep alive
    server_poll_time = attribute(10)

    # the minimum number of samples gathered before sending to the server
    min_num_samples = attribute(128)

    # if this is set to zero, will do inline
    num_workers = attribute(0)

    # run system commands to get the neural network isn't in data
    run_cmds_if_no_nn = attribute(default=attr_factory(list))

    # will exit if there is an update to the config
    exit_on_update_config = attribute(False)

    # dont replace the network every new network, instead wait n generations
    replace_network_every_n_gens = attribute(1)
Esempio n. 9
0
class TrainNNConfig(object):
    game = attribute("breakthrough")

    # the generation prefix is what defines our models (along with step). Be careful not to
    # overwrite these.
    generation_prefix = attribute("v2_")

    # uses previous network?
    use_previous = attribute(True)
    next_step = attribute(42)
    overwrite_existing = attribute(False)
    validation_split = attribute(0.8)
    batch_size = attribute(32)
    epochs = attribute(10)

    # this is applied even if max_sample_count can't be reached
    starting_step = attribute(0)

    # one of adam / amsgrad/ SGD
    compile_strategy = attribute("adam")
    learning_rate = attribute(None)

    # experimental:
    # list of tuple.  Idea is that at epoch we take a percentage of the samples to train.
    # [(5, 1.0), (10, 0.8), (0, 0.5), (-5, 0.2)]
    # which translates into, take all samples of first 5, 80% of next 10, 50% of next n, and 20% of
    # the last 5.  also assert number of gens is more than sum(abs(k) for k,_ in resample_buckets)
    resample_buckets = attribute(default=attr_factory(list))

    # set the maximum size for an epoch.  buckets will be scaled accordingly.
    max_epoch_size = attribute(-1)

    # set the initial weight before for the first epoch between training
    initial_value_weight = attribute(1.0)
Esempio n. 10
0
class SelfPlayConfig(object):
    # -1 is off, and defaults to alpha-zero style
    max_number_of_samples = attribute(4)

    # temperature for policy
    temperature_for_policy = attribute(1.0)

    # percentage of games to play from beginning to end (using sample_xxx config)
    play_full_game_pct = attribute(-1)

    # select will get to the point where we start sampling
    select_puct_config = attribute(default=attr_factory(PUCTEvaluatorConfig))
    select_iterations = attribute(100)

    # sample is the actual sample we take to train for.  The focus is on good policy distribution.
    sample_puct_config = attribute(default=attr_factory(PUCTEvaluatorConfig))
    sample_iterations = attribute(800)

    # after samples, will play to the end using this config
    score_puct_config = attribute(default=attr_factory(PUCTEvaluatorConfig))
    score_iterations = attribute(100)

    # if the probability of losing drops below - then resign
    # and ignore resignation - and continue to end
    # two levels, resign0 should have more freedom than resign1
    resign0_score_probability = attribute(0.9)
    resign0_false_positive_retry_percentage = attribute(0.5)
    resign1_score_probability = attribute(0.975)
    resign1_false_positive_retry_percentage = attribute(0.1)

    # aborts play if play depth exceeds this max_length (-1 off)
    abort_max_length = attribute(-1)

    # lookback to see if states are draw
    number_repeat_states_draw = attribute(-1)

    # score to back prop, to try and avoid repeat states
    repeat_states_score = attribute(0.45)

    # chance of really resigning.  Will exit collecting.
    pct_actually_resign = attribute(0.4)

    # run to end (or scoring) - pct -> chance to actually run, score to exit on
    run_to_end_early_pct = attribute(0.2)
    run_to_end_early_score = attribute(0.01)
    run_to_end_minimum_game_depth = attribute(30)
Esempio n. 11
0
class ControlBase(object):
    ''' a control base is basically a mapping from a gdl base to a value '''

    # list of argument terms - which must match exactly
    arg_terms = attribute(attr_factory(list))

    # we set the channel to this value
    value = attribute(1)
Esempio n. 12
0
class PUCTPlayerConfig(object):
    name = attribute("Player")

    verbose = attribute(False)

    # XXX these should be renamed, and values less abused (0, -1 have special meaning)
    playouts_per_iteration = attribute(800)
    playouts_per_iteration_noop = attribute(1)

    generation = attribute("latest")

    # one of PUCTEvaluatorConfig/PUCTEvaluatorV2Config
    evaluator_config = attribute(default=attr_factory(PUCTEvaluatorV2Config))
Esempio n. 13
0
class SelfPlayConfig(object):
    # -1 is off, and defaults to alpha-zero style
    max_number_of_samples = attribute(4)

    # select will get to the point where we start sampling
    select_puct_config = attribute(default=attr_factory(PUCTEvaluatorConfig))
    select_iterations = attribute(100)

    # sample is the actual sample we take to train for.  The focus is on good policy distribution.
    sample_puct_config = attribute(default=attr_factory(PUCTEvaluatorConfig))
    sample_iterations = attribute(800)

    # after samples, will play to the end using this config
    score_puct_config = attribute(default=attr_factory(PUCTEvaluatorConfig))
    score_iterations = attribute(100)

    # if the probability of losing drops below - then resign
    # and ignore resignation - and continue to end
    # two levels, resign0 should have more freedom than resign1
    resign0_score_probability = attribute(0.9)
    resign0_false_positive_retry_percentage = attribute(0.5)
    resign1_score_probability = attribute(0.975)
    resign1_false_positive_retry_percentage = attribute(0.1)
Esempio n. 14
0
class Symmetries(object):
    ''' defines the bases which symmetries can be applied  '''

    # list of ApplySymmetry
    apply_bases = attribute(attr_factory(list))

    # list of terms
    skip_bases = attribute(attr_factory(list))

    # list of ApplySymmetry
    apply_actions = attribute(attr_factory(list))

    # list of terms
    skip_actions = attribute(attr_factory(list))

    # do horizontal reflection
    do_reflection = attribute(False)

    # rotate x4
    do_rotations_90 = attribute(False)

    # rotate x2
    do_rotations_180 = attribute(False)
Esempio n. 15
0
class LGConfig(object):
    # LG convention is to postfix non humans with "_bot"
    whoami = at.attribute("gzero_bot")

    # fetch these from your browser, after logged in
    cookie = at.attribute("login2=.......; JSESSIONID=.......")

    # dry run, dont actually send moves
    dry_run = at.attribute(True)

    # list of GameConfig
    play_games = at.attribute(default=at.attr_factory(list))

    # store game path
    store_path = at.attribute("/home/rxe/working/ggpzero/data/lg/")
Esempio n. 16
0
class BoardChannels(object):
    ''' board channels are defined by
        (a) the base term
        (b) a cross product of the board terms

    The value set on the channel itself will be if a matching base is set on the x/y cordinates.
    '''

    base_term = attribute("cell")

    # these are index to the term identifying the coordinates
    x_term_idx = attribute(1)
    y_term_idx = attribute(2)

    # list of BoardTerm (if any) - will result in taking a cross product if len() > 1
    board_terms = attribute(attr_factory(list))
Esempio n. 17
0
class TrainNNConfig(object):
    game = attribute("breakthrough")

    # the generation prefix is what defines our models (along with step). Be careful not to
    # overwrite these.
    generation_prefix = attribute("x1_")

    # uses previous network?
    use_previous = attribute(True)
    next_step = attribute(42)
    overwrite_existing = attribute(False)
    validation_split = attribute(0.8)
    batch_size = attribute(32)
    epochs = attribute(10)

    # this is applied even if max_sample_count can't be reached
    starting_step = attribute(0)

    # one of adam / amsgrad/ SGD
    compile_strategy = attribute("SGD")
    learning_rate = attribute(0.01)
    l2_regularisation = attribute(0.0001)

    # list of tuple.  This is the replay buffer.

    # [(5, 1.0), (10, 0.8)]
    # Will take the first 5 generations with all data and 80% of the next 10 generations.  Every
    # generation after is ignored.

    # [(-1, 1.0)]
    # Will take all generations with 100% data.

    # XXX better name would be replay_sample_buckets
    resample_buckets = attribute(default=attr_factory(list))

    # set the maximum size for an epoch.  buckets will be scaled accordingly.
    max_epoch_size = attribute(-1)

    # set the initial weight before for the first epoch between training on the next epoch the
    # value weight will automatically adjust based on whether overfitting occurs
    initial_value_weight = attribute(1.0)
Esempio n. 18
0
class WorkerConfig(object):
    connect_port = attribute(9000)
    connect_ip_addr = attribute("127.0.0.1")
    do_training = attribute(False)
    do_self_play = attribute(False)
    self_play_batch_size = attribute(1)

    # slow things down
    sleep_between_poll = attribute(-1)

    # send back whatever samples we have gather at this - sort of application level keep alive
    server_poll_time = attribute(10)

    # the minimum number of samples gathered before sending to the server
    min_num_samples = attribute(128)

    # if this is set to zero, will do inline
    num_workers = attribute(0)

    # run system commands to get the neural network isn't in data
    run_cmds_if_no_nn = attribute(default=attr_factory(list))

    # will exit if there is an update to the config
    exit_on_update_config = attribute(False)
Esempio n. 19
0
class ApplySymmetry(object):
    base_term = attribute("cell")

    # these are index to the term identifying the coordinates
    x_terms_idx = attribute(attr_factory(list))
    y_terms_idx = attribute(attr_factory(list))
Esempio n. 20
0
class WorkerConfigMsg(object):
    conf = attribute(default=attr_factory(confs.WorkerConfig))
Esempio n. 21
0
class ConfigureSelfPlay(object):
    game = attribute("game")
    generation_name = attribute("gen0")
    self_play_conf = attribute(default=attr_factory(confs.SelfPlayConfig))
Esempio n. 22
0
class ControlChannel(object):
    ''' Creates a single channel.  The control bases need to be mutually exclusive (ie only one set
        at a time).  If none are set the value of the channel will be zero.  If a channel is set,
        it is the value defined in the ControlBase '''
    # a list of control bases.
    control_bases = attribute(attr_factory(list))
Esempio n. 23
0
class RequestSamples(object):
    # list of states (0/1 tuples) - to reduce duplicates
    new_states = attribute(default=attr_factory(list))
Esempio n. 24
0
class RequestSampleResponse(object):
    # list of def.confs.Sample
    samples = attribute(default=attr_factory(list))
    duplicates_seen = attribute(0)
Esempio n. 25
0
class RequestNetworkTrain(object):
    game = attribute("game")
    train_conf = attribute(default=attr_factory(confs.TrainNNConfig))
    network_model = attribute(default=attr_factory(confs.NNModelConfig))
    generation_description = attribute(default=attr_factory(datadesc.GenerationDescription))