Beispiel #1
0
def main(unused_argv):
    config = alpha_zero.Config(
        game=FLAGS.game,
        path=FLAGS.path,
        learning_rate=FLAGS.learning_rate,
        weight_decay=FLAGS.weight_decay,
        train_batch_size=FLAGS.train_batch_size,
        replay_buffer_size=FLAGS.replay_buffer_size,
        replay_buffer_reuse=FLAGS.replay_buffer_reuse,
        max_steps=FLAGS.max_steps,
        checkpoint_freq=FLAGS.checkpoint_freq,
        actors=FLAGS.actors,
        evaluators=FLAGS.evaluators,
        uct_c=FLAGS.uct_c,
        max_simulations=FLAGS.max_simulations,
        policy_alpha=FLAGS.policy_alpha,
        policy_epsilon=FLAGS.policy_epsilon,
        temperature=FLAGS.temperature,
        temperature_drop=FLAGS.temperature_drop,
        evaluation_window=FLAGS.evaluation_window,
        eval_levels=FLAGS.eval_levels,
        nn_model=FLAGS.nn_model,
        nn_width=FLAGS.nn_width,
        nn_depth=FLAGS.nn_depth,
        observation_shape=None,
        output_size=None,
        quiet=FLAGS.quiet,
    )
    alpha_zero.alpha_zero(config)
def main(unused_argv):
  config = alpha_zero.Config(
      game="tic_tac_toe",
      path=FLAGS.path,
      learning_rate=0.01,
      weight_decay=1e-4,
      train_batch_size=128,
      replay_buffer_size=2**14,
      replay_buffer_reuse=4,
      max_steps=25,
      checkpoint_freq=25,

      actors=4,
      evaluators=4,
      uct_c=1,
      max_simulations=20,
      policy_alpha=0.25,
      policy_epsilon=1,
      temperature=1,
      temperature_drop=4,
      evaluation_window=50,
      eval_levels=7,

      nn_model="resnet",
      nn_width=128,
      nn_depth=2,
      observation_shape=None,
      output_size=None,

      quiet=True,
  )
  alpha_zero.alpha_zero(config)