Exemple #1
0
def main(argv):
  # Make sure tf does not allocate gpu memory.
  tf.config.experimental.set_visible_devices([], 'GPU')
  config = FLAGS.config
  game = config.game + 'NoFrameskip-v4'
  num_actions = env_utils.get_num_actions(game)
  print(f'Playing {game} with {num_actions} actions')
  model = models.ActorCritic(num_outputs=num_actions)
  ppo_lib.train(model, config, FLAGS.workdir)
Exemple #2
0
def main(argv):
    config = FLAGS.config
    game = config.game + 'NoFrameskip-v4'
    num_actions = env_utils.get_num_actions(game)
    print(f'Playing {game} with {num_actions} actions')
    key = jax.random.PRNGKey(0)
    key, subkey = jax.random.split(key)
    model = models.create_model(subkey, num_outputs=num_actions)
    optimizer = models.create_optimizer(model,
                                        learning_rate=config.learning_rate)
    del model
    optimizer = ppo_lib.train(optimizer, config, FLAGS.logdir)
Exemple #3
0
def main(argv):
    # Make sure tf does not allocate gpu memory.
    tf.config.experimental.set_visible_devices([], 'GPU')
    config = FLAGS.config
    game = config.game + 'NoFrameskip-v4'
    num_actions = env_utils.get_num_actions(game)
    print(f'Playing {game} with {num_actions} actions')
    module = models.ActorCritic(num_outputs=num_actions)
    key = jax.random.PRNGKey(0)
    key, subkey = jax.random.split(key)
    initial_params = models.get_initial_params(subkey, module)
    optimizer = models.create_optimizer(initial_params, config.learning_rate)
    optimizer = ppo_lib.train(module, optimizer, config, FLAGS.logdir)
Exemple #4
0
def main(argv):
    if len(argv) > 1:
        raise app.UsageError('Too many command-line arguments.')

    FLAGS.log_dir = FLAGS.workdir
    FLAGS.stderrthreshold = 'info'
    logging.get_absl_handler().start_logging_to_file()

    # Make sure tf does not allocate gpu memory.
    tf.config.experimental.set_visible_devices([], 'GPU')
    config = FLAGS.config
    game = config.game + 'NoFrameskip-v4'
    num_actions = env_utils.get_num_actions(game)
    print(f'Playing {game} with {num_actions} actions')
    module = models.ActorCritic(num_outputs=num_actions)
    key = jax.random.PRNGKey(0)
    key, subkey = jax.random.split(key)
    initial_params = models.get_initial_params(subkey, module)
    optimizer = models.create_optimizer(initial_params, config.learning_rate)
    optimizer = ppo_lib.train(module, optimizer, config, FLAGS.workdir)