def main(argv): # Make sure tf does not allocate gpu memory. tf.config.experimental.set_visible_devices([], 'GPU') config = FLAGS.config game = config.game + 'NoFrameskip-v4' num_actions = env_utils.get_num_actions(game) print(f'Playing {game} with {num_actions} actions') model = models.ActorCritic(num_outputs=num_actions) ppo_lib.train(model, config, FLAGS.workdir)
def main(argv): config = FLAGS.config game = config.game + 'NoFrameskip-v4' num_actions = env_utils.get_num_actions(game) print(f'Playing {game} with {num_actions} actions') key = jax.random.PRNGKey(0) key, subkey = jax.random.split(key) model = models.create_model(subkey, num_outputs=num_actions) optimizer = models.create_optimizer(model, learning_rate=config.learning_rate) del model optimizer = ppo_lib.train(optimizer, config, FLAGS.logdir)
def main(argv): # Make sure tf does not allocate gpu memory. tf.config.experimental.set_visible_devices([], 'GPU') config = FLAGS.config game = config.game + 'NoFrameskip-v4' num_actions = env_utils.get_num_actions(game) print(f'Playing {game} with {num_actions} actions') module = models.ActorCritic(num_outputs=num_actions) key = jax.random.PRNGKey(0) key, subkey = jax.random.split(key) initial_params = models.get_initial_params(subkey, module) optimizer = models.create_optimizer(initial_params, config.learning_rate) optimizer = ppo_lib.train(module, optimizer, config, FLAGS.logdir)
def main(argv): if len(argv) > 1: raise app.UsageError('Too many command-line arguments.') FLAGS.log_dir = FLAGS.workdir FLAGS.stderrthreshold = 'info' logging.get_absl_handler().start_logging_to_file() # Make sure tf does not allocate gpu memory. tf.config.experimental.set_visible_devices([], 'GPU') config = FLAGS.config game = config.game + 'NoFrameskip-v4' num_actions = env_utils.get_num_actions(game) print(f'Playing {game} with {num_actions} actions') module = models.ActorCritic(num_outputs=num_actions) key = jax.random.PRNGKey(0) key, subkey = jax.random.split(key) initial_params = models.get_initial_params(subkey, module) optimizer = models.create_optimizer(initial_params, config.learning_rate) optimizer = ppo_lib.train(module, optimizer, config, FLAGS.workdir)