def main(argv): config = FLAGS.config game = config.game + 'NoFrameskip-v4' num_actions = env_utils.get_num_actions(game) print(f'Playing {game} with {num_actions} actions') key = jax.random.PRNGKey(0) key, subkey = jax.random.split(key) model = models.create_model(subkey, num_outputs=num_actions) optimizer = models.create_optimizer(model, learning_rate=config.learning_rate) del model optimizer = ppo_lib.train(optimizer, config, FLAGS.logdir)
def main(argv): # Make sure tf does not allocate gpu memory. tf.config.experimental.set_visible_devices([], 'GPU') config = FLAGS.config game = config.game + 'NoFrameskip-v4' num_actions = env_utils.get_num_actions(game) print(f'Playing {game} with {num_actions} actions') module = models.ActorCritic(num_outputs=num_actions) key = jax.random.PRNGKey(0) key, subkey = jax.random.split(key) initial_params = models.get_initial_params(subkey, module) optimizer = models.create_optimizer(initial_params, config.learning_rate) optimizer = ppo_lib.train(module, optimizer, config, FLAGS.logdir)
def test_optimization_step(self): num_outputs = 4 trn_data = self.generate_random_data(num_actions=num_outputs) clip_param = 0.1 vf_coeff = 0.5 entropy_coeff = 0.01 lr = 2.5e-4 batch_size = 256 key = jax.random.PRNGKey(0) key, subkey = jax.random.split(key) model = models.create_model(subkey, num_outputs) optimizer = models.create_optimizer(model, learning_rate=lr) optimizer, _ = ppo_lib.train_step(optimizer, trn_data, clip_param, vf_coeff, entropy_coeff, lr, batch_size) self.assertTrue(isinstance(optimizer, flax.optim.base.Optimizer))
def test_model(self): key = jax.random.PRNGKey(0) key, subkey = jax.random.split(key) outputs = self.choose_random_outputs() model = models.create_model(subkey, outputs) optimizer = models.create_optimizer(model, learning_rate=1e-3) self.assertTrue(isinstance(model, nn.base.Model)) self.assertTrue(isinstance(optimizer, flax.optim.base.Optimizer)) test_batch_size, obs_shape = 10, (84, 84, 4) random_input = onp.random.random(size=(test_batch_size, ) + obs_shape) log_probs, values = optimizer.target(random_input) self.assertEqual(values.shape, (test_batch_size, 1)) sum_probs = onp.sum(onp.exp(log_probs), axis=1) self.assertEqual(sum_probs.shape, (test_batch_size, )) onp_testing.assert_allclose(sum_probs, onp.ones((test_batch_size, )), atol=1e-6)
def test_optimization_step(self): num_outputs = 4 trn_data = self.generate_random_data(num_actions=num_outputs) clip_param = 0.1 vf_coeff = 0.5 entropy_coeff = 0.01 lr = 2.5e-4 batch_size = 256 key = jax.random.PRNGKey(0) key, subkey = jax.random.split(key) module = models.ActorCritic(num_outputs) initial_params = models.get_initial_params(subkey, module) lr = 2.5e-4 optimizer = models.create_optimizer(initial_params, lr) optimizer, _ = ppo_lib.train_step(module, optimizer, trn_data, clip_param, vf_coeff, entropy_coeff, lr, batch_size) self.assertTrue(isinstance(optimizer, flax.optim.base.Optimizer))
def test_model(self): key = jax.random.PRNGKey(0) key, subkey = jax.random.split(key) outputs = self.choose_random_outputs() module = models.ActorCritic(num_outputs=outputs) initial_params = models.get_initial_params(subkey, module) lr = 2.5e-4 optimizer = models.create_optimizer(initial_params, lr) self.assertTrue(isinstance(optimizer, flax.optim.base.Optimizer)) test_batch_size, obs_shape = 10, (84, 84, 4) random_input = np.random.random(size=(test_batch_size, ) + obs_shape) log_probs, values = agent.policy_action(optimizer.target, module, random_input) self.assertEqual(values.shape, (test_batch_size, 1)) sum_probs = np.sum(np.exp(log_probs), axis=1) self.assertEqual(sum_probs.shape, (test_batch_size, )) np_testing.assert_allclose(sum_probs, np.ones((test_batch_size, )), atol=1e-6)
def main(argv): if len(argv) > 1: raise app.UsageError('Too many command-line arguments.') FLAGS.log_dir = FLAGS.workdir FLAGS.stderrthreshold = 'info' logging.get_absl_handler().start_logging_to_file() # Make sure tf does not allocate gpu memory. tf.config.experimental.set_visible_devices([], 'GPU') config = FLAGS.config game = config.game + 'NoFrameskip-v4' num_actions = env_utils.get_num_actions(game) print(f'Playing {game} with {num_actions} actions') module = models.ActorCritic(num_outputs=num_actions) key = jax.random.PRNGKey(0) key, subkey = jax.random.split(key) initial_params = models.get_initial_params(subkey, module) optimizer = models.create_optimizer(initial_params, config.learning_rate) optimizer = ppo_lib.train(module, optimizer, config, FLAGS.workdir)
def train(self, exp_config): # making sure that all nn_models are trainable for name, param in self.main_model.named_parameters(): if not param.requires_grad: raise Exception(f"Frozen: {name}, {param.data}") dataloader_train_clean, _ = self._load_train_clean(exp_config) if exp_config["use_noisy"]: self.noisy_subsample_random = np.random.RandomState( exp_config["seed"]) self.noisy_dataset_full_size = len( read_sentences_from_file(exp_config["data_dir"], "train_noisy.tsv")) optimizer, scheduler = create_optimizer(self.main_model, exp_config) self.main_model.zero_grad() # first epoch only clean self.train_one_epoch(dataloader_train_clean, optimizer, scheduler, exp_config, "clean") epoch_iterator = trange(0, int(exp_config["num_epochs"] - 1), desc="Epoch") for epoch in epoch_iterator: if exp_config["use_noisy"]: dataloader_train_noisy = self.load_train_noisy_subsample( exp_config) self.train_one_epoch(dataloader_train_noisy, optimizer, scheduler, exp_config, "noisy") self.train_one_epoch(dataloader_train_clean, optimizer, scheduler, exp_config, "clean") # after each epoch, evaluate on dev and save if better than previous self._dev(exp_config, epoch) # test in the end self._test(exp_config)
def train(self, exp_config): assert exp_config["use_clean"] dataloader_train_clean, _ = self._load_train_clean(exp_config) if exp_config["use_noisy"]: self.noisy_subsample_random = np.random.RandomState( exp_config["seed"]) self.noisy_dataset_full_size = len( read_instances_from_file(exp_config["data_dir"], "train_noisy.tsv")) optimizer, scheduler = create_optimizer(self.main_model, exp_config) self.main_model.zero_grad() # first epoch only clean self.train_one_epoch(dataloader_train_clean, optimizer, scheduler, exp_config, "clean - 1. epoch") epoch_iterator = trange(0, int(exp_config["num_epochs"] - 1), desc="Epoch") for epoch in epoch_iterator: if exp_config["use_noisy"]: dataloader_train_noisy = self.load_train_noisy_subsample( exp_config) self.train_one_epoch(dataloader_train_noisy, optimizer, scheduler, exp_config, "noisy") self.train_one_epoch(dataloader_train_clean, optimizer, scheduler, exp_config, "clean") # after each epoch, evaluate on dev and save if better than previous self._dev(exp_config, epoch) # test in the end self._test(exp_config)
def train(self, exp_config): # load clean dataset dataloader_train_clean, clean_indices = self._load_train_clean( exp_config) # load noisy dataset self.noisy_subsample_random = np.random.RandomState(exp_config["seed"]) self.noisy_dataset_full_size = len( read_sentences_from_file(exp_config["data_dir"], "train_noisy.tsv")) # creating noise matrix from pairs of clean and noisy labels # making sure that the noise matrix is build on the same subset indices as the actual training data clean_sentences = read_sentences_from_file(exp_config["data_dir"], "train_clean.tsv") clean_noisy_labels_sentences = read_sentences_from_file( exp_config["data_dir"], "train_clean_noisy_labels.tsv") if not "use_true_cm" in exp_config or not exp_config["use_true_cm"]: if clean_indices is None: logging.info( "Would be using the clean subsample, but the full clean data is used for the training." ) else: logging.info( "Using the clean subsample to estimate the noise matrix.") clean_sentences = np.array(clean_sentences)[clean_indices] clean_noisy_labels_sentences = np.array( clean_noisy_labels_sentences)[clean_indices] else: logging.info( "Using the full clean data to estimate the noise matrix.") if not "use_io_estimation" in exp_config or not exp_config[ "use_io_estimation"]: logging.info("Using BIO format for noise matrix estimation.") noise_matrix = ConfusionMatrixTrainer.compute_noise_matrix( clean_sentences, clean_noisy_labels_sentences, exp_config["labels"]) else: logging.info("Using IO format for noise matrix estimation.") noise_matrix = ConfusionMatrixTrainer.compute_noise_matrix_io( clean_sentences, clean_noisy_labels_sentences, exp_config["labels"]) logger.info(f"Using noise matrix {repr(noise_matrix)}") # matrix smoothing following https://arxiv.org/pdf/2003.11904.pdf if "matrix_smoothing_value" in exp_config: logger.info( f"Using matrix smoothing with value {exp_config['matrix_smoothing_value']}." ) smoothing_beta = exp_config[ "matrix_smoothing_value"] # 0.8 is their chosen hyperparameter in the paper, section 4.2 noise_matrix = noise_matrix**smoothing_beta # renormalize for row in noise_matrix: row_sum = np.sum(row) if row_sum != 0: row /= row_sum self.cm_model = create_cm_model(self.main_model, noise_matrix, exp_config) optimizer, scheduler = create_optimizer(self.cm_model, exp_config) self.cm_model.zero_grad() # first epoch only clean self.train_one_epoch(dataloader_train_clean, optimizer, scheduler, exp_config, "clean") epoch_iterator = trange(0, int(exp_config["num_epochs"] - 1), desc="Epoch") for epoch in epoch_iterator: dataloader_train_noisy = self.load_train_noisy_subsample( exp_config) self.train_one_epoch(dataloader_train_noisy, optimizer, scheduler, exp_config, "noisy") self.train_one_epoch(dataloader_train_clean, optimizer, scheduler, exp_config, "clean") # after each epoch, evaluate on dev and save if better than previous self._dev(exp_config, epoch) # test in the end self._test(exp_config)