Esempio n. 1
0
def main(argv):
    config = FLAGS.config
    game = config.game + 'NoFrameskip-v4'
    num_actions = env_utils.get_num_actions(game)
    print(f'Playing {game} with {num_actions} actions')
    key = jax.random.PRNGKey(0)
    key, subkey = jax.random.split(key)
    model = models.create_model(subkey, num_outputs=num_actions)
    optimizer = models.create_optimizer(model,
                                        learning_rate=config.learning_rate)
    del model
    optimizer = ppo_lib.train(optimizer, config, FLAGS.logdir)
Esempio n. 2
0
def main(argv):
    # Make sure tf does not allocate gpu memory.
    tf.config.experimental.set_visible_devices([], 'GPU')
    config = FLAGS.config
    game = config.game + 'NoFrameskip-v4'
    num_actions = env_utils.get_num_actions(game)
    print(f'Playing {game} with {num_actions} actions')
    module = models.ActorCritic(num_outputs=num_actions)
    key = jax.random.PRNGKey(0)
    key, subkey = jax.random.split(key)
    initial_params = models.get_initial_params(subkey, module)
    optimizer = models.create_optimizer(initial_params, config.learning_rate)
    optimizer = ppo_lib.train(module, optimizer, config, FLAGS.logdir)
Esempio n. 3
0
 def test_optimization_step(self):
     num_outputs = 4
     trn_data = self.generate_random_data(num_actions=num_outputs)
     clip_param = 0.1
     vf_coeff = 0.5
     entropy_coeff = 0.01
     lr = 2.5e-4
     batch_size = 256
     key = jax.random.PRNGKey(0)
     key, subkey = jax.random.split(key)
     model = models.create_model(subkey, num_outputs)
     optimizer = models.create_optimizer(model, learning_rate=lr)
     optimizer, _ = ppo_lib.train_step(optimizer, trn_data, clip_param,
                                       vf_coeff, entropy_coeff, lr,
                                       batch_size)
     self.assertTrue(isinstance(optimizer, flax.optim.base.Optimizer))
Esempio n. 4
0
 def test_model(self):
     key = jax.random.PRNGKey(0)
     key, subkey = jax.random.split(key)
     outputs = self.choose_random_outputs()
     model = models.create_model(subkey, outputs)
     optimizer = models.create_optimizer(model, learning_rate=1e-3)
     self.assertTrue(isinstance(model, nn.base.Model))
     self.assertTrue(isinstance(optimizer, flax.optim.base.Optimizer))
     test_batch_size, obs_shape = 10, (84, 84, 4)
     random_input = onp.random.random(size=(test_batch_size, ) + obs_shape)
     log_probs, values = optimizer.target(random_input)
     self.assertEqual(values.shape, (test_batch_size, 1))
     sum_probs = onp.sum(onp.exp(log_probs), axis=1)
     self.assertEqual(sum_probs.shape, (test_batch_size, ))
     onp_testing.assert_allclose(sum_probs,
                                 onp.ones((test_batch_size, )),
                                 atol=1e-6)
Esempio n. 5
0
 def test_optimization_step(self):
     num_outputs = 4
     trn_data = self.generate_random_data(num_actions=num_outputs)
     clip_param = 0.1
     vf_coeff = 0.5
     entropy_coeff = 0.01
     lr = 2.5e-4
     batch_size = 256
     key = jax.random.PRNGKey(0)
     key, subkey = jax.random.split(key)
     module = models.ActorCritic(num_outputs)
     initial_params = models.get_initial_params(subkey, module)
     lr = 2.5e-4
     optimizer = models.create_optimizer(initial_params, lr)
     optimizer, _ = ppo_lib.train_step(module, optimizer, trn_data,
                                       clip_param, vf_coeff, entropy_coeff,
                                       lr, batch_size)
     self.assertTrue(isinstance(optimizer, flax.optim.base.Optimizer))
Esempio n. 6
0
 def test_model(self):
     key = jax.random.PRNGKey(0)
     key, subkey = jax.random.split(key)
     outputs = self.choose_random_outputs()
     module = models.ActorCritic(num_outputs=outputs)
     initial_params = models.get_initial_params(subkey, module)
     lr = 2.5e-4
     optimizer = models.create_optimizer(initial_params, lr)
     self.assertTrue(isinstance(optimizer, flax.optim.base.Optimizer))
     test_batch_size, obs_shape = 10, (84, 84, 4)
     random_input = np.random.random(size=(test_batch_size, ) + obs_shape)
     log_probs, values = agent.policy_action(optimizer.target, module,
                                             random_input)
     self.assertEqual(values.shape, (test_batch_size, 1))
     sum_probs = np.sum(np.exp(log_probs), axis=1)
     self.assertEqual(sum_probs.shape, (test_batch_size, ))
     np_testing.assert_allclose(sum_probs,
                                np.ones((test_batch_size, )),
                                atol=1e-6)
Esempio n. 7
0
def main(argv):
    if len(argv) > 1:
        raise app.UsageError('Too many command-line arguments.')

    FLAGS.log_dir = FLAGS.workdir
    FLAGS.stderrthreshold = 'info'
    logging.get_absl_handler().start_logging_to_file()

    # Make sure tf does not allocate gpu memory.
    tf.config.experimental.set_visible_devices([], 'GPU')
    config = FLAGS.config
    game = config.game + 'NoFrameskip-v4'
    num_actions = env_utils.get_num_actions(game)
    print(f'Playing {game} with {num_actions} actions')
    module = models.ActorCritic(num_outputs=num_actions)
    key = jax.random.PRNGKey(0)
    key, subkey = jax.random.split(key)
    initial_params = models.get_initial_params(subkey, module)
    optimizer = models.create_optimizer(initial_params, config.learning_rate)
    optimizer = ppo_lib.train(module, optimizer, config, FLAGS.workdir)
    def train(self, exp_config):
        # making sure that all nn_models are trainable
        for name, param in self.main_model.named_parameters():
            if not param.requires_grad:
                raise Exception(f"Frozen:  {name}, {param.data}")

        dataloader_train_clean, _ = self._load_train_clean(exp_config)

        if exp_config["use_noisy"]:
            self.noisy_subsample_random = np.random.RandomState(
                exp_config["seed"])
            self.noisy_dataset_full_size = len(
                read_sentences_from_file(exp_config["data_dir"],
                                         "train_noisy.tsv"))

        optimizer, scheduler = create_optimizer(self.main_model, exp_config)

        self.main_model.zero_grad()

        # first epoch only clean
        self.train_one_epoch(dataloader_train_clean, optimizer, scheduler,
                             exp_config, "clean")

        epoch_iterator = trange(0,
                                int(exp_config["num_epochs"] - 1),
                                desc="Epoch")
        for epoch in epoch_iterator:
            if exp_config["use_noisy"]:
                dataloader_train_noisy = self.load_train_noisy_subsample(
                    exp_config)
                self.train_one_epoch(dataloader_train_noisy, optimizer,
                                     scheduler, exp_config, "noisy")

            self.train_one_epoch(dataloader_train_clean, optimizer, scheduler,
                                 exp_config, "clean")

            # after each epoch, evaluate on dev and save if better than previous
            self._dev(exp_config, epoch)

        # test in the end
        self._test(exp_config)
    def train(self, exp_config):
        assert exp_config["use_clean"]
        dataloader_train_clean, _ = self._load_train_clean(exp_config)

        if exp_config["use_noisy"]:
            self.noisy_subsample_random = np.random.RandomState(
                exp_config["seed"])
            self.noisy_dataset_full_size = len(
                read_instances_from_file(exp_config["data_dir"],
                                         "train_noisy.tsv"))

        optimizer, scheduler = create_optimizer(self.main_model, exp_config)

        self.main_model.zero_grad()

        # first epoch only clean
        self.train_one_epoch(dataloader_train_clean, optimizer, scheduler,
                             exp_config, "clean - 1. epoch")

        epoch_iterator = trange(0,
                                int(exp_config["num_epochs"] - 1),
                                desc="Epoch")
        for epoch in epoch_iterator:
            if exp_config["use_noisy"]:
                dataloader_train_noisy = self.load_train_noisy_subsample(
                    exp_config)
                self.train_one_epoch(dataloader_train_noisy, optimizer,
                                     scheduler, exp_config, "noisy")

            self.train_one_epoch(dataloader_train_clean, optimizer, scheduler,
                                 exp_config, "clean")

            # after each epoch, evaluate on dev and save if better than previous
            self._dev(exp_config, epoch)

        # test in the end
        self._test(exp_config)
    def train(self, exp_config):
        # load clean dataset
        dataloader_train_clean, clean_indices = self._load_train_clean(
            exp_config)

        # load noisy dataset
        self.noisy_subsample_random = np.random.RandomState(exp_config["seed"])
        self.noisy_dataset_full_size = len(
            read_sentences_from_file(exp_config["data_dir"],
                                     "train_noisy.tsv"))

        # creating noise matrix from pairs of clean and noisy labels
        # making sure that the noise matrix is build on the same subset indices as the actual training data
        clean_sentences = read_sentences_from_file(exp_config["data_dir"],
                                                   "train_clean.tsv")
        clean_noisy_labels_sentences = read_sentences_from_file(
            exp_config["data_dir"], "train_clean_noisy_labels.tsv")

        if not "use_true_cm" in exp_config or not exp_config["use_true_cm"]:
            if clean_indices is None:
                logging.info(
                    "Would be using the clean subsample, but the full clean data is used for the training."
                )
            else:
                logging.info(
                    "Using the clean subsample to estimate the noise matrix.")
                clean_sentences = np.array(clean_sentences)[clean_indices]
                clean_noisy_labels_sentences = np.array(
                    clean_noisy_labels_sentences)[clean_indices]
        else:
            logging.info(
                "Using the full clean data to estimate the noise matrix.")

        if not "use_io_estimation" in exp_config or not exp_config[
                "use_io_estimation"]:
            logging.info("Using BIO format for noise matrix estimation.")
            noise_matrix = ConfusionMatrixTrainer.compute_noise_matrix(
                clean_sentences, clean_noisy_labels_sentences,
                exp_config["labels"])
        else:
            logging.info("Using IO format for noise matrix estimation.")
            noise_matrix = ConfusionMatrixTrainer.compute_noise_matrix_io(
                clean_sentences, clean_noisy_labels_sentences,
                exp_config["labels"])

        logger.info(f"Using noise matrix {repr(noise_matrix)}")

        # matrix smoothing following https://arxiv.org/pdf/2003.11904.pdf
        if "matrix_smoothing_value" in exp_config:
            logger.info(
                f"Using matrix smoothing with value {exp_config['matrix_smoothing_value']}."
            )
            smoothing_beta = exp_config[
                "matrix_smoothing_value"]  # 0.8 is their chosen hyperparameter in the paper, section 4.2
            noise_matrix = noise_matrix**smoothing_beta

            # renormalize
            for row in noise_matrix:
                row_sum = np.sum(row)
                if row_sum != 0:
                    row /= row_sum

        self.cm_model = create_cm_model(self.main_model, noise_matrix,
                                        exp_config)

        optimizer, scheduler = create_optimizer(self.cm_model, exp_config)

        self.cm_model.zero_grad()

        # first epoch only clean
        self.train_one_epoch(dataloader_train_clean, optimizer, scheduler,
                             exp_config, "clean")

        epoch_iterator = trange(0,
                                int(exp_config["num_epochs"] - 1),
                                desc="Epoch")
        for epoch in epoch_iterator:
            dataloader_train_noisy = self.load_train_noisy_subsample(
                exp_config)
            self.train_one_epoch(dataloader_train_noisy, optimizer, scheduler,
                                 exp_config, "noisy")

            self.train_one_epoch(dataloader_train_clean, optimizer, scheduler,
                                 exp_config, "clean")

            # after each epoch, evaluate on dev and save if better than previous
            self._dev(exp_config, epoch)

        # test in the end
        self._test(exp_config)