Beispiel #1
0
    def test_retrained_model(self):
        """Test code for retrained model."""
        retrained_model, config = build_model_from_config(
            './bert_config.json',
            output_dim=2,
            seq_len=64,
            retention_configuration=[
                64, 64, 64, 32, 32, 32, 16, 16, 16, 8, 8, 8
            ],
            FLAG_EXTRACT_LAYER=2,
            TASK='cola')

        decay_steps, warmup_steps = calc_train_steps(
            8550,
            batch_size=128,
            epochs=3,
        )
        retrained_model.compile(AdamWarmup(decay_steps=decay_steps,
                                           warmup_steps=warmup_steps,
                                           lr=3e-5,
                                           lr_mult=None),
                                loss='sparse_categorical_crossentropy',
                                metrics=['accuracy'])

        print("Retrained model summary: ", retrained_model.summary())
        del retrained_model
Beispiel #2
0
    def test_search_model(self):
        """Test code for configurtion search."""
        configuration_search_model, config = build_model_from_config(
            './bert_config.json',
            output_dim=2,
            seq_len=64,
            LAMBDA=3e-3,
            FLAG_EXTRACT_LAYER=1,
            TASK='cola')

        decay_steps, warmup_steps = calc_train_steps(
            8550,
            batch_size=128,
            epochs=3,
        )
        configuration_search_model.compile(
            AdamWarmup(decay_steps=decay_steps,
                       warmup_steps=warmup_steps,
                       lr=3e-5,
                       lr_mult=None),
            loss='sparse_categorical_crossentropy',
            metrics=['accuracy'])

        print("Configuration search model summary: ",
              configuration_search_model.summary())
        del configuration_search_model
Beispiel #3
0
    def test_finetuned_model(self):
        """Test code for finetuning task."""

        fine_tuned_model, config = build_model_from_config(
            './bert_config.json',
            output_dim=2,
            seq_len=64,
            FLAG_EXTRACT_LAYER=0,
            TASK='cola')

        decay_steps, warmup_steps = calc_train_steps(
            8550,
            batch_size=128,
            epochs=3,
        )

        fine_tuned_model.compile(AdamWarmup(decay_steps=decay_steps,
                                            warmup_steps=warmup_steps,
                                            lr=3e-5,
                                            lr_mult=None),
                                 loss='sparse_categorical_crossentropy',
                                 metrics=['accuracy'])

        print("Fine-tuned model summary: ", fine_tuned_model.summary())
        del fine_tuned_model
Beispiel #4
0
    def fine_tuning_step(self, LR_BERT=0.00003):
        """
            Carries out simple fine-tuning for the data given in train_data above.
            Use it only if the model at checkpoint path has never seen this data.
            Returns:
                A string representing path to the fine-tuned model checkpoint.
            """

        fine_tuned_model = load_model(self.BERT_CONFIG_PATH,
                                      self.CHECKPOINT_PATH,
                                      FLAG_BERT_PRETRAINED=True,
                                      output_dim=self.NUM_CLASSES,
                                      seq_len=self.SEQ_LEN,
                                      FLAG_EXTRACT_LAYER=0,
                                      TASK=self.TASK)
        decay_steps, warmup_steps = calc_train_steps(
            self.NUM_TRAIN,
            batch_size=self.BATCH_SIZE,
            epochs=self.EPOCHS,
        )
        fine_tuned_model.compile(
            AdamWarmup(decay_steps=decay_steps,
                       warmup_steps=warmup_steps,
                       lr=LR_BERT,
                       lr_mult=None),
            loss=self.loss,
            metrics=self.metric,
        )
        print("Fine-tuned model summary: ", fine_tuned_model.summary())

        SAVE_CP_PATH = os.path.join(self.OUTPUT_DIR, "finetune.hdf5")
        checkpoint = ModelCheckpoint(SAVE_CP_PATH,
                                     monitor=self.validation_metric,
                                     verbose=1,
                                     save_best_only=True,
                                     mode='max')
        history = fine_tuned_model.fit(self.train_data[0],
                                       self.train_data[1],
                                       batch_size=self.BATCH_SIZE,
                                       epochs=self.EPOCHS,
                                       validation_data=(self.dev_data[0],
                                                        self.dev_data[1],
                                                        None),
                                       verbose=1,
                                       callbacks=[checkpoint])
        with open(self.LOGFILE_PATH, 'a') as fp:
            fp.write(
                "\n Fine-tuned model accuracies for all epochs on the Dev set:"
                + str(history.history[self.validation_metric]))

        keras.backend.clear_session()

        return SAVE_CP_PATH
Beispiel #5
0
    def retraining_step(self,
                        configuration_search_model_path=None,
                        retention_configuration=[],
                        LR_BERT=0.00003):
        """
            Switches Soft Extract layer to a Hard Extract layer and trains on the given data.
            Args:
                configuration_search_model_path: Path to a checkpoint as given by
                                                 configuration_search_step().
                retention_configuration: A list of integers representing the number
                                         of word-vectors to retain after each layer.
            Returns:
                A keras.models.Model instance that contains the Hard Extract layer,
                and can be used for prediction with word-vector elimination.
            """

        ## Define a PoWER-BERT model where Soft-Extract Layers have been replaced by Extract Layers that eliminates the word-vectors
        retrained_model = load_model(
            self.BERT_CONFIG_PATH,
            self.CHECKPOINT_PATH,
            FLAG_BERT_PRETRAINED=True,
            output_dim=self.NUM_CLASSES,
            seq_len=self.SEQ_LEN,
            retention_configuration=retention_configuration,
            FLAG_EXTRACT_LAYER=2,
            TASK=self.TASK)
        decay_steps, warmup_steps = calc_train_steps(
            self.NUM_TRAIN,
            batch_size=self.BATCH_SIZE,
            epochs=self.EPOCHS,
        )
        retrained_model.load_weights(configuration_search_model_path,
                                     by_name=True)

        retrained_model.compile(
            AdamWarmup(decay_steps=decay_steps,
                       warmup_steps=warmup_steps,
                       lr=LR_BERT,
                       lr_mult=None),
            loss=self.loss,
            metrics=self.metric,
        )
        print("Re-trained model summary: ", retrained_model.summary())

        SAVE_CP_PATH = os.path.join(self.OUTPUT_DIR, "retrained.hdf5")
        checkpoint = ModelCheckpoint(SAVE_CP_PATH,
                                     monitor=self.validation_metric,
                                     verbose=1,
                                     save_best_only=True,
                                     mode='max')
        history = retrained_model.fit(self.train_data[0],
                                      self.train_data[1],
                                      batch_size=self.BATCH_SIZE,
                                      epochs=self.EPOCHS,
                                      validation_data=(self.dev_data[0],
                                                       self.dev_data[1], None),
                                      verbose=1,
                                      callbacks=[checkpoint])
        with open(self.LOGFILE_PATH, 'a') as fp:
            fp.write(
                "\n Re-trained model accuracies for all epochs on the Dev set:"
                + str(history.history[self.validation_metric]))
Beispiel #6
0
    def configuration_search_step(self,
                                  fine_tuned_model_path=None,
                                  LAMBDA=0.0001,
                                  LR_BERT=0.00003,
                                  LR_SOFT_EXTRACT=0.0001):
        """
            Searches for a good output reduction configuration on given model and data.
                lambda_hyperparam: See the paper for the meaning of this hyper parameter.
                                   Used for searching the configuration.
                fine_tuned_model_path: Path to a checkpoint model that has been finetuned
                                       on given data. Should have been finetuned using above
                                       funtion only.
            Returns:
                (String, np.array): Path to checkpoint representing config search model
                and the retention configuration for this model.
            """

        ## Define a PoWER-BERT model containing Soft-Extract Layers
        configuration_search_model = load_model(self.BERT_CONFIG_PATH,
                                                self.CHECKPOINT_PATH,
                                                FLAG_BERT_PRETRAINED=True,
                                                output_dim=self.NUM_CLASSES,
                                                seq_len=self.SEQ_LEN,
                                                LAMBDA=LAMBDA,
                                                FLAG_EXTRACT_LAYER=1,
                                                TASK=self.TASK)

        configuration_search_model.load_weights(fine_tuned_model_path,
                                                by_name=True)

        decay_steps, warmup_steps = calc_train_steps(
            self.NUM_TRAIN,
            batch_size=self.BATCH_SIZE,
            epochs=self.EPOCHS,
        )

        ## Set different Learning rates for original BERT parameters and the retnetion parameters fo the Soft-Extract Layers
        lr_mult = {}
        for layer in configuration_search_model.layers:
            if 'Extract' in layer.name:
                lr_mult[layer.name] = 1.0
            else:
                lr_mult[layer.name] = LR_BERT / LR_SOFT_EXTRACT

        configuration_search_model.compile(
            AdamWarmup(decay_steps=decay_steps,
                       warmup_steps=warmup_steps,
                       lr=LR_SOFT_EXTRACT,
                       lr_mult=lr_mult),
            loss=self.loss,
            metrics=self.metric,
        )
        print("Configuration Search model summary: ",
              configuration_search_model.summary())

        ## Train the model
        configuration_search_model.fit(self.train_data[0],
                                       self.train_data[1],
                                       batch_size=self.BATCH_SIZE,
                                       epochs=self.EPOCHS,
                                       validation_data=(self.dev_data[0],
                                                        self.dev_data[1],
                                                        None),
                                       verbose=1)
        SAVE_CP_PATH = os.path.join(self.OUTPUT_DIR,
                                    'configuration_search_model.hdf5')
        configuration_search_model.save(os.path.join(SAVE_CP_PATH))

        ## Obtain the retention configuration by calculating the mass of each encoder layer
        retention_configuration = self.get_configuration(
            configuration_search_model)
        with open(self.LOGFILE_PATH, 'a') as fp:
            fp.write("\n Retention Configuration :" +
                     str(retention_configuration))

        keras.backend.clear_session()

        return SAVE_CP_PATH, retention_configuration