def test_retrained_model(self): """Test code for retrained model.""" retrained_model, config = build_model_from_config( './bert_config.json', output_dim=2, seq_len=64, retention_configuration=[ 64, 64, 64, 32, 32, 32, 16, 16, 16, 8, 8, 8 ], FLAG_EXTRACT_LAYER=2, TASK='cola') decay_steps, warmup_steps = calc_train_steps( 8550, batch_size=128, epochs=3, ) retrained_model.compile(AdamWarmup(decay_steps=decay_steps, warmup_steps=warmup_steps, lr=3e-5, lr_mult=None), loss='sparse_categorical_crossentropy', metrics=['accuracy']) print("Retrained model summary: ", retrained_model.summary()) del retrained_model
def test_search_model(self): """Test code for configurtion search.""" configuration_search_model, config = build_model_from_config( './bert_config.json', output_dim=2, seq_len=64, LAMBDA=3e-3, FLAG_EXTRACT_LAYER=1, TASK='cola') decay_steps, warmup_steps = calc_train_steps( 8550, batch_size=128, epochs=3, ) configuration_search_model.compile( AdamWarmup(decay_steps=decay_steps, warmup_steps=warmup_steps, lr=3e-5, lr_mult=None), loss='sparse_categorical_crossentropy', metrics=['accuracy']) print("Configuration search model summary: ", configuration_search_model.summary()) del configuration_search_model
def test_finetuned_model(self): """Test code for finetuning task.""" fine_tuned_model, config = build_model_from_config( './bert_config.json', output_dim=2, seq_len=64, FLAG_EXTRACT_LAYER=0, TASK='cola') decay_steps, warmup_steps = calc_train_steps( 8550, batch_size=128, epochs=3, ) fine_tuned_model.compile(AdamWarmup(decay_steps=decay_steps, warmup_steps=warmup_steps, lr=3e-5, lr_mult=None), loss='sparse_categorical_crossentropy', metrics=['accuracy']) print("Fine-tuned model summary: ", fine_tuned_model.summary()) del fine_tuned_model
def fine_tuning_step(self, LR_BERT=0.00003): """ Carries out simple fine-tuning for the data given in train_data above. Use it only if the model at checkpoint path has never seen this data. Returns: A string representing path to the fine-tuned model checkpoint. """ fine_tuned_model = load_model(self.BERT_CONFIG_PATH, self.CHECKPOINT_PATH, FLAG_BERT_PRETRAINED=True, output_dim=self.NUM_CLASSES, seq_len=self.SEQ_LEN, FLAG_EXTRACT_LAYER=0, TASK=self.TASK) decay_steps, warmup_steps = calc_train_steps( self.NUM_TRAIN, batch_size=self.BATCH_SIZE, epochs=self.EPOCHS, ) fine_tuned_model.compile( AdamWarmup(decay_steps=decay_steps, warmup_steps=warmup_steps, lr=LR_BERT, lr_mult=None), loss=self.loss, metrics=self.metric, ) print("Fine-tuned model summary: ", fine_tuned_model.summary()) SAVE_CP_PATH = os.path.join(self.OUTPUT_DIR, "finetune.hdf5") checkpoint = ModelCheckpoint(SAVE_CP_PATH, monitor=self.validation_metric, verbose=1, save_best_only=True, mode='max') history = fine_tuned_model.fit(self.train_data[0], self.train_data[1], batch_size=self.BATCH_SIZE, epochs=self.EPOCHS, validation_data=(self.dev_data[0], self.dev_data[1], None), verbose=1, callbacks=[checkpoint]) with open(self.LOGFILE_PATH, 'a') as fp: fp.write( "\n Fine-tuned model accuracies for all epochs on the Dev set:" + str(history.history[self.validation_metric])) keras.backend.clear_session() return SAVE_CP_PATH
def retraining_step(self, configuration_search_model_path=None, retention_configuration=[], LR_BERT=0.00003): """ Switches Soft Extract layer to a Hard Extract layer and trains on the given data. Args: configuration_search_model_path: Path to a checkpoint as given by configuration_search_step(). retention_configuration: A list of integers representing the number of word-vectors to retain after each layer. Returns: A keras.models.Model instance that contains the Hard Extract layer, and can be used for prediction with word-vector elimination. """ ## Define a PoWER-BERT model where Soft-Extract Layers have been replaced by Extract Layers that eliminates the word-vectors retrained_model = load_model( self.BERT_CONFIG_PATH, self.CHECKPOINT_PATH, FLAG_BERT_PRETRAINED=True, output_dim=self.NUM_CLASSES, seq_len=self.SEQ_LEN, retention_configuration=retention_configuration, FLAG_EXTRACT_LAYER=2, TASK=self.TASK) decay_steps, warmup_steps = calc_train_steps( self.NUM_TRAIN, batch_size=self.BATCH_SIZE, epochs=self.EPOCHS, ) retrained_model.load_weights(configuration_search_model_path, by_name=True) retrained_model.compile( AdamWarmup(decay_steps=decay_steps, warmup_steps=warmup_steps, lr=LR_BERT, lr_mult=None), loss=self.loss, metrics=self.metric, ) print("Re-trained model summary: ", retrained_model.summary()) SAVE_CP_PATH = os.path.join(self.OUTPUT_DIR, "retrained.hdf5") checkpoint = ModelCheckpoint(SAVE_CP_PATH, monitor=self.validation_metric, verbose=1, save_best_only=True, mode='max') history = retrained_model.fit(self.train_data[0], self.train_data[1], batch_size=self.BATCH_SIZE, epochs=self.EPOCHS, validation_data=(self.dev_data[0], self.dev_data[1], None), verbose=1, callbacks=[checkpoint]) with open(self.LOGFILE_PATH, 'a') as fp: fp.write( "\n Re-trained model accuracies for all epochs on the Dev set:" + str(history.history[self.validation_metric]))
def configuration_search_step(self, fine_tuned_model_path=None, LAMBDA=0.0001, LR_BERT=0.00003, LR_SOFT_EXTRACT=0.0001): """ Searches for a good output reduction configuration on given model and data. lambda_hyperparam: See the paper for the meaning of this hyper parameter. Used for searching the configuration. fine_tuned_model_path: Path to a checkpoint model that has been finetuned on given data. Should have been finetuned using above funtion only. Returns: (String, np.array): Path to checkpoint representing config search model and the retention configuration for this model. """ ## Define a PoWER-BERT model containing Soft-Extract Layers configuration_search_model = load_model(self.BERT_CONFIG_PATH, self.CHECKPOINT_PATH, FLAG_BERT_PRETRAINED=True, output_dim=self.NUM_CLASSES, seq_len=self.SEQ_LEN, LAMBDA=LAMBDA, FLAG_EXTRACT_LAYER=1, TASK=self.TASK) configuration_search_model.load_weights(fine_tuned_model_path, by_name=True) decay_steps, warmup_steps = calc_train_steps( self.NUM_TRAIN, batch_size=self.BATCH_SIZE, epochs=self.EPOCHS, ) ## Set different Learning rates for original BERT parameters and the retnetion parameters fo the Soft-Extract Layers lr_mult = {} for layer in configuration_search_model.layers: if 'Extract' in layer.name: lr_mult[layer.name] = 1.0 else: lr_mult[layer.name] = LR_BERT / LR_SOFT_EXTRACT configuration_search_model.compile( AdamWarmup(decay_steps=decay_steps, warmup_steps=warmup_steps, lr=LR_SOFT_EXTRACT, lr_mult=lr_mult), loss=self.loss, metrics=self.metric, ) print("Configuration Search model summary: ", configuration_search_model.summary()) ## Train the model configuration_search_model.fit(self.train_data[0], self.train_data[1], batch_size=self.BATCH_SIZE, epochs=self.EPOCHS, validation_data=(self.dev_data[0], self.dev_data[1], None), verbose=1) SAVE_CP_PATH = os.path.join(self.OUTPUT_DIR, 'configuration_search_model.hdf5') configuration_search_model.save(os.path.join(SAVE_CP_PATH)) ## Obtain the retention configuration by calculating the mass of each encoder layer retention_configuration = self.get_configuration( configuration_search_model) with open(self.LOGFILE_PATH, 'a') as fp: fp.write("\n Retention Configuration :" + str(retention_configuration)) keras.backend.clear_session() return SAVE_CP_PATH, retention_configuration