def test_encoder_decoder_from_pretrained(self): load_weight_prefix = TFEncoderDecoderModel.load_weight_prefix config = self.get_encoder_decoder_config() encoder_tokenizer = AutoTokenizer.from_pretrained("bert-base-uncased") decoder_tokenizer = AutoTokenizer.from_pretrained("bert-base-uncased") input_ids = encoder_tokenizer("who sings does he love me with reba", return_tensors="tf").input_ids decoder_input_ids = decoder_tokenizer("Linda Davis", return_tensors="tf").input_ids with tempfile.TemporaryDirectory() as tmp_dirname: # Since most of HF's models don't have pretrained cross-attention layers, they are randomly # initialized even if we create models using `from_pretrained` method. # For the tests, the decoder need to be a model with pretrained cross-attention layers. # So we create pretrained models (without `load_weight_prefix`), save them, and later, # we load them using `from_pretrained`. # (we don't need to do this for encoder, but let's make the code more similar between encoder/decoder) encoder = TFAutoModel.from_pretrained("bert-base-uncased", name="encoder") # It's necessary to specify `add_cross_attention=True` here. decoder = TFAutoModelForCausalLM.from_pretrained( "bert-base-uncased", is_decoder=True, add_cross_attention=True, name="decoder" ) pretrained_encoder_dir = os.path.join(tmp_dirname, "pretrained_encoder") pretrained_decoder_dir = os.path.join(tmp_dirname, "pretrained_decoder") encoder.save_pretrained(pretrained_encoder_dir) decoder.save_pretrained(pretrained_decoder_dir) del encoder del decoder enc_dec_model = TFEncoderDecoderModel.from_encoder_decoder_pretrained( pretrained_encoder_dir, pretrained_decoder_dir, ) # check that the from pretrained methods work enc_dec_model.save_pretrained(tmp_dirname) enc_dec_model = TFEncoderDecoderModel.from_pretrained(tmp_dirname) output = enc_dec_model(input_ids, decoder_input_ids=decoder_input_ids, labels=decoder_input_ids) loss_pretrained = output.loss del enc_dec_model # Create the model using `__init__` with loaded ``pretrained`` encoder / decoder encoder = TFAutoModel.from_pretrained( pretrained_encoder_dir, load_weight_prefix=load_weight_prefix, name="encoder" ) decoder = TFAutoModelForCausalLM.from_pretrained( pretrained_decoder_dir, load_weight_prefix=load_weight_prefix, name="decoder" ) enc_dec_model = TFEncoderDecoderModel(config=config, encoder=encoder, decoder=decoder) output = enc_dec_model(input_ids, decoder_input_ids=decoder_input_ids, labels=decoder_input_ids) loss_init = output.loss max_diff = np.max(np.abs(loss_pretrained - loss_init)) expected_diff = 0.0 self.assertAlmostEqual(max_diff, expected_diff, places=4)
def build_model(model_id1='bert-base-multilingual-cased', model_id2='bert-base-multilingual-uncased', max_len=192, dropout=0.2, **_): """ build a dual TFAutoModel """ print(model_id1, model_id2) transformer1 = TFAutoModel.from_pretrained(model_id1) transformer2 = TFAutoModel.from_pretrained(model_id2) input_word_ids1 = Input(shape=(max_len, ), dtype=tf.int32, name="input_word_ids1") out1 = transformer1(input_word_ids1) input_word_ids2 = Input(shape=(max_len, ), dtype=tf.int32, name="input_word_ids2") out2 = transformer2(input_word_ids2) sequence_output1 = out1[0] sequence_output2 = out2[0] cls_token1 = sequence_output1[:, 0, :] cls_token2 = sequence_output2[:, 0, :] x = Dropout(dropout)(cls_token1) + Dropout(dropout)(cls_token2) out = Dense(1, activation='sigmoid')(x) model = Model(inputs=[input_word_ids1, input_word_ids2], outputs=out) return model
def create_model(n_dense1=64, n_dense2=16, dout_rate=0.1, **kwargs): embedding_base = kwargs.embedding_base # specify ProtBERT_BFD or XLNET categories = kwargs.categories # number of labels # acrobatics to avoid putting a model inside a model in keras which prevents saving the model if embedding_base == "ProtBERT_BFD": if kwargs.max_len: assert isinstance(kwargs.max_len, int) max_len = kwargs.max_len else: max_len = defaults.MAX_LEN base = TFAutoModel.from_pretrained('Rostlab/prot_bert_bfd') assert isinstance(base, TFBertModel) main_layer = base.bert input_ids = tf.keras.layers.Input(shape=(max_len, ), name='input_ids', dtype='int32') mask = tf.keras.layers.Input(shape=(max_len, ), name='attention_mask', dtype='int32') embeddings = main_layer(input_ids, attention_mask=mask)[0] elif embedding_base == "XLNET": # TODO: probably needs debugging base = TFAutoModel.from_pretrained("Rostlab/prot_xlnet", from_pt=True) assert isinstance(base, TFXLNetForSequenceClassification) main_layer = base.xlnet inputs = tf.keras.layers.Input(shape=None, name="input layer", ragged=True) embeddings = main_layer(inputs)[0] else: print("create_model(): invalid arg") # throw error del base # TODO: fix input tensor issue from embedding layers : [0] X = tf.keras.layers.GlobalMaxPooling1D()(embeddings) X = tf.keras.layers.BatchNormalization()(X) X = tf.keras.layers.Dense(n_dense1, activation='relu')(X) X = tf.keras.layers.Dropout(dout_rate)(X) X = tf.keras.layers.Dense(n_dense2, activation='relu')(X) y = tf.keras.layers.Dense(categories, activation='softmax', name='outputs')(X) # if you are going to adjust the inner workings of the classification head, do so here. model = tf.keras.Model(inputs=(input_ids, mask), outputs=[y]) model.layers[2].trainable = False return model
def _embedding_from_bert(): with tf.device("CPU:0"): input_pretrained_bert = TFAutoModel.from_pretrained(config.input_pretrained_model, trainable=False, name=config.input_pretrained_model) target_pretrained_bert = TFAutoModel.from_pretrained(config.target_pretrained_model, trainable=False, name=config.target_pretrained_model) decoder_embedding = target_pretrained_bert.get_weights()[0] return (decoder_embedding, input_pretrained_bert, target_pretrained_bert)
def test_from_pretrained_with_tuple_values(self): # For the auto model mapping, FunnelConfig has two models: FunnelModel and FunnelBaseModel model = TFAutoModel.from_pretrained("sgugger/funnel-random-tiny") self.assertIsInstance(model, TFFunnelModel) config = copy.deepcopy(model.config) config.architectures = ["FunnelBaseModel"] model = TFAutoModel.from_config(config) self.assertIsInstance(model, TFFunnelBaseModel) with tempfile.TemporaryDirectory() as tmp_dir: model.save_pretrained(tmp_dir) model = TFAutoModel.from_pretrained(tmp_dir) self.assertIsInstance(model, TFFunnelBaseModel)
def __init__( self, pretrained_model_name_or_path, reduce_output='sum', trainable=True, num_tokens=None, **kwargs ): super(AutoTransformerEncoder, self).__init__() try: from transformers import TFAutoModel except ModuleNotFoundError: logger.error( ' transformers is not installed. ' 'In order to install all text feature dependencies run ' 'pip install ludwig[text]' ) sys.exit(-1) self.transformer = TFAutoModel.from_pretrained( pretrained_model_name_or_path ) self.reduce_output = reduce_output if not self.reduce_output == 'cls_pooled': self.reduce_sequence = SequenceReducer(reduce_mode=reduce_output) self.transformer.trainable = trainable self.transformer.resize_token_embeddings(num_tokens)
def build_model(model_id='jplu/tf-xlm-roberta-large', from_pt=False, transformer=None, max_len=192, dropout=0.2, pooling='first', **_): """ build a TFAutoModel """ if transformer is None: transformer = TFAutoModel.from_pretrained(model_id, from_pt=from_pt) input_word_ids = Input(shape=(max_len, ), dtype=tf.int32, name="input_word_ids") sequence_output = transformer(input_word_ids)[0] if pooling == 'first': cls_token = sequence_output[:, 0, :] elif pooling == 'max': cls_token = GlobalMaxPooling1D()(sequence_output) elif pooling == 'avg': cls_token = GlobalAveragePooling1D()(sequence_output) elif pooling == 'GeM': cls_token = GeneralizedMeanPooling1D(p=3)(sequence_output) if dropout > 0: cls_token = Dropout(dropout)(cls_token) out = Dense(1, activation='sigmoid')(cls_token) model = Model(inputs=input_word_ids, outputs=out) return model
def build_classifier(model_name, max_len, learning_rate, metrics): """ Constructing a transformer model given a configuration. """ # Defining the encoded inputs input_ids = tf.keras.layers.Input(shape=(max_len, ), dtype=tf.int32, name="input_ids") # Loading pretrained transformer model transformer_model = TFAutoModel.from_pretrained(model_name) # Defining the data embedding using the loaded model transformer_embeddings = transformer_model(input_ids)[0] # Defining the classifier layer output_values = tf.keras.layers.Dense(3, activation="softmax")( transformer_embeddings[:, 0, :]) # Constructing the final model along with an optimizer, loss function and metrics model = tf.keras.Model(inputs=input_ids, outputs=output_values) opt = tf.keras.optimizers.Adam(learning_rate=learning_rate) loss = tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True) metrics = metrics model.compile(optimizer=opt, loss=loss, metrics=metrics) return model
def build_model( hparams): bert_model = TFAutoModel.from_pretrained(hparams["bert_file_name"]) bert_model.trainable = True if not hparams['trainable_bert'] is None: bert_model.trainable = hparams['trainable_bert'] input_layer_ids = Input(shape = (hparams['max_sequence_length'],), dtype='int64') input_layer_masks = Input(shape = (hparams['max_sequence_length'],), dtype='int64') bert_output = bert_model([input_layer_ids,input_layer_masks]) bert_output = bert_output[1] classifier = Dense(units = 2, activation = 'sigmoid', kernel_initializer = initializers.RandomUniform( minval = - 1 / np.sqrt(bert_output.shape[1]), maxval = 1 / np.sqrt(bert_output.shape[1]) ), bias_initializer = initializers.Zeros(), kernel_regularizer = regularizers.l2(hparams['l2_regularization']) )(bert_output) model = Model(inputs=[input_layer_ids,input_layer_masks], outputs=classifier) model.compile( loss= dice_loss, optimizer = Adam(learning_rate = hparams["learning_rate"]), metrics = [f1_score] ) plot_model(model, "model_bert.png", show_layer_names=False) return model
def build_model(model_name, max_len, learning_rate, metrics): """ Building the Deep Learning architecture """ # defining encoded inputs input_ids = Input(shape=(max_len, ), dtype=tf.int32, name="input_ids") # defining transformer model embeddings transformer_model = TFAutoModel.from_pretrained(model_name) transformer_embeddings = transformer_model(input_ids)[0] # defining output layer output_values = Dense(512, activation="relu")(transformer_embeddings[:, 0, :]) output_values = Dropout(0.5)(output_values) #output_values = Dense(32, activation = "relu")(output_values) output_values = Dense(1, activation='sigmoid')(output_values) # defining model model = Model(inputs=input_ids, outputs=output_values) opt = Adam(learning_rate=learning_rate) loss = tf.keras.losses.BinaryCrossentropy() metrics = metrics model.compile(optimizer=opt, loss=loss, metrics=metrics) return model
def __init__(self, model_name: str, output_dim: int) -> None: super(SentimentAnalysisModel, self).__init__() config = AutoConfig.from_pretrained(model_name) self.transformer = TFAutoModel.from_pretrained(model_name) # freeze all but last layer of transformer layers_to_freeze = None frozen_params = 0 if type(self.transformer) is TFGPT2Model: layers_to_freeze = self.transformer.layers[0].h[:-1] elif type(self.transformer) is TFDistilBertModel: layers_to_freeze = self.transformer.layers[ 0].transformer.layer[:-1] elif type(self.transformer) is TFT5Model: layers_to_freeze = self.transformer.layers[1].block[:-1] layers_to_freeze.extend(self.transformer.layers[2].block[:-1]) for layer in layers_to_freeze: layer.trainable = False print( f'Init model: frozen {len(self.transformer.non_trainable_variables)} variables.' ) self.pre_classifier = Linear(units=config.hidden_size, input_dim=config.hidden_size, activation='linear') self.dropout = Dropout(0.3) # self.classifier = Linear(units=output_dim, input_dim=config.hidden_size, activation='linear') self.classifier = Linear(units=1, input_dim=config.hidden_size, activation='linear')
def __init__( self, seq_len: int = 100, text_model_name: str = 'bert-base-uncased', vision_model: tf.keras.applications = VGG19(weights="imagenet", include_top=False) ) -> None: super(VisionBertModel, self).__init__() self.text_model_layer = TFAutoModel.from_pretrained(text_model_name) self.text_model_layer.trainable = False self.vision_model = vision_model self.vision_model.trainable = False self.flatten = Flatten() self.dropout = Dropout(0.2) self.concat = Concatenate(axis=1) self.global_dense1 = Dense(128, activation='relu') self.global_dense2 = Dense(64, activation='relu') self.global_dense3 = Dense(1, activation='sigmoid') self.dense_text1 = Dense(768, activation='relu') self.dense_text2 = Dense(256, activation='relu') self.img_dense1 = Dense(512 * 8, activation='relu') self.img_dense2 = Dense(512 * 4, activation='relu') self.img_dense3 = Dense(512 * 2, activation='relu') self.img_dense4 = Dense(512, activation='relu') self.img_dense5 = Dense(256, activation='relu')
def build_transformer(transformer, max_seq_length, num_labels, tagging=True, tokenizer_only=False): tokenizer = AutoTokenizer_.from_pretrained(transformer) if tokenizer_only: return tokenizer l_bert = TFAutoModel.from_pretrained(transformer) l_input_ids = tf.keras.layers.Input(shape=(max_seq_length, ), dtype='int32', name="input_ids") l_mask_ids = tf.keras.layers.Input(shape=(max_seq_length, ), dtype='int32', name="mask_ids") l_token_type_ids = tf.keras.layers.Input(shape=(max_seq_length, ), dtype='int32', name="token_type_ids") output = l_bert(input_ids=l_input_ids, token_type_ids=l_token_type_ids, attention_mask=l_mask_ids).last_hidden_state if not tagging: output = tf.keras.layers.Lambda(lambda seq: seq[:, 0, :])(output) logits = tf.keras.layers.Dense(num_labels)(output) model = tf.keras.Model(inputs=[l_input_ids, l_mask_ids, l_token_type_ids], outputs=logits) model.build(input_shape=(None, max_seq_length)) return model, tokenizer
def create_model(self, path_weights=None): phobert = TFAutoModel.from_pretrained("vinai/phobert-base") self.tokenizer = AutoTokenizer.from_pretrained("vinai/phobert-base", use_fast=False) MAX_LEN = 25 ids = tf.keras.layers.Input(shape=(25), dtype=tf.int32) mask = tf.keras.layers.Input(shape=(25, ), name='attention_mask', dtype='int32') # For transformers v4.x+: embeddings = phobert(ids, attention_mask=mask)[0] X = (tf.keras.layers.Bidirectional( tf.keras.layers.LSTM(128)))(embeddings) X = tf.keras.layers.BatchNormalization()(X) X = tf.keras.layers.Dense(128, activation='relu')(X) X = tf.keras.layers.Dropout(0.1)(X) y = tf.keras.layers.Dense(6, activation='softmax', name='outputs')(X) self.model = tf.keras.models.Model(inputs=[ids, mask], outputs=[y]) # model.summary() # model.layers[2].trainable = False # model.layers[2].roberta.embeddings.trainable = True # print() # print(model.layers[2]) # inputs = [inputs] # model.compile(optimizer='Adam',loss = 'categorical_crossentropy',metrics='accuracy') if path_weights != None: self.model.load_weights(path_weights)
def _embedding_from_bert(): with tf.device("CPU:0"): input_pretrained_bert = TFAutoModel.from_pretrained( config.input_pretrained_model, trainable=False, name=config.input_pretrained_model) target_pretrained_bert = TFAutoModel.from_pretrained( config.target_pretrained_model, trainable=False, name=config.target_pretrained_model ) if config['task'] == 'translate' else input_pretrained_bert decoder_embedding = target_pretrained_bert.get_weights()[0] log.info(f"Decoder_Embedding matrix shape '{decoder_embedding.shape}'") return (decoder_embedding, input_pretrained_bert, target_pretrained_bert)
def build_model(transformer_layer, max_len, learning_rate): # must use this to send to TPU cores with strategy.scope(): # define input(s) input_ids = tf.keras.Input(shape=(max_len, ), dtype=tf.int32) print("input") # insert roberta layer roberta = TFAutoModel.from_pretrained(transformer_layer) roberta = roberta(input_ids)[0] print("roberta") # only need <s> token here, so we extract it now out = roberta[:, 0, :] # add our softmax layer out = tf.keras.layers.Dense(3, activation='softmax')(out) print("dense") # assemble model and compile model = tf.keras.Model(inputs=input_ids, outputs=out) print("model") model.compile(optimizer=tf.keras.optimizers.Adam(lr=learning_rate), loss='sparse_categorical_crossentropy', metrics=['accuracy']) return model
def test_output_embeds_base_model(self): model = TFAutoModel.from_pretrained("amazon/bort") input_ids = tf.convert_to_tensor( [[ 0, 18077, 4082, 7804, 8606, 6195, 2457, 3321, 11, 10489, 16, 269, 2579, 328, 2 ]], dtype=tf.int32, ) # Schloß Nymphenburg in Munich is really nice! output = model(input_ids)["last_hidden_state"] expected_shape = tf.TensorShape((1, 15, 1024)) self.assertEqual(output.shape, expected_shape) # compare the actual values for a slice. expected_slice = tf.convert_to_tensor( [[[-0.0349, 0.0436, -1.8654], [-0.6964, 0.0835, -1.7393], [-0.9819, 0.2956, -0.2868]]], dtype=tf.float32, ) self.assertTrue( np.allclose(output[:, :3, :3].numpy(), expected_slice.numpy(), atol=1e-4))
def test_build_save_load_model(self): """Test that full model is built properly.""" strategy = tf.distribute.MirroredStrategy( cross_device_ops=tf.distribute.HierarchicalCopyAllReduce()) os.makedirs("biomed_roberta_base") self.model.save_pretrained("biomed_roberta_base") with strategy.scope(): model = TFAutoModel.from_pretrained("biomed_roberta_base", from_pt=True) model = build_model(model) shutil.rmtree("biomed_roberta_base") self.assertEqual( str(type(model)), "<class 'tensorflow.python.keras.engine.training.Model'>") save_model(model, timed_dir_name=False, transformer_dir=self.out_dir) self.assertTrue( os.path.isfile(os.path.join(self.out_dir, 'sigmoid.pickle'))) self.assertTrue( os.path.isfile(os.path.join(self.out_dir, 'config.json'))) self.assertTrue( os.path.isfile(os.path.join(self.out_dir, 'tf_model.h5'))) pickle_path = os.path.join(self.out_dir, 'sigmoid.pickle') model = load_model(pickle_path=pickle_path, transformer_dir=self.out_dir) self.assertEqual( str(type(model)), "<class 'tensorflow.python.keras.engine.training.Model'>")
def test_revision_not_found(self): with self.assertRaisesRegex( EnvironmentError, r"aaaaaa is not a valid git identifier \(branch name, tag name or commit id\)" ): _ = TFAutoModel.from_pretrained(DUMMY_UNKNOWN_IDENTIFIER, revision="aaaaaa")
def test_model_file_not_found(self): with self.assertRaisesRegex( EnvironmentError, "hf-internal-testing/config-no-model does not appear to have a file named tf_model.h5", ): _ = TFAutoModel.from_pretrained( "hf-internal-testing/config-no-model")
def post_init(self): from transformers import TFAutoModel, AutoTokenizer self.tokenizer = AutoTokenizer.from_pretrained(self.base_tokenizer_model) self.model = TFAutoModel.from_pretrained( self.pretrained_model_name_or_path, output_hidden_states=True ) self.to_device()
def load_model(pickle_path, transformer_dir='transformer', max_len=512): """Load a keras model containing a transformer layer.""" transformer = TFAutoModel.from_pretrained(transformer_dir) model = build_model(transformer, max_len=max_len) sigmoid = pickle.load(open(pickle_path, 'rb')) model.get_layer('sigmoid').set_weights(sigmoid) return model
def _compute_tensorflow(model_names, dictionary, average_over, amp): for c, model_name in enumerate(model_names): print(f"{c + 1} / {len(model_names)}") config = AutoConfig.from_pretrained(model_name) model = TFAutoModel.from_pretrained(model_name, config=config) tokenizer = AutoTokenizer.from_pretrained(model_name) tokenized_sequence = tokenizer.encode(input_text, add_special_tokens=False) max_input_size = tokenizer.max_model_input_sizes[model_name] batch_sizes = [1, 2, 4, 8] slice_sizes = [8, 64, 128, 256, 512, 1024] dictionary[model_name] = { "bs": batch_sizes, "ss": slice_sizes, "results": {} } dictionary[model_name]["results"] = {i: {} for i in batch_sizes} print("Using model", model) @tf.function def inference(inputs): return model(inputs) for batch_size in batch_sizes: for slice_size in slice_sizes: if max_input_size is not None and slice_size > max_input_size: dictionary[model_name]["results"][batch_size][ slice_size] = "N/A" else: sequence = tf.stack([ tf.squeeze( tf.constant( tokenized_sequence[:slice_size])[None, :]) ] * batch_size) try: print("Going through model with sequence of shape", sequence.shape) # To make sure that the model is traced + that the tensors are on the appropriate device inference(sequence) runtimes = timeit.repeat(lambda: inference(sequence), repeat=average_over, number=3) average_time = sum(runtimes) / float( len(runtimes)) / 3.0 dictionary[model_name]["results"][batch_size][ slice_size] = average_time except tf.errors.ResourceExhaustedError as e: print("Doesn't fit on GPU.", e) torch.cuda.empty_cache() dictionary[model_name]["results"][batch_size][ slice_size] = "N/A" return dictionary
def test_cached_model_has_minimum_calls_to_head(self): # Make sure we have cached the model. _ = TFAutoModel.from_pretrained("hf-internal-testing/tiny-random-bert") with RequestCounter() as counter: _ = TFAutoModel.from_pretrained( "hf-internal-testing/tiny-random-bert") self.assertEqual(counter.get_request_count, 0) self.assertEqual(counter.head_request_count, 1) self.assertEqual(counter.other_request_count, 0) # With a sharded checkpoint _ = TFAutoModel.from_pretrained("ArthurZ/tiny-random-bert-sharded") with RequestCounter() as counter: _ = TFAutoModel.from_pretrained("ArthurZ/tiny-random-bert-sharded") self.assertEqual(counter.get_request_count, 0) # There is no pytorch_model.bin so we still get one call for this one. self.assertEqual(counter.head_request_count, 2) self.assertEqual(counter.other_request_count, 0)
def test_model_from_pretrained(self): model_name = "bert-base-cased" config = AutoConfig.from_pretrained(model_name) self.assertIsNotNone(config) self.assertIsInstance(config, BertConfig) model = TFAutoModel.from_pretrained(model_name) self.assertIsNotNone(model) self.assertIsInstance(model, TFBertModel)
def load_model(self, pickle_path:str, transformer_dir:str='transformer', max_len=512): """ Special function to load a keras model that uses a transformer layer """ transformer = TFAutoModel.from_pretrained(transformer_dir) model = self.build_model(transformer, max_len=max_len) sigmoid = pickle.load(open(pickle_path, 'rb')) model.get_layer('sigmoid').set_weights(sigmoid) return model
def _load(self): """ :return: """ self._tokenizer = AutoTokenizer.from_pretrained("vinai/bertweet-base", use_fast=False) self._model = TFAutoModel.from_pretrained("vinai/bertweet-base") self._normalizer = TweetNormalisation() self.OUTPUT = "last_hidden_state"
def load_bert(bert_name): ################################### # --------- Setup BERT ---------- # # Load transformers config and set output_hidden_states to False config = AutoConfig.from_pretrained(bert_name) config.output_hidden_states = True # Load BERT tokenizer tokenizer = AutoTokenizer.from_pretrained(bert_name) # Load the Transformers BERT model transformer_model = TFAutoModel.from_pretrained(bert_name) return tokenizer, transformer_model, config
def test_rag_sequence_from_pretrained(self): load_weight_prefix = "tf_rag_model_1" rag_config = self.get_rag_config() rag_decoder_tokenizer = BartTokenizer.from_pretrained( "facebook/bart-large-cnn") rag_question_encoder_tokenizer = DPRQuestionEncoderTokenizer.from_pretrained( "facebook/dpr-question_encoder-single-nq-base") rag_retriever = RagRetriever( rag_config, question_encoder_tokenizer=rag_question_encoder_tokenizer, generator_tokenizer=rag_decoder_tokenizer, ) input_ids = rag_question_encoder_tokenizer( "who sings does he love me with reba", return_tensors="tf").input_ids decoder_input_ids = rag_decoder_tokenizer( "Linda Davis", return_tensors="tf").input_ids with tempfile.TemporaryDirectory() as tmp_dirname: rag_sequence = TFRagSequenceForGeneration.from_pretrained_question_encoder_generator( "facebook/dpr-question_encoder-single-nq-base", "facebook/bart-large-cnn", retriever=rag_retriever, config=rag_config, ) # check that the from pretrained methods work rag_sequence.save_pretrained(tmp_dirname) rag_sequence.from_pretrained(tmp_dirname, retriever=rag_retriever) output = rag_sequence(input_ids, labels=decoder_input_ids) loss_pretrained = output.loss del rag_sequence question_encoder = TFAutoModel.from_pretrained( "facebook/dpr-question_encoder-single-nq-base") generator = TFAutoModelForSeq2SeqLM.from_pretrained( "facebook/bart-large-cnn", load_weight_prefix=load_weight_prefix, name="generator") rag_sequence = TFRagSequenceForGeneration( config=rag_config, question_encoder=question_encoder, generator=generator, retriever=rag_retriever) output = rag_sequence(input_ids, labels=decoder_input_ids) loss_init = output.loss self.assertAlmostEqual(loss_pretrained, loss_init, places=4)
def load_model_and_tokenizer(model_name, tensor_type): if tensor_type == "tf": from transformers import TFAutoModel as AutoModel elif tensor_type == "pt": from transformers import AutoModel model = AutoModel.from_pretrained(model_name) tokenizer = AutoTokenizer.from_pretrained(model_name) if tokenizer.pad_token is None: tokenizer.pad_token = tokenizer.eos_token return model, tokenizer