def __init__( self, pretrained_model_name_or_path='gpt2', reduce_output='sum', trainable=True, num_tokens=None, **kwargs ): super(GPT2Encoder, self).__init__() try: from transformers import TFGPT2Model except ModuleNotFoundError: logger.error( ' transformers is not installed. ' 'In order to install all text feature dependencies run ' 'pip install ludwig[text]' ) sys.exit(-1) self.transformer = TFGPT2Model.from_pretrained( pretrained_model_name_or_path ) self.reduce_output = reduce_output self.reduce_sequence = SequenceReducer(reduce_mode=reduce_output) self.transformer.trainable = trainable self.transformer.resize_token_embeddings(num_tokens)
def tl_gpt2_model(param={}): trainable = param['Trainable'] max_seq_len = param['Max_length'] inputs = Input(shape= (max_seq_len,), dtype ='int64', name='inputs') masks = Input(shape = (max_seq_len,), dtype='int64', name='masks') gpt2_model = TFGPT2Model.from_pretrained('gpt2') gpt2_model.trainable = param['Trainable'] gpt2_output = gpt2_model(inputs, attention_mask = masks) gpt2_last_hidden = gpt2_output.last_hidden_state # gpt2_CLS_output = gpt2_last_hidden[:,0,:] x = Flatten()(gpt2_last_hidden) x = LayerNormalization()(x) x = Dense(param['first_layer'], activation='relu', kernel_regularizer=regularizers.l2(0.01))(x) x = Dropout(param['dropout'])(x) x = LayerNormalization()(x) x = Dense(param['second_layer'], activation='relu')(x) x = Dropout(param['dropout'])(x) probs = Dense(3, activation='softmax')(x) model = keras.Model(inputs = [inputs, masks], outputs=probs) model.summary() return model
def make_model(self, is_train: bool = False): # with tf.compat.v1.variable_scope("gpt2_encoder_" + name): self._make_placeholders() """ GPT-2 uses Transformer's decoder as a building block, excluding the encoder-decoder attention module. Thus, the only difference with Bert's building blocks(Transformer's encoder) is the masked attention. However, in this implementation the masked attention is used for the BertEncoder. Therefore the BertModel will be used and adjust the hyper-parameters to be the same of those of the pretrained GPT-2 models. """ cache_dir = "../resources/hugging_face/gpt2/" model = TFGPT2Model.from_pretrained('gpt2', cache_dir=cache_dir, return_dict=True) output = model(self.placeholders['tokens'], training=is_train) seq_token_embeddings = output.last_hidden_state seq_token_masks = self.placeholders['tokens_mask'] seq_token_lengths = tf.reduce_sum(input_tensor=seq_token_masks, axis=1) # B return pool_sequence_embedding( "weighted_mean", sequence_token_embeddings=seq_token_embeddings, sequence_lengths=seq_token_lengths, sequence_token_masks=seq_token_masks)
def gpt2_model(freeze=True, configuration=None): if configuration is None: configuration = GPT2Config() gp2_model = TFGPT2Model.from_pretrained('gpt2', config=configuration) if freeze: for layer in gp2_model.layers: layer.trainable = False return gp2_model
def _test_TFGpt2(self, size, large=False): from transformers import GPT2Tokenizer, TFGPT2Model tokenizer = GPT2Tokenizer.from_pretrained(size) model = TFGPT2Model.from_pretrained(size) input_dict = tokenizer("Hello, my dog is cute", return_tensors="tf") spec, input_dict = self.spec_and_pad(input_dict) outputs = ["last_hidden_state"] self.run_test(model, input_dict, input_signature=spec, outputs=outputs, large=large)
def init_network(self, input_shape, num_outputs=10): # network = keras.Sequential() # network.add(Dense(input_shape, activation="relu")) # network.add(Dense(units=num_outputs, activation="softmax")) tokenizer = GPT2Tokenizer.from_pretrained('gpt2') model = TFGPT2Model.from_pretrained('gpt2')
def __init__(self, dir_path, num_class): super(TFGPT2Classifier, self).__init__() self.gpt2 = TFGPT2Model.from_pretrained(dir_path) self.num_class = num_class self.dropout = tf.keras.layers.Dropout(self.gpt2.config.summary_first_dropout) self.classifier = tf.keras.layers.Dense(self.num_class, kernel_initializer=tf.keras.initializers.TruncatedNormal(stddev=self.gpt2.config.initializer_range), name="classifier")
def test_3layer_gpt2(self): from transformers import GPT2Config, TFGPT2Model, BertTokenizer keras2onnx.proto.keras.backend.set_learning_phase(0) config = GPT2Config(n_layer=3) model = TFGPT2Model(config) tokenizer = BertTokenizer.from_pretrained("bert-base-uncased") text, inputs, inputs_onnx = self._prepare_inputs(tokenizer) inputs = tokenizer.encode_plus(text, add_special_tokens=True, return_tensors='tf') predictions = model.predict(inputs) onnx_model = keras2onnx.convert_keras(model, model.name) self.assertTrue(run_onnx_runtime(onnx_model.graph.name, onnx_model, inputs_onnx, predictions, self.model_files))
def get_transformer(bert_model_type, output_hidden_states=False): config = get_bert_config(bert_model_type, output_hidden_states) if bert_model_type in [ 'bert-base-uncased', 'bert-base-cased', 'bert-large-uncased', 'bert-large-uncased-whole-word-masking', 'bert-large-uncased-whole-word-masking-finetuned-squad' ]: return TFBertModel.from_pretrained(BERT_MODEL_FILE[bert_model_type], config=config) elif bert_model_type in [ 'prod-bert-base-uncased', 'tune_bert-base-uncased_nsp' ]: return TFBertModel.from_pretrained(BERT_MODEL_FILE[bert_model_type], config=config, from_pt=True) elif bert_model_type in [ 'roberta-base', 'roberta-large', 'roberta-large-mnli', 'distilroberta-base' ]: return TFRobertaModel.from_pretrained(BERT_MODEL_FILE[bert_model_type], config=config) elif bert_model_type in ['prod-roberta-base-cased']: return TFRobertaModel.from_pretrained(BERT_MODEL_FILE[bert_model_type], config=config, from_pt=True) elif bert_model_type in ['xlnet-base-cased']: return TFXLNetModel.from_pretrained(BERT_MODEL_FILE[bert_model_type], config=config) elif bert_model_type in [ 'albert-base-v1', 'albert-large-v1', 'albert-xlarge-v1', 'albert-xxlarge-v1' ]: return TFAlbertModel.from_pretrained(BERT_MODEL_FILE[bert_model_type], config=config) elif bert_model_type in ['gpt2', 'gpt2-medium']: return TFGPT2Model.from_pretrained(BERT_MODEL_FILE[bert_model_type], config=config) elif bert_model_type in ['transfo-xl']: return TFTransfoXLModel.from_pretrained( BERT_MODEL_FILE[bert_model_type], config=config) elif bert_model_type in [ 'distilbert-base-uncased', 'distilbert-base-uncased-distilled-squad' ]: return TFDistilBertModel.from_pretrained( BERT_MODEL_FILE[bert_model_type], config=config) else: raise ValueError( f'`bert_model_type` not understood: {bert_model_type}')
def __init__(self, num_units, output_embed_num_units): super().__init__() self.config = GPT2Config(vocab_size=1, n_positions=1024, n_ctx=1024, n_embd=num_units, n_layer=6, n_head=8) self.input_embedding = tf.keras.layers.Dense(num_units) self.transformer = TFGPT2Model(self.config) self.output_embedding = tf.keras.layers.Dense(output_embed_num_units) self.text_idx_embedding = tf.keras.layers.Embedding( MAX_NUM_TOKENS, self.config.n_embd, embeddings_initializer=modeling_tf_utils.get_initializer( self.config.initializer_range)) self.obj_idx_embedding = tf.keras.layers.Embedding( MAX_NUM_TOKENS, self.config.n_embd, embeddings_initializer=modeling_tf_utils.get_initializer( self.config.initializer_range))
from transformers import GPT2Tokenizer, TFGPT2Model import tensorflow as tf tokenizer = GPT2Tokenizer.from_pretrained('gpt2') model = TFGPT2Model.from_pretrained('gpt2', return_dict=True) inputs = tokenizer("Hello, my dog is cute", return_tensors="tf") outputs = model(inputs) last_hidden_states = outputs.last_hidden_state print(last_hidden_states)
# --model_type=gpt2 \ # --model_name_or_path=gpt2 \ # --length=100 ## Text Generation Using GPT2 from transformers import pipeline, set_seed generator = pipeline('text-generation', model='gpt2') set_seed(42) generator("Hello, I'm a language model,", max_length=30, num_return_sequences=5) generator("Once upon a time, ", max_length=30, num_return_sequences=5) ## Transforming Texts into Features # from transformers import GPT2Tokenizer, GPT2Model # tokenizer = GPT2Tokenizer.from_pretrained('gpt2') # model = GPT2Model.from_pretrained('gpt2') # text = "Replace me by any text you'd like." # encoded_input = tokenizer(text, return_tensors='pt') # return tensorflow tensors # output = model(encoded_input) from transformers import GPT2Tokenizer, TFGPT2Model tokenizer = GPT2Tokenizer.from_pretrained('gpt2') model = TFGPT2Model.from_pretrained('gpt2') text = "Replace me by any text you'd like." encoded_input = tokenizer(text, return_tensors='tf') output = model(encoded_input) print(encoded_input)