def prepare_config_and_inputs(self): input_ids = np.clip( ids_tensor([self.batch_size, self.seq_length - 1], self.vocab_size), 3, self.vocab_size) input_ids = np.concatenate((input_ids, 2 * np.ones( (self.batch_size, 1), dtype=np.int64)), -1) decoder_input_ids = shift_tokens_right(input_ids, 1, 2) config = BlenderbotSmallConfig( vocab_size=self.vocab_size, d_model=self.hidden_size, encoder_layers=self.num_hidden_layers, decoder_layers=self.num_hidden_layers, encoder_attention_heads=self.num_attention_heads, decoder_attention_heads=self.num_attention_heads, encoder_ffn_dim=self.intermediate_size, decoder_ffn_dim=self.intermediate_size, dropout=self.hidden_dropout_prob, attention_dropout=self.attention_probs_dropout_prob, max_position_embeddings=self.max_position_embeddings, eos_token_id=self.eos_token_id, bos_token_id=self.bos_token_id, pad_token_id=self.pad_token_id, initializer_range=self.initializer_range, use_cache=False, ) inputs_dict = prepare_blenderbot_inputs_dict(config, input_ids, decoder_input_ids) return config, inputs_dict
def prepare_config_and_inputs(self): input_ids = ids_tensor([self.batch_size, self.seq_length], self.vocab_size) input_ids = ids_tensor([self.batch_size, self.seq_length], self.vocab_size).clamp(3, ) input_ids[:, -1] = self.eos_token_id # Eos Token decoder_input_ids = ids_tensor([self.batch_size, self.seq_length], self.vocab_size) config = BlenderbotSmallConfig( vocab_size=self.vocab_size, d_model=self.hidden_size, encoder_layers=self.num_hidden_layers, decoder_layers=self.num_hidden_layers, encoder_attention_heads=self.num_attention_heads, decoder_attention_heads=self.num_attention_heads, encoder_ffn_dim=self.intermediate_size, decoder_ffn_dim=self.intermediate_size, dropout=self.hidden_dropout_prob, attention_dropout=self.attention_probs_dropout_prob, max_position_embeddings=self.max_position_embeddings, eos_token_id=self.eos_token_id, bos_token_id=self.bos_token_id, pad_token_id=self.pad_token_id, ) inputs_dict = prepare_blenderbot_small_inputs_dict( config, input_ids, decoder_input_ids) return config, inputs_dict
def get_config(self): return BlenderbotSmallConfig( vocab_size=self.vocab_size, d_model=self.hidden_size, encoder_layers=self.num_hidden_layers, decoder_layers=self.num_hidden_layers, encoder_attention_heads=self.num_attention_heads, decoder_attention_heads=self.num_attention_heads, encoder_ffn_dim=self.intermediate_size, decoder_ffn_dim=self.intermediate_size, dropout=self.hidden_dropout_prob, attention_dropout=self.attention_probs_dropout_prob, max_position_embeddings=self.max_position_embeddings, eos_token_id=self.eos_token_id, bos_token_id=self.bos_token_id, pad_token_id=self.pad_token_id, )
def test_lm_uneven_forward(self): config = BlenderbotSmallConfig( vocab_size=self.vocab_size, d_model=14, encoder_layers=2, decoder_layers=2, encoder_attention_heads=2, decoder_attention_heads=2, encoder_ffn_dim=8, decoder_ffn_dim=8, max_position_embeddings=48, ) lm_model = FlaxBlenderbotSmallForConditionalGeneration(config) context = np.array([[71, 82, 18, 33, 46, 91, 2], [68, 34, 26, 58, 30, 2, 1]], dtype=np.int64) summary = np.array([[82, 71, 82, 18, 2], [58, 68, 2, 1, 1]], dtype=np.int64) outputs = lm_model(input_ids=context, decoder_input_ids=summary) expected_shape = (*summary.shape, config.vocab_size) self.assertEqual(outputs["logits"].shape, expected_shape)
def prepare_config_and_inputs(self): input_ids = ids_tensor([self.batch_size, self.decoder_seq_length], self.vocab_size) attention_mask = None if self.use_attention_mask: attention_mask = ids_tensor( [self.batch_size, self.decoder_seq_length], vocab_size=2) lm_labels = None if self.use_labels: lm_labels = ids_tensor([self.batch_size, self.decoder_seq_length], self.vocab_size) config = BlenderbotSmallConfig( vocab_size=self.vocab_size, d_model=self.d_model, decoder_layers=self.decoder_layers, decoder_ffn_dim=self.decoder_ffn_dim, encoder_attention_heads=self.encoder_attention_heads, decoder_attention_heads=self.decoder_attention_heads, eos_token_id=self.eos_token_id, bos_token_id=self.bos_token_id, use_cache=self.use_cache, pad_token_id=self.pad_token_id, decoder_start_token_id=self.decoder_start_token_id, max_position_embeddings=self.max_position_embeddings, is_encoder_decoder=self.is_encoder_decoder, ) return ( config, input_ids, attention_mask, lm_labels, )
def _get_config_and_data(self): input_ids = np.array( [ [71, 82, 18, 33, 46, 91, 2], [68, 34, 26, 58, 30, 82, 2], [5, 97, 17, 39, 94, 40, 2], [76, 83, 94, 25, 70, 78, 2], [87, 59, 41, 35, 48, 66, 2], [55, 13, 16, 58, 5, 2, 1], # note padding [64, 27, 31, 51, 12, 75, 2], [52, 64, 86, 17, 83, 39, 2], [48, 61, 9, 24, 71, 82, 2], [26, 1, 60, 48, 22, 13, 2], [21, 5, 62, 28, 14, 76, 2], [45, 98, 37, 86, 59, 48, 2], [70, 70, 50, 9, 28, 0, 2], ], dtype=np.int64, ) batch_size = input_ids.shape[0] config = BlenderbotSmallConfig( vocab_size=self.vocab_size, d_model=24, encoder_layers=2, decoder_layers=2, encoder_attention_heads=2, decoder_attention_heads=2, encoder_ffn_dim=32, decoder_ffn_dim=32, max_position_embeddings=48, eos_token_id=2, pad_token_id=1, bos_token_id=0, ) return config, input_ids, batch_size
from transformers import TFBlenderbotSmallModel, BlenderbotSmallConfig import numpy as np import tensorflow as tf from tensorflow import keras from config_ import cfg config = BlenderbotSmallConfig.from_json_file("config.json") blen_model = TFBlenderbotSmallModel(config=config) npzfile = np.load('train_data.npz') inputs = npzfile['arr_0'] outputs = npzfile['arr_1'] inp_shape = inputs.shape[1] out_shape = outputs.shape[1] class NaturalExpDecay(tf.keras.optimizers.schedules.LearningRateSchedule): """学习率自然数衰减""" def __init__(self, initial_learning_rate, decay_steps, decay_rate): super().__init__() self.initial_learning_rate = tf.cast(initial_learning_rate, dtype=tf.float32) self.decay_steps = tf.cast(decay_steps, dtype=tf.float32) self.decay_rate = tf.cast(decay_rate, dtype=tf.float32) def __call__(self, step): return self.initial_learning_rate * tf.math.exp(-self.decay_rate * (step / self.decay_steps))
from transformers import TFBlenderbotSmallModel, BlenderbotSmallTokenizer, BlenderbotSmallConfig import tensorflow as tf from tensorflow import keras from config_ import cfg import numpy as np mname = 'facebook/blenderbot_small-90M' blen_model = TFBlenderbotSmallModel.from_pretrained(mname) tokenizer = BlenderbotSmallTokenizer.from_pretrained(mname) config = BlenderbotSmallConfig.from_pretrained(mname) que = [] ans = [] start_token = 1 end_token = 2 with open(cfg.data_file) as f: data = f.read().split("\n") state = None q = None a = None for i in range(len(data)): if data[i] != " " and data[i] != "": if data[i + 1] != "" and data[i + 1] != " ": if q is None: q = data[i] a = data[i + 1] que.append(q) ans.append(a)