def __init__(self, vocab_size=33708, embedding_width=512, dropout_rate=0.0, padded_decode=False, decode_max_length=None, extra_decode_length=0, beam_size=4, alpha=0.6, encoder_layer=None, decoder_layer=None, dtype=tf.float32, eos_id=EOS_ID, **kwargs): """Initialize layers to build Transformer model. Args: vocab_size: Size of vocabulary. embedding_width: Size of hidden layer for embedding. dropout_rate: Dropout probability. padded_decode: Whether to max_sequence_length padding is used. If set False, max_sequence_length padding is not used. decode_max_length: maximum number of steps to decode a sequence. extra_decode_length: Beam search will run extra steps to decode. beam_size: Number of beams for beam search alpha: The strength of length normalization for beam search. encoder_layer: An initialized encoder layer. decoder_layer: An initialized decoder layer. dtype: float dtype. eos_id: Id of end of sentence token. **kwargs: other keyword arguments. """ super(Seq2SeqTransformer, self).__init__(**kwargs) self._vocab_size = vocab_size self._embedding_width = embedding_width self._dropout_rate = dropout_rate self._padded_decode = padded_decode self._decode_max_length = decode_max_length self._extra_decode_length = extra_decode_length self._beam_size = beam_size self._alpha = alpha self._dtype = dtype self._eos_id = eos_id self.embedding_lookup = keras_nlp.layers.OnDeviceEmbedding( vocab_size=self._vocab_size, embedding_width=self._embedding_width, initializer=tf.random_normal_initializer( mean=0., stddev=self._embedding_width**-0.5), scale_factor=self._embedding_width**0.5) self.encoder_layer = encoder_layer self.decoder_layer = decoder_layer self.position_embedding = layers.RelativePositionEmbedding( hidden_size=self._embedding_width) self.encoder_dropout = tf.keras.layers.Dropout(rate=self._dropout_rate) self.decoder_dropout = tf.keras.layers.Dropout(rate=self._dropout_rate)
def __init__(self, hparams): super(ScreenCaptionModel, self).__init__() self._hparams = hparams with tf.name_scope('captioning'): self._word_embedding_layer = EmbeddingLayer( name='word', hidden_dim=self._hparams['hidden_size'], embedding_file=self._hparams['embedding_file'], vocab_size=self._hparams['vocab_size'], embedding_dim=self._hparams['hidden_size'], # not used trainable=self._hparams['glove_trainable']) self._position_embedding_layer = layers.RelativePositionEmbedding( self._hparams['hidden_size']) self._encoder = EncoderLayer(self._hparams, self._word_embedding_layer) self._decoder = DecoderLayer(self._hparams, self._word_embedding_layer, self._position_embedding_layer) self._word_layer = tf.keras.layers.Dense( units=self._hparams['vocab_size']) self.model_metrics = { 'loss': tf.keras.metrics.Mean(name='loss'), 'caption_loss': tf.keras.metrics.Mean(name='caption_loss'), 'global_norm': tf.keras.metrics.Mean(name='global_norm'), } self.caption_metrics = {} for score_name in self._SCORE_NAMES: scoped_name = 'COCO/{}'.format(score_name) self.caption_metrics[scoped_name] = tf.keras.metrics.Mean( name=scoped_name) self._word_vocab = [] with tf.io.gfile.Open(self._hparams['word_vocab_path']) as f: for index, line in enumerate(f): if index >= self._hparams['vocab_size']: break self._word_vocab.append(line.strip())
def __init__(self, vocab_size=33708, hidden_size=512, dropout_rate=0.0, padded_decode=False, num_replicas=1, decode_batch_size=2048, decode_max_length=97, dtype=tf.float32, extra_decode_length=0, num_heads=8, num_layers=6, beam_size=4, alpha=0.6, encoder_layer=None, decoder_layer=None, name=None, **kwargs): """Initialize layers to build Transformer model. Arguments: vocab_size: Size of vocabulary. hidden_size: Size of hidden layer for embedding. dropout_rate: Dropout probability. padded_decode: Whether to max_sequence_length padding is used. If set False, max_sequence_length padding is not used. num_replicas: Number of replicas for distribution strategy. decode_batch_size: batch_size for decoding. decode_max_length: maximum number of steps to decode a sequence. dtype: data type. extra_decode_length: Beam search will run extra steps to decode. num_heads: Number of attention heads. num_layers: Number of identical layers for Transformer architecture. beam_size: Number of beams for beam search alpha: The strength of length normalization for beam search. encoder_layer: An initialized encoder layer. decoder_layer: An initialized decoder layer. name: name of the model. **kwargs: other keyword arguments. """ super(Seq2SeqTransformer, self).__init__(**kwargs) self._vocab_size = vocab_size self._hidden_size = hidden_size self._dropout_rate = dropout_rate self._padded_decode = padded_decode self._num_replicas = num_replicas self._decode_batch_size = decode_batch_size self._decode_max_length = decode_max_length self._dtype = dtype self._extra_decode_length = extra_decode_length self._num_heads = num_heads self._num_layers = num_layers self._beam_size = beam_size self._alpha = alpha self.embedding_lookup = layers.OnDeviceEmbedding( vocab_size=self._vocab_size, embedding_width=self._hidden_size, initializer=tf.random_normal_initializer( mean=0., stddev=self._hidden_size**-0.5), use_scale=True) self.encoder_layer = encoder_layer self.decoder_layer = decoder_layer self.position_embedding = layers.RelativePositionEmbedding( hidden_size=self._hidden_size) self.encoder_dropout = tf.keras.layers.Dropout(rate=self._dropout_rate) self.decoder_dropout = tf.keras.layers.Dropout(rate=self._dropout_rate)