def create_model(params, is_train): """Creates transformer model.""" with tf.name_scope("model"): if is_train: inputs = tf.keras.layers.Input((None,), dtype="int64", name="inputs") targets = tf.keras.layers.Input((None,), dtype="int64", name="targets") internal_model = Transformer(params, name="transformer_v2") logits = internal_model([inputs, targets], training=is_train) vocab_size = params["vocab_size"] label_smoothing = params["label_smoothing"] if params["enable_metrics_in_training"]: logits = metrics.MetricLayer(vocab_size)([logits, targets]) logits = tf.keras.layers.Lambda(lambda x: x, name="logits", dtype=tf.float32)(logits) model = tf.keras.Model([inputs, targets], logits) # TODO(reedwm): Can we do this loss in float16 instead of float32? loss = metrics.transformer_loss( logits, targets, label_smoothing, vocab_size) model.add_loss(loss) return model else: inputs = tf.keras.layers.Input((None,), dtype="int64", name="inputs") internal_model = Transformer(params, name="transformer_v2") ret = internal_model([inputs], training=is_train) outputs, scores = ret["outputs"], ret["scores"] return tf.keras.Model(inputs, [outputs, scores])
def create_model(params, is_train): """Create transformer model""" with tf.name_scope('model'): if is_train: inputs = tf.keras.layers.Input((None,), dtype='int64', name='inputs') targets = tf.keras.layers.Input((None,), dtype='int64', name='targets') internal_model = Transformer(params, name='transformer_v2') logits = internal_model([inputs, targets], training=is_train) vocab_size = params['vocab_size'] label_smoothing = params['label_smoothing'] if params['enable_metrics_in_training']: logits = metrics.MetricLayer(vocab_size)([logits, targets]) logits = tf.keras.layers.Lambda(lambda x: x, name='logits', dtype=tf.float32)(logits) model = tf.keras.Model([inputs, targets], logits) loss = metrics.transformer_loss( logits, targets, label_smoothing, vocab_size) model.add_loss(loss) return model else: inputs = tf.keras.layers.Input((None,), dtype='int64', name='inputs') internal_model = Transformer(params, name='transformer_v2') ret = internal_model([inputs], training=is_train) outputs, scores = ret['outputs'], ret['scores'] return tf.keras.Model(inputs, [outputs, scores])
def create_model(params, mode): """Creates transformer model.""" with tf.name_scope("model"): if mode == 'train' or mode == 'eval': inputs = tf.keras.layers.Input((None, ), dtype="int32", name="inputs") targets = tf.keras.layers.Input((None, ), dtype="int32", name="targets") internal_model = Transformer(params, name="transformer_v2") logits = internal_model([inputs, targets], training=mode == 'train') if params["enable_metrics_in_training"]: vocab_size = params["vocab_size"] label_smoothing = params["label_smoothing"] logits = metrics.MetricLayer( vocab_size, label_smoothing)([logits, targets]) logits = tf.keras.layers.Lambda(lambda x: x, name="logits", dtype=tf.float32)(logits) model = tf.keras.Model([inputs, targets], logits) return model else: inputs = tf.keras.layers.Input((None, ), dtype="int32", name="inputs") targets = tf.keras.layers.Input((None, ), dtype="int32", name="targets") internal_model = Transformer(params, name="transformer_v2") ret = internal_model([inputs], training=False) logits = internal_model([inputs, targets], training=False) outputs, scores = ret["outputs"], ret["scores"] return tf.keras.Model([inputs, targets], [outputs, scores, logits])
def test_metric_layer(self): vocab_size = 50 logits = tf.keras.layers.Input((None, vocab_size), dtype="float32", name="logits") targets = tf.keras.layers.Input((None, ), dtype="int64", name="targets") output_logits = metrics.MetricLayer(vocab_size)([logits, targets]) self.assertEqual(output_logits.shape.as_list(), [ None, None, vocab_size, ])