Exemple #1
0
def create_model(params, is_train):
  """Creates transformer model."""
  with tf.name_scope("model"):
    if is_train:
      inputs = tf.keras.layers.Input((None,), dtype="int64", name="inputs")
      targets = tf.keras.layers.Input((None,), dtype="int64", name="targets")
      internal_model = Transformer(params, name="transformer_v2")
      logits = internal_model([inputs, targets], training=is_train)
      vocab_size = params["vocab_size"]
      label_smoothing = params["label_smoothing"]
      if params["enable_metrics_in_training"]:
        logits = metrics.MetricLayer(vocab_size)([logits, targets])
      logits = tf.keras.layers.Lambda(lambda x: x, name="logits",
                                      dtype=tf.float32)(logits)
      model = tf.keras.Model([inputs, targets], logits)
      # TODO(reedwm): Can we do this loss in float16 instead of float32?
      loss = metrics.transformer_loss(
          logits, targets, label_smoothing, vocab_size)
      model.add_loss(loss)
      return model

    else:
      inputs = tf.keras.layers.Input((None,), dtype="int64", name="inputs")
      internal_model = Transformer(params, name="transformer_v2")
      ret = internal_model([inputs], training=is_train)
      outputs, scores = ret["outputs"], ret["scores"]
      return tf.keras.Model(inputs, [outputs, scores])
Exemple #2
0
def create_model(params, is_train):
    """Create transformer model"""
    with tf.name_scope('model'):
        if is_train:
            inputs = tf.keras.layers.Input((None,), dtype='int64', name='inputs')
            targets = tf.keras.layers.Input((None,), dtype='int64', name='targets')
            internal_model = Transformer(params, name='transformer_v2')
            logits = internal_model([inputs, targets], training=is_train)
            vocab_size = params['vocab_size']
            label_smoothing = params['label_smoothing']
            if params['enable_metrics_in_training']:
                logits = metrics.MetricLayer(vocab_size)([logits, targets])
            logits = tf.keras.layers.Lambda(lambda x: x, name='logits',
                                            dtype=tf.float32)(logits)
            model = tf.keras.Model([inputs, targets], logits)
            loss = metrics.transformer_loss(
                logits, targets, label_smoothing, vocab_size)
            model.add_loss(loss)
            return model
        else:
            inputs = tf.keras.layers.Input((None,), dtype='int64', name='inputs')
            internal_model = Transformer(params, name='transformer_v2')
            ret = internal_model([inputs], training=is_train)
            outputs, scores = ret['outputs'], ret['scores']
            return tf.keras.Model(inputs, [outputs, scores])
Exemple #3
0
def create_model(params, mode):
    """Creates transformer model."""
    with tf.name_scope("model"):
        if mode == 'train' or mode == 'eval':
            inputs = tf.keras.layers.Input((None, ),
                                           dtype="int32",
                                           name="inputs")
            targets = tf.keras.layers.Input((None, ),
                                            dtype="int32",
                                            name="targets")
            internal_model = Transformer(params, name="transformer_v2")
            logits = internal_model([inputs, targets],
                                    training=mode == 'train')
            if params["enable_metrics_in_training"]:
                vocab_size = params["vocab_size"]
                label_smoothing = params["label_smoothing"]
                logits = metrics.MetricLayer(
                    vocab_size, label_smoothing)([logits, targets])
            logits = tf.keras.layers.Lambda(lambda x: x,
                                            name="logits",
                                            dtype=tf.float32)(logits)
            model = tf.keras.Model([inputs, targets], logits)
            return model
        else:
            inputs = tf.keras.layers.Input((None, ),
                                           dtype="int32",
                                           name="inputs")
            targets = tf.keras.layers.Input((None, ),
                                            dtype="int32",
                                            name="targets")
            internal_model = Transformer(params, name="transformer_v2")
            ret = internal_model([inputs], training=False)
            logits = internal_model([inputs, targets], training=False)
            outputs, scores = ret["outputs"], ret["scores"]
            return tf.keras.Model([inputs, targets], [outputs, scores, logits])
Exemple #4
0
 def test_metric_layer(self):
     vocab_size = 50
     logits = tf.keras.layers.Input((None, vocab_size),
                                    dtype="float32",
                                    name="logits")
     targets = tf.keras.layers.Input((None, ),
                                     dtype="int64",
                                     name="targets")
     output_logits = metrics.MetricLayer(vocab_size)([logits, targets])
     self.assertEqual(output_logits.shape.as_list(), [
         None,
         None,
         vocab_size,
     ])