def encode(self, inputs, attention_bias): """Generate continuous representation for inputs. Args: inputs: int tensor with shape [batch_size, input_length]. attention_bias: float tensor with shape [batch_size, 1, 1, input_length] Returns: float tensor with shape [batch_size, input_length, hidden_size] """ with tf.name_scope("encode"): # Prepare inputs to the layer stack by adding positional encodings and # applying dropout. embedded_inputs = self.embedding_softmax_layer(inputs) inputs_padding = model_utils.get_padding(inputs) with tf.name_scope("add_pos_encoding"): length = tf.shape(embedded_inputs)[1] pos_encoding = model_utils.get_position_encoding( length, self.params.hidden_size) encoder_inputs = embedded_inputs + pos_encoding if self.train: encoder_inputs = tf.nn.dropout( encoder_inputs, 1 - self.params.layer_postprocess_dropout) return self.encoder_stack(encoder_inputs, attention_bias, inputs_padding)
def encode(self, inputs, attention_bias): """ :param inputs: int tensor with shape [batch_size, input_length] :param attention_bias: float tensor with shape [batch_size, 1, 1, input_length] :return: float tensor with shape [batch_size, input_length, hidden_size] """ with tf.name_scope('encode'): # [batch_size, length, hidden_size] embedded_inputs = self.embedding_layer(inputs) # [batch_size, length] inputs_padding = model_utils.get_padding(inputs) with tf.name_scope('add_pos_embedding'): length = tf.shape(embedded_inputs)[1] # use sin cos calculate position embeddings pos_encoding = model_utils.get_position_encoding( length, self.params.get('hidden_size')) encoder_inputs = tf.add(embedded_inputs, pos_encoding) if self.train: encoder_inputs = tf.nn.dropout( encoder_inputs, 1 - self.params.get('encoder_decoder_dropout')) return self.encoder_stack(encoder_inputs, attention_bias, inputs_padding)
def call(self, x): """Get token embeddings of x. Args: x: An int64 tensor with shape [batch_size, length] Returns: embeddings: float32 tensor with shape [batch_size, length, embedding_size] padding: float32 tensor with shape [batch_size, length] indicating the locations of the padding tokens in x. """ with tf.name_scope("embedding"): # SSY 1 accessing embeddings embeddings = tf.gather(self.shared_weights, x) # Scale embedding by the sqrt of the hidden size # SSY 2 scaling embeddings embeddings *= self.hidden_size**0.5 # Create binary array of size [batch_size, length] # where 1 = padding, 0 = not padding padding = model_utils.get_padding(x) # Set all padding embedding values to 0 embeddings *= tf.expand_dims(1 - padding, -1) return embeddings
def encode(self, inputs, attention_bias): """Generate continuous representation for inputs. Args: inputs: int tensor with shape [batch_size, input_length]. attention_bias: float tensor with shape [batch_size, 1, 1, input_length] Returns: float tensor with shape [batch_size, input_length, hidden_size] """ with tf.compat.v1.name_scope("encode"): # Prepare inputs to the layer stack by adding positional encodings and # applying dropout. embedded_inputs = self.embedding_softmax_layer(inputs) inputs_padding = model_utils.get_padding(inputs) with tf.compat.v1.name_scope("add_pos_encoding"): length = tf.shape(input=embedded_inputs)[1] pos_encoding = model_utils.get_position_encoding( length, self.params.hidden_size) encoder_inputs = embedded_inputs + pos_encoding with tf.compat.v1.tpu.bfloat16_scope(): encoder_inputs = tf.cast(encoder_inputs, tf.bfloat16) #attention_bias = tf.cast(attention_bias, tf.bfloat16) inputs_padding = tf.cast(inputs_padding, tf.bfloat16) if self.train: mlperf_log.transformer_print( key=mlperf_log.MODEL_HP_LAYER_POSTPROCESS_DROPOUT, value=self.params.layer_postprocess_dropout) encoder_inputs = tf.nn.dropout( encoder_inputs, 1 - (1 - self.params.layer_postprocess_dropout)) #encoder_outputs = self.encoder_stack(encoder_inputs, attention_bias, inputs_padding) #return encoder_outputs # self.encoder_stack(encoder_inputs, attention_bias, inputs_padding) return self.encoder_stack(encoder_inputs, attention_bias, inputs_padding)
def encode(self, inputs, attention_bias): with tf.name_scope("encode"): embedded_inputs = self.encoder_embedding_layer( inputs, not ModeKeys.is_predict_one(self.mode)) if ModeKeys.is_predict_one(self.mode): inputs_padding = None else: inputs_padding = model_utils.get_padding(inputs) with tf.name_scope("add_pos_encoding"): length = tf.shape(embedded_inputs)[1] if ModeKeys.is_predict_one(self.mode): pos_encoding = model_utils.get_position_encoding( self.params.max_length, self.params.hidden_size) pos_encoding = tf.slice(pos_encoding, [0, 0], [length, self.params.hidden_size], name='slice_pos_encoding') else: pos_encoding = model_utils.get_position_encoding( length, self.params.hidden_size) encoder_inputs = embedded_inputs + pos_encoding if self.is_train: encoder_inputs = tf.nn.dropout( encoder_inputs, 1 - self.params.layer_postprocess_dropout) return self.encoder_stack(encoder_inputs, attention_bias, inputs_padding)
def test_get_padding(self): x = tf.constant([[1, 0, 0, 0, 2], [3, 4, 0, 0, 0], [0, 5, 6, 0, 7]]) padding = model_utils.get_padding(x, padding_value=0) with self.test_session() as sess: padding = sess.run(padding) self.assertAllEqual( [[0, 1, 1, 1, 0], [0, 0, 1, 1, 1], [1, 0, 0, 1, 0]], padding)
def _encode(self, inputs, attention_bias): embedded_inputs = self.embedding_layer(inputs) inputs_padding = model_utils.get_padding(inputs) if self.is_train: encoder_inputs = self.encoder_embedding_dropout(embedded_inputs) return self.encoder_stack(encoder_inputs, attention_bias, inputs_padding)
def _encode(self, inputs, attention_bias): embedded_inputs = self.embedding_layer(inputs) embedded_inputs += model_utils.get_position_encoding( self.hparams['max_length'], self.hparams['num_units']) inputs_padding = model_utils.get_padding(inputs) if self.is_train: encoder_inputs = self.encoder_embedding_dropout(embedded_inputs) return self.encoder_stack(encoder_inputs, attention_bias, inputs_padding)
def encode(self, inputs, attention_bias): """Generate continuous representation for inputs. Args: inputs: int tensor with shape [batch_size, input_length]. attention_bias: float tensor with shape [batch_size, 1, 1, input_length] Returns: float tensor with shape [batch_size, input_length, hidden_size] """ with tf.name_scope("encode"): # Prepare inputs to the layer stack by adding positional encodings and # applying dropout. embedded_inputs = self.encoder_embedding_layer( inputs, not ModeKeys.is_predict_one(self.mode)) if ModeKeys.is_predict_one(self.mode): inputs_padding = None else: inputs_padding = model_utils.get_padding(inputs) # add_pos_encoding with tf.name_scope("add_pos_encoding"): length = tf.shape(embedded_inputs)[1] if ModeKeys.is_predict_one(self.mode): pos_encoding = model_utils.get_position_encoding( self.params.max_length, self.params.hidden_size) pos_encoding = tf.slice(pos_encoding, [0, 0], [length, self.params.hidden_size], name='slice_pos_encoding') else: pos_encoding = model_utils.get_position_encoding( length, self.params.hidden_size) encoder_inputs = embedded_inputs + pos_encoding if self.is_train: encoder_inputs = tf.nn.dropout( encoder_inputs, 1 - self.params.layer_postprocess_dropout) return self.encoder_stack(encoder_inputs, attention_bias, inputs_padding)
def encode(self, inputs, attention_bias): """Generate continuous representation for inputs. Args: inputs: int tensor with shape [batch_size, input_length]. attention_bias: float tensor with shape [batch_size, 1, 1, input_length] Returns: float tensor with shape [batch_size, input_length, hidden_size] """ embedded_inputs = self.embedding_softmax_layer(inputs) inputs_padding = model_utils.get_padding(inputs) length = embedded_inputs.shape[1] pos_encoding = model_utils.get_position_encoding( length, self.param.hidden_size, inputs.context) encoder_inputs = embedded_inputs + pos_encoding if self.train: encoder_inputs = self.dropout_input(encoder_inputs) return self.encoder_stack(encoder_inputs, attention_bias, inputs_padding)
def call(self, x): """Get token embeddings of x. Args: x: An int64 tensor with shape [batch_size, length] Returns: embeddings: float32 tensor with shape [batch_size, length, embedding_size] padding: float32 tensor with shape [batch_size, length] indicating the locations of the padding tokens in x. """ with tf.name_scope("embedding"): embeddings = tf.gather(self.shared_weights, x) # Scale embedding by the sqrt of the hidden size embeddings *= self.hidden_size ** 0.5 # Create binary array of size [batch_size, length] # where 1 = padding, 0 = not padding padding = model_utils.get_padding(x) # Set all padding embedding values to 0 embeddings *= tf.expand_dims(1 - padding, -1) return embeddings