def encode(self, inputs, attention_bias, training): """Generate continuous representation for inputs. Args: inputs: int tensor with shape [batch_size, input_length]. attention_bias: float tensor with shape [batch_size, 1, 1, input_length]. training: boolean, whether in training mode or not. Returns: float tensor with shape [batch_size, input_length, hidden_size] """ with tf.name_scope("encode"): # Prepare inputs to the layer stack by adding positional encodings and # applying dropout. embedded_inputs = self.embedding_softmax_layer(inputs) inputs_padding = model_utils.get_padding(inputs) with tf.name_scope("add_pos_encoding"): length = tf.shape(embedded_inputs)[1] pos_encoding = model_utils.get_position_encoding( length, self.params["hidden_size"]) encoder_inputs = embedded_inputs + pos_encoding if training: encoder_inputs = tf.nn.dropout( encoder_inputs, rate=self.params["layer_postprocess_dropout"]) return self.encoder_stack(encoder_inputs, attention_bias, inputs_padding, training=training)
def encode_no_lookup(self, embedded_inputs, inputs_mask): """Encoder step for transformer given already-embedded inputs Args: model: transformer model embedded_inputs: int tensor with shape [batch_size, input_length, emb_size]. inputs_mask: int tensor with shape [batch_size, input_length] params: transformer_params train: boolean flag Returns: float tensor with shape [batch_size, input_length, hidden_size] """ with tf.name_scope("encode"): # Prepare inputs to the layer stack by adding positional encodings and # applying dropout. inputs_padding = model_utils.get_padding(inputs_mask) attention_bias = model_utils.get_padding_bias(inputs_mask) with tf.name_scope("add_pos_encoding"): length = tf.shape(embedded_inputs)[1] pos_encoding = model_utils.get_position_encoding( length, self.params.hidden_size) encoder_inputs = embedded_inputs + pos_encoding if self.train: encoder_inputs = tf.nn.dropout( encoder_inputs, 1 - self.params.layer_postprocess_dropout) return self.encoder_stack(encoder_inputs, attention_bias, inputs_padding)
def encode(self, inputs, attention_bias): """Generate continuous representation for inputs. 生成输入的向量表示,即representation Args: inputs: int tensor with shape [batch_size, input_length]. attention_bias: float tensor with shape [batch_size, 1, 1, input_length] Returns: float tensor with shape [batch_size, input_length, hidden_size] """ with tf.name_scope("encode"): # Prepare inputs to the layer stack by adding positional encodings and # applying dropout. # 添加postional encodings,输入给encoder,然后应用dropout embedded_inputs = self.embedding_softmax_layer( inputs) # 将Input转化成embedding inputs_padding = model_utils.get_padding( inputs) # 获得Input 的 padding过的位置 with tf.name_scope("add_pos_encoding"): length = tf.shape(embedded_inputs)[1] pos_encoding = model_utils.get_position_encoding( # 获得position embedding length, self.params["hidden_size"]) encoder_inputs = embedded_inputs + pos_encoding # 相加作为输入 if self.train: encoder_inputs = tf.nn.dropout( # 输入前还要dropout encoder_inputs, 1 - self.params["layer_postprocess_dropout"]) return self.encoder_stack(encoder_inputs, attention_bias, inputs_padding)
def encode(self, inputs, attention_bias): """Generate continuous representation for inputs. Args: inputs: int tensor with shape [batch_size, input_length]. attention_bias: float tensor with shape [batch_size, 1, 1, input_length] Returns: float tensor with shape [batch_size, input_length, hidden_size] """ with tf.name_scope("encode"): # Prepare inputs to the layer stack by adding positional encodings and # applying dropout. embedded_inputs = self.embedding_softmax_layer(inputs) inputs_padding = model_utils.get_padding(inputs) with tf.name_scope("add_pos_encoding"): length = tf.shape(embedded_inputs)[1] pos_encoding = model_utils.get_position_encoding( length, self.params.hidden_size) encoder_inputs = embedded_inputs + pos_encoding if self.train: encoder_inputs = tf.nn.dropout( encoder_inputs, 1 - self.params.layer_postprocess_dropout) return self.encoder_stack(encoder_inputs, attention_bias, inputs_padding)
def test_get_padding(self): x = tf.constant([[1, 0, 0, 0, 2], [3, 4, 0, 0, 0], [0, 5, 6, 0, 7]]) padding = model_utils.get_padding(x, padding_value=0) with self.test_session() as sess: padding = sess.run(padding) self.assertAllEqual( [[0, 1, 1, 1, 0], [0, 0, 1, 1, 1], [1, 0, 0, 1, 0]], padding)
def __call__(self, inputs): """ inputs: size with [batch_size, length, 2*hidden_size] return: size with [batch_size, length, hidden_size] """ with tf.name_scope("encoder_output_layer"): inputs_padding = model_utils.get_padding(inputs) outputs = self.feed_foward_layer(inputs, padding=inputs_padding) return self.output_norm_layer(outputs)
def __call__(self, inputs, embedded_inputs): """1.get padding; 2.add position encoding. Args: inputs: size with [batch_size, length] embedded_inputs: size with [batch_size, length, hidden_size] return: encoder_inputs: size with [batch_size, length, hidden_size] inputs_padding: size with [batch_size, length] """ with tf.name_scope("stack_input"): inputs_padding = model_utils.get_padding(inputs) length = tf.shape(inputs)[1] pos_encoding = model_utils.get_position_encoding(length, self.params["hidden_size"]) encoder_inputs = embedded_inputs + pos_encoding if self.train: encoder_inputs = tf.nn.dropout(encoder_inputs, 1-self.params["layer_postprocess_dropout"]) return encoder_inputs, inputs_padding
def call(self, x): """Get token embeddings of x. Args: x: An int64 tensor with shape [batch_size, length] Returns: embeddings: float32 tensor with shape [batch_size, length, embedding_size] padding: float32 tensor with shape [batch_size, length] indicating the locations of the padding tokens in x. """ with tf.name_scope("embedding"): embeddings = tf.gather(self.shared_weights, x) # Scale embedding by the sqrt of the hidden size embeddings *= self.hidden_size**0.5 # Create binary array of size [batch_size, length] # where 1 = padding, 0 = not padding padding = model_utils.get_padding(x) # Set all padding embedding values to 0 embeddings *= tf.expand_dims(1 - padding, -1) return embeddings
def call(self, x): """Get token embeddings of x. Args: x: An int64 tensor with shape [batch_size, length] Returns: embeddings: float32 tensor with shape [batch_size, length, embedding_size] padding: float32 tensor with shape [batch_size, length] indicating the locations of the padding tokens in x. """ with tf.name_scope("embedding"): embeddings = tf.gather(self.shared_weights, x) # Scale embedding by the sqrt of the hidden size embeddings *= self.hidden_size ** 0.5 # Create binary array of size [batch_size, length] # where 1 = padding, 0 = not padding padding = model_utils.get_padding(x) # Set all padding embedding values to 0 embeddings *= tf.expand_dims(1 - padding, -1) return embeddings
def encode(self, inputs, attention_bias): """Generate continuous representation for inputs. Args: inputs: int tensor with shape [batch_size, input_length]. attention_bias: float tensor with shape [batch_size, 1, 1, input_length] # 这个矩阵,不是padding的部分,都是0,是padding的部分,都是负无穷, Returns: float tensor with shape [batch_size, input_length, hidden_size] """ with tf.name_scope("encode"): # Prepare inputs to the layer stack by adding positional encodings and # applying dropout. embedded_inputs = self.embedding_softmax_layer( inputs) # 将inputs做embedding # 获得padding information tensor,凡是padding部分是1,非padding部分是0,形状与inputs一样 inputs_padding = model_utils.get_padding(inputs) with tf.name_scope("add_pos_encoding" ): # 给embedded_inputs添加pos_encoding,即添加时序信息 length = tf.shape(embedded_inputs)[1] pos_encoding = model_utils.get_position_encoding( length, self.params["hidden_size"]) encoder_inputs = embedded_inputs + pos_encoding if self.train: # 如果是训练模式,则需要加上dropout encoder_inputs = tf.nn.dropout( encoder_inputs, 1 - self.params["layer_postprocess_dropout"]) # encoder_inputs 的 shape 应该是: [batch_size, input_length, hidden_size] # attention_bias 应该是: [batch_size, 1, 1, input_length] # inputs_padding 应该是: [batch_size, input_length] return self.encoder_stack(encoder_inputs, attention_bias, inputs_padding) # 将经过encoder的结果返回
def encode(self, inputs, attention_bias): """Generate continuous representation for inputs. Args: inputs: int tensor with shape [batch_size, input_length]. attention_bias: float tensor with shape [batch_size, 1, 1, input_length] Returns: float tensor with shape [batch_size, input_length, hidden_size] """ print('LOOK AT ME') print(inputs.get_shape().as_list()) print(attention_bias.get_shape().as_list()) with tf.name_scope("encode"): # Prepare inputs to the layer stack by adding positional encodings and # applying dropout. embedded_inputs = self.embedding_softmax_layer(inputs) inputs_padding = model_utils.get_padding(inputs) with tf.name_scope("add_pos_encoding"): length = tf.shape(embedded_inputs)[1] pos_encoding = model_utils.get_position_encoding( length, self.params["hidden_size"]) encoder_inputs = embedded_inputs + pos_encoding if self.train: encoder_inputs = tf.nn.dropout( encoder_inputs, 1 - self.params["layer_postprocess_dropout"]) print('YOOO') print(encoder_inputs) print(attention_bias) print(inputs_padding) return self.encoder_stack(encoder_inputs, attention_bias, inputs_padding)
def encode(self, inputs, attention_bias): """Generate continuous representation for inputs. Args: inputs: int tensor with shape [batch_size, input_length]. attention_bias: float tensor with shape [batch_size, 1, 1, input_length] Returns: float tensor with shape [batch_size, input_length, hidden_size] """ with tf.name_scope("encode"): # Prepare inputs to the layer stack by adding positional encodings and # applying dropout. embedded_inputs = self.embedding_softmax_layer(inputs) inputs_padding = model_utils.get_padding(inputs) with tf.name_scope("add_pos_encoding"): length = tf.shape(embedded_inputs)[1] pos_encoding = model_utils.get_position_encoding( length, self.params["hidden_size"]) encoder_inputs = embedded_inputs + pos_encoding # shape (batch_size, input_len, h_size) with tf.name_scope("add_vir_entities"): encoder_inputs = self.add_vir_entities( encoder_inputs ) # shape (batch_size, input_len + num_ve, h_size) if self.train: encoder_inputs = tf.nn.dropout( encoder_inputs, 1 - self.params["layer_postprocess_dropout"]) x = self.encoder_stack( encoder_inputs, attention_bias, inputs_padding) # shape (-1, length, h_size) # Remove virtual entities from the encoder output x = x[:, :-self.params["num_vir_entities"], :] return x # shape (batch_size, input_length, hidden_size)