def encode(self, inputs, sequence_lengths, is_training=False): ''' get the high level feature representation Args: inputs: the input to the layer as a [batch_size, max_length, dim] tensor sequence_length: the length of the input sequences is_training: whether or not the network is in training mode Returns: the output of the layer as a [bath_size, max_length, output_dim] tensor ''' outputs = inputs output_seq_lengths = sequence_lengths with tf.variable_scope('inlayer'): #apply the linear layer outputs = self.hidden_layer(outputs) #apply the nonlinearity outputs = tf.nn.relu(outputs) if float(self.conf['listener_dropout']) < 1 and is_training: outputs = tf.nn.dropout( outputs, float(self.conf['listener_dropout'])) for l in range(int(self.conf['listener_numlayers'])): with tf.variable_scope('layer%d' % l): #apply the linear layer hidden = self.hidden_layer(outputs) #apply the nonlinearity outputs = (tf.nn.relu(hidden) + outputs)/2 if float(self.conf['listener_dropout']) < 1 and is_training: outputs = tf.nn.dropout( outputs, float(self.conf['listener_dropout'])) #apply the pyramid stack outputs, output_seq_lengths = pyramid_stack(outputs, output_seq_lengths) outputs = self.outlayer(outputs, output_seq_lengths) if float(self.conf['listener_dropout']) < 1 and is_training: outputs = tf.nn.dropout(outputs, float(self.conf['listener_dropout'])) return outputs
def __call__(self, inputs, sequence_lengths, scope=None): """ Create the variables and do the forward computation Args: inputs: A time minor tensor of shape [batch_size, time, input_size], sequence_lengths: the length of the input sequences scope: The variable scope sets the namespace under which the variables created during this call will be stored. Returns: the output of the layer, the concatenated outputs of the forward and backward pass shape [batch_size, time/2, input_size*2]. """ with tf.variable_scope(scope or type(self).__name__): #apply blstm layer outputs = self.blstm(inputs, sequence_lengths) stacked_outputs, output_seq_lengths = ops.pyramid_stack( outputs, sequence_lengths) return stacked_outputs, output_seq_lengths