def encode(self, inputs, input_seq_length, is_training): ''' Create the variables and do the forward computation Args: inputs: the inputs to the neural network, this is a dictionary of [batch_size x time x ...] tensors input_seq_length: The sequence lengths of the input utterances, this is a dictionary of [batch_size] vectors is_training: whether or not the network is in training mode Returns: - the outputs of the encoder as a dictionary of [bath_size x time x ...] tensors - the sequence lengths of the outputs as a dictionary of [batch_size] tensors ''' encoded = {} encoded_seq_length = {} for inp in inputs: with tf.variable_scope(inp): #add input noise std_input_noise = float(self.conf['input_noise']) if is_training and std_input_noise > 0: noisy_inputs = inputs[inp] + tf.random_normal( tf.shape(inputs[inp]), stddev=std_input_noise) else: noisy_inputs = inputs[inp] outputs = noisy_inputs output_seq_lengths = input_seq_length[inp] for l in range(int(self.conf['num_layers'])): outputs, output_seq_lengths = layer.pblstm( inputs=outputs, sequence_length=output_seq_lengths, num_units=int(self.conf['num_units']), num_steps=int(self.conf['pyramid_steps']), scope='layer%d' % l) if float(self.conf['dropout']) < 1 and is_training: outputs = tf.nn.dropout(outputs, float(self.conf['dropout'])) outputs = layer.blstm(inputs=outputs, sequence_length=output_seq_lengths, num_units=int(self.conf['num_units']), scope='layer%d' % int(self.conf['num_layers'])) if float(self.conf['dropout']) < 1 and is_training: outputs = tf.nn.dropout(outputs, float(self.conf['dropout'])) encoded[inp] = outputs encoded_seq_length[inp] = output_seq_lengths return encoded, encoded_seq_length
def encode(self, features, len_feas): ''' Create the variables and do the forward computation Args: inputs: [batch_size x time x ...] tensor input_seq_length: [batch_size] vector is_train: whether or not the network is in training mode Returns: - [bath_size x time x ...] tensor - [batch_size] tensor ''' num_pblayers = self.args.model.encoder.num_pblayers num_blayers = self.args.model.encoder.num_blayers num_cell_units = self.args.model.encoder.num_cell_units dropout = self.args.model.encoder.dropout outputs = features output_seq_lengths = len_feas for l in range(num_pblayers): outputs, output_seq_lengths = layer.pblstm( inputs=outputs, sequence_length=output_seq_lengths, num_units=num_cell_units, num_steps=2, layer_norm=True, scope='en_pblstm_%d' % l) if dropout > 0 and self.is_train: outputs = tf.nn.dropout(outputs, keep_prob=1.0 - dropout) for l in range(num_blayers): outputs = layer.blstm(inputs=outputs, sequence_length=output_seq_lengths, num_units=num_cell_units, scope='en_blstm_%d' % (l + num_pblayers)) if dropout > 0 and self.is_train: outputs = tf.nn.dropout(outputs, keep_prob=1.0 - dropout) return outputs, output_seq_lengths
def encode(self, features, len_feas): ''' Create the variables and do the forward computation Args: inputs: [batch_size x time x ...] tensor input_seq_length: [batch_size] vector is_train: whether or not the network is in training mode Returns: - [bath_size x time x ...] tensor - [batch_size] tensor ''' num_pblayers = self.args.model.encoder.num_pblayers num_blayers = self.args.model.encoder.num_blayers num_cell_units = self.args.model.encoder.num_cell_units dropout = self.args.model.encoder.dropout size_feat = self.args.data.dim_input # the first cnn layer size_batch = tf.shape(features)[0] size_length = tf.shape(features)[1] x = tf.reshape(features, [size_batch, size_length, int(size_feat / 3), 3]) x = normal_conv(inputs=x, filter_num=64, kernel=(3, 3), stride=(2, 2), padding='SAME', use_relu=True, name="conv", w_initializer=None, norm_type='layer') # conv_output = tf.expand_dims(features, -1) # len_sequence = len_feas # conv_output, len_sequence, size_feat = conv_layer( # inputs=conv_output, # len_sequence=len_sequence, # size_feat=size_feat, # num_filter=64, # kernel=(3,3), # stride=(2,2), # scope='en_conv_0') # the second pblstm layer size_feat = int(np.ceil(40 / 2)) * 64 size_length = tf.cast(tf.ceil(tf.cast(size_length, tf.float32) / 2), tf.int32) output_seq_lengths = tf.cast( tf.ceil(tf.cast(len_feas, tf.float32) / 2), tf.int32) outputs = tf.reshape(x, [size_batch, size_length, size_feat]) for l in range(num_pblayers): outputs, output_seq_lengths = layer.pblstm( inputs=outputs, sequence_length=output_seq_lengths, num_units=num_cell_units, num_steps=2, layer_norm=True, scope='en_pblstm_%d' % l) if dropout > 0 and self.is_train: outputs = tf.nn.dropout(outputs, keep_prob=1.0 - dropout) # the third blstm layer for l in range(num_blayers): outputs = layer.blstm(inputs=outputs, sequence_length=output_seq_lengths, num_units=num_cell_units, scope='en_blstm_%d' % (l + num_pblayers)) if dropout > 0 and self.is_train: outputs = tf.nn.dropout(outputs, keep_prob=1.0 - dropout) return outputs, output_seq_lengths