def encode(current_input, max_num_frames, num_mfcc_coeffs, state_size_1, state_size_2, state_size_3, version): xavier = tf.contrib.layers.xavier_initializer(uniform=True) W1 = tf.get_variable("W1" + version, shape=(num_mfcc_coeffs, state_size_1), initializer=xavier) W2 = tf.get_variable("W2" + version, shape=(state_size_1, state_size_2), initializer=xavier) W3 = tf.get_variable("W3" + version, shape=(state_size_2, state_size_3), initializer=xavier) b1 = tf.get_variable('b1' + version, shape=[state_size_1], initializer=tf.constant_initializer(0)) b2 = tf.get_variable('b2' + version, shape=[state_size_2], initializer=tf.constant_initializer(0)) b3 = tf.get_variable('b3' + version, shape=[state_size_3], initializer=tf.constant_initializer(0)) h1 = tf.tanh(batch_multiply_by_matrix(batch=current_input, matrix=W1)) + b1 h2 = tf.tanh(batch_multiply_by_matrix(batch=h1, matrix=W2)) + b2 h3 = tf.tanh(batch_multiply_by_matrix(batch=h2, matrix=W3)) + b3 encoder = [W1, W2, W3, b1, b2, b3] return encoder, h3
def decode(encoder, current_input, max_num_frames, num_mfcc_coeffs, state_size_1, state_size_2): encoder.reverse() W1 = tf.transpose(encoder[0]) W2 = tf.transpose(encoder[1]) h1 = tf.tanh(batch_multiply_by_matrix(batch=current_input, matrix=W1)) h2 = batch_multiply_by_matrix(batch=h1, matrix=W2) return h2
def encode(current_input, max_num_frames, num_mfcc_coeffs, state_size_1, state_size_2, version): xavier = tf.contrib.layers.xavier_initializer(uniform=True) W1 = tf.get_variable("W1"+version, shape=(num_mfcc_coeffs, state_size_1), initializer=xavier) W2 = tf.get_variable("W2"+version, shape=(state_size_1, state_size_2), initializer=xavier) h1 = tf.tanh(batch_multiply_by_matrix(batch=current_input, matrix=W1)) h2 = tf.tanh(batch_multiply_by_matrix(batch=h1, matrix=W2)) encoder = [W1, W2] return encoder, h2
def add_prediction_op(self): """Adds the core transformation for this model which transforms a batch of input data into a batch of predictions. In this case, the transformation is a linear layer plus a softmax transformation: Implements a stacked, denoising autoencoder. Autoencoder learns two mappings: (1) Encoder: input ==> hidden layer, and (2) Decoder: hidden layer ==> output layer Returns: pred: A tensor of shape (batch_size, n_classes) """ xavier = tf.contrib.layers.xavier_initializer(uniform=True) W1 = tf.get_variable("W1", shape=(self.config.num_mfcc_coeffs, self.config.state_size_1), initializer=xavier) #b1 = tf.get_variable("b1", shape=(1, self.config.state_size_1)) W2 = tf.get_variable("W2", shape=(self.config.state_size_1, self.config.state_size_2), initializer=xavier) #b2 = tf.get_variable("b2", shape=(1, self.config.state_size_2)) W3 = tf.get_variable("W3", shape=(self.config.state_size_2, self.config.num_mfcc_coeffs), initializer=xavier) #b3 = tf.get_variable("b3", shape=(1, self.config.num_mfcc_coeffs)) #W4 = tf.get_variable("W4", shape=(self.config.state_size_3, self.config.num_features), initializer=xavier) #b4 = tf.get_variable("b4", shape=(1, self.config.num_features)) #W5 = tf.get_variable("W5", shape=(self.config.state_size_4, self.config.num_features), initializer=xavier) #b5 = tf.get_variable("b5", shape=(1, self.config.num_features)) # [batch, max_num_frames, num_mfcc_coeffs] x [num_mfcc_coeffs, state_size1] = [batch, max_num_frames, state_size1] print "inputs shape: ", self.input_placeholder h1 = tf.tanh( batch_multiply_by_matrix(batch=self.input_placeholder, matrix=W1)) print "h1 shape: ", h1 # [batch, max_num_frames, state_size1] x [state_size1, state_size2] = [batch, max_num_frames, state_size2] h2 = tf.tanh(batch_multiply_by_matrix(batch=h1, matrix=W2)) print "h2 shape: ", h2 # [batch, max_num_frames, state_size2] x [state_size2, num_mfcc_coeffs] = [batch, max_num_frames, num_mfcc_coeffs] mfcc_preds = batch_multiply_by_matrix(batch=h2, matrix=W3) print "mfcc preds shape: ", mfcc_preds self.mfcc = mfcc_preds return mfcc_preds
def add_prediction_op(self): """Adds the core transformation for this model which transforms a batch of input data into a batch of predictions. The network is a 3-layer ANN with weights and no biases. Returns: pred: A tensor of shape (batch_size, max_num_frames, 2*num_samples_per_frame) """ xavier = tf.contrib.layers.xavier_initializer() # It's 2 * num_samples_per_frame because we're dealing with complex numbers W1 = tf.get_variable("W1", shape=(2*self.config.num_samples_per_frame, self.config.state_size_1), dtype=tf.float32, initializer=xavier) W2 = tf.get_variable("W2", shape=(self.config.state_size_1, self.config.state_size_2), dtype=tf.float32, initializer=xavier) W3 = tf.get_variable("W3", shape=(self.config.state_size_2, 2*self.config.num_samples_per_frame), dtype=tf.float32, initializer=xavier) # [batch, max_num_frames, 2*num_samples_per_frame] x [2*num_samples_per_frame, state_size1] = [batch, max_num_frames, state_size1] print "inputs shape: ", self.input_placeholder twice_as_long_input = self.complex_to_float_tensor(self.input_placeholder) print "twice_as_long_input shape: ", twice_as_long_input h1 = tf.tanh(batch_multiply_by_matrix(batch=twice_as_long_input, matrix=W1)) print "h1 shape: ", h1 # [batch, max_num_frames, state_size1] x [state_size1, state_size2] = [batch, max_num_frames, state_size2] h2 = tf.tanh(batch_multiply_by_matrix(batch=h1, matrix=W2)) print "h2 shape: ", h2 # [batch, max_num_frames, state_size2] x [state_size2, 2*num_samples_per_frame] = [batch, max_num_frames, 2*num_samples_per_frame] fft_preds_real_2x = batch_multiply_by_matrix(batch=h2, matrix=W3) print "fft preds real 2x shape: ", fft_preds_real_2x # Convert back into complex numbers fft_preds_reals = tf.slice(fft_preds_real_2x, [0, 0, 0], [-1, self.config.max_num_frames, self.config.num_samples_per_frame]) fft_preds_complexes = tf.slice(fft_preds_real_2x, [0, 0, self.config.num_samples_per_frame], [-1, self.config.max_num_frames, self.config.num_samples_per_frame]) self.fft = tf.complex(fft_preds_reals, fft_preds_complexes) print "fft preds complex shape: ", self.fft # Return the twice as long real-valued tensor return fft_preds_real_2x
def decode(encoder, current_input, max_num_frames, num_mfcc_coeffs, state_size_1, state_size_2, state_size_3, version): encoder.reverse() W1 = tf.transpose(encoder[0]) W2 = tf.transpose(encoder[1]) W3 = tf.transpose(encoder[2]) b1 = tf.get_variable('b4' + version, shape=[state_size_3], initializer=tf.constant_initializer(0)) b2 = tf.get_variable('b5' + version, shape=[state_size_2], initializer=tf.constant_initializer(0)) b2 = tf.get_variable('b6' + version, shape=[num_mfcc_coeffs], initializer=tf.constant_initializer(0)) h1 = tf.tanh(batch_multiply_by_matrix(batch=current_input, matrix=W1)) + b1 h2 = tf.tanh(batch_multiply_by_matrix(batch=h1, matrix=W2)) + b2 h3 = batch_multiply_by_matrix(batch=h2, matrix=W3) + b3 return h3
def forward_prop(W1, W2, W3, W4, W5, b1, b2, b3, b4, b5, inputs, num_frames, num_mfcc): h1 = tf.nn.relu(batch_multiply_by_matrix(matrix=W1, batch=inputs) + b1) print "h1 shape: ", h1 # [batch, state_size1] x [state_size1, state_size2] = [batch, state_size2] h2 = tf.nn.relu(tf.matmul(h1, W2) + b2) print "h2 shape: ", h2 # [batch, state_size2] x [state_size2, state_size3] = [batch, state_size3] h3 = tf.nn.relu(tf.matmul(h2, W3) + b3) print "h3 shape: ", h3 # [batch, state_size3] x [state_size3, max_num_frames * num_mfcc_coeffs] = [batch, max_num_frames, num_mfcc_coeffs] print "W4 shape: ", W4 h4 = tf.nn.relu(tf.matmul(h3, W4) + b4) mfcc_preds = tf.nn.relu(tf.matmul(h4, W5) + b5) mfcc_preds = tf.reshape(mfcc_preds, (-1, num_frames, num_mfcc)) print "mfcc preds shape: ", mfcc_preds return mfcc_preds