コード例 #1
0
def encode(current_input, max_num_frames, num_mfcc_coeffs, state_size_1,
           state_size_2, state_size_3, version):
    xavier = tf.contrib.layers.xavier_initializer(uniform=True)
    W1 = tf.get_variable("W1" + version,
                         shape=(num_mfcc_coeffs, state_size_1),
                         initializer=xavier)
    W2 = tf.get_variable("W2" + version,
                         shape=(state_size_1, state_size_2),
                         initializer=xavier)
    W3 = tf.get_variable("W3" + version,
                         shape=(state_size_2, state_size_3),
                         initializer=xavier)

    b1 = tf.get_variable('b1' + version,
                         shape=[state_size_1],
                         initializer=tf.constant_initializer(0))
    b2 = tf.get_variable('b2' + version,
                         shape=[state_size_2],
                         initializer=tf.constant_initializer(0))
    b3 = tf.get_variable('b3' + version,
                         shape=[state_size_3],
                         initializer=tf.constant_initializer(0))

    h1 = tf.tanh(batch_multiply_by_matrix(batch=current_input, matrix=W1)) + b1
    h2 = tf.tanh(batch_multiply_by_matrix(batch=h1, matrix=W2)) + b2
    h3 = tf.tanh(batch_multiply_by_matrix(batch=h2, matrix=W3)) + b3

    encoder = [W1, W2, W3, b1, b2, b3]
    return encoder, h3
コード例 #2
0
def decode(encoder, current_input, max_num_frames, num_mfcc_coeffs, state_size_1, state_size_2):
    encoder.reverse()
    W1 = tf.transpose(encoder[0])
    W2 = tf.transpose(encoder[1])
    h1 = tf.tanh(batch_multiply_by_matrix(batch=current_input, matrix=W1))
    h2 = batch_multiply_by_matrix(batch=h1, matrix=W2)
    
    return h2
コード例 #3
0
def encode(current_input, max_num_frames, num_mfcc_coeffs, state_size_1, state_size_2, version):
    xavier = tf.contrib.layers.xavier_initializer(uniform=True)
    W1 = tf.get_variable("W1"+version, shape=(num_mfcc_coeffs, state_size_1), initializer=xavier) 
    W2 = tf.get_variable("W2"+version, shape=(state_size_1, state_size_2), initializer=xavier) 
   
    h1 = tf.tanh(batch_multiply_by_matrix(batch=current_input, matrix=W1))
    h2 = tf.tanh(batch_multiply_by_matrix(batch=h1, matrix=W2))

    encoder = [W1, W2]
    return encoder, h2
コード例 #4
0
    def add_prediction_op(self):
        """Adds the core transformation for this model which transforms a batch of input
				data into a batch of predictions. In this case, the transformation is a linear layer plus a
				softmax transformation:
			
				Implements a stacked, denoising autoencoder. 
				Autoencoder learns two mappings: (1) Encoder: input ==> hidden layer, and (2) Decoder: hidden layer ==> output layer
				

				Returns:
						pred: A tensor of shape (batch_size, n_classes)
				"""

        xavier = tf.contrib.layers.xavier_initializer(uniform=True)
        W1 = tf.get_variable("W1",
                             shape=(self.config.num_mfcc_coeffs,
                                    self.config.state_size_1),
                             initializer=xavier)
        #b1 = tf.get_variable("b1", shape=(1, self.config.state_size_1))
        W2 = tf.get_variable("W2",
                             shape=(self.config.state_size_1,
                                    self.config.state_size_2),
                             initializer=xavier)
        #b2 = tf.get_variable("b2", shape=(1, self.config.state_size_2))
        W3 = tf.get_variable("W3",
                             shape=(self.config.state_size_2,
                                    self.config.num_mfcc_coeffs),
                             initializer=xavier)
        #b3 = tf.get_variable("b3", shape=(1, self.config.num_mfcc_coeffs))
        #W4 = tf.get_variable("W4", shape=(self.config.state_size_3, self.config.num_features), initializer=xavier)
        #b4 = tf.get_variable("b4", shape=(1, self.config.num_features))
        #W5 = tf.get_variable("W5", shape=(self.config.state_size_4, self.config.num_features), initializer=xavier)
        #b5 = tf.get_variable("b5", shape=(1, self.config.num_features))

        # [batch, max_num_frames, num_mfcc_coeffs] x [num_mfcc_coeffs, state_size1] = [batch, max_num_frames, state_size1]
        print "inputs shape: ", self.input_placeholder
        h1 = tf.tanh(
            batch_multiply_by_matrix(batch=self.input_placeholder, matrix=W1))
        print "h1 shape: ", h1

        # [batch, max_num_frames, state_size1] x [state_size1, state_size2] = [batch, max_num_frames, state_size2]
        h2 = tf.tanh(batch_multiply_by_matrix(batch=h1, matrix=W2))
        print "h2 shape: ", h2

        # [batch, max_num_frames, state_size2] x [state_size2, num_mfcc_coeffs] = [batch, max_num_frames, num_mfcc_coeffs]
        mfcc_preds = batch_multiply_by_matrix(batch=h2, matrix=W3)
        print "mfcc preds shape: ", mfcc_preds

        self.mfcc = mfcc_preds
        return mfcc_preds
		def add_prediction_op(self): 
				"""Adds the core transformation for this model which transforms a batch of input
				data into a batch of predictions. 
			
				The network is a 3-layer ANN with weights and no biases.	

				Returns:
						pred: A tensor of shape (batch_size, max_num_frames, 2*num_samples_per_frame)
				"""

				xavier = tf.contrib.layers.xavier_initializer()

				# It's 2 * num_samples_per_frame because we're dealing with complex numbers
				W1 = tf.get_variable("W1", shape=(2*self.config.num_samples_per_frame, self.config.state_size_1), dtype=tf.float32, initializer=xavier) 
				W2 = tf.get_variable("W2", shape=(self.config.state_size_1, self.config.state_size_2), dtype=tf.float32, initializer=xavier) 
				W3 = tf.get_variable("W3", shape=(self.config.state_size_2, 2*self.config.num_samples_per_frame), dtype=tf.float32, initializer=xavier) 

				# [batch, max_num_frames, 2*num_samples_per_frame] x [2*num_samples_per_frame, state_size1] = [batch, max_num_frames, state_size1]
				print "inputs shape: ", self.input_placeholder
				twice_as_long_input = self.complex_to_float_tensor(self.input_placeholder)
				print "twice_as_long_input shape: ", twice_as_long_input
				h1 = tf.tanh(batch_multiply_by_matrix(batch=twice_as_long_input, matrix=W1))
				print "h1 shape: ", h1

				# [batch, max_num_frames, state_size1] x [state_size1, state_size2] = [batch, max_num_frames, state_size2]
				h2 = tf.tanh(batch_multiply_by_matrix(batch=h1, matrix=W2))
				print "h2 shape: ", h2
				
				# [batch, max_num_frames, state_size2] x [state_size2, 2*num_samples_per_frame] = [batch, max_num_frames, 2*num_samples_per_frame]
				fft_preds_real_2x = batch_multiply_by_matrix(batch=h2, matrix=W3) 
				print "fft preds real 2x shape: ", fft_preds_real_2x

				# Convert back into complex numbers
				fft_preds_reals = tf.slice(fft_preds_real_2x, [0, 0, 0], 
																											[-1, self.config.max_num_frames, self.config.num_samples_per_frame]) 
				fft_preds_complexes = tf.slice(fft_preds_real_2x, [0, 0, self.config.num_samples_per_frame],
																													[-1, self.config.max_num_frames, self.config.num_samples_per_frame])
				self.fft = tf.complex(fft_preds_reals, fft_preds_complexes)
				print "fft preds complex shape: ", self.fft 

				# Return the twice as long real-valued tensor
				return fft_preds_real_2x 
コード例 #6
0
def decode(encoder, current_input, max_num_frames, num_mfcc_coeffs,
           state_size_1, state_size_2, state_size_3, version):
    encoder.reverse()
    W1 = tf.transpose(encoder[0])
    W2 = tf.transpose(encoder[1])
    W3 = tf.transpose(encoder[2])
    b1 = tf.get_variable('b4' + version,
                         shape=[state_size_3],
                         initializer=tf.constant_initializer(0))
    b2 = tf.get_variable('b5' + version,
                         shape=[state_size_2],
                         initializer=tf.constant_initializer(0))
    b2 = tf.get_variable('b6' + version,
                         shape=[num_mfcc_coeffs],
                         initializer=tf.constant_initializer(0))

    h1 = tf.tanh(batch_multiply_by_matrix(batch=current_input, matrix=W1)) + b1
    h2 = tf.tanh(batch_multiply_by_matrix(batch=h1, matrix=W2)) + b2
    h3 = batch_multiply_by_matrix(batch=h2, matrix=W3) + b3

    return h3
コード例 #7
0
def forward_prop(W1, W2, W3, W4, W5, b1, b2, b3, b4, b5, inputs, num_frames,
                 num_mfcc):
    h1 = tf.nn.relu(batch_multiply_by_matrix(matrix=W1, batch=inputs) + b1)
    print "h1 shape: ", h1

    # [batch, state_size1] x [state_size1, state_size2] = [batch, state_size2]
    h2 = tf.nn.relu(tf.matmul(h1, W2) + b2)
    print "h2 shape: ", h2

    # [batch, state_size2] x [state_size2, state_size3] = [batch, state_size3]
    h3 = tf.nn.relu(tf.matmul(h2, W3) + b3)
    print "h3 shape: ", h3

    # [batch, state_size3] x [state_size3, max_num_frames * num_mfcc_coeffs] = [batch, max_num_frames, num_mfcc_coeffs]
    print "W4 shape: ", W4
    h4 = tf.nn.relu(tf.matmul(h3, W4) + b4)

    mfcc_preds = tf.nn.relu(tf.matmul(h4, W5) + b5)
    mfcc_preds = tf.reshape(mfcc_preds, (-1, num_frames, num_mfcc))
    print "mfcc preds shape: ", mfcc_preds

    return mfcc_preds