Esempio n. 1
0
    def loss(self, input_batch, name='wavenet'):
        '''Creates a WaveNet network and returns the autoencoding loss.

        The variables are all scoped to the given name.
        '''
        with tf.name_scope(name):
            input_batch = mu_law_encode(input_batch,
                                        self.quantization_channels)
            encoded = self._one_hot(input_batch)
            raw_output = self._create_network(encoded)

            with tf.name_scope('loss'):
                # Shift original input left by one sample, which means that
                # each output sample has to predict the next input sample.
                shifted = tf.slice(encoded, [0, 1, 0],
                                   [-1, tf.shape(encoded)[1] - 1, -1])
                shifted = tf.pad(shifted, [[0, 0], [0, 1], [0, 0]])

                prediction = tf.reshape(raw_output,
                                        [-1, self.quantization_channels])
                loss = tf.nn.softmax_cross_entropy_with_logits(
                    prediction,
                    tf.reshape(shifted, [-1, self.quantization_channels]))
                reduced_loss = tf.reduce_mean(loss)

                tf.scalar_summary('loss', reduced_loss)

        return reduced_loss
    def testEncodeDecode(self):
        x = np.linspace(-1, 1, 1000).astype(np.float32)
        channels = 256

        # Test whether decoded signal is roughly equal to
        # what was encoded before
        with self.test_session() as sess:
            encoded = mu_law_encode(x, channels)
            x1 = sess.run(mu_law_decode(encoded, channels))

        self.assertAllClose(x, x1, rtol=1e-1, atol=0.05)

        # Make sure that re-encoding leaves the waveform invariant
        with self.test_session() as sess:
            encoded = mu_law_encode(x1, channels)
            x2 = sess.run(mu_law_decode(encoded, channels))

        self.assertAllClose(x1, x2)
def create_seed(filename,
                sample_rate,
                quantization_channels,
                window_size=WINDOW):
    audio, _ = librosa.load(filename, sr=sample_rate, mono=True)

    quantized = mu_law_encode(audio, quantization_channels)
    cut_index = tf.size(quantized) + tf.constant(window_size) - tf.constant(1)

    return quantized[:cut_index]
    def testEncodeNegativeChannelSize(self):
        np.random.seed(1944)  # For repeatability of test.

        channels = -256
        number_of_samples = 1024
        x = np.zeros(number_of_samples).astype(np.float32)
        manual_encode = manual_mu_law_encode(x, channels)

        with self.test_session() as sess:
            self.assertRaises(TypeError, sess.run(mu_law_encode(x, channels)))
    def testEncodeUniformRandomNoise(self):
        np.random.seed(42)  # For repeatability of test.

        channels = 256
        number_of_samples = 2048
        x = np.random.uniform(-1, 1, number_of_samples).astype(np.float32)
        manual_encode = manual_mu_law_encode(x, channels)

        with self.test_session() as sess:
            encode = sess.run(mu_law_encode(x, channels))

        self.assertAllEqual(manual_encode, encode)
    def testEncodeZeros(self):
        np.random.seed(1944)  # For repeatability of test.

        channels = 256
        number_of_samples = 1024
        x = np.zeros(number_of_samples).astype(np.float32)
        manual_encode = manual_mu_law_encode(x, channels)

        with self.test_session() as sess:
            encode = sess.run(mu_law_encode(x, channels))

        self.assertAllEqual(manual_encode, encode)
    def testEncodePrecomputed(self):
        channels = 256
        number_of_samples = 10
        x = np.array(
            [-1.0, 1.0, 0.6, -0.25, 0.01, 0.33, -0.9999, 0.42, 0.1,
             -0.45]).astype(np.float32)
        encoded_manual = np.array([0, 255, 243, 32, 157, 230, 0, 235, 203,
                                   18]).astype(np.int32)

        with self.test_session() as sess:
            encoded = sess.run(mu_law_encode(x, channels))

        self.assertAllEqual(encoded_manual, encoded)
    def testEncodeRamp(self):
        np.random.seed(1944)  # For repeatability of test.

        channels = 256
        number_of_samples = 1024
        number_of_steps = 2.0 / number_of_samples
        x = np.arange(-1.0, 1.0, number_of_steps).astype(np.float32)
        manual_encode = manual_mu_law_encode(x, channels)

        with self.test_session() as sess:
            encode = sess.run(mu_law_encode(x, channels))

        self.assertAllEqual(manual_encode, encode)
 def testEncodeIsSurjective(self):
     x = np.linspace(-1, 1, 10000).astype(np.float32)
     channels = 123
     with self.test_session() as sess:
         encoded = sess.run(mu_law_encode(x, channels))
     self.assertEqual(len(np.unique(encoded)), channels)