Beispiel #1
0
    def get_signal(self, amps, f0_hz, mod_amps, mod_f0_hz):
        """Synthesize audio with am synthesizer from controls.

    Args:
      amps: Amplitude tensor of shape [batch, n_frames, 1]. Expects
        float32 that is strictly positive.
      f0_hz: The fundamental frequency in Hertz. Tensor of shape [batch,
        n_frames, 1].
      mod_amps: Amplitude tensor of shape [batch, n_frames, 1].
        Expects float32 that is strictly positive.
      mod_f0_hz: Tensor of shape [batch, n_frames, 1].
        Expects float32 in Hertz that is strictly positive.

    Returns:
      signal: A tensor of shape [batch, n_samples].
    """
        # Create sample-wise envelopes.
        amps_envelopes = core.resample(amps,
                                       self.n_samples,
                                       method=self.amp_resample_method)
        f0_hz_envelopes = core.resample(f0_hz, self.n_samples)
        mod_amps_envelopes = core.resample(mod_amps,
                                           self.n_samples,
                                           method=self.amp_resample_method)
        mod_f0_hz_envelopes = core.resample(mod_f0_hz, self.n_samples)

        signal = core.modulate_amplitude(amps=amps_envelopes,
                                         f0_hz=f0_hz_envelopes,
                                         mod_amps=mod_amps_envelopes,
                                         mod_f0_hz=mod_f0_hz_envelopes,
                                         sample_rate=self.sample_rate)
        return signal
Beispiel #2
0
  def call(self, conditioning):
    batch_size = conditioning['f0_hz'].shape[0]
    noise = tf.random.normal([batch_size, self.n_total, 1])

    f0_hz = core.resample(conditioning['f0_hz'], self.n_total)
    frequency_envelopes = core.get_harmonic_frequencies(f0_hz, self.n_harmonics)
    audios = core.oscillator_bank(frequency_envelopes=frequency_envelopes,
                    amplitude_envelopes=tf.ones_like(frequency_envelopes),
                    sample_rate=self.sample_rate,
                    sum_sinusoids=False)
    
    inputs = [conditioning[k] for k in self.input_keys]
    inputs = [stack(x) for stack, x in zip(self.input_stacks, inputs)]

    # Resample all inputs to the target sample rate
    inputs = [core.resample(x, self.n_total) for x in inputs]
    
    c = tf.concat(inputs + [audios, noise], axis=-1)
    # Conv layers
    x = self.first_conv(c)
    skips = 0
    for f in self.conv_layers:
      x, h = f(x, c)
      skips += h
    skips *= tf.sqrt(1.0 / len(self.conv_layers))

    return {'audio_tensor': self.dense_out(skips)}
Beispiel #3
0
  def get_signal(self, magnitudes, taus):
    """Synthesize audio with sinusoidal synthesizer from controls.

    Args:
      magnitudes: magnitude tensor of shape [batch, n_frames, 1].
        Expects float32 that is strictly positive.
      stdevs: Tensor of shape [batch, n_frames, 1].
        Expects float32 in that is strictly positive.
      taus: Tensor of shape [batch, n_frames, 1].
        Expects float32 in that is strictly positive.

    Returns:
      signal: A tensor of the force impulse profile of shape [batch, n_samples].
    """
    # Create sample-wise envelopes.
    weight_distance = 100
    diff_order = 1
    # magnitude_diffs = tf.experimental.numpy.diff(magnitudes, n=diff_order, axis=1)
    # magnitude_diffs = tf.concat((tf.zeros((tf.shape(magnitude_diffs)[0], int(math.floor(diff_order/2)), 1), dtype=tf.float32),
    #                             magnitude_diffs,
    #                             tf.zeros((tf.shape(magnitude_diffs)[0], int(math.ceil(diff_order/2)), 1), dtype=tf.float32)), axis=1)
    # magnitude_envelopes = core.resample(((-1)**diff_order) *  magnitude_diffs, self.n_samples,
    magnitude_envelopes = core.resample(magnitudes, self.n_samples,
                                        method=self.resample_method)
    taus = core.resample(taus, self.n_samples,
                          method=self.resample_method)

    window_size = int(self.sample_rate / self.max_impact_frequency)
    magnitude_envelopes = tf.expand_dims(magnitude_envelopes, axis=1)
    vals, inds = tf.nn.max_pool_with_argmax(magnitude_envelopes, window_size, window_size, 'SAME')
    # Use a weighted average of magnitude to select peak time so that things can shift around
    if self.timing_adjust:
      augmented_inds = tf.concat([inds - weight_distance, inds, inds + weight_distance], axis=-1)
      augmented_inds = tf.clip_by_value(augmented_inds, 0, self.n_samples - 1)
      b,w,h,c = magnitude_envelopes.get_shape().as_list()
      mags_pooled = tf.gather(tf.reshape(magnitude_envelopes, shape=[b*w*h*c]), augmented_inds)
      weighted_inds = tf.reduce_sum(tf.cast(augmented_inds, dtype=tf.float32) * mags_pooled, axis=-1) / tf.reduce_sum(mags_pooled, axis=-1)
      peak_times = tf.cast(weighted_inds / self.sample_rate, dtype=tf.float32)
    else:
      peak_times = tf.squeeze(tf.cast(inds / self.sample_rate, dtype=tf.float32), axis=3)
    
    scale_heights = tf.squeeze(vals, axis=3)
    taus = tf.expand_dims(taus, axis=1)
    b,w,h,c = taus.get_shape().as_list()
    taus_pooled = tf.gather(tf.reshape(taus,shape= [b*w*h*c,]),inds)
    taus_pooled = tf.squeeze(taus_pooled, axis=3)
    basis_impulses = self.hertz_gaussian(peak_times, taus_pooled)
    signal = tf.reduce_sum(scale_heights * basis_impulses, axis=2)
    return signal
Beispiel #4
0
    def _default_processing(self, features):
        '''Always resample to time_steps and scale input signals.'''
        for k in [
                "f0", "phase", "phase_unwrapped", "osc", "osc_sub",
                "phase_sub", "phase_unwrapped_sub", "osc_sub_sync",
                "phase_unwrapped_sub_sync", "phase_sub_sync"
        ]:
            if features.get(k, None) is not None:
                features[k] = at_least_3d(features[k])
                features[k] = resample(features[k],
                                       n_timesteps=self.time_steps)

        # Divide by denom (e.g. number of cylinders in engine to produce subharmonics)
        features["f0_sub"] = features["f0"] / self.denom

        # Set additive input
        features["f0_additive"] = features["f0_sub"]

        # Prepare decoder network inputs
        features["f0_scaled"] = hz_to_midi(features["f0"]) / F0_RANGE
        features["f0_scaled_mel"] = hz_to_mel(features["f0"]) / F0_RANGE_MEL
        features["f0_sub_scaled"] = hz_to_mel(
            features["f0_sub"]) / F0_SUB_RANGE
        for k in ["phase", "phase_sub", "phase_sub_sync"]:
            if features.get(k, None) is not None:
                features[k + "_scaled"] = 0.5 + 0.5 * features[k] / np.pi
        for k in ["osc", "osc_sub", "osc_sub_sync"]:
            if features.get(k, None) is not None:
                features[k + "_scaled"] = 0.5 + 0.5 * features[k]

        return features
Beispiel #5
0
 def call(self, audio, *conditioning):
   if self.spectral_op == 'compute_mfcc':
       z = spectral_ops.compute_mfcc(
           audio,
           lo_hz=20.0,
           hi_hz=8000.0,
           fft_size=self.fft_size,
           mel_bins=128,
           mfcc_bins=30,
           overlap=self.overlap,
           pad_end=True)
   elif self.spectral_op == 'compute_logmag':
       z = spectral_ops.compute_logmag(core.tf_float32(audio), size=self.fft_size)
   
   # Normalize.
   z = self.z_norm(z[:, :, tf.newaxis, :])[:, :, 0, :]
   n_timesteps = z.shape[1]
   conditioning = [resample(c, n_timesteps) for c  in conditioning]
   
   z = tf.concat([z] + conditioning, axis=-1)
   # Run an RNN over the latents.
   z = self.rnn(z)
   # Bounce down to compressed z dimensions.
   w = tf.math.sigmoid(self.confidence(z))
   z = self.dense_out(z)
   z = tf.reduce_sum(z * w, axis=1, keepdims=True) / tf.reduce_sum(w, axis=1, keepdims=True)
   return z
Beispiel #6
0
    def get_controls(self, signal_one: tf.Tensor, signal_two: tf.Tensor,
                     nn_out_mix_level: tf.Tensor) -> TensorDict:
        """Standardize inputs to same length, mix_level to range [0, 1].

    Args:
      signal_one: 2-D or 3-D tensor.
      signal_two: 2-D or 3-D tensor.
      nn_out_mix_level: Tensor of shape [batch, n_time, 1] output of the network
        determining relative levels of signal one and two.

    Returns:
      Dict of control parameters.

    Raises:
      ValueError: If signal_one and signal_two are not the same length.
    """
        n_time_one = int(signal_one.shape[1])
        n_time_two = int(signal_two.shape[1])
        if n_time_one != n_time_two:
            raise ValueError(
                'The two signals must have the same length instead of'
                '{} and {}'.format(n_time_one, n_time_two))

        mix_level = tf.nn.sigmoid(nn_out_mix_level)
        mix_level = core.resample(mix_level, n_time_one)
        mix_level = tf.reshape(mix_level, (signal_one.shape[0], n_time_one))
        return {
            'signal_one': signal_one,
            'signal_two': signal_two,
            'mix_level': mix_level
        }
Beispiel #7
0
 def call(self, *args, **unused_kwargs):
   """Resamples all inputs to the maximal resolution and computes the score"""
   inputs  = [preprocessing.at_least_3d(i) for i in args]
   n_timesteps = max(i.shape[1] for i in inputs)
   inputs = [core.resample(i, n_timesteps) for i in inputs]
   score  = self.compute_score(*inputs)
   score = tf.reduce_mean(score, axis=list(range(1, len(score.shape))))
   return score
Beispiel #8
0
 def estimate_spec(self, f0, amp):
   quantized_f0 = nn.differential_onehot(nn.ensure_3d(f0), self.f_quantization_bins, 200)
   x = [resample(x, self.t_bins) for x in [quantized_f0, nn.ensure_3d(amp)]]
   x = tf.concat(x, axis=-1)
   # x: (batch, time, synth_params + fbins)
   x = self.dense(x)
   # x: (batch, time, f_bins*channels)
   x = tf.reshape(x, [-1, self.t_bins, self.f_bins, self.channels])
   res = self.conv1d(x) # assume close time steps interact with each other
   return x + res
Beispiel #9
0
  def get_signal(self, amplitudes, frequencies):
    """Synthesize audio with sinusoidal synthesizer from controls.

    Args:
      amplitudes: Amplitude tensor of shape [batch, n_frames, n_sinusoids].
        Expects float32 that is strictly positive.
      frequencies: Tensor of shape [batch, n_frames, n_sinusoids].
        Expects float32 in Hertz that is strictly positive.

    Returns:
      signal: A tensor of harmonic waves of shape [batch, n_samples].
    """
    # Create sample-wise envelopes.
    amplitude_envelopes = core.resample(amplitudes, self.n_samples,
                                        method=self.amp_resample_method)
    frequency_envelopes = core.resample(frequencies, self.n_samples)

    signal = core.oscillator_bank(frequency_envelopes=frequency_envelopes,
                                  amplitude_envelopes=amplitude_envelopes,
                                  sample_rate=self.sample_rate)
    return signal
Beispiel #10
0
    def _default_processing(self, features):
        '''Always resample to time_steps and scale f0 signal.'''
        # Make sure inputs have the right dimensions, i.e. [batch_size, n_frames, {context dependent}]
        for k in [
                "f0", "phase", "phase_unwrapped", "osc", "osc_sub",
                "phase_sub", "phase_unwrapped_sub", "osc_sub_sync",
                "phase_unwrapped_sub_sync", "phase_sub_sync"
        ]:
            if features.get(k, None) is not None:
                features[k] = at_least_3d(features[k])
                features[k] = resample(features[k],
                                       n_timesteps=self.time_steps)

        # Divide by denom (e.g. number of cylinders in engine to produce subharmonics)
        features["f0_sub"] = features["f0"] / self.denom

        # Set additive input
        features["f0_additive"] = features[self.f0_additive]

        # Generate osc and phase from f0 if missing
        for suffix in ["", "_sub"]:
            if features.get("osc" + suffix, None) is None:
                amplitudes = tf.ones(tf.shape(features["f0" + suffix]))
                features["osc" + suffix] = oscillator_bank(
                    features["f0" + suffix], amplitudes,
                    sample_rate=self.rate)[:, :, tf.newaxis]
            if features.get("phase" + suffix, None) is None:
                omegas = 2.0 * np.pi * features["f0" + suffix] / float(
                    self.rate)
                phases = tf.cumsum(omegas, axis=1)
                features["phase_unwrapped" + suffix] = phases
                phases_wrapped = tf.math.mod(phases + np.pi, 2 * np.pi) - np.pi
                features["phase" + suffix] = phases_wrapped

        for prefix in ["osc_sub", "phase_sub", "phase_unwrapped_sub"]:
            if features.get(prefix + "_sync", None) is None:
                features[prefix + "_sync"] = features[prefix]

        # Prepare decoder network inputs
        features["f0_scaled"] = hz_to_midi(features["f0"]) / F0_RANGE
        features["f0_scaled_mel"] = hz_to_mel(features["f0"]) / F0_RANGE_MEL
        features["f0_sub_scaled"] = hz_to_mel(
            features["f0_sub"]) / F0_SUB_RANGE
        for k in ["phase", "phase_sub", "phase_sub_sync"]:
            if features.get(k, None) is not None:
                features[k + "_scaled"] = 0.5 + 0.5 * features[k] / np.pi
        for k in ["osc", "osc_sub", "osc_sub_sync"]:
            if features.get(k, None) is not None:
                features[k + "_scaled"] = 0.5 + 0.5 * features[k]

        return features
Beispiel #11
0
  def create_resampled_signals(self, n_before, n_after, add_endpoint, method):
    """Helper function to resample a test signal using core.resample().

    Args:
      n_before: Number of timesteps before resampling.
      n_after: Number of timesteps after resampling.
      add_endpoint: Add extra timestep at end of resampling.
      method: Method of resampling.

    Returns:
      before: Numpy array before resampling. Shape (n_before,).
      after: Numpy array after resampling. Shape (n_after,).
    """
    before = 1.0 - np.sin(np.linspace(0, np.pi, n_before))
    before = before[np.newaxis, :, np.newaxis]
    after = core.resample(
        before, n_after, method=method, add_endpoint=add_endpoint).numpy()
    return before[0, :, 0], after[0, :, 0]
Beispiel #12
0
    def get_signal(self, amplitudes, wavetables, f0_hz):
        """Synthesize audio with additive synthesizer from controls.

    Args:
      amplitudes: Amplitude tensor of shape [batch, n_frames, 1]. Expects
        float32 that is strictly positive.
      wavetables: Tensor of shape [batch, n_frames, n_wavetable].
      f0_hz: The fundamental frequency in Hertz. Tensor of shape [batch,
        n_frames, 1].

    Returns:
      signal: A tensor of of shape [batch, n_samples].
    """
        wavetables = core.resample(wavetables, self.n_samples)
        signal = core.wavetable_synthesis(amplitudes=amplitudes,
                                          wavetables=wavetables,
                                          frequencies=f0_hz,
                                          n_samples=self.n_samples,
                                          sample_rate=self.sample_rate)
        return signal
Beispiel #13
0
    def test_multi_dimensional_inputs(self, dimensions):
        """Test the shapes are correct for different dimensional inputs.

    Args:
      dimensions: The number of dimensions of the input test signal.
    """
        # Create test signal.
        inputs_shape = [self.n_smaller] * dimensions
        inputs = np.ones(inputs_shape)

        # Run through the resampling op.
        outputs = core.resample(inputs, self.n_larger)

        # Compute output shape.
        outputs_shape = inputs_shape
        if dimensions == 1:
            outputs_shape[0] = self.n_larger
        else:
            outputs_shape[1] = self.n_larger

        self.assertListEqual(list(outputs.shape), outputs_shape)
Beispiel #14
0
 def setUp(self):
     """Create input dictionary and preprocessor."""
     super().setUp()
     sr = 16000
     frame_rate = 250
     frame_size = 256
     n_samples = 16000
     n_t = 250
     # Replicate preprocessor computations.
     audio = 0.5 * tf.sin(tf.range(0, n_samples, dtype=tf.float32))[None, :]
     power_db = compute_power(audio,
                              sample_rate=sr,
                              frame_rate=frame_rate,
                              frame_size=frame_size)
     power_db = preprocessing.at_least_3d(power_db)
     power_db = resample(power_db, n_t)
     self.input_dict = {
         'f0_hz': tf.ones([1, n_t]),
         'audio': audio,
         'power_db': power_db,
     }
     self.preprocessor = preprocessing.F0PowerPreprocessor(
         time_steps=n_t, frame_rate=frame_rate, sample_rate=sr)
Beispiel #15
0
    def _default_processing(self, features):
        '''Always resample to time_steps and scale f0 signal.'''
        features["f0"] = at_least_3d(features["f0"])
        features["f0"] = resample(features["f0"], n_timesteps=self.time_steps)

        # Divide by denom (e.g. number of cylinders in engine to produce subharmonics)
        features["f0"] /= self.denom

        # Set additive input
        features["f0_additive"] = features["f0"]

        # Prepare decoder network inputs
        if self.feature_domain == "freq":
            features["f0_scaled"] = hz_to_midi(features["f0"]) / F0_RANGE
        elif self.feature_domain == "freq-old":
            '''DEPRICATED. This option is for backward compability with a version containing a typo.'''
            features["f0_scaled"] = hz_to_midi(
                self.denom * features["f0"]) / F0_RANGE / self.denom
        elif self.feature_domain == "time":
            amplitudes = tf.ones(tf.shape(features["f0"]))
            features["f0_scaled"] = oscillator_bank(
                features["f0"], amplitudes, sample_rate=self.rate)[:, :,
                                                                   tf.newaxis]
        elif self.feature_domain == "osc":
            if features.get("osc", None) is None:
                amplitudes = tf.ones(tf.shape(features["f0"]))
                features["f0_scaled"] = oscillator_bank(
                    self.denom * features["f0"],
                    amplitudes,
                    sample_rate=self.rate)[:, :, tf.newaxis]
            else:
                features["f0_scaled"] = features["osc"][:, :, tf.newaxis]
        else:
            raise ValueError("%s is not a valid value for feature_domain." %
                             self.feature_domain)

        return features
Beispiel #16
0
    def get_signal(self, f0, op1, op2, op3, op4, modulators):
        """Synthesize audio with fm synthesizer from controls.

    Args:
      f0_hz: Fundamental frequencies in hertz. Shape [batch, n_frames , 1]
      op1-4: Amp, idx and ADSR of each operator. Shape [batch, n_frames , 3]
      modulators: Modulation between operators. Shape [batch, n_frames , 6]

    Returns:
      signal: A tensor of shape [batch, n_samples].
    """
        # Create sample-wise envelopes.
        f0_env = core.resample(f0,
                               self.n_samples,
                               add_endpoint=self.add_endpoint)

        op1 = core.resample(op1,
                            self.n_samples,
                            add_endpoint=self.add_endpoint)
        op2 = core.resample(op2,
                            self.n_samples,
                            add_endpoint=self.add_endpoint)
        op3 = core.resample(op3,
                            self.n_samples,
                            add_endpoint=self.add_endpoint)
        op4 = core.resample(op4,
                            self.n_samples,
                            add_endpoint=self.add_endpoint)

        modulators_env = core.resample(modulators,
                                       self.n_samples,
                                       add_endpoint=self.add_endpoint)

        signal = core.modulate_frequency(f0=f0_env,
                                         op1=op1,
                                         op2=op2,
                                         op3=op3,
                                         op4=op4,
                                         modulators=modulators_env,
                                         sample_rate=self.sample_rate)
        return signal
Beispiel #17
0
    def additive_synthesis(self,
                           amplitudes,
                           frequency_shifts=None,
                           frequency_distribution=None,
                           n_samples=64000,
                           sample_rate=16000,
                           amp_resample_method="window"):
        '''Generate audio from frame-wise monophonic harmonic oscillator bank.

        Args:
            amplitudes: Frame-wise oscillator peak amplitude. Shape [batch_size,
                n_frames, 1].
            frequency_shifts: Harmonic frequency variations (Hz), zero-centered. Total
                frequency of a harmonic is equal to (frequencies * (1 +
                frequency_shifts)). Shape [batch_size, n_frames, n_harmonics].
            frequency_distribution: Harmonic amplitude variations, ranged zero to one.
                Total amplitude of a harmonic is equal to (amplitudes *
                frequency_distribution). Shape [batch_size, n_frames, n_harmonics].
            n_samples: Total length of output audio. Interpolates and crops to this.
            sample_rate: Sample rate.
            amp_resample_method: Mode with which to resample amplitude envelopes.

        Returns:
            audio: Output audio. Shape [batch_size, n_samples, 1]
        '''
        amplitudes = core.tf_float32(amplitudes)
        batch_size = amplitudes.shape[0]
        n_frames = amplitudes.shape[1]

        if frequency_distribution is not None:
            frequency_distribution = core.tf_float32(frequency_distribution)
            n_frequencies = int(frequency_distribution.shape[-1])
        elif harmonic_shifts is not None:
            harmonic_shifts = core.tf_float32(harmonic_shifts)
            n_frequencies = int(frequency_shifts.shape[-1])
        else:
            n_frequencies = 1

        # Create frequencies [batch_size, n_frames, n_frequencies].
        frequencies = self.get_linear_frequencies(batch_size, n_frames,
                                                  n_frequencies)
        if frequency_shifts is not None:
            frequencies *= (1.0 + harmonic_shifts)

        # Create harmonic amplitudes [batch_size, n_frames, n_frequencies].
        if frequency_distribution is not None:
            frequency_amplitudes = amplitudes * frequency_distribution
        else:
            frequency_amplitudes = amplitudes

        # Create sample-wise envelopes.
        frequency_envelopes = core.resample(frequencies,
                                            n_samples)  # cycles/sec
        amplitude_envelopes = core.resample(frequency_amplitudes,
                                            n_samples,
                                            method=amp_resample_method)

        # Synthesize from harmonics [batch_size, n_samples].
        audio = core.oscillator_bank(frequency_envelopes,
                                     amplitude_envelopes,
                                     sample_rate=sample_rate)
        return audio
Beispiel #18
0
 def get_signal(self, signal_in: tf.Tensor) -> tf.Tensor:
     return core.resample(signal_in, self.n_timesteps, self.method)