Beispiel #1
0
    def _default_processing(self, features):
        '''Always resample to time_steps and scale input signals.'''
        for k in [
                "f0", "phase", "phase_unwrapped", "osc", "osc_sub",
                "phase_sub", "phase_unwrapped_sub", "osc_sub_sync",
                "phase_unwrapped_sub_sync", "phase_sub_sync"
        ]:
            if features.get(k, None) is not None:
                features[k] = at_least_3d(features[k])
                features[k] = resample(features[k],
                                       n_timesteps=self.time_steps)

        # Divide by denom (e.g. number of cylinders in engine to produce subharmonics)
        features["f0_sub"] = features["f0"] / self.denom

        # Set additive input
        features["f0_additive"] = features["f0_sub"]

        # Prepare decoder network inputs
        features["f0_scaled"] = hz_to_midi(features["f0"]) / F0_RANGE
        features["f0_scaled_mel"] = hz_to_mel(features["f0"]) / F0_RANGE_MEL
        features["f0_sub_scaled"] = hz_to_mel(
            features["f0_sub"]) / F0_SUB_RANGE
        for k in ["phase", "phase_sub", "phase_sub_sync"]:
            if features.get(k, None) is not None:
                features[k + "_scaled"] = 0.5 + 0.5 * features[k] / np.pi
        for k in ["osc", "osc_sub", "osc_sub_sync"]:
            if features.get(k, None) is not None:
                features[k + "_scaled"] = 0.5 + 0.5 * features[k]

        return features
Beispiel #2
0
 def call(self, *args, **unused_kwargs):
   """Resamples all inputs to the maximal resolution and computes the score"""
   inputs  = [preprocessing.at_least_3d(i) for i in args]
   n_timesteps = max(i.shape[1] for i in inputs)
   inputs = [core.resample(i, n_timesteps) for i in inputs]
   score  = self.compute_score(*inputs)
   score = tf.reduce_mean(score, axis=list(range(1, len(score.shape))))
   return score
Beispiel #3
0
    def _default_processing(self, features):
        '''Always resample to time_steps and scale f0 signal.'''
        # Make sure inputs have the right dimensions, i.e. [batch_size, n_frames, {context dependent}]
        for k in [
                "f0", "phase", "phase_unwrapped", "osc", "osc_sub",
                "phase_sub", "phase_unwrapped_sub", "osc_sub_sync",
                "phase_unwrapped_sub_sync", "phase_sub_sync"
        ]:
            if features.get(k, None) is not None:
                features[k] = at_least_3d(features[k])
                features[k] = resample(features[k],
                                       n_timesteps=self.time_steps)

        # Divide by denom (e.g. number of cylinders in engine to produce subharmonics)
        features["f0_sub"] = features["f0"] / self.denom

        # Set additive input
        features["f0_additive"] = features[self.f0_additive]

        # Generate osc and phase from f0 if missing
        for suffix in ["", "_sub"]:
            if features.get("osc" + suffix, None) is None:
                amplitudes = tf.ones(tf.shape(features["f0" + suffix]))
                features["osc" + suffix] = oscillator_bank(
                    features["f0" + suffix], amplitudes,
                    sample_rate=self.rate)[:, :, tf.newaxis]
            if features.get("phase" + suffix, None) is None:
                omegas = 2.0 * np.pi * features["f0" + suffix] / float(
                    self.rate)
                phases = tf.cumsum(omegas, axis=1)
                features["phase_unwrapped" + suffix] = phases
                phases_wrapped = tf.math.mod(phases + np.pi, 2 * np.pi) - np.pi
                features["phase" + suffix] = phases_wrapped

        for prefix in ["osc_sub", "phase_sub", "phase_unwrapped_sub"]:
            if features.get(prefix + "_sync", None) is None:
                features[prefix + "_sync"] = features[prefix]

        # Prepare decoder network inputs
        features["f0_scaled"] = hz_to_midi(features["f0"]) / F0_RANGE
        features["f0_scaled_mel"] = hz_to_mel(features["f0"]) / F0_RANGE_MEL
        features["f0_sub_scaled"] = hz_to_mel(
            features["f0_sub"]) / F0_SUB_RANGE
        for k in ["phase", "phase_sub", "phase_sub_sync"]:
            if features.get(k, None) is not None:
                features[k + "_scaled"] = 0.5 + 0.5 * features[k] / np.pi
        for k in ["osc", "osc_sub", "osc_sub_sync"]:
            if features.get(k, None) is not None:
                features[k + "_scaled"] = 0.5 + 0.5 * features[k]

        return features
    def _default_processing(self, features):
        """Always resample to `n_timesteps` and scale 'loudness_db' and 'f0_hz'."""

        # apply preprocesssing (scale loudness and f0, make sure batch dim exists etc..)
        if "loudness_db" in features and "f0_hz" in features:
            features = self.df_pp(features)

        for k in ['midi_velocity', 'midi_pitch']:
            features[k] = at_least_3d(features[k])
            features[k] = ddsp.core.resample(features[k],
                                             n_timesteps=self.n_timesteps)

        # relu is here to fix an issue in the dataset preparation
        features["midi_velocity_scaled"] = tf.nn.relu(
            features["midi_velocity"] / 127.0)
        features["midi_pitch_scaled"] = tf.nn.relu(features["midi_pitch"] /
                                                   127.0)

        return features
Beispiel #5
0
 def setUp(self):
     """Create input dictionary and preprocessor."""
     super().setUp()
     sr = 16000
     frame_rate = 250
     frame_size = 256
     n_samples = 16000
     n_t = 250
     # Replicate preprocessor computations.
     audio = 0.5 * tf.sin(tf.range(0, n_samples, dtype=tf.float32))[None, :]
     power_db = compute_power(audio,
                              sample_rate=sr,
                              frame_rate=frame_rate,
                              frame_size=frame_size)
     power_db = preprocessing.at_least_3d(power_db)
     power_db = resample(power_db, n_t)
     self.input_dict = {
         'f0_hz': tf.ones([1, n_t]),
         'audio': audio,
         'power_db': power_db,
     }
     self.preprocessor = preprocessing.F0PowerPreprocessor(
         time_steps=n_t, frame_rate=frame_rate, sample_rate=sr)
Beispiel #6
0
    def _default_processing(self, features):
        '''Always resample to time_steps and scale f0 signal.'''
        features["f0"] = at_least_3d(features["f0"])
        features["f0"] = resample(features["f0"], n_timesteps=self.time_steps)

        # Divide by denom (e.g. number of cylinders in engine to produce subharmonics)
        features["f0"] /= self.denom

        # Set additive input
        features["f0_additive"] = features["f0"]

        # Prepare decoder network inputs
        if self.feature_domain == "freq":
            features["f0_scaled"] = hz_to_midi(features["f0"]) / F0_RANGE
        elif self.feature_domain == "freq-old":
            '''DEPRICATED. This option is for backward compability with a version containing a typo.'''
            features["f0_scaled"] = hz_to_midi(
                self.denom * features["f0"]) / F0_RANGE / self.denom
        elif self.feature_domain == "time":
            amplitudes = tf.ones(tf.shape(features["f0"]))
            features["f0_scaled"] = oscillator_bank(
                features["f0"], amplitudes, sample_rate=self.rate)[:, :,
                                                                   tf.newaxis]
        elif self.feature_domain == "osc":
            if features.get("osc", None) is None:
                amplitudes = tf.ones(tf.shape(features["f0"]))
                features["f0_scaled"] = oscillator_bank(
                    self.denom * features["f0"],
                    amplitudes,
                    sample_rate=self.rate)[:, :, tf.newaxis]
            else:
                features["f0_scaled"] = features["osc"][:, :, tf.newaxis]
        else:
            raise ValueError("%s is not a valid value for feature_domain." %
                             self.feature_domain)

        return features