def __init__(self):
        n_mels = 128
        feature_size = 32
        sr = zounds.SR22050()
        n_fft = 1024
        hop = 256
        total_samples = 8192

        freq_band = zounds.FrequencyBand(20, sr.nyquist - 20)
        n_filters = 128
        filter_taps = 511

        gen_scale = zounds.LinearScale(freq_band, n_filters)
        gen_filter_bank = zounds.learn.FilterBank(sr,
                                                  filter_taps,
                                                  gen_scale,
                                                  0.9,
                                                  normalize_filters=True,
                                                  a_weighting=False)

        disc_scale = zounds.LinearScale(freq_band, n_filters)
        disc_filter_bank = zounds.learn.FilterBank(sr,
                                                   filter_taps,
                                                   disc_scale,
                                                   0.9,
                                                   normalize_filters=True,
                                                   a_weighting=False)

        super().__init__(generator=ResidualStackFilterBankGenerator(
            gen_filter_bank,
            feature_size,
            total_samples,
            n_mels,
            add_weight_norm=True),
                         discriminator=FilterBankDiscriminator(
                             disc_filter_bank,
                             total_samples,
                             conditioning_channels=n_mels),
                         learning_rate=1e-4,
                         feature_size=feature_size,
                         audio_repr_class=RawAudio,
                         generator_loss=mel_gan_gen_loss,
                         sub_gen_loss=least_squares_generator_loss,
                         discriminator_loss=mel_gan_disc_loss,
                         sub_disc_loss=least_squares_disc_loss,
                         g_init=weights_init,
                         d_init=weights_init,
                         feature_funcs={
                             'audio': (audio, (sr, )),
                             'spectrogram': (spectrogram, (sr, ))
                         },
                         total_samples=total_samples,
                         feature_channels=n_mels,
                         samplerate=sr,
                         inference_sequence_factor=4)
 def make_filter_bank(cls, samplerate):
     scale = zounds.LinearScale(
         zounds.FrequencyBand(20, samplerate.nyquist - 20), 128)
     filter_bank = zounds.learn.FilterBank(samplerate,
                                           511,
                                           scale,
                                           0.9,
                                           normalize_filters=True,
                                           a_weighting=False)
     return filter_bank
def make_filter_banks(taps, bands, sr, size):
    out = {}
    for tap, band in zip(taps, bands):
        # KLUDGE: Get rid of this hard-coded value
        if size == 8192:
            start = 0
        else:
            start = sr.nyquist // 2
        stop = sr.nyquist
        fb = zounds.FrequencyBand(start, stop)
        out[size] = zounds.learn.FilterBank(sr,
                                            tap,
                                            zounds.LinearScale(fb, band),
                                            0.05,
                                            normalize_filters=True,
                                            a_weighting=False)
        print(size, sr, out[size].scale)
        size = size // 2
        sr = sr * 2

    return out
    def __init__(self):
        n_mels = 128
        feature_size = 32
        samplerate = zounds.SR22050()
        n_fft = 1024
        hop = 256
        total_samples = 8192

        n_osc = 128
        scale = zounds.LinearScale(
            zounds.FrequencyBand(20, samplerate.nyquist - 20), n_osc)
        super().__init__(generator=DDSPGenerator(n_osc, feature_size, n_mels,
                                                 total_samples, scale,
                                                 samplerate),
                         discriminator=MultiScaleMultiResDiscriminator(
                             total_samples,
                             flatten_multiscale_features=False,
                             channel_judgements=True,
                             conditioning_channels=n_mels,
                             decompose=True),
                         learning_rate=1e-4,
                         feature_size=feature_size,
                         audio_repr_class=RawAudio,
                         generator_loss=mel_gan_gen_loss,
                         sub_gen_loss=least_squares_generator_loss,
                         discriminator_loss=mel_gan_disc_loss,
                         sub_disc_loss=least_squares_disc_loss,
                         g_init=weights_init,
                         d_init=weights_init,
                         feature_funcs={
                             'audio': (audio, (samplerate, )),
                             'spectrogram': (spectrogram, (samplerate, ))
                         },
                         total_samples=total_samples,
                         feature_channels=n_mels,
                         samplerate=samplerate,
                         inference_sequence_factor=4)
 def _scale(self, samplerate, bands, zero_start=False):
     start = 0 if zero_start else samplerate.nyquist / 2
     end = samplerate.nyquist
     return zounds.LinearScale(zounds.FrequencyBand(start, end), bands)
Exemple #6
0
    @classmethod
    def from_audio(cls, samples, samplerate):
        coeffs = cls.batch_stft(samples)
        mag = np.abs(coeffs)
        coeffs = cls._embed(mag)
        coeffs = coeffs.transpose((0, 2, 1))
        coeffs = np.log(coeffs + 1e-12)
        coeffs = cls._postprocess_coeffs(coeffs)
        return cls(coeffs, samplerate)


sr = zounds.SR11025()
n_bands = 256
mel_scale = zounds.MelScale(zounds.FrequencyBand(20, sr.nyquist - 20), n_bands)
geom_scale = zounds.GeometricScale(20, sr.nyquist - 20, 0.05, n_bands)
linear_scale = zounds.LinearScale(zounds.FrequencyBand(0, sr.nyquist), 513)
mel_scale_basis = mel_scale._basis(linear_scale, zounds.HanningWindowingFunc())
geom_scale_basis = geom_scale._basis(linear_scale,
                                     zounds.HanningWindowingFunc())


class MelScalePhaseRecover(BasePhaseRecovery):
    basis = mel_scale_basis

    def __init__(self, data, samplerate):
        super().__init__(data, samplerate)


class GeometricScalePhaseRecover(BasePhaseRecovery):
    basis = geom_scale_basis