Esempio n. 1
0
File: codec.py Progetto: idiap/ssp
def decode(tuple):
    """
    Decode a speech waveform.
    """
    (ark, g, pitch, hnr) = tuple
    print("Frame padding:", opt.padding)

    nFrames = len(ark)
    assert(len(g) == nFrames)
    assert(len(pitch) == nFrames)
    assert(len(hnr) == nFrames)

    # The original framer padded the ends so the number of samples to
    # synthesise is a bit less than you might think
    if opt.ola:
        frameSize = framePeriod * 2
        nSamples = framePeriod * (nFrames-1)
    else:
        frameSize = framePeriod
        nSamples = frameSize * (nFrames-1)

    ex = opt.glottal
    if opt.glottal == 'cepgm' and (opt.encode or opt.decode or opt.pitch):
        order = ark.shape[-1] - 2
        ar = ark[:,0:order]
        theta = ark[:,-2]
        magni = np.exp(ark[:,-1])
    else:
        ar = ark

    # Use the original AR residual; it should be a very good reconstruction.
    if ex == 'ar':
        e = ssp.ARExcitation(f, ar, g)

    # Just noise.  This is effectively a whisper synthesis.
    elif ex == 'noise':
        e = np.random.normal(size=(nFrames, frameSize))

    # Just harmonics, and with a fixed F0.  This is the classic robot
    # synthesis.
    elif ex == 'robot':
        ew = np.zeros(nSamples)
        period = int(1.0 / 200 * r)
        for i in range(0, len(ew), period):
            ew[i] = period
        e = ssp.Frame(ew, size=frameSize, period=framePeriod)

    # Synthesise harmonics plus noise in the ratio suggested by the HNR.
    elif ex == 'synth':
        # Harmonic part
        mperiod = int(1.0 / np.mean(pitch) * r)
        gm = ssp.GlottalModel(ssp.parameter('Pulse', 'impulse'))
        pr, pg = ssp.pulse_response(gm, pcm, period=mperiod, order=lpOrder[r])
        h = np.zeros(nSamples)
        i = 0
        frame = 0
        while i < nSamples and frame < len(pitch):
            period = int(1.0 / pitch[frame] * r)
            if i + period > nSamples:
                break
            weight = np.sqrt(hnr[frame] / (hnr[frame] + 1))
            h[i:i+period] = gm.pulse(period, pcm) * weight
            i += period
            frame = i // framePeriod
        h = ssp.ARExcitation(h, pr, 1.0)
        fh = ssp.Frame(h, size=frameSize, period=framePeriod, pad=opt.padding)

        # Noise part
        n = np.random.normal(size=nSamples)
        n = ssp.ZeroFilter(n, 1.0) # Include the radiation impedance
        fn = ssp.Frame(n, size=frameSize, period=framePeriod, pad=opt.padding)
        for i in range(len(fn)):
            fn[i] *= np.sqrt(1.0 / (hnr[i] + 1))

        hgain = ssp.parameter("HGain", 1.0)
        e = fn + fh * hgain

    # Like harmonics plus noise, but with explicit sinusoids instead of time
    # domain impulses.
    elif ex == 'sine':
        order = 20
        sine = ssp.Harmonics(r, order)
        h = np.zeros(nSamples)
        for i in range(0, len(h)-framePeriod, framePeriod):
            frame = i // framePeriod
            period = int(1.0 / pitch[frame] * r)
            weight = np.sqrt(hnr[frame] / (hnr[frame] + 1))
            h[i:i+framePeriod] = ( sine.sample(pitch[frame], framePeriod)
                                      * weight )
        fh = ssp.Frame(h, size=frameSize, period=framePeriod, pad=opt.padding)
        n = np.random.normal(size=nSamples)
        fn = ssp.Frame(n, size=frameSize, period=framePeriod, pad=opt.padding)
        for i in range(len(fn)):
            fn[i] *= np.sqrt(1.0 / (hnr[i] + 1))
        e = fn + fh*10

    # High order linear prediction.  Synthesise the harmonics using noise to
    # excite a high order polynomial with roots resembling harmonics.
    elif ex == 'holp':
        # Some noise
        n = np.random.normal(size=nSamples)
        fn = ssp.Frame(n, size=frameSize, period=framePeriod)

        # Use the noise to excite a high order AR model
        fh = np.ndarray(fn.shape)
        for i in range(len(fn)):
            hoar = ssp.ARHarmonicPoly(pitch[i], r, 0.7)
            fh[i] = ssp.ARResynthesis(fn[i], hoar, 1.0 / linalg.norm(hoar)**2)
            print(i, pitch[i], linalg.norm(hoar), np.min(fh[i]), np.max(fh[i]))
            print(' ', np.min(hoar), np.max(hoar))
            # fh[i] *= np.sqrt(r / pitch[i]) / linalg.norm(fh[i])
            # fh[i] *= np.sqrt(hnr[i] / (hnr[i] + 1))

        # Weight the noise as for the other methods
        for i in range(len(fn)):
            fn[i] *= np.sqrt(1.0 / (hnr[i] + 1))
        e = fh # fn + fh*30

    # Shaped excitation.  The pulses are shaped by a filter to have a
    # rolloff, then added to the noise.  The resulting signal is
    # flattened using AR.
    elif ex == 'shaped':
        # Harmonic part
        gm = ssp.GlottalModel(ssp.parameter('Pulse', 'impulse'))
        gm.angle = pcm.hertz_to_radians(np.mean(pitch)*0.5)
        h = np.zeros(nSamples)
        i = 0
        frame = 0
        while i < nSamples and frame < len(pitch):
            period = int(1.0 / pitch[frame] * r)
            if i + period > nSamples:
                break
            weight = np.sqrt(hnr[frame] / (hnr[frame] + 1))
            h[i:i+period] = gm.pulse(period, pcm) * weight
            i += period
            frame = i // framePeriod

        # Filter to mimic the glottal pulse
        hfilt = ssp.parameter("HFilt", None)
        hpole1 = ssp.parameter("HPole1", 0.98)
        hpole2 = ssp.parameter("HPole2", 0.8)
        angle = pcm.hertz_to_radians(np.mean(pitch)) * ssp.parameter("Angle", 1.0)
        if hfilt == 'pp':
            h = ssp.ZeroFilter(h, 1.0)
            h = ssp.PolePairFilter(h, hpole1, angle)
        fh = ssp.Frame(h, size=frameSize, period=framePeriod)

        # Noise part
        n = np.random.normal(size=nSamples)
        zero = ssp.parameter("NoiseZero", 1.0)
        n = ssp.ZeroFilter(n, zero) # Include the radiation impedance
        npole = ssp.parameter("NPole", None)
        nf = ssp.parameter("NoiseFreq", 4000)
        if npole is not None:
            n = ssp.PolePairFilter(n, npole, pcm.hertz_to_radians(nf))
        fn = ssp.Frame(n, size=frameSize, period=framePeriod, pad=opt.padding)
        for i in range(len(fn)):
            fn[i] *= np.sqrt(1.0 / (hnr[i] + 1))

        # Combination
        assert(len(fh) == len(fn))
        hgain = ssp.parameter("HGain", 1.0)
        e = fn + fh * hgain
        hnw = np.hanning(frameSize)
        for i in range(len(e)):
            ep = ssp.Window(e[i], hnw)
            #ep = e[i]
            eac = ssp.Autocorrelation(ep)
            ea, eg = ssp.ARLevinson(eac, order=lpOrder[r])
            e[i] = ssp.ARExcitation(e[i], ea, eg)

    elif ex == 'ceplf':
        omega, alpha = ssp.glottal_pole_lf(
            f, pcm, pitch, hnr, visual=(opt.graphic == "ceplf"))
        epsilon = ssp.parameter("Epsilon", 5000.0)
        h = np.zeros(nSamples)
        i = 0
        frame = 0
        while i < nSamples and frame < len(pitch):
            period = int(1.0 / pitch[frame] * r)
            if i + period > nSamples:
                break
            weight = np.sqrt(hnr[frame] / (hnr[frame] + 1))
            pu = np.zeros((period))
            T0 = pcm.period_to_seconds(period)
            print(T0,)
            Te = ssp.lf_te(T0, alpha[frame], omega[frame], epsilon)
            if Te:
                pu = ssp.pulse_lf(pu, T0, Te, alpha[frame], omega[frame], epsilon)
            h[i:i+period] = pu * weight
            i += period
            frame = i // framePeriod
        fh = ssp.Frame(h, size=frameSize, period=framePeriod, pad=opt.padding)

        # Noise part
        n = np.random.normal(size=nSamples)
        zero = ssp.parameter("NoiseZero", 1.0)
        n = ssp.ZeroFilter(n, zero) # Include the radiation impedance
        fn = ssp.Frame(n, size=frameSize, period=framePeriod, pad=opt.padding)
        for i in range(len(fn)):
            fn[i] *= np.sqrt(1.0 / (hnr[i] + 1))

        # Combination
        assert(len(fh) == len(fn))
        hgain = ssp.parameter("HGain", 1.0)
        e = fn + fh * hgain
        hnw = np.hanning(frameSize)
        for i in range(len(e)):
            ep = ssp.Window(e[i], hnw)
            #ep = e[i]
            eac = ssp.Autocorrelation(ep)
            ea, eg = ssp.ARLevinson(eac, order=lpOrder[r])
            e[i] = ssp.ARExcitation(e[i], ea, eg)

    elif ex == 'cepgm':
        # Infer the unstable poles via complex cepstrum, then build an explicit
        # glottal model.
        if not (opt.encode or opt.decode or opt.pitch):
            theta, magni = ssp.glottal_pole_gm(
                f, pcm, pitch, hnr, visual=(opt.graphic == "cepgm"))
        h = np.zeros(nSamples)
        i = 0
        frame = 0
        while i < nSamples and frame < len(pitch):
            period = int(1.0 / pitch[frame] * r)
            if i + period > nSamples:
                break
            h[i] = 1 # np.random.normal() ** 2
            i += period
            frame = i // framePeriod
        fh = ssp.Frame(h, size=frameSize, period=framePeriod, pad=opt.padding)
        gl = ssp.MinPhaseGlottis()
        for i in range(len(fh)):
            # This is minimum phase; the glotter will invert if required
            gl.setpolepair(np.abs(magni[frame]), theta[frame])
            fh[i] = gl.glotter(fh[i])
            if linalg.norm(fh[i]) > 1e-6:
                fh[i] *= np.sqrt(len(fh[i])) / linalg.norm(fh[i])
            weight = np.sqrt(hnr[i] / (hnr[i] + 1))
            fh[i] *= weight

        if (opt.graphic == "h"):
            fig = ssp.Figure(1, 1)
            hPlot = fig.subplot()
            hPlot.plot(h, 'r')
            fig.show()

        # Noise part
        n = np.random.normal(size=nSamples)
        zero = ssp.parameter("NoiseZero", 1.0)
        n = ssp.ZeroFilter(n, zero) # Include the radiation impedance
        fn = ssp.Frame(n, size=frameSize, period=framePeriod, pad=opt.padding)
        for i in range(len(fn)):
            fn[i] *= np.sqrt(1.0 / (hnr[i] + 1))

        # Combination
        assert(len(fh) == len(fn))
        hgain = ssp.parameter("HGain", 1.0)
        e = fn + fh * hgain
        hnw = np.hanning(frameSize)
        for i in range(len(e)):
            ep = ssp.Window(e[i], hnw)
            #ep = e[i]
            eac = ssp.Autocorrelation(ep)
            ea, eg = ssp.ARLevinson(eac, order=lpOrder[r])
            e[i] = ssp.ARExcitation(e[i], ea, eg)

    else:
        print("Unknown synthesis method")
        exit

    if opt.excitation:
        s = e.flatten('C')/frameSize
    else:
        s = ssp.ARResynthesis(e, ar, g)
        if opt.ola:
            # Asymmetric window for OLA
            sw = np.hanning(frameSize+1)
            sw = np.delete(sw, -1)
            s = ssp.Window(s, sw)
            s = ssp.OverlapAdd(s)
        else:
            s = s.flatten('C')

    gain = ssp.parameter("Gain", 1.0)
    return s * gain
Esempio n. 2
0
def decode((ar, g, pitch, hnr)):
    """
    Decode a speech waveform.
    """
    nFrames = len(ar)
    assert(len(g) == nFrames)
    assert(len(pitch) == nFrames)
    assert(len(hnr) == nFrames)

    # The original framer padded the ends so the number of samples to
    # synthesise is a bit less than you might think
    if opt.ola:
        frameSize = framePeriod * 2
        nSamples = framePeriod * (nFrames-1)
    else:
        frameSize = framePeriod
        nSamples = frameSize * (nFrames-1)

    ex = ssp.parameter('Excitation', 'synth')

    # Use the original AR residual; it should be a very good
    # reconstruction.
    if ex == 'ar':
        e = ssp.ARExcitation(f, ar, g)

    # Just noise.  This is effectively a whisper synthesis.
    elif ex == 'noise':
        e = np.random.normal(size=f.shape)

    # Just harmonics, and with a fixed F0.  This is the classic robot
    # syntheisis.
    elif ex == 'robot':
        ew = np.zeros(nSamples)
        period = int(1.0 / 200 * r)
        for i in range(0, len(ew), period):
            ew[i] = period
        e = ssp.Frame(ew, size=frameSize, period=framePeriod)

    # Synthesise harmonics plus noise in the ratio suggested by the
    # HNR.
    elif ex == 'synth':
        # Harmonic part
        mperiod = int(1.0 / np.mean(pitch) * r)
        gm = ssp.GlottalModel(ssp.parameter('Pulse', 'impulse'))
        pr, pg = ssp.pulse_response(gm, pcm, period=mperiod, order=lpOrder[r])
        h = np.zeros(nSamples)
        i = 0
        frame = 0
        while i < nSamples and frame < len(pitch):
            period = int(1.0 / pitch[frame] * r)
            if i + period > nSamples:
                break
            weight = np.sqrt(hnr[frame] / (hnr[frame] + 1))
            h[i:i+period] = gm.pulse(period, pcm) * weight
            i += period
            frame = i // framePeriod
        h = ssp.ARExcitation(h, pr, 1.0)
        fh = ssp.Frame(h, size=frameSize, period=framePeriod)

        # Noise part
        n = np.random.normal(size=nSamples)
        n = ssp.ZeroFilter(n, 1.0) # Include the radiation impedance
        fn = ssp.Frame(n, size=frameSize, period=framePeriod)
        for i in range(len(fn)):
            fn[i] *= np.sqrt(1.0 / (hnr[i] + 1))

        hgain = ssp.parameter("HGain", 1.0)
        e = fn + fh * hgain

    # Like harmonics plus noise, but with explicit sinusoids instead
    # of time domain impulses.
    elif ex == 'sine':
        order = 20
        sine = ssp.Harmonics(r, order)
        h = np.zeros(nSamples)
        for i in range(0, len(h)-framePeriod, framePeriod):
            frame = i // framePeriod
            period = int(1.0 / pitch[frame] * r)
            weight = np.sqrt(hnr[frame] / (hnr[frame] + 1))
            h[i:i+framePeriod] = ( sine.sample(pitch[frame], framePeriod)
                                      * weight )
        fh = ssp.Frame(h, size=frameSize, period=framePeriod)
        n = np.random.normal(size=nSamples)
        fn = ssp.Frame(n, size=frameSize, period=framePeriod)
        for i in range(len(fn)):
            fn[i] *= np.sqrt(1.0 / (hnr[i] + 1))
        e = fn + fh*10

    # High order linear prediction.  Synthesise the harmonics using
    # noise to excite a high order polynomial with roots resembling
    # harmonics.
    elif ex == 'holp':
        # Some noise
        n = np.random.normal(size=nSamples)
        fn = ssp.Frame(n, size=frameSize, period=framePeriod)

        # Use the noise to excite a high order AR model
        fh = np.ndarray(fn.shape)
        for i in range(len(fn)):
            hoar = ssp.ARHarmonicPoly(pitch[i], r, 0.7)
            fh[i] = ssp.ARResynthesis(fn[i], hoar, 1.0 / linalg.norm(hoar)**2)
            print i, pitch[i], linalg.norm(hoar), np.min(fh[i]), np.max(fh[i])
            print ' ', np.min(hoar), np.max(hoar)
            # fh[i] *= np.sqrt(r / pitch[i]) / linalg.norm(fh[i])
            # fh[i] *= np.sqrt(hnr[i] / (hnr[i] + 1))

        # Weight the noise as for the other methods
        for i in range(len(fn)):
            fn[i] *= np.sqrt(1.0 / (hnr[i] + 1))
        e = fh # fn + fh*30

    # Shaped excitation.  The pulses are shaped by a filter to have a
    # rolloff, then added to the noise.  The resulting signal is
    # flattened using AR.
    elif ex == 'shaped':
        # Harmonic part
        gm = ssp.GlottalModel(ssp.parameter('Pulse', 'impulse'))
        gm.angle = pcm.hertz_to_radians(np.mean(pitch)*0.5)
        h = np.zeros(nSamples)
        i = 0
        frame = 0
        while i < nSamples and frame < len(pitch):
            period = int(1.0 / pitch[frame] * r)
            if i + period > nSamples:
                break
            weight = np.sqrt(hnr[frame] / (hnr[frame] + 1))
            h[i:i+period] = gm.pulse(period, pcm) * weight
            i += period
            frame = i // framePeriod

        # Filter to mimic the glottal pulse
        hfilt = ssp.parameter("HFilt", None)
        hpole1 = ssp.parameter("HPole1", 0.98)
        hpole2 = ssp.parameter("HPole2", 0.8)
        angle = pcm.hertz_to_radians(np.mean(pitch)) * ssp.parameter("Angle", 1.0)

        # Noise part
        n = np.random.normal(size=nSamples)
        zero = ssp.parameter("NoiseZero", 1.0)
        n = ssp.ZeroFilter(n, zero) # Include the radiation impedance
        npole = ssp.parameter("NPole", None)
        nf = ssp.parameter("NoiseFreq", 4000)
        if npole is not None:
            n = ssp.PolePairFilter(n, npole, pcm.hertz_to_radians(nf))
        fn = ssp.Frame(n, size=frameSize, period=framePeriod)
        for i in range(len(fn)):
            fn[i] *= np.sqrt(1.0 / (hnr[i] + 1))

        # Combination
        assert(len(fh) == len(fn))
        hgain = ssp.parameter("HGain", 1.0)
        e = fn + fh * hgain
        hnw = np.hanning(frameSize)
        for i in range(len(e)):
            ep = ssp.Window(e[i], hnw)
            #ep = e[i]
            eac = ssp.Autocorrelation(ep)
            ea, eg = ssp.ARLevinson(eac, order=lpOrder[r])
            e[i] = ssp.ARExcitation(e[i], ea, eg)

    else:
        print "Unknown synthesis method"
        exit

    if opt.excitation:
        s = e.flatten('C')/frameSize
    else:
        s = ssp.ARResynthesis(e, ar, g)
        if opt.ola:
            # Asymmetric window for OLA
            sw = np.hanning(frameSize+1)
            sw = np.delete(sw, -1)
            s = ssp.Window(s, sw)
            s = ssp.OverlapAdd(s)
        else:
            s = s.flatten('C')

    gain = ssp.parameter("Gain", 1.0)
    return s * gain
Esempio n. 3
0
def decode(tuple):
    """
    Decode a speech waveform.
    """
    (ark, g, pitch, hnr) = tuple
    print("Frame padding:", opt.padding)

    nFrames = len(ark)
    assert (len(g) == nFrames)
    assert (len(pitch) == nFrames)
    assert (len(hnr) == nFrames)

    # The original framer padded the ends so the number of samples to
    # synthesise is a bit less than you might think
    if opt.ola:
        frameSize = framePeriod * 2
        nSamples = framePeriod * (nFrames - 1)
    else:
        frameSize = framePeriod
        nSamples = frameSize * (nFrames - 1)

    ex = opt.glottal
    if opt.glottal == 'cepgm' and (opt.encode or opt.decode or opt.pitch):
        order = ark.shape[-1] - 2
        ar = ark[:, 0:order]
        theta = ark[:, -2]
        magni = np.exp(ark[:, -1])
    else:
        ar = ark

    # Use the original AR residual; it should be a very good reconstruction.
    if ex == 'ar':
        e = ssp.ARExcitation(f, ar, g)

    # Just noise.  This is effectively a whisper synthesis.
    elif ex == 'noise':
        e = np.random.normal(size=(nFrames, frameSize))

    # Just harmonics, and with a fixed F0.  This is the classic robot
    # synthesis.
    elif ex == 'robot':
        ew = np.zeros(nSamples)
        period = int(1.0 / 200 * r)
        for i in range(0, len(ew), period):
            ew[i] = period
        e = ssp.Frame(ew, size=frameSize, period=framePeriod)

    # Synthesise harmonics plus noise in the ratio suggested by the HNR.
    elif ex == 'synth':
        # Harmonic part
        mperiod = int(1.0 / np.mean(pitch) * r)
        gm = ssp.GlottalModel(ssp.parameter('Pulse', 'impulse'))
        pr, pg = ssp.pulse_response(gm, pcm, period=mperiod, order=lpOrder[r])
        h = np.zeros(nSamples)
        i = 0
        frame = 0
        while i < nSamples and frame < len(pitch):
            period = int(1.0 / pitch[frame] * r)
            if i + period > nSamples:
                break
            weight = np.sqrt(hnr[frame] / (hnr[frame] + 1))
            h[i:i + period] = gm.pulse(period, pcm) * weight
            i += period
            frame = i // framePeriod
        h = ssp.ARExcitation(h, pr, 1.0)
        fh = ssp.Frame(h, size=frameSize, period=framePeriod, pad=opt.padding)

        # Noise part
        n = np.random.normal(size=nSamples)
        n = ssp.ZeroFilter(n, 1.0)  # Include the radiation impedance
        fn = ssp.Frame(n, size=frameSize, period=framePeriod, pad=opt.padding)
        for i in range(len(fn)):
            fn[i] *= np.sqrt(1.0 / (hnr[i] + 1))

        hgain = ssp.parameter("HGain", 1.0)
        e = fn + fh * hgain

    # Like harmonics plus noise, but with explicit sinusoids instead of time
    # domain impulses.
    elif ex == 'sine':
        order = 20
        sine = ssp.Harmonics(r, order)
        h = np.zeros(nSamples)
        for i in range(0, len(h) - framePeriod, framePeriod):
            frame = i // framePeriod
            period = int(1.0 / pitch[frame] * r)
            weight = np.sqrt(hnr[frame] / (hnr[frame] + 1))
            h[i:i + framePeriod] = (sine.sample(pitch[frame], framePeriod) *
                                    weight)
        fh = ssp.Frame(h, size=frameSize, period=framePeriod, pad=opt.padding)
        n = np.random.normal(size=nSamples)
        fn = ssp.Frame(n, size=frameSize, period=framePeriod, pad=opt.padding)
        for i in range(len(fn)):
            fn[i] *= np.sqrt(1.0 / (hnr[i] + 1))
        e = fn + fh * 10

    # High order linear prediction.  Synthesise the harmonics using noise to
    # excite a high order polynomial with roots resembling harmonics.
    elif ex == 'holp':
        # Some noise
        n = np.random.normal(size=nSamples)
        fn = ssp.Frame(n, size=frameSize, period=framePeriod)

        # Use the noise to excite a high order AR model
        fh = np.ndarray(fn.shape)
        for i in range(len(fn)):
            hoar = ssp.ARHarmonicPoly(pitch[i], r, 0.7)
            fh[i] = ssp.ARResynthesis(fn[i], hoar, 1.0 / linalg.norm(hoar)**2)
            print(i, pitch[i], linalg.norm(hoar), np.min(fh[i]), np.max(fh[i]))
            print(' ', np.min(hoar), np.max(hoar))
            # fh[i] *= np.sqrt(r / pitch[i]) / linalg.norm(fh[i])
            # fh[i] *= np.sqrt(hnr[i] / (hnr[i] + 1))

        # Weight the noise as for the other methods
        for i in range(len(fn)):
            fn[i] *= np.sqrt(1.0 / (hnr[i] + 1))
        e = fh  # fn + fh*30

    # Shaped excitation.  The pulses are shaped by a filter to have a
    # rolloff, then added to the noise.  The resulting signal is
    # flattened using AR.
    elif ex == 'shaped':
        # Harmonic part
        gm = ssp.GlottalModel(ssp.parameter('Pulse', 'impulse'))
        gm.angle = pcm.hertz_to_radians(np.mean(pitch) * 0.5)
        h = np.zeros(nSamples)
        i = 0
        frame = 0
        while i < nSamples and frame < len(pitch):
            period = int(1.0 / pitch[frame] * r)
            if i + period > nSamples:
                break
            weight = np.sqrt(hnr[frame] / (hnr[frame] + 1))
            h[i:i + period] = gm.pulse(period, pcm) * weight
            i += period
            frame = i // framePeriod

        # Filter to mimic the glottal pulse
        hfilt = ssp.parameter("HFilt", None)
        hpole1 = ssp.parameter("HPole1", 0.98)
        hpole2 = ssp.parameter("HPole2", 0.8)
        angle = pcm.hertz_to_radians(np.mean(pitch)) * ssp.parameter(
            "Angle", 1.0)
        if hfilt == 'pp':
            h = ssp.ZeroFilter(h, 1.0)
            h = ssp.PolePairFilter(h, hpole1, angle)
        fh = ssp.Frame(h, size=frameSize, period=framePeriod)

        # Noise part
        n = np.random.normal(size=nSamples)
        zero = ssp.parameter("NoiseZero", 1.0)
        n = ssp.ZeroFilter(n, zero)  # Include the radiation impedance
        npole = ssp.parameter("NPole", None)
        nf = ssp.parameter("NoiseFreq", 4000)
        if npole is not None:
            n = ssp.PolePairFilter(n, npole, pcm.hertz_to_radians(nf))
        fn = ssp.Frame(n, size=frameSize, period=framePeriod, pad=opt.padding)
        for i in range(len(fn)):
            fn[i] *= np.sqrt(1.0 / (hnr[i] + 1))

        # Combination
        assert (len(fh) == len(fn))
        hgain = ssp.parameter("HGain", 1.0)
        e = fn + fh * hgain
        hnw = np.hanning(frameSize)
        for i in range(len(e)):
            ep = ssp.Window(e[i], hnw)
            #ep = e[i]
            eac = ssp.Autocorrelation(ep)
            ea, eg = ssp.ARLevinson(eac, order=lpOrder[r])
            e[i] = ssp.ARExcitation(e[i], ea, eg)

    elif ex == 'ceplf':
        omega, alpha = ssp.glottal_pole_lf(f,
                                           pcm,
                                           pitch,
                                           hnr,
                                           visual=(opt.graphic == "ceplf"))
        epsilon = ssp.parameter("Epsilon", 5000.0)
        h = np.zeros(nSamples)
        i = 0
        frame = 0
        while i < nSamples and frame < len(pitch):
            period = int(1.0 / pitch[frame] * r)
            if i + period > nSamples:
                break
            weight = np.sqrt(hnr[frame] / (hnr[frame] + 1))
            pu = np.zeros((period))
            T0 = pcm.period_to_seconds(period)
            print(T0, )
            Te = ssp.lf_te(T0, alpha[frame], omega[frame], epsilon)
            if Te:
                pu = ssp.pulse_lf(pu, T0, Te, alpha[frame], omega[frame],
                                  epsilon)
            h[i:i + period] = pu * weight
            i += period
            frame = i // framePeriod
        fh = ssp.Frame(h, size=frameSize, period=framePeriod, pad=opt.padding)

        # Noise part
        n = np.random.normal(size=nSamples)
        zero = ssp.parameter("NoiseZero", 1.0)
        n = ssp.ZeroFilter(n, zero)  # Include the radiation impedance
        fn = ssp.Frame(n, size=frameSize, period=framePeriod, pad=opt.padding)
        for i in range(len(fn)):
            fn[i] *= np.sqrt(1.0 / (hnr[i] + 1))

        # Combination
        assert (len(fh) == len(fn))
        hgain = ssp.parameter("HGain", 1.0)
        e = fn + fh * hgain
        hnw = np.hanning(frameSize)
        for i in range(len(e)):
            ep = ssp.Window(e[i], hnw)
            #ep = e[i]
            eac = ssp.Autocorrelation(ep)
            ea, eg = ssp.ARLevinson(eac, order=lpOrder[r])
            e[i] = ssp.ARExcitation(e[i], ea, eg)

    elif ex == 'cepgm':
        # Infer the unstable poles via complex cepstrum, then build an explicit
        # glottal model.
        if not (opt.encode or opt.decode or opt.pitch):
            theta, magni = ssp.glottal_pole_gm(f,
                                               pcm,
                                               pitch,
                                               hnr,
                                               visual=(opt.graphic == "cepgm"))
        h = np.zeros(nSamples)
        i = 0
        frame = 0
        while i < nSamples and frame < len(pitch):
            period = int(1.0 / pitch[frame] * r)
            if i + period > nSamples:
                break
            h[i] = 1  # np.random.normal() ** 2
            i += period
            frame = i // framePeriod
        fh = ssp.Frame(h, size=frameSize, period=framePeriod, pad=opt.padding)
        gl = ssp.MinPhaseGlottis()
        for i in range(len(fh)):
            # This is minimum phase; the glotter will invert if required
            gl.setpolepair(np.abs(magni[frame]), theta[frame])
            fh[i] = gl.glotter(fh[i])
            if linalg.norm(fh[i]) > 1e-6:
                fh[i] *= np.sqrt(len(fh[i])) / linalg.norm(fh[i])
            weight = np.sqrt(hnr[i] / (hnr[i] + 1))
            fh[i] *= weight

        if (opt.graphic == "h"):
            fig = ssp.Figure(1, 1)
            hPlot = fig.subplot()
            hPlot.plot(h, 'r')
            fig.show()

        # Noise part
        n = np.random.normal(size=nSamples)
        zero = ssp.parameter("NoiseZero", 1.0)
        n = ssp.ZeroFilter(n, zero)  # Include the radiation impedance
        fn = ssp.Frame(n, size=frameSize, period=framePeriod, pad=opt.padding)
        for i in range(len(fn)):
            fn[i] *= np.sqrt(1.0 / (hnr[i] + 1))

        # Combination
        assert (len(fh) == len(fn))
        hgain = ssp.parameter("HGain", 1.0)
        e = fn + fh * hgain
        hnw = np.hanning(frameSize)
        for i in range(len(e)):
            ep = ssp.Window(e[i], hnw)
            #ep = e[i]
            eac = ssp.Autocorrelation(ep)
            ea, eg = ssp.ARLevinson(eac, order=lpOrder[r])
            e[i] = ssp.ARExcitation(e[i], ea, eg)

    else:
        print("Unknown synthesis method")
        exit

    if opt.excitation:
        s = e.flatten('C') / frameSize
    else:
        s = ssp.ARResynthesis(e, ar, g)
        if opt.ola:
            # Asymmetric window for OLA
            sw = np.hanning(frameSize + 1)
            sw = np.delete(sw, -1)
            s = ssp.Window(s, sw)
            s = ssp.OverlapAdd(s)
        else:
            s = s.flatten('C')

    gain = ssp.parameter("Gain", 1.0)
    return s * gain
Esempio n. 4
0
def decode((ar, g, pitch, hnr)):
    """
    Decode a speech waveform.
    """
    nFrames = len(ar)
    assert(len(g) == nFrames)
    assert(len(pitch) == nFrames)
    assert(len(hnr) == nFrames)

    # The original framer padded the ends so the number of samples to
    # synthesise is a bit less than you might think
    if opt.ola:
        frameSize = framePeriod * 2
        nSamples = framePeriod * (nFrames-1)
    else:
        frameSize = framePeriod
        nSamples = frameSize * (nFrames-1)

    ex = ssp.parameter('Excitation', 'synth')

    # Use the original AR residual; it should be a very good
    # reconstruction.
    if ex == 'ar':
        e = ssp.ARExcitation(f, ar, g)

    # Just noise.  This is effectively a whisper synthesis.
    elif ex == 'noise':
        e = np.random.normal(size=f.shape)

    # Just harmonics, and with a fixed F0.  This is the classic robot
    # syntheisis.
    elif ex == 'robot':
        ew = np.zeros(nSamples)
        period = int(1.0 / 200 * r)
        for i in range(0, len(ew), period):
            ew[i] = period
        e = ssp.Frame(ew, size=frameSize, period=framePeriod)

    # Synthesise harmonics plus noise in the ratio suggested by the
    # HNR.
    elif ex == 'synth':
        # Harmonic part
        mperiod = int(1.0 / np.mean(pitch) * r)
        gm = ssp.GlottalModel(ssp.parameter('Pulse', 'impulse'))
        pr, pg = ssp.pulse_response(gm, pcm, period=mperiod, order=lpOrder[r])
        h = np.zeros(nSamples)
        i = 0
        frame = 0
        while i < nSamples and frame < len(pitch):
            period = int(1.0 / pitch[frame] * r)
            if i + period > nSamples:
                break
            weight = np.sqrt(hnr[frame] / (hnr[frame] + 1))
            h[i:i+period] = gm.pulse(period, pcm) * weight
            i += period
            frame = i // framePeriod
        h = ssp.ARExcitation(h, pr, 1.0)
        fh = ssp.Frame(h, size=frameSize, period=framePeriod)

        # Noise part
        n = np.random.normal(size=nSamples)
        n = ssp.ZeroFilter(n, 1.0) # Include the radiation impedance
        fn = ssp.Frame(n, size=frameSize, period=framePeriod)
        for i in range(len(fn)):
            fn[i] *= np.sqrt(1.0 / (hnr[i] + 1))

        hgain = ssp.parameter("HGain", 1.0)
        e = fn + fh * hgain

    # Like harmonics plus noise, but with explicit sinusoids instead
    # of time domain impulses.
    elif ex == 'sine':
        order = 20
        sine = ssp.Harmonics(r, order)
        h = np.zeros(nSamples)
        for i in range(0, len(h)-framePeriod, framePeriod):
            frame = i // framePeriod
            period = int(1.0 / pitch[frame] * r)
            weight = np.sqrt(hnr[frame] / (hnr[frame] + 1))
            h[i:i+framePeriod] = ( sine.sample(pitch[frame], framePeriod)
                                      * weight )
        fh = ssp.Frame(h, size=frameSize, period=framePeriod)
        n = np.random.normal(size=nSamples)
        fn = ssp.Frame(n, size=frameSize, period=framePeriod)
        for i in range(len(fn)):
            fn[i] *= np.sqrt(1.0 / (hnr[i] + 1))
        e = fn + fh*10

    # High order linear prediction.  Synthesise the harmonics using
    # noise to excite a high order polynomial with roots resembling
    # harmonics.
    elif ex == 'holp':
        # Some noise
        n = np.random.normal(size=nSamples)
        fn = ssp.Frame(n, size=frameSize, period=framePeriod)

        # Use the noise to excite a high order AR model
        fh = np.ndarray(fn.shape)
        for i in range(len(fn)):
            hoar = ssp.ARHarmonicPoly(pitch[i], r, 0.7)
            fh[i] = ssp.ARResynthesis(fn[i], hoar, 1.0 / linalg.norm(hoar)**2)
            print i, pitch[i], linalg.norm(hoar), np.min(fh[i]), np.max(fh[i])
            print ' ', np.min(hoar), np.max(hoar)
            # fh[i] *= np.sqrt(r / pitch[i]) / linalg.norm(fh[i])
            # fh[i] *= np.sqrt(hnr[i] / (hnr[i] + 1))

        # Weight the noise as for the other methods
        for i in range(len(fn)):
            fn[i] *= np.sqrt(1.0 / (hnr[i] + 1))
        e = fh # fn + fh*30

    # Shaped excitation.  The pulses are shaped by a filter to have a
    # rolloff, then added to the noise.  The resulting signal is
    # flattened using AR.
    elif ex == 'shaped':
        # Harmonic part
        gm = ssp.GlottalModel(ssp.parameter('Pulse', 'impulse'))
        gm.angle = pcm.hertz_to_radians(np.mean(pitch)*0.5)
        h = np.zeros(nSamples)
        i = 0
        frame = 0
        while i < nSamples and frame < len(pitch):
            period = int(1.0 / pitch[frame] * r)
            if i + period > nSamples:
                break
            weight = np.sqrt(hnr[frame] / (hnr[frame] + 1))
            h[i:i+period] = gm.pulse(period, pcm) * weight
            i += period
            frame = i // framePeriod

        # Filter to mimic the glottal pulse
        hfilt = ssp.parameter("HFilt", None)
        hpole1 = ssp.parameter("HPole1", 0.98)
        hpole2 = ssp.parameter("HPole2", 0.8)
        angle = pcm.hertz_to_radians(np.mean(pitch)) * ssp.parameter("Angle", 1.0)
        if hfilt == 'pp':
            h = ssp.ZeroFilter(h, 1.0)
            h = ssp.PolePairFilter(h, hpole1, angle)
        if hfilt == 'g':
            h = ssp.GFilter(h, hpole1, angle, hpole2)
        if hfilt == 'p':
            h = ssp.PFilter(h, hpole1, angle, hpole2)
        fh = ssp.Frame(h, size=frameSize, period=framePeriod)

        # Noise part
        n = np.random.normal(size=nSamples)
        zero = ssp.parameter("NoiseZero", 1.0)
        n = ssp.ZeroFilter(n, zero) # Include the radiation impedance
        npole = ssp.parameter("NPole", None)
        nf = ssp.parameter("NoiseFreq", 4000)
        if npole is not None:
            n = ssp.PolePairFilter(n, npole, pcm.hertz_to_radians(nf))
        fn = ssp.Frame(n, size=frameSize, period=framePeriod)
        for i in range(len(fn)):
            fn[i] *= np.sqrt(1.0 / (hnr[i] + 1))

        # Combination
        assert(len(fh) == len(fn))
        hgain = ssp.parameter("HGain", 1.0)
        e = fn + fh * hgain
        hnw = np.hanning(frameSize)
        for i in range(len(e)):
            ep = ssp.Window(e[i], hnw)
            #ep = e[i]
            eac = ssp.Autocorrelation(ep)
            ea, eg = ssp.ARLevinson(eac, order=lpOrder[r])
            e[i] = ssp.ARExcitation(e[i], ea, eg)

    else:
        print "Unknown synthesis method"
        exit

    if opt.excitation:
        s = e.flatten('C')/frameSize
    else:
        s = ssp.ARResynthesis(e, ar, g)
        if opt.ola:
            # Asymmetric window for OLA
            sw = np.hanning(frameSize+1)
            sw = np.delete(sw, -1)
            s = ssp.Window(s, sw)
            s = ssp.OverlapAdd(s)
        else:
            s = s.flatten('C')

    gain = ssp.parameter("Gain", 1.0)
    return s * gain