예제 #1
0
def encode(a, pcm):
    """
    Encode a speech waveform.  The encoding framers (frames and pitch)
    pad the frames so that the first frame is centered on sample zero.
    This is consistent with STRAIGHT and SPTK (I hope!).  At least, it
    means the pitch can have longer frame lengths and still align with
    the OLA'd frames.
    """
    if opt.ola:
        frameSize = pcm.seconds_to_period(0.025, 'atleast') # 25ms frame size
    else:
        frameSize = framePeriod
    pitchSize = pcm.seconds_to_period(0.1, 'atmost')
    print "Encoding with period", framePeriod, "size", frameSize, \
          "and pitch window", pitchSize

    # First the pitch as it's on the unaltered waveform.  The frame
    # should be long with no window.  1024 at 16 kHz is 64 ms.
    pf = ssp.Frame(a, size=pitchSize, period=framePeriod)
    pitch, hnr = ssp.ACPitch(pf, pcm)

    # Pre-emphasis
    pre = ssp.parameter("Pre", None)
    if pre is not None:
        a = ssp.PoleFilter(a, pre) / 5

    # Keep f around after the function so the decoder can do a
    # reference decoding on the real excitaton.
    global f
    f = ssp.Frame(a, size=frameSize, period=framePeriod)
    #aw = np.hanning(frameSize+1)
    aw = ssp.nuttall(frameSize+1)
    aw = np.delete(aw, -1)
    w = ssp.Window(f, aw)
    ac = ssp.Autocorrelation(w)
    lp = ssp.parameter('AR', 'levinson')
    if lp == 'levinson':
        ar, g = ssp.ARLevinson(ac, lpOrder[r])
    elif lp == 'ridge':
        ar, g = ssp.ARRidge(ac, lpOrder[r], 0.03)
    elif lp == 'lasso':
        ar, g = ssp.ARLasso(ac, lpOrder[r], 5)
    elif lp == 'sparse':
        ar, g = ssp.ARSparse(w, lpOrder[r], ssp.parameter('Gamma', 1.414))
    elif lp == 'student':
        ar, g = ssp.ARStudent(w, lpOrder[r], ssp.parameter('DoF', 50.0))

    if False:
        fig = ssp.Figure(5, 1)
        #stddev = np.sqrt(kVar)
        sPlot = fig.subplot()
        sPlot.plot(pitch, 'c')
        #sPlot.plot(kPitch + stddev, 'b')
        #sPlot.plot(kPitch - stddev, 'b')
        sPlot.set_xlim(0, len(pitch))
        sPlot.set_ylim(0, 500)
        plt.show()

    return (ar, g, pitch, hnr)
예제 #2
0
    wa, wg = ssp.ARMatrix(f, order, method=ssp.parameter('Method', 'matrix'))
if t == 'arwarp':
    wa, wg = ssp.ARAllPassWarp(a, g, alpha=ssp.mel[pcm.rate])
elif t == 'acwarp':
    ac = ssp.Autocorrelation(f)
    ac = ssp.AutocorrelationAllPassWarp(ac, alpha=ssp.mel[pcm.rate],
                                        size=order+1)
    wa, wg = ssp.ARLevinson(ac, order)
elif t == 'tdwarp':
    m = ssp.AllPassWarpMatrix(256, ssp.mel[pcm.rate])
    fw = np.dot(f,m.T)
    aw = ssp.Autocorrelation(fw)
    wa, wg = ssp.ARLevinson(aw, order)
elif t == 'ridge':
    ac = ssp.Autocorrelation(f)
    wa, wg = ssp.ARRidge(ac, order, ridge=0.01)
elif t == 'lasso':
    ac = ssp.Autocorrelation(f)
    wa, wg = ssp.ARLasso(ac, order, ridge=30)
elif t == 'sparse':
    wa, wg = ssp.ARSparse(f, order, ssp.parameter("Gamma", 1))
elif t == 'student':
    wa, wg = ssp.ARStudent(f, order, ssp.parameter("DF", 1))
lap(t)
ws = ssp.ARSpectrum(wa, wg, nSpec=128)
lap("Spectrum")

#llRatio = ARLogLikelihoodRatio(f, order)

exn = ssp.ARExcitation(f, a, g)
exnw = ssp.ARExcitation(f, wa, wg)