Ejemplo n.º 1
0
def encode(a, pcm):
    """
    Encode a speech waveform.  The encoding framers (frames and pitch)
    pad the frames so that the first frame is centered on sample zero.
    This is consistent with STRAIGHT and SPTK (I hope!).  At least, it
    means the pitch can have longer frame lengths and still align with
    the OLA'd frames.
    """
    if opt.ola:
        frameSize = pcm.seconds_to_period(0.025, 'atleast') # 25ms frame size
    else:
        frameSize = framePeriod
    pitchSize = pcm.seconds_to_period(0.1, 'atmost')
    print "Encoding with period", framePeriod, "size", frameSize, \
          "and pitch window", pitchSize

    # First the pitch as it's on the unaltered waveform.  The frame
    # should be long with no window.  1024 at 16 kHz is 64 ms.
    pf = ssp.Frame(a, size=pitchSize, period=framePeriod)
    pitch, hnr = ssp.ACPitch(pf, pcm)

    # Pre-emphasis
    pre = ssp.parameter("Pre", None)
    if pre is not None:
        a = ssp.PoleFilter(a, pre) / 5

    # Keep f around after the function so the decoder can do a
    # reference decoding on the real excitaton.
    global f
    f = ssp.Frame(a, size=frameSize, period=framePeriod)
    #aw = np.hanning(frameSize+1)
    aw = ssp.nuttall(frameSize+1)
    aw = np.delete(aw, -1)
    w = ssp.Window(f, aw)
    ac = ssp.Autocorrelation(w)
    lp = ssp.parameter('AR', 'levinson')
    if lp == 'levinson':
        ar, g = ssp.ARLevinson(ac, lpOrder[r])
    elif lp == 'ridge':
        ar, g = ssp.ARRidge(ac, lpOrder[r], 0.03)
    elif lp == 'lasso':
        ar, g = ssp.ARLasso(ac, lpOrder[r], 5)
    elif lp == 'sparse':
        ar, g = ssp.ARSparse(w, lpOrder[r], ssp.parameter('Gamma', 1.414))
    elif lp == 'student':
        ar, g = ssp.ARStudent(w, lpOrder[r], ssp.parameter('DoF', 50.0))

    if False:
        fig = ssp.Figure(5, 1)
        #stddev = np.sqrt(kVar)
        sPlot = fig.subplot()
        sPlot.plot(pitch, 'c')
        #sPlot.plot(kPitch + stddev, 'b')
        #sPlot.plot(kPitch - stddev, 'b')
        sPlot.set_xlim(0, len(pitch))
        sPlot.set_ylim(0, 500)
        plt.show()

    return (ar, g, pitch, hnr)
Ejemplo n.º 2
0
def encode(a, pcm):
    """
    Encode a speech waveform.  The encoding framers (frames and pitch)
    pad the frames so that the first frame is centered on sample zero.
    This is consistent with STRAIGHT and SPTK (I hope!).  At least, it
    means the pitch can have longer frame lengths and still align with
    the OLA'd frames.
    """
    if opt.ola:
        frameSize = pcm.seconds_to_period(0.025, 'atleast') # 25ms frame size
    else:
        frameSize = framePeriod
    pitchSize = pcm.seconds_to_period(0.1, 'atmost')
    print "Encoding with period", framePeriod, "size", frameSize, \
          "and pitch window", pitchSize

    # First the pitch as it's on the unaltered waveform.  The frame
    # should be long with no window.  1024 at 16 kHz is 64 ms.
    pf = ssp.Frame(a, size=pitchSize, period=framePeriod)
    pitch, hnr = ssp.ACPitch(pf, pcm)

    # Pre-emphasis
    pre = ssp.parameter("Pre", None)
    if pre is not None:
        a = ssp.PoleFilter(a, pre) / 5

    # Keep f around after the function so the decoder can do a
    # reference decoding on the real excitaton.
    global f
    f = ssp.Frame(a, size=frameSize, period=framePeriod)
    #aw = np.hanning(frameSize+1)
    aw = ssp.nuttall(frameSize+1)
    aw = np.delete(aw, -1)
    w = ssp.Window(f, aw)
    ac = ssp.Autocorrelation(w)
    lp = ssp.parameter('AR', 'levinson')
    if lp == 'levinson':
        ar, g = ssp.ARLevinson(ac, lpOrder[r])
    elif lp == 'ridge':
        ar, g = ssp.ARRidge(ac, lpOrder[r], 0.03)
    elif lp == 'lasso':
        ar, g = ssp.ARLasso(ac, lpOrder[r], 5)
    elif lp == 'sparse':
        ar, g = ssp.ARSparse(w, lpOrder[r], ssp.parameter('Gamma', 1.414))
    elif lp == 'student':
        ar, g = ssp.ARStudent(w, lpOrder[r], ssp.parameter('DoF', 50.0))

    if False:
        fig = ssp.Figure(5, 1)
        #stddev = np.sqrt(kVar)
        sPlot = fig.subplot()
        sPlot.plot(pitch, 'c')
        #sPlot.plot(kPitch + stddev, 'b')
        #sPlot.plot(kPitch - stddev, 'b')
        sPlot.set_xlim(0, len(pitch))
        sPlot.set_ylim(0, 500)
        plt.show()

    return (ar, g, pitch, hnr)
Ejemplo n.º 3
0
 def setUp(self):
     """
     Generate one (short) frame of a 1 kHz sinusoid at a sampling
     rate of 16 kHz.  It doesn't matter too much what it is, just
     that it is representative of some natural signal.
     """
     self.pcm = ssp.PulseCodeModulation(16000)
     self.seq = np.zeros(64)
     p = self.pcm.seconds_to_period(1.0/1000);
     for s in range(len(self.seq)):
         self.seq[s] = np.sin(2*np.pi * s/p)
     w = ssp.nuttall(len(self.seq)+1)
     w = np.delete(w, -1)
     self.seq = ssp.Window(self.seq, w)
Ejemplo n.º 4
0
def get_pitch(gen_path, basefilename):

    (Fs, x) = io_wav.read(gen_path + basefilename + '.wav')

    assert Fs == 16000

    pcm = ssp.PulseCodeModulation(Fs)

    frameSize = pcm.seconds_to_period(0.025, 'atleast')  # 25ms Frame size
    pitchSize = pcm.seconds_to_period(0.1, 'atmost')  # 100ms Pitch size

    pf = ssp.Frame(x, size=pitchSize, period=framePeriod)
    pitch, ac = ssp.ACPitch(pf, pcm, loPitch,
                            hiPitch)  # Initially pitch estimated

    # Pre-emphasis
    pre = ssp.parameter("Pre", None)
    if pre is not None:
        x = ssp.PoleFilter(x, pre) / 5

    # Frame Splitting
    f = ssp.Frame(x, size=frameSize, period=framePeriod)

    # Windowing
    aw = ssp.nuttall(frameSize + 1)
    aw = np.delete(aw, -1)
    w = ssp.Window(f, aw)

    # Autocorrelation
    ac = ssp.Autocorrelation(w)

    if (len(ac) > len(pitch)):
        d = len(ac) - len(pitch)
        addon = np.ones(d) * pitch[-1]
        pitch = np.hstack((pitch, addon))

    # Save pitch as binary
    lf0 = np.log(pitch)
    lf0.astype('float32').tofile(gen_path + basefilename + '.lf0')

    return pitch
Ejemplo n.º 5
0
    print "Need one arg"
    exit(1)
file = arg[0]

import ssp
import numpy as np

# Load and process
pcm = ssp.PulseCodeModulation()
a = pcm.WavSource(file)
if (ssp.parameter('Pre', None)):
    a = ssp.ZeroFilter(a)
framePeriod = pcm.seconds_to_period(0.01)
frameSize = pcm.seconds_to_period(0.02, 'atleast')
f = ssp.Frame(a, size=frameSize, period=framePeriod)
w = ssp.nuttall(frameSize+1)
w = np.delete(w, -1)
wf = ssp.Window(f, w)
type = ssp.parameter('Type', 'psd')
if type == 'psd':
    p = ssp.Periodogram(wf)
    p = p[:,:p.shape[1]/2+1]
elif type == 'ar':
    a = ssp.Autocorrelation(wf)
    a, g = ssp.ARLevinson(a, pcm.speech_ar_order())
    p = ssp.ARSpectrum(a, g, nSpec=128)
elif type == 'snr':
    p = ssp.Periodogram(wf)
    n = ssp.Noise(p)
    p = ssp.SNRSpectrum(p, n)
    p = p[:,:p.shape[1]/2+1]
Ejemplo n.º 6
0
    # Defaults for 8 kHz
    frameSize = 256
    framePeriod = 80
    lpOrder = 10

    if pcm.rate == 16000:
        frameSize = 400
        framePeriod = 160
        lpOrder = 12

    # Basic preprocessing
    g = np.ndarray((0))
    a = ssp.ZeroFilter(a)
    f = ssp.Frame(a, size=frameSize, period=framePeriod, pad=False)
    f = ssp.Window(f, ssp.nuttall(frameSize))

    # Next part depends on user
    frontend = ssp.parameter("FrontEnd", "ar")
    if frontend == "ar":
        a = ssp.Autocorrelation(f)
        a = ssp.AutocorrelationAllPassWarp(a,
                                           alpha=ssp.mel[pcm.rate],
                                           size=lpOrder + 1)
        a, g = ssp.ARLevinson(a, lpOrder)
        #    ridge = Parameter('Ridge', 0.1)
        #    a, g = ARRidge(a, lpOrder, ridge)
        #    a, g = ARLasso(a, lpOrder, ridge)
    elif frontend == "snr":
        a = ssp.Periodogram(f)
        n = ssp.Noise(a)
Ejemplo n.º 7
0
    # Defaults for 8 kHz
    frameSize = 256
    framePeriod = 80
    lpOrder = 10

    if pcm.rate == 16000:
        frameSize = 400
        framePeriod = 160
        lpOrder = 12

    # Basic preprocessing
    g = np.ndarray((0))
    a = ssp.ZeroFilter(a)
    f = ssp.Frame(a, size=frameSize, period=framePeriod, pad=False)
    f = ssp.Window(f, ssp.nuttall(frameSize))

    # Next part depends on user
    frontend = ssp.parameter("FrontEnd", "ar")
    if frontend == "ar":
        a = ssp.Autocorrelation(f)
        a = ssp.AutocorrelationAllPassWarp(a, alpha=ssp.mel[pcm.rate],
                                           size=lpOrder+1)
        a, g = ssp.ARLevinson(a, lpOrder)
        #    ridge = Parameter('Ridge', 0.1)
        #    a, g = ARRidge(a, lpOrder, ridge)
        #    a, g = ARLasso(a, lpOrder, ridge)
    elif frontend == "snr":
        a = ssp.Periodogram(f)
        n = ssp.Noise(a)
        a = ssp.SNRSpectrum(a, n * 0.1)