Esempio n. 1
0
def main(inputFile=demo_sound_path('ocean.wav'), H=256, N=512, stocf=.1,
         interactive=True, plotFile=False):
    """
    inputFile: input sound file (monophonic with sampling rate of 44100)
    H: hop size, N: fft size
    stocf: decimation factor used for the stochastic approximation (bigger than 0, maximum 1)
    """

    # read input sound
    (fs, x) = audio.read_wav(inputFile)

    # compute stochastic model
    stocEnv = stochastic.from_audio(x, H, N, stocf)

    # synthesize sound from stochastic model
    y = stochastic.to_audio(stocEnv, H, N)

    outputFile = 'output_sounds/' + strip_file(inputFile) + '_stochasticModel.wav'

    # write output sound
    audio.write_wav(y, fs, outputFile)

    # create figure to plot
    plt.figure(figsize=(12, 9))

    # plot the input sound
    plt.subplot(3, 1, 1)
    plt.plot(np.arange(x.size) / float(fs), x)
    plt.axis([0, x.size / float(fs), min(x), max(x)])
    plt.ylabel('amplitude')
    plt.xlabel('time (sec)')
    plt.title('input sound: x')

    # plot stochastic representation
    plt.subplot(3, 1, 2)
    numFrames = int(stocEnv.shape[0])
    frmTime = H * np.arange(numFrames) / float(fs)
    binFreq = np.arange(stocf * (N / 2 + 1)) * float(fs) / (stocf * N)
    plt.pcolormesh(frmTime, binFreq, np.transpose(stocEnv))
    plt.autoscale(tight=True)
    plt.xlabel('time (sec)')
    plt.ylabel('frequency (Hz)')
    plt.title('stochastic approximation')

    # plot the output sound
    plt.subplot(3, 1, 3)
    plt.plot(np.arange(y.size) / float(fs), y)
    plt.axis([0, y.size / float(fs), min(y), max(y)])
    plt.ylabel('amplitude')
    plt.xlabel('time (sec)')

    plt.tight_layout()

    if interactive:
        plt.show()
    if plotFile:
        plt.savefig('output_plots/%s_stochastic_model.png' % files.strip_file(inputFile))
plt.figure(1, figsize=(9, 7))
plt.subplot(411)
plt.plot(np.arange(x.size) / float(fs), x, 'b')
plt.title('x (ocean.wav)')
plt.axis([0, x.size / float(fs), min(x), max(x)])

plt.subplot(412)
numFrames = int(mX.shape[0])
frmTime = H * np.arange(numFrames) / float(fs)
binFreq = np.arange(mX.shape[1]) * float(fs) / N
plt.pcolormesh(frmTime, binFreq, np.transpose(mX))
plt.title('mX; M=512, N=512, H=256')
plt.autoscale(tight=True)

plt.subplot(413)
numFrames = int(mYst.shape[0])
frmTime = H * np.arange(numFrames) / float(fs)
binFreq = np.arange(stocf * mX.shape[1]) * float(fs) / (stocf * N)
plt.pcolormesh(frmTime, binFreq, np.transpose(mYst))
plt.title('mY (stochastic approximation); stocf=.1')
plt.autoscale(tight=True)

plt.subplot(414)
plt.plot(np.arange(y.size) / float(fs), y, 'b')
plt.title('y')
plt.axis([0, y.size / float(fs), min(y), max(y)])

plt.tight_layout()
plt.savefig('stochasticModelAnalSynth.png')
audio.write_wav(y, fs, 'ocean-synthesis.wav')
Esempio n. 3
0
def transformation_synthesis(inputFile,
                             fs,
                             tfreq,
                             tmag,
                             freqScaling=np.array([0, 2.0, 1, .3]),
                             timeScaling=np.array(
                                 [0, .0, .671, .671, 1.978, 1.978 + 1.0]),
                             interactive=True,
                             plotFile=False):
    """
    Transform the analysis values returned by the analysis function and synthesize the sound
    inputFile: name of input file; fs: sampling rate of input file
    tfreq, tmag: sinusoidal frequencies and magnitudes
    freqScaling: frequency scaling factors, in time-value pairs
    timeScaling: time scaling factors, in time-value pairs
    """

    # size of fft used in synthesis
    Ns = 512

    # hop size (has to be 1/4 of Ns)
    H = 128

    # frequency scaling of the sinusoidal tracks
    ytfreq = sine.scale_frequencies(tfreq, freqScaling)

    # time scale the sinusoidal tracks
    ytfreq, ytmag = sine.scale_time(ytfreq, tmag, timeScaling)

    # synthesis
    y = sine.to_audio(ytfreq, ytmag, np.array([]), Ns, H, fs)

    # write output sound
    outputFile = 'output_sounds/' + strip_file(
        inputFile) + '_sineModelTransformation.wav'
    audio.write_wav(y, fs, outputFile)

    # create figure to plot
    plt.figure(figsize=(12, 6))

    # frequency range to plot
    maxplotfreq = 15000.0

    # plot the transformed sinusoidal frequencies
    if (ytfreq.shape[1] > 0):
        plt.subplot(2, 1, 1)
        tracks = np.copy(ytfreq)
        tracks = tracks * np.less(tracks, maxplotfreq)
        tracks[tracks <= 0] = np.nan
        numFrames = int(tracks.shape[0])
        frmTime = H * np.arange(numFrames) / float(fs)
        plt.plot(frmTime, tracks)
        plt.title('transformed sinusoidal tracks')
        plt.autoscale(tight=True)

    # plot the output sound
    plt.subplot(2, 1, 2)
    plt.plot(np.arange(y.size) / float(fs), y)
    plt.axis([0, y.size / float(fs), min(y), max(y)])
    plt.ylabel('amplitude')
    plt.xlabel('time (sec)')
    plt.title('output sound: y')

    plt.tight_layout()

    if interactive:
        plt.show()
    if plotFile:
        plt.savefig('output_plots/%s_sine_transformation_synthesis.png' %
                    files.strip_file(inputFile))
Esempio n. 4
0
def main(inputFile=demo_sound_path('rain.wav'), stocf=0.1, timeScaling=np.array([0, 0, 1, 2]),
         interactive=True, plotFile=False):
    """
    function to perform a time scaling using the stochastic model
    inputFile: name of input sound file
    stocf: decimation factor used for the stochastic approximation
    timeScaling: time scaling factors, in time-value pairs
    """

    # hop size
    H = 128

    # read input sound
    (fs, x) = audio.read_wav(inputFile)

    # perform stochastic analysis
    mYst = stochastic.from_audio(x, H, H * 2, stocf)

    # perform time scaling of stochastic representation
    ystocEnv = stochastic.scale_time(mYst, timeScaling)

    # synthesize output sound
    y = stochastic.to_audio(ystocEnv, H, H * 2)

    # write output sound
    outputFile = 'output_sounds/' + strip_file(inputFile) + '_stochasticModelTransformation.wav'
    audio.write_wav(y, fs, outputFile)

    # create figure to plot
    plt.figure(figsize=(12, 9))

    # plot the input sound
    plt.subplot(4, 1, 1)
    plt.plot(np.arange(x.size) / float(fs), x)
    plt.axis([0, x.size / float(fs), min(x), max(x)])
    plt.ylabel('amplitude')
    plt.xlabel('time (sec)')
    plt.title('input sound: x')

    # plot stochastic representation
    plt.subplot(4, 1, 2)
    numFrames = int(mYst.shape[0])
    frmTime = H * np.arange(numFrames) / float(fs)
    binFreq = np.arange(stocf * H) * float(fs) / (stocf * 2 * H)
    plt.pcolormesh(frmTime, binFreq, np.transpose(mYst))
    plt.autoscale(tight=True)
    plt.xlabel('time (sec)')
    plt.ylabel('frequency (Hz)')
    plt.title('stochastic approximation')

    # plot modified stochastic representation
    plt.subplot(4, 1, 3)
    numFrames = int(ystocEnv.shape[0])
    frmTime = H * np.arange(numFrames) / float(fs)
    binFreq = np.arange(stocf * H) * float(fs) / (stocf * 2 * H)
    plt.pcolormesh(frmTime, binFreq, np.transpose(ystocEnv))
    plt.autoscale(tight=True)
    plt.xlabel('time (sec)')
    plt.ylabel('frequency (Hz)')
    plt.title('modified stochastic approximation')

    # plot the output sound
    plt.subplot(4, 1, 4)
    plt.plot(np.arange(y.size) / float(fs), y)
    plt.axis([0, y.size / float(fs), min(y), max(y)])
    plt.ylabel('amplitude')
    plt.xlabel('time (sec)')

    plt.tight_layout()

    if interactive:
        plt.show()
    if plotFile:
        plt.savefig('output_plots/%s_stochastic_transformation.png' % files.strip_file(inputFile))
Esempio n. 5
0
def main(inputFile=demo_sound_path('bendir.wav'), window='hamming', M=2001, N=2048, t=-80, minSineDur=0.02,
         maxnSines=150, freqDevOffset=10, freqDevSlope=0.001,
         interactive=True, plotFile=False):
    """
    Perform analysis/synthesis using the sinusoidal model
    inputFile: input sound file (monophonic with sampling rate of 44100)
    window: analysis window type (rectangular, hanning, hamming, blackman, blackmanharris)
    M: analysis window size; N: fft size (power of two, bigger or equal than M)
    t: magnitude threshold of spectral peaks; minSineDur: minimum duration of sinusoidal tracks
    maxnSines: maximum number of parallel sinusoids
    freqDevOffset: frequency deviation allowed in the sinusoids from frame to frame at frequency 0
    freqDevSlope: slope of the frequency deviation, higher frequencies have bigger deviation
    """

    # size of fft used in synthesis
    Ns = 512

    # hop size (has to be 1/4 of Ns)
    H = 128

    # read input sound
    fs, x = audio.read_wav(inputFile)

    # compute analysis window
    w = get_window(window, M)

    # analyze the sound with the sinusoidal model
    tfreq, tmag, tphase = sine.from_audio(x, fs, w, N, H, t, maxnSines, minSineDur, freqDevOffset, freqDevSlope)

    # synthesize the output sound from the sinusoidal representation
    y = sine.to_audio(tfreq, tmag, tphase, Ns, H, fs)

    # output sound file name
    outputFile = 'output_sounds/' + strip_file(inputFile) + '_sineModel.wav'

    # write the synthesized sound obtained from the sinusoidal synthesis
    audio.write_wav(y, fs, outputFile)

    # create figure to show plots
    plt.figure(figsize=(12, 9))

    # frequency range to plot
    maxplotfreq = 5000.0

    # plot the input sound
    plt.subplot(3, 1, 1)
    plt.plot(np.arange(x.size) / float(fs), x)
    plt.axis([0, x.size / float(fs), min(x), max(x)])
    plt.ylabel('amplitude')
    plt.xlabel('time (sec)')
    plt.title('input sound: x')

    # plot the sinusoidal frequencies
    plt.subplot(3, 1, 2)
    if (tfreq.shape[1] > 0):
        numFrames = tfreq.shape[0]
        frmTime = H * np.arange(numFrames) / float(fs)
        tfreq[tfreq <= 0] = np.nan
        plt.plot(frmTime, tfreq)
        plt.axis([0, x.size / float(fs), 0, maxplotfreq])
        plt.title('frequencies of sinusoidal tracks')

    # plot the output sound
    plt.subplot(3, 1, 3)
    plt.plot(np.arange(y.size) / float(fs), y)
    plt.axis([0, y.size / float(fs), min(y), max(y)])
    plt.ylabel('amplitude')
    plt.xlabel('time (sec)')
    plt.title('output sound: y')

    plt.tight_layout()

    if interactive:
        plt.show()
    if plotFile:
        plt.savefig('output_plots/%s_sine_model.png' % files.strip_file(inputFile))
Esempio n. 6
0
def main(inputFile1=demo_sound_path('ocean.wav'),
         inputFile2=demo_sound_path('speech-male.wav'),
         window1='hamming',
         window2='hamming',
         M1=1024,
         M2=1024,
         N1=1024,
         N2=1024,
         H1=256,
         smoothf=.5,
         balancef=0.2,
         interactive=True,
         plotFile=False):
    """
    Function to perform a morph between two sounds
    inputFile1: name of input sound file to be used as source
    inputFile2: name of input sound file to be used as filter
    window1 and window2: windows for both files
    M1 and M2: window sizes for both files
    N1 and N2: fft sizes for both sounds
    H1: hop size for sound 1 (the one for sound 2 is computed automatically)
    smoothf: smoothing factor to be applyed to magnitude spectrum of sound 2 before morphing
    balancef: balance factor between booth sounds, 0 is sound 1 and 1 is sound 2
    """

    # read input sounds
    (fs, x1) = audio.read_wav(inputFile1)
    (fs, x2) = audio.read_wav(inputFile2)

    # compute analysis windows
    w1 = get_window(window1, M1)
    w2 = get_window(window2, M2)

    # perform morphing
    y = stft.morph(x1, x2, fs, w1, N1, w2, N2, H1, smoothf, balancef)

    # compute the magnitude and phase spectrogram of input sound (for plotting)
    mX1, pX1 = stft.from_audio(x1, w1, N1, H1)

    # compute the magnitude and phase spectrogram of output sound (for plotting)
    mY, pY = stft.from_audio(y, w1, N1, H1)

    # write output sound
    outputFile = 'output_sounds/' + os.path.basename(
        inputFile1)[:-4] + '_stftMorph.wav'
    audio.write_wav(y, fs, outputFile)

    # create figure to plot
    plt.figure(figsize=(12, 9))

    # frequency range to plot
    maxplotfreq = 10000.0

    # plot sound 1
    plt.subplot(4, 1, 1)
    plt.plot(np.arange(x1.size) / float(fs), x1)
    plt.axis([0, x1.size / float(fs), min(x1), max(x1)])
    plt.ylabel('amplitude')
    plt.xlabel('time (sec)')
    plt.title('input sound: x')

    # plot magnitude spectrogram of sound 1
    plt.subplot(4, 1, 2)
    numFrames = int(mX1.shape[0])
    frmTime = H1 * np.arange(numFrames) / float(fs)
    binFreq = fs * np.arange(N1 * maxplotfreq / fs) / N1
    plt.pcolormesh(frmTime, binFreq,
                   np.transpose(mX1[:, :N1 * maxplotfreq / fs + 1]))
    plt.xlabel('time (sec)')
    plt.ylabel('frequency (Hz)')
    plt.title('magnitude spectrogram of x')
    plt.autoscale(tight=True)

    # plot magnitude spectrogram of morphed sound
    plt.subplot(4, 1, 3)
    numFrames = int(mY.shape[0])
    frmTime = H1 * np.arange(numFrames) / float(fs)
    binFreq = fs * np.arange(N1 * maxplotfreq / fs) / N1
    plt.pcolormesh(frmTime, binFreq,
                   np.transpose(mY[:, :N1 * maxplotfreq / fs + 1]))
    plt.xlabel('time (sec)')
    plt.ylabel('frequency (Hz)')
    plt.title('magnitude spectrogram of y')
    plt.autoscale(tight=True)

    # plot the morphed sound
    plt.subplot(4, 1, 4)
    plt.plot(np.arange(y.size) / float(fs), y)
    plt.axis([0, y.size / float(fs), min(y), max(y)])
    plt.ylabel('amplitude')
    plt.xlabel('time (sec)')
    plt.title('output sound: y')

    plt.tight_layout()

    if interactive:
        plt.show()
    if plotFile:
        plt.savefig(
            'output_plots/%s_%s_stft_morph.png' %
            (files.strip_file(inputFile1), files.strip_file(inputFile2)))
harmDevSlope = 0.01
hfreq, hmag, hphase = harmonic.from_audio(x, fs, w, N, H, t, nH, minf0, maxf0,
                                          f0et, harmDevSlope, minSineDur)
y = sine.to_audio(hfreq, hmag, hphase, Ns, H, fs)

numFrames = int(hfreq.shape[0])
frmTime = H * np.arange(numFrames) / float(fs)

plt.figure(1, figsize=(9, 7))

plt.subplot(3, 1, 1)
plt.plot(np.arange(x.size) / float(fs), x, 'b')
plt.axis([0, x.size / float(fs), min(x), max(x)])
plt.title('x (vignesh.wav)')

plt.subplot(3, 1, 2)
yhfreq = hfreq
yhfreq[hfreq == 0] = np.nan
plt.plot(frmTime, hfreq, lw=1.2)
plt.axis([0, y.size / float(fs), 0, 8000])
plt.title('f_h, harmonic frequencies')

plt.subplot(3, 1, 3)
plt.plot(np.arange(y.size) / float(fs), y, 'b')
plt.axis([0, y.size / float(fs), min(y), max(y)])
plt.title('yh')

plt.tight_layout()
audio.write_wav(y, fs, 'vignesh-harmonic-synthesis.wav')
plt.savefig('harmonicModel-analysis-synthesis.png')
Esempio n. 8
0
def transformation_synthesis(inputFile, fs, hfreq, hmag, mYst,
                             freqScaling=np.array([0, 1.2, 2.01, 1.2, 2.679, .7, 3.146, .7]),
                             freqStretching=np.array([0, 1, 2.01, 1, 2.679, 1.5, 3.146, 1.5]), timbrePreservation=1,
                             timeScaling=np.array([0, 0, 2.138, 2.138 - 1.0, 3.146, 3.146]),
                             interactive=True, plotFile=False):
    """
    transform the analysis values returned by the analysis function and synthesize the sound
    inputFile: name of input file
    fs: sampling rate of input file
    hfreq, hmag: harmonic frequencies and magnitudes
    mYst: stochastic residual
    freqScaling: frequency scaling factors, in time-value pairs (value of 1 no scaling)
    freqStretching: frequency stretching factors, in time-value pairs (value of 1 no stretching)
    timbrePreservation: 1 preserves original timbre, 0 it does not
    timeScaling: time scaling factors, in time-value pairs
    """

    # size of fft used in synthesis
    Ns = 512

    # hop size (has to be 1/4 of Ns)
    H = 128

    # frequency scaling of the harmonics
    hfreqt, hmagt = harmonic.scale_frequencies(hfreq, hmag, freqScaling, freqStretching, timbrePreservation, fs)

    # time scaling the sound
    yhfreq, yhmag, ystocEnv = hps.scale_time(hfreqt, hmagt, mYst, timeScaling)

    # synthesis from the trasformed hps representation
    y, yh, yst = hps.to_audio(yhfreq, yhmag, np.array([]), ystocEnv, Ns, H, fs)

    # write output sound
    outputFile = 'output_sounds/' + strip_file(inputFile) + '_hpsModelTransformation.wav'
    audio.write_wav(y, fs, outputFile)

    # create figure to plot
    plt.figure(figsize=(12, 6))

    # frequency range to plot
    maxplotfreq = 15000.0

    # plot spectrogram of transformed stochastic compoment
    plt.subplot(2, 1, 1)
    numFrames = int(ystocEnv.shape[0])
    sizeEnv = int(ystocEnv.shape[1])
    frmTime = H * np.arange(numFrames) / float(fs)
    binFreq = (.5 * fs) * np.arange(sizeEnv * maxplotfreq / (.5 * fs)) / sizeEnv
    plt.pcolormesh(frmTime, binFreq, np.transpose(ystocEnv[:, :sizeEnv * maxplotfreq / (.5 * fs) + 1]))
    plt.autoscale(tight=True)

    # plot transformed harmonic on top of stochastic spectrogram
    if (yhfreq.shape[1] > 0):
        harms = yhfreq * np.less(yhfreq, maxplotfreq)
        harms[harms == 0] = np.nan
        numFrames = int(harms.shape[0])
        frmTime = H * np.arange(numFrames) / float(fs)
        plt.plot(frmTime, harms, color='k', ms=3, alpha=1)
        plt.xlabel('time (sec)')
        plt.ylabel('frequency (Hz)')
        plt.autoscale(tight=True)
        plt.title('harmonics + stochastic spectrogram')

    # plot the output sound
    plt.subplot(2, 1, 2)
    plt.plot(np.arange(y.size) / float(fs), y)
    plt.axis([0, y.size / float(fs), min(y), max(y)])
    plt.ylabel('amplitude')
    plt.xlabel('time (sec)')
    plt.title('output sound: y')

    plt.tight_layout()

    if interactive:
        plt.show()
    if plotFile:
        plt.savefig('output_plots/%s_hps_transformation_synthesis.png' % files.strip_file(inputFile))
Esempio n. 9
0
plt.figure(1, figsize=(9.5, 7))
plt.subplot(411)
plt.plot(np.arange(x.size) / float(fs), x, 'b')
plt.title('x (piano.wav)')
plt.axis([0, x.size / float(fs), min(x), max(x)])

plt.subplot(412)
numFrames = int(mX.shape[0])
frmTime = H * np.arange(numFrames) / float(fs)
binFreq = np.arange(mX.shape[1]) * float(fs) / N
plt.pcolormesh(frmTime, binFreq, np.transpose(mX))
plt.title('mX, M=1024, N=1024, H=512')
plt.autoscale(tight=True)

plt.subplot(413)
numFrames = int(pX.shape[0])
frmTime = H * np.arange(numFrames) / float(fs)
binFreq = np.arange(pX.shape[1]) * float(fs) / N
plt.pcolormesh(frmTime, binFreq, np.diff(np.transpose(pX), axis=0))
plt.title('pX derivative, M=1024, N=1024, H=512')
plt.autoscale(tight=True)

plt.subplot(414)
plt.plot(np.arange(y.size) / float(fs), y, 'b')
plt.axis([0, y.size / float(fs), min(y), max(y)])
plt.title('y')

plt.tight_layout()
plt.savefig('stft-system.png')
audio.write_wav(y, fs, 'piano-stft.wav')
Esempio n. 10
0
def main(inputFile=demo_sound_path('piano.wav'),
         window='hamming',
         M=1024,
         N=1024,
         H=512,
         interactive=True,
         plotFile=False):
    """
    analysis/synthesis using the STFT
    inputFile: input sound file (monophonic with sampling rate of 44100)
    window: analysis window type (choice of rectangular, hanning, hamming, blackman, blackmanharris)
    M: analysis window size
    N: fft size (power of two, bigger or equal than M)
    H: hop size (at least 1/2 of analysis window size to have good overlap-add)
    """

    # read input sound (monophonic with sampling rate of 44100)
    fs, x = audio.read_wav(inputFile)

    # compute analysis window
    w = get_window(window, M)

    # compute the magnitude and phase spectrogram
    mX, pX = stft.from_audio(x, w, N, H)

    # perform the inverse stft
    y = stft.to_audio(mX, pX, M, H)

    # output sound file (monophonic with sampling rate of 44100)
    outputFile = 'output_sounds/' + strip_file(inputFile) + '_stft.wav'

    # write the sound resulting from the inverse stft
    audio.write_wav(y, fs, outputFile)

    # create figure to plot
    plt.figure(figsize=(12, 9))

    # frequency range to plot
    maxplotfreq = 5000.0

    # plot the input sound
    plt.subplot(4, 1, 1)
    plt.plot(np.arange(x.size) / float(fs), x)
    plt.axis([0, x.size / float(fs), min(x), max(x)])
    plt.ylabel('amplitude')
    plt.xlabel('time (sec)')
    plt.title('input sound: x')

    # plot magnitude spectrogram
    plt.subplot(4, 1, 2)
    numFrames = int(mX.shape[0])
    frmTime = H * np.arange(numFrames) / float(fs)
    binFreq = fs * np.arange(N * maxplotfreq / fs) / N
    plt.pcolormesh(frmTime, binFreq,
                   np.transpose(mX[:, :N * maxplotfreq / fs + 1]))
    plt.xlabel('time (sec)')
    plt.ylabel('frequency (Hz)')
    plt.title('magnitude spectrogram')
    plt.autoscale(tight=True)

    # plot the phase spectrogram
    plt.subplot(4, 1, 3)
    numFrames = int(pX.shape[0])
    frmTime = H * np.arange(numFrames) / float(fs)
    binFreq = fs * np.arange(N * maxplotfreq / fs) / N
    plt.pcolormesh(
        frmTime, binFreq,
        np.transpose(np.diff(pX[:, :N * maxplotfreq / fs + 1], axis=1)))
    plt.xlabel('time (sec)')
    plt.ylabel('frequency (Hz)')
    plt.title('phase spectrogram (derivative)')
    plt.autoscale(tight=True)

    # plot the output sound
    plt.subplot(4, 1, 4)
    plt.plot(np.arange(y.size) / float(fs), y)
    plt.axis([0, y.size / float(fs), min(y), max(y)])
    plt.ylabel('amplitude')
    plt.xlabel('time (sec)')
    plt.title('output sound: y')

    plt.tight_layout()

    if interactive:
        plt.show()
    if plotFile:
        plt.savefig('output_plots/%s_stft_model.png' %
                    files.strip_file(inputFile))
Esempio n. 11
0
plt.pcolormesh(frmTime, binFreq, np.transpose(np.diff(pX[:, :N * maxplotfreq / fs + 1], axis=1)))
plt.autoscale(tight=True)

harms = hfreq * np.less(hfreq, maxplotfreq)
harms[harms == 0] = np.nan
numFrames = int(harms.shape[0])
frmTime = H * np.arange(numFrames) / float(fs)
plt.plot(frmTime, harms, color='k', ms=3, alpha=1)
plt.autoscale(tight=True)
plt.title('pX + harmonics')

plt.subplot(223)
numFrames = int(mXr.shape[0])
frmTime = H * np.arange(numFrames) / float(fs)
binFreq = fs * np.arange(Ns * maxplotfreq / fs) / Ns
plt.pcolormesh(frmTime, binFreq, np.transpose(mXr[:, :Ns * maxplotfreq / fs + 1]))
plt.autoscale(tight=True)
plt.title('mXr')

plt.subplot(224)
numFrames = int(pXr.shape[0])
frmTime = H * np.arange(numFrames) / float(fs)
binFreq = fs * np.arange(Ns * maxplotfreq / fs) / Ns
plt.pcolormesh(frmTime, binFreq, np.transpose(np.diff(pXr[:, :Ns * maxplotfreq / fs + 1], axis=1)))
plt.autoscale(tight=True)
plt.title('pXr')

plt.tight_layout()
plt.savefig('hprModelAnal-flute.png')
audio.write_wav(5 * xr, fs, 'flute-residual.wav')
filt[startBin:startBin + nBins] = bandpass
y = stft.filter(x, fs, w, N, H, filt)
mX, pX = stft.from_audio(x, w, N, H)
mY, pY = stft.from_audio(y, w, N, H)

plt.figure(1, figsize=(12, 9))
plt.subplot(311)
numFrames = int(mX.shape[0])
frmTime = H * np.arange(numFrames) / float(fs)
binFreq = np.arange(mX.shape[1]) * float(fs) / N
plt.pcolormesh(frmTime, binFreq, np.transpose(mX))
plt.title('mX (orchestra.wav)')
plt.autoscale(tight=True)

plt.subplot(312)
plt.plot(fs * np.arange(mX.shape[1]) / float(N), filt, 'k', lw=1.3)
plt.axis([0, fs / 2, -60, 7])
plt.title('filter shape')

plt.subplot(313)
numFrames = int(mY.shape[0])
frmTime = H * np.arange(numFrames) / float(fs)
binFreq = np.arange(mY.shape[1]) * float(fs) / N
plt.pcolormesh(frmTime, binFreq, np.transpose(mY))
plt.title('mY')
plt.autoscale(tight=True)

plt.tight_layout()
audio.write_wav(y, fs, 'orchestra-stft-filtering.wav')
plt.savefig('stftFiltering-orchestra.png')
Esempio n. 13
0
hfreq, hmag, hphase, xr = hpr.from_audio(x, fs, w, N, H, t, minSineDur, nH,
                                         minf0, maxf0, f0et, harmDevSlope)

mXr, pXr = stft.from_audio(xr, w, N, H)

freqScaling = np.array([0, 1.5, 1, 1.5])
freqStretching = np.array([0, 1.1, 1, 1.1])
timbrePreservation = 1

hfreqt, hmagt = harmonic.scale_frequencies(hfreq, hmag, freqScaling,
                                           freqStretching, timbrePreservation,
                                           fs)

y, yh = hpr.to_audio(hfreqt, hmagt, np.array([]), xr, Ns, H, fs)

audio.write_wav(y, fs, 'hpr-freq-transformation.wav')

plt.figure(figsize=(12, 9))

maxplotfreq = 15000.0

plt.subplot(4, 1, 1)
plt.plot(np.arange(x.size) / float(fs), x)
plt.axis([0, x.size / float(fs), min(x), max(x)])
plt.title('x (flute-A4.wav)')

plt.subplot(4, 1, 2)
maxplotbin = int(N * maxplotfreq / fs)
numFrames = int(mXr.shape[0])
frmTime = H * np.arange(numFrames) / float(fs)
binFreq = np.arange(maxplotbin + 1) * float(fs) / N
Esempio n. 14
0
contours_bins, contours_saliences, contours_start_times, duration = run_pitch_contours(
    pool['allframes_salience_peaks_bins'],
    pool['allframes_salience_peaks_saliences'])
pitch, confidence = run_pitch_contours_melody(contours_bins,
                                              contours_saliences,
                                              contours_start_times, duration)

yf0 = synth.synthesize_sinusoid(pitch, .6, hopSize, sampleRate)

figure(1, figsize=(9, 6))

mX, pX = stft.from_audio(audio, hamming(frameSize), frameSize, hopSize)
maxplotfreq = 3000.0
numFrames = int(mX.shape[0])
frmTime = hopSize * arange(numFrames) / float(sampleRate)
binFreq = sampleRate * arange(frameSize * maxplotfreq / sampleRate) / frameSize
plt.pcolormesh(frmTime, binFreq,
               np.transpose(mX[:, :frameSize * maxplotfreq / sampleRate + 1]))
plt.autoscale(tight=True)

offset = .5 * frameSize / sampleRate
time = hopSize * arange(size(pitch)) / float(sampleRate)
pitch[pitch == 0] = nan
plot(time, pitch, color='k', linewidth=2)

plt.title('mX + prominent melody (carnatic.wav)')
tight_layout()
savefig('predominantmelody-2.png')
audio.write_wav(yf0, sampleRate, 'predominantmelody-2.wav')
Esempio n. 15
0
def main(inputFile=demo_sound_path('bendir.wav'),
         window='hamming',
         M=2001,
         N=2048,
         t=-80,
         minSineDur=0.02,
         maxnSines=150,
         freqDevOffset=10,
         freqDevSlope=0.001,
         stocf=0.2,
         interactive=True,
         plotFile=False):
    """
    inputFile: input sound file (monophonic with sampling rate of 44100)
    window: analysis window type (rectangular, hanning, hamming, blackman, blackmanharris)
    M: analysis window size; N: fft size (power of two, bigger or equal than M)
    t: magnitude threshold of spectral peaks; minSineDur: minimum duration of sinusoidal tracks
    maxnSines: maximum number of parallel sinusoids
    freqDevOffset: frequency deviation allowed in the sinusoids from frame to frame at frequency 0
    freqDevSlope: slope of the frequency deviation, higher frequencies have bigger deviation
    stocf: decimation factor used for the stochastic approximation
    """

    # size of fft used in synthesis
    Ns = 512

    # hop size (has to be 1/4 of Ns)
    H = 128

    # read input sound
    (fs, x) = audio.read_wav(inputFile)

    # compute analysis window
    w = get_window(window, M)

    # perform sinusoidal+sotchastic analysis
    tfreq, tmag, tphase, stocEnv = sps.from_audio(x, fs, w, N, H, t,
                                                  minSineDur, maxnSines,
                                                  freqDevOffset, freqDevSlope,
                                                  stocf)

    # synthesize sinusoidal+stochastic model
    y, ys, yst = sps.to_audio(tfreq, tmag, tphase, stocEnv, Ns, H, fs)

    # output sound file (monophonic with sampling rate of 44100)
    baseFileName = strip_file(inputFile)
    outputFileSines, outputFileStochastic, outputFile = [
        'output_sounds/%s_spsModel%s.wav' % (baseFileName, i)
        for i in ('_sines', '_stochastic', '')
    ]

    # write sounds files for sinusoidal, residual, and the sum
    audio.write_wav(ys, fs, outputFileSines)
    audio.write_wav(yst, fs, outputFileStochastic)
    audio.write_wav(y, fs, outputFile)

    # create figure to plot
    plt.figure(figsize=(12, 9))

    # frequency range to plot
    maxplotfreq = 10000.0

    # plot the input sound
    plt.subplot(3, 1, 1)
    plt.plot(np.arange(x.size) / float(fs), x)
    plt.axis([0, x.size / float(fs), min(x), max(x)])
    plt.ylabel('amplitude')
    plt.xlabel('time (sec)')
    plt.title('input sound: x')

    plt.subplot(3, 1, 2)
    numFrames = int(stocEnv.shape[0])
    sizeEnv = int(stocEnv.shape[1])
    frmTime = H * np.arange(numFrames) / float(fs)
    binFreq = (.5 * fs) * np.arange(sizeEnv * maxplotfreq /
                                    (.5 * fs)) / sizeEnv
    plt.pcolormesh(
        frmTime, binFreq,
        np.transpose(stocEnv[:, :sizeEnv * maxplotfreq / (.5 * fs) + 1]))
    plt.autoscale(tight=True)

    # plot sinusoidal frequencies on top of stochastic component
    if (tfreq.shape[1] > 0):
        sines = tfreq * np.less(tfreq, maxplotfreq)
        sines[sines == 0] = np.nan
        numFrames = int(sines.shape[0])
        frmTime = H * np.arange(numFrames) / float(fs)
        plt.plot(frmTime, sines, color='k', ms=3, alpha=1)
        plt.xlabel('time(s)')
        plt.ylabel('Frequency(Hz)')
        plt.autoscale(tight=True)
        plt.title('sinusoidal + stochastic spectrogram')

    # plot the output sound
    plt.subplot(3, 1, 3)
    plt.plot(np.arange(y.size) / float(fs), y)
    plt.axis([0, y.size / float(fs), min(y), max(y)])
    plt.ylabel('amplitude')
    plt.xlabel('time (sec)')
    plt.title('output sound: y')

    plt.tight_layout()

    if interactive:
        plt.show()
    if plotFile:
        plt.savefig('output_plots/%s_sps_model.png' %
                    files.strip_file(inputFile))
Esempio n. 16
0

if __name__ == '__main__':
    (fs, x) = audio.read_wav('../../../sounds/bendir.wav')

    plt.figure(1, figsize=(9, 7))
    N = 2048
    H = 256
    w = hamming(2048)
    mX, pX = stft.from_audio(x, w, N, H)
    maxplotfreq = 2000.0
    frmTime = H * np.arange(mX.shape[0]) / float(fs)
    binFreq = fs * np.arange(N * maxplotfreq / fs) / N
    plt.pcolormesh(frmTime, binFreq,
                   np.transpose(mX[:, :N * maxplotfreq / fs + 1]))

    N = 2048
    minf0 = 130
    maxf0 = 300
    H = 256
    f0 = f0Yin(x, N, H, minf0, maxf0)
    yf0 = synth.synthesize_sinusoid(f0, .8, H, fs)
    frmTime = H * np.arange(f0.size) / float(fs)
    plt.plot(frmTime, f0, linewidth=2, color='k')
    plt.autoscale(tight=True)
    plt.title('mX + f0 (vignesh.wav), YIN: N=2048, H = 256 ')

    plt.tight_layout()
    plt.savefig('f0Yin.png')
    audio.write_wav(yf0, fs, 'f0Yin.wav')
Esempio n. 17
0
                                                  harmDevSlope1, minSineDur1,
                                                  Ns, stocf)
hfreq2, hmag2, hphase2, stocEnv2 = hps.from_audio(x2, fs2, w2, N2, H, t2, nH,
                                                  minf02, maxf02, f0et2,
                                                  harmDevSlope2, minSineDur2,
                                                  Ns, stocf)

hfreqIntp = np.array([0, .5, 1, .5])
hmagIntp = np.array([0, .5, 1, .5])
stocIntp = np.array([0, .5, 1, .5])
yhfreq, yhmag, ystocEnv = hps.morph(hfreq1, hmag1, stocEnv1, hfreq2, hmag2,
                                    stocEnv2, hfreqIntp, hmagIntp, stocIntp)

y, yh, yst = hps.to_audio(yhfreq, yhmag, np.array([]), ystocEnv, Ns, H, fs1)

audio.write_wav(y, fs1, 'hps-morph.wav')

plt.figure(figsize=(12, 9))
frame = 200

plt.subplot(2, 3, 1)
plt.vlines(hfreq1[frame, :], -100, hmag1[frame, :], lw=1.5, color='b')
plt.axis([0, 5000, -80, -15])
plt.title('x1: harmonics')

plt.subplot(2, 3, 2)
plt.vlines(hfreq2[frame, :], -100, hmag2[frame, :], lw=1.5, color='r')
plt.axis([0, 5000, -80, -15])
plt.title('x2: harmonics')

plt.subplot(2, 3, 3)
def main(inputFile=demo_sound_path('vignesh.wav'),
         window='blackman',
         M=1201,
         N=2048,
         t=-90,
         minSineDur=0.1,
         nH=100,
         minf0=130,
         maxf0=300,
         f0et=7,
         harmDevSlope=0.01,
         interactive=True,
         plotFile=False):
    """
    Analysis and synthesis using the harmonic model
    inputFile: input sound file (monophonic with sampling rate of 44100)
    window: analysis window type (rectangular, hanning, hamming, blackman, blackmanharris)
    M: analysis window size; N: fft size (power of two, bigger or equal than M)
    t: magnitude threshold of spectral peaks; minSineDur: minimum duration of sinusoidal tracks
    nH: maximum number of harmonics; minf0: minimum fundamental frequency in sound
    maxf0: maximum fundamental frequency in sound; f0et: maximum error accepted in f0 detection algorithm
    harmDevSlope: allowed deviation of harmonic tracks, higher harmonics could have higher allowed deviation
    """

    # size of fft used in synthesis
    Ns = 512

    # hop size (has to be 1/4 of Ns)
    H = 128

    # read input sound
    (fs, x) = audio.read_wav(inputFile)

    # compute analysis window
    w = get_window(window, M)

    # detect harmonics of input sound
    hfreq, hmag, hphase = harmonic.from_audio(x, fs, w, N, H, t, nH, minf0,
                                              maxf0, f0et, harmDevSlope,
                                              minSineDur)

    # synthesize the harmonics
    y = sine.to_audio(hfreq, hmag, hphase, Ns, H, fs)

    # output sound file (monophonic with sampling rate of 44100)
    outputFile = 'output_sounds/' + files.strip_file(
        inputFile) + '_harmonicModel.wav'

    # write the sound resulting from harmonic analysis
    audio.write_wav(y, fs, outputFile)

    # create figure to show plots
    plt.figure(figsize=(12, 9))

    # frequency range to plot
    maxplotfreq = 5000.0

    # plot the input sound
    plt.subplot(3, 1, 1)
    plt.plot(np.arange(x.size) / float(fs), x)
    plt.axis([0, x.size / float(fs), min(x), max(x)])
    plt.ylabel('amplitude')
    plt.xlabel('time (sec)')
    plt.title('input sound: x')

    # plot the harmonic frequencies
    plt.subplot(3, 1, 2)
    if (hfreq.shape[1] > 0):
        numFrames = hfreq.shape[0]
        frmTime = H * np.arange(numFrames) / float(fs)
        hfreq[hfreq <= 0] = np.nan
        plt.plot(frmTime, hfreq)
        plt.axis([0, x.size / float(fs), 0, maxplotfreq])
        plt.title('frequencies of harmonic tracks')

    # plot the output sound
    plt.subplot(3, 1, 3)
    plt.plot(np.arange(y.size) / float(fs), y)
    plt.axis([0, y.size / float(fs), min(y), max(y)])
    plt.ylabel('amplitude')
    plt.xlabel('time (sec)')
    plt.title('output sound: y')

    plt.tight_layout()

    if interactive:
        plt.show()
    if plotFile:
        plt.savefig('output_plots/%s_harmonic_model.png' %
                    files.strip_file(inputFile))
Esempio n. 19
0
def analysis(inputFile=demo_sound_path('sax-phrase-short.wav'), window='blackman', M=601, N=1024, t=-100,
             minSineDur=0.1, nH=100, minf0=350, maxf0=700, f0et=5, harmDevSlope=0.01, stocf=0.1,
             interactive=True, plotFile=False):
    """
    Analyze a sound with the harmonic plus stochastic model
    inputFile: input sound file (monophonic with sampling rate of 44100)
    window: analysis window type (rectangular, hanning, hamming, blackman, blackmanharris)
    M: analysis window size
    N: fft size (power of two, bigger or equal than M)
    t: magnitude threshold of spectral peaks
    minSineDur: minimum duration of sinusoidal tracks
    nH: maximum number of harmonics
    minf0: minimum fundamental frequency in sound
    maxf0: maximum fundamental frequency in sound
    f0et: maximum error accepted in f0 detection algorithm
    harmDevSlope: allowed deviation of harmonic tracks, higher harmonics have higher allowed deviation
    stocf: decimation factor used for the stochastic approximation
    returns inputFile: input file name; fs: sampling rate of input file,
            hfreq, hmag: harmonic frequencies, magnitude; mYst: stochastic residual
    """

    # size of fft used in synthesis
    Ns = 512

    # hop size (has to be 1/4 of Ns)
    H = 128

    # read input sound
    (fs, x) = audio.read_wav(inputFile)

    # compute analysis window
    w = get_window(window, M)

    # compute the harmonic plus stochastic model of the whole sound
    hfreq, hmag, hphase, mYst = hps.from_audio(x, fs, w, N, H, t, nH, minf0, maxf0, f0et, harmDevSlope, minSineDur, Ns,
                                              stocf)

    # synthesize the harmonic plus stochastic model without original phases
    y, yh, yst = hps.to_audio(hfreq, hmag, np.array([]), mYst, Ns, H, fs)

    # write output sound
    outputFile = 'output_sounds/' + strip_file(inputFile) + '_hpsModel.wav'
    audio.write_wav(y, fs, outputFile)

    # create figure to plot
    plt.figure(figsize=(12, 9))

    # frequency range to plot
    maxplotfreq = 15000.0

    # plot the input sound
    plt.subplot(3, 1, 1)
    plt.plot(np.arange(x.size) / float(fs), x)
    plt.axis([0, x.size / float(fs), min(x), max(x)])
    plt.ylabel('amplitude')
    plt.xlabel('time (sec)')
    plt.title('input sound: x')

    # plot spectrogram stochastic compoment
    plt.subplot(3, 1, 2)
    numFrames = int(mYst.shape[0])
    sizeEnv = int(mYst.shape[1])
    frmTime = H * np.arange(numFrames) / float(fs)
    binFreq = (.5 * fs) * np.arange(sizeEnv * maxplotfreq / (.5 * fs)) / sizeEnv
    plt.pcolormesh(frmTime, binFreq, np.transpose(mYst[:, :sizeEnv * maxplotfreq / (.5 * fs) + 1]))
    plt.autoscale(tight=True)

    # plot harmonic on top of stochastic spectrogram
    if (hfreq.shape[1] > 0):
        harms = hfreq * np.less(hfreq, maxplotfreq)
        harms[harms == 0] = np.nan
        numFrames = int(harms.shape[0])
        frmTime = H * np.arange(numFrames) / float(fs)
        plt.plot(frmTime, harms, color='k', ms=3, alpha=1)
        plt.xlabel('time (sec)')
        plt.ylabel('frequency (Hz)')
        plt.autoscale(tight=True)
        plt.title('harmonics + stochastic spectrogram')

    # plot the output sound
    plt.subplot(3, 1, 3)
    plt.plot(np.arange(y.size) / float(fs), y)
    plt.axis([0, y.size / float(fs), min(y), max(y)])
    plt.ylabel('amplitude')
    plt.xlabel('time (sec)')
    plt.title('output sound: y')

    plt.tight_layout()

    if interactive:
        plt.show(block=False)
    if plotFile:
        plt.savefig('output_plots/%s_hps_transformation_analysis.png' % files.strip_file(inputFile))

    return inputFile, fs, hfreq, hmag, mYst
Esempio n. 20
0
                                                  harmDevSlope1, minSineDur1,
                                                  Ns, stocf)
hfreq2, hmag2, hphase2, stocEnv2 = hps.from_audio(x2, fs2, w2, N2, H, t2, nH,
                                                  minf02, maxf02, f0et2,
                                                  harmDevSlope2, minSineDur2,
                                                  Ns, stocf)

hfreqIntp = np.array([0, 0, .1, 0, .9, 1, 1, 1])
hmagIntp = np.array([0, 0, .1, 0, .9, 1, 1, 1])
stocIntp = np.array([0, 0, .1, 0, .9, 1, 1, 1])
yhfreq, yhmag, ystocEnv = hps.morph(hfreq1, hmag1, stocEnv1, hfreq2, hmag2,
                                    stocEnv2, hfreqIntp, hmagIntp, stocIntp)

y, yh, yst = hps.to_audio(yhfreq, yhmag, np.array([]), ystocEnv, Ns, H, fs1)

audio.write_wav(y, fs1, 'hps-morph-total.wav')

plt.figure(figsize=(12, 9))

# frequency range to plot
maxplotfreq = 15000.0

# plot spectrogram stochastic component of sound 1
plt.subplot(3, 1, 1)
numFrames = int(stocEnv1.shape[0])
sizeEnv = int(stocEnv1.shape[1])
frmTime = H * np.arange(numFrames) / float(fs1)
binFreq = (.5 * fs1) * np.arange(sizeEnv * maxplotfreq / (.5 * fs1)) / sizeEnv
plt.pcolormesh(
    frmTime, binFreq,
    np.transpose(stocEnv1[:, :sizeEnv * maxplotfreq / (.5 * fs1) + 1]))
Esempio n. 21
0
def main(inputFile=demo_sound_path('sax-phrase-short.wav'),
         window='blackman',
         M=601,
         N=1024,
         t=-100,
         minSineDur=0.1,
         nH=100,
         minf0=350,
         maxf0=700,
         f0et=5,
         harmDevSlope=0.01,
         interactive=True,
         plotFile=False):
    """
    Perform analysis/synthesis using the harmonic plus residual model
    inputFile: input sound file (monophonic with sampling rate of 44100)
    window: analysis window type (rectangular, hanning, hamming, blackman, blackmanharris)
    M: analysis window size; N: fft size (power of two, bigger or equal than M)
    t: magnitude threshold of spectral peaks; minSineDur: minimum duration of sinusoidal tracks
    nH: maximum number of harmonics; minf0: minimum fundamental frequency in sound
    maxf0: maximum fundamental frequency in sound; f0et: maximum error accepted in f0 detection algorithm
    harmDevSlope: allowed deviation of harmonic tracks, higher harmonics have higher allowed deviation
    """

    # size of fft used in synthesis
    Ns = 512

    # hop size (has to be 1/4 of Ns)
    H = 128

    # read input sound
    (fs, x) = audio.read_wav(inputFile)

    # compute analysis window
    w = get_window(window, M)

    # find harmonics and residual
    hfreq, hmag, hphase, xr = hpr.from_audio(x, fs, w, N, H, t, minSineDur, nH,
                                             minf0, maxf0, f0et, harmDevSlope)

    # compute spectrogram of residual
    mXr, pXr = stft.from_audio(xr, w, N, H)

    # synthesize hpr model
    y, yh = hpr.to_audio(hfreq, hmag, hphase, xr, Ns, H, fs)

    # output sound file (monophonic with sampling rate of 44100)
    baseFileName = files.strip_file(inputFile)
    outputFileSines, outputFileResidual, outputFile = [
        'output_sounds/%s_hprModel%s.wav' % (baseFileName, i)
        for i in ('_sines', '_residual', '')
    ]

    # write sounds files for harmonics, residual, and the sum
    audio.write_wav(yh, fs, outputFileSines)
    audio.write_wav(xr, fs, outputFileResidual)
    audio.write_wav(y, fs, outputFile)

    # create figure to plot
    plt.figure(figsize=(12, 9))

    # frequency range to plot
    maxplotfreq = 5000.0

    # plot the input sound
    plt.subplot(3, 1, 1)
    plt.plot(np.arange(x.size) / float(fs), x)
    plt.axis([0, x.size / float(fs), min(x), max(x)])
    plt.ylabel('amplitude')
    plt.xlabel('time (sec)')
    plt.title('input sound: x')

    # plot the magnitude spectrogram of residual
    plt.subplot(3, 1, 2)
    maxplotbin = int(N * maxplotfreq / fs)
    numFrames = int(mXr.shape[0])
    frmTime = H * np.arange(numFrames) / float(fs)
    binFreq = np.arange(maxplotbin + 1) * float(fs) / N
    plt.pcolormesh(frmTime, binFreq, np.transpose(mXr[:, :maxplotbin + 1]))
    plt.autoscale(tight=True)

    # plot harmonic frequencies on residual spectrogram
    if (hfreq.shape[1] > 0):
        harms = hfreq * np.less(hfreq, maxplotfreq)
        harms[harms == 0] = np.nan
        numFrames = int(harms.shape[0])
        frmTime = H * np.arange(numFrames) / float(fs)
        plt.plot(frmTime, harms, color='k', ms=3, alpha=1)
        plt.xlabel('time(s)')
        plt.ylabel('frequency(Hz)')
        plt.autoscale(tight=True)
        plt.title('harmonics + residual spectrogram')

    # plot the output sound
    plt.subplot(3, 1, 3)
    plt.plot(np.arange(y.size) / float(fs), y)
    plt.axis([0, y.size / float(fs), min(y), max(y)])
    plt.ylabel('amplitude')
    plt.xlabel('time (sec)')
    plt.title('output sound: y')

    plt.tight_layout()

    if interactive:
        plt.show()
    if plotFile:
        plt.savefig('output_plots/%s_hpr_model.png' %
                    files.strip_file(inputFile))
Esempio n. 22
0
numFrames = int(mX.shape[0])
frmTime = H * np.arange(numFrames) / float(fs)
binFreq = np.arange(maxplotbin + 1) * float(fs) / N
plt.pcolormesh(frmTime, binFreq, np.transpose(mX[:, :maxplotbin + 1]))
plt.autoscale(tight=True)

plt.subplot(4, 1, 3)
numFrames = int(ytfreq.shape[0])
frmTime = H * np.arange(numFrames) / float(fs)
tracks = ytfreq * np.less(ytfreq, maxplotfreq)
tracks[tracks <= 0] = np.nan
plt.plot(frmTime, tracks, color='k', lw=1)
plt.autoscale(tight=True)
plt.title('mY + time-scaled sine frequencies')

maxplotbin = int(N * maxplotfreq / fs)
numFrames = int(mY.shape[0])
frmTime = H * np.arange(numFrames) / float(fs)
binFreq = np.arange(maxplotbin + 1) * float(fs) / N
plt.pcolormesh(frmTime, binFreq, np.transpose(mY[:, :maxplotbin + 1]))
plt.autoscale(tight=True)

plt.subplot(4, 1, 4)
plt.plot(np.arange(y.size) / float(fs), y, 'b')
plt.axis([0, y.size / float(fs), min(y), max(y)])
plt.title('y')

plt.tight_layout()
audio.write_wav(y, fs, 'mridangam-sineModelTimeScale.wav')
plt.savefig('sineModelTimeScale-mridangam.png')
Esempio n. 23
0
def transformation_synthesis(inputFile1,
                             fs,
                             hfreq1,
                             hmag1,
                             stocEnv1,
                             inputFile2,
                             hfreq2,
                             hmag2,
                             stocEnv2,
                             hfreqIntp=np.array([0, 0, .1, 0, .9, 1, 1, 1]),
                             hmagIntp=np.array([0, 0, .1, 0, .9, 1, 1, 1]),
                             stocIntp=np.array([0, 0, .1, 0, .9, 1, 1, 1]),
                             interactive=True,
                             plotFile=False):
    """
    Transform the analysis values returned by the analysis function and synthesize the sound
    inputFile1: name of input file 1
    fs: sampling rate of input file	1
    hfreq1, hmag1, stocEnv1: hps representation of sound 1
    inputFile2: name of input file 2
    hfreq2, hmag2, stocEnv2: hps representation of sound 2
    hfreqIntp: interpolation factor between the harmonic frequencies of the two sounds, 0 is sound 1 and 1 is sound 2 (time,value pairs)
    hmagIntp: interpolation factor between the harmonic magnitudes of the two sounds, 0 is sound 1 and 1 is sound 2  (time,value pairs)
    stocIntp: interpolation factor between the stochastic representation of the two sounds, 0 is sound 1 and 1 is sound 2  (time,value pairs)
    """

    # size of fft used in synthesis
    Ns = 512
    # hop size (has to be 1/4 of Ns)
    H = 128

    # morph the two sounds
    yhfreq, yhmag, ystocEnv = hps.morph(hfreq1, hmag1, stocEnv1, hfreq2, hmag2,
                                        stocEnv2, hfreqIntp, hmagIntp,
                                        stocIntp)

    # synthesis
    y, yh, yst = hps.to_audio(yhfreq, yhmag, np.array([]), ystocEnv, Ns, H, fs)

    # write output sound
    outputFile = 'output_sounds/' + os.path.basename(
        inputFile1)[:-4] + '_hpsMorph.wav'
    audio.write_wav(y, fs, outputFile)

    # create figure to plot
    plt.figure(figsize=(12, 9))

    # frequency range to plot
    maxplotfreq = 15000.0

    # plot spectrogram of transformed stochastic compoment
    plt.subplot(2, 1, 1)
    numFrames = int(ystocEnv.shape[0])
    sizeEnv = int(ystocEnv.shape[1])
    frmTime = H * np.arange(numFrames) / float(fs)
    binFreq = (.5 * fs) * np.arange(sizeEnv * maxplotfreq /
                                    (.5 * fs)) / sizeEnv
    plt.pcolormesh(
        frmTime, binFreq,
        np.transpose(ystocEnv[:, :sizeEnv * maxplotfreq / (.5 * fs) + 1]))
    plt.autoscale(tight=True)

    # plot transformed harmonic on top of stochastic spectrogram
    if (yhfreq.shape[1] > 0):
        harms = np.copy(yhfreq)
        harms = harms * np.less(harms, maxplotfreq)
        harms[harms == 0] = np.nan
        numFrames = int(harms.shape[0])
        frmTime = H * np.arange(numFrames) / float(fs)
        plt.plot(frmTime, harms, color='k', ms=3, alpha=1)
        plt.xlabel('time (sec)')
        plt.ylabel('frequency (Hz)')
        plt.autoscale(tight=True)
        plt.title('harmonics + stochastic spectrogram')

    # plot the output sound
    plt.subplot(2, 1, 2)
    plt.plot(np.arange(y.size) / float(fs), y)
    plt.axis([0, y.size / float(fs), min(y), max(y)])
    plt.ylabel('amplitude')
    plt.xlabel('time (sec)')
    plt.title('output sound: y')

    plt.tight_layout()

    if interactive:
        plt.show()
    if plotFile:
        plt.savefig(
            'output_plots/%s_%s_hps_morph_synthesis.png' %
            (files.strip_file(inputFile1), files.strip_file(inputFile2)))
Esempio n. 24
0
H = Ns / 4
tfreq, tmag, tphase = sine.from_audio(x1, fs, w, N, H, t, maxnSines, minSineDur, freqDevOffset, freqDevSlope)
y = sine.to_audio(tfreq, tmag, tphase, Ns, H, fs)

numFrames = int(tfreq.shape[0])
frmTime = H * np.arange(numFrames) / float(fs)
maxplotfreq = 3000.0

plt.figure(1, figsize=(9, 7))

plt.subplot(3, 1, 1)
plt.plot(np.arange(x1.size) / float(fs), x1, 'b', lw=1.5)
plt.axis([0, x1.size / float(fs), min(x1), max(x1)])
plt.title('x (bendir.wav)')

plt.subplot(3, 1, 2)
tracks = tfreq * np.less(tfreq, maxplotfreq)
tracks[tracks <= 0] = np.nan
plt.plot(frmTime, tracks, color='k', lw=1.5)
plt.autoscale(tight=True)
plt.title('f_t, sine frequencies')

plt.subplot(3, 1, 3)
plt.plot(np.arange(y.size) / float(fs), y, 'b', lw=1.5)
plt.axis([0, y.size / float(fs), min(y), max(y)])
plt.title('y')

plt.tight_layout()
audio.write_wav(y, fs, 'bendir-sine-synthesis.wav')
plt.savefig('sineModel-anal-synth.png')
Esempio n. 25
0
N = 2048
t = -90
minf0 = 100
maxf0 = 300
f0et = 1
maxnpeaksTwm = 4
H = 128
x1 = x[1.5 * fs:1.8 * fs]

plt.figure(1, figsize=(9, 7))
mX, pX = stft.from_audio(x, w, N, H)
f0 = harmonic.find_fundamental_freq(x, fs, w, N, H, t, minf0, maxf0, f0et)
f0 = peaks.clean_sinusoid_track(f0, 5)
yf0 = synth.synthesize_sinusoid(f0, .8, H, fs)
f0[f0 == 0] = np.nan
maxplotfreq = 800.0
numFrames = int(mX.shape[0])
frmTime = H * np.arange(numFrames) / float(fs)
binFreq = fs * np.arange(N * maxplotfreq / fs) / N
plt.pcolormesh(frmTime, binFreq,
               np.transpose(mX[:, :N * maxplotfreq / fs + 1]))
plt.autoscale(tight=True)

plt.plot(frmTime, f0, linewidth=2, color='k')
plt.autoscale(tight=True)
plt.title('mX + f0 (piano.wav), TWM')

plt.tight_layout()
plt.savefig('f0Twm-piano.png')
audio.write_wav(yf0, fs, 'f0Twm-piano.wav')
Esempio n. 26
0
def main(inputFile=demo_sound_path('bendir.wav'),
         window='hamming',
         M=2001,
         N=2048,
         t=-80,
         minSineDur=0.02,
         maxnSines=150,
         freqDevOffset=10,
         freqDevSlope=0.001,
         interactive=True,
         plotFile=False):
    """
    inputFile: input sound file (monophonic with sampling rate of 44100)
    window: analysis window type (rectangular, hanning, hamming, blackman, blackmanharris)
    M: analysis window size
    N: fft size (power of two, bigger or equal than M)
    t: magnitude threshold of spectral peaks
    minSineDur: minimum duration of sinusoidal tracks
    maxnSines: maximum number of parallel sinusoids
    freqDevOffset: frequency deviation allowed in the sinusoids from frame to frame at frequency 0
    freqDevSlope: slope of the frequency deviation, higher frequencies have bigger deviation
    """

    # size of fft used in synthesis
    Ns = 512

    # hop size (has to be 1/4 of Ns)
    H = 128

    # read input sound
    (fs, x) = audio.read_wav(inputFile)

    # compute analysis window
    w = get_window(window, M)

    # perform sinusoidal plus residual analysis
    tfreq, tmag, tphase, xr = spr.from_audio(x, fs, w, N, H, t, minSineDur,
                                             maxnSines, freqDevOffset,
                                             freqDevSlope)

    # compute spectrogram of residual
    mXr, pXr = stft.from_audio(xr, w, N, H)

    # sum sinusoids and residual
    y, ys = spr.to_audio(tfreq, tmag, tphase, xr, Ns, H, fs)

    # output sound file (monophonic with sampling rate of 44100)
    baseFileName = strip_file(inputFile)
    outputFileSines, outputFileResidual, outputFile = [
        'output_sounds/%s_sprModel%s.wav' % (baseFileName, i)
        for i in ('_sines', '_residual', '')
    ]

    # write sounds files for sinusoidal, residual, and the sum
    audio.write_wav(ys, fs, outputFileSines)
    audio.write_wav(xr, fs, outputFileResidual)
    audio.write_wav(y, fs, outputFile)

    # create figure to show plots
    plt.figure(figsize=(12, 9))

    # frequency range to plot
    maxplotfreq = 5000.0

    # plot the input sound
    plt.subplot(3, 1, 1)
    plt.plot(np.arange(x.size) / float(fs), x)
    plt.axis([0, x.size / float(fs), min(x), max(x)])
    plt.ylabel('amplitude')
    plt.xlabel('time (sec)')
    plt.title('input sound: x')

    # plot the magnitude spectrogram of residual
    plt.subplot(3, 1, 2)
    maxplotbin = int(N * maxplotfreq / fs)
    numFrames = int(mXr.shape[0])
    frmTime = H * np.arange(numFrames) / float(fs)
    binFreq = np.arange(maxplotbin + 1) * float(fs) / N
    plt.pcolormesh(frmTime, binFreq, np.transpose(mXr[:, :maxplotbin + 1]))
    plt.autoscale(tight=True)

    # plot the sinusoidal frequencies on top of the residual spectrogram
    if (tfreq.shape[1] > 0):
        tracks = tfreq * np.less(tfreq, maxplotfreq)
        tracks[tracks <= 0] = np.nan
        plt.plot(frmTime, tracks, color='k')
        plt.title('sinusoidal tracks + residual spectrogram')
        plt.autoscale(tight=True)

    # plot the output sound
    plt.subplot(3, 1, 3)
    plt.plot(np.arange(y.size) / float(fs), y)
    plt.axis([0, y.size / float(fs), min(y), max(y)])
    plt.ylabel('amplitude')
    plt.xlabel('time (sec)')
    plt.title('output sound: y')

    plt.tight_layout()

    if interactive:
        plt.show()
    if plotFile:
        plt.savefig('output_plots/%s_spr_model.png' %
                    files.strip_file(inputFile))
Esempio n. 27
0
harmDevSlope = 0.01
stocf = 0.1

Ns = 512
H = 128

(fs, x) = audio.read_wav(inputFile)
w = get_window(window, M)
hfreq, hmag, hphase, mYst = hps.from_audio(x, fs, w, N, H, t, nH, minf0, maxf0, f0et, harmDevSlope, minSineDur, Ns,
                                          stocf)
timeScaling = np.array([0, 0, 2.138, 2.138 - 1.5, 3.146, 3.146])
yhfreq, yhmag, ystocEnv = hps.scale_time(hfreq, hmag, mYst, timeScaling)

y, yh, yst = hps.to_audio(yhfreq, yhmag, np.array([]), ystocEnv, Ns, H, fs)

audio.write_wav(y, fs, 'hps-transformation.wav')

plt.figure(figsize=(12, 9))

maxplotfreq = 14900.0

# plot the input sound
plt.subplot(4, 1, 1)
plt.plot(np.arange(x.size) / float(fs), x)
plt.axis([0, x.size / float(fs), min(x), max(x)])
plt.title('x (sax-phrase-short.wav')

# plot spectrogram stochastic compoment
plt.subplot(4, 1, 2)
numFrames = int(mYst.shape[0])
sizeEnv = int(mYst.shape[1])
Esempio n. 28
0
plt.plot(np.arange(x.size) / float(fs), x, 'b')
plt.autoscale(tight=True)
plt.title('x (sax-phrase-short.wav)')

plt.subplot(312)
numFrames = int(mYst.shape[0])
sizeEnv = int(mYst.shape[1])
frmTime = H * np.arange(numFrames) / float(fs)
binFreq = (.5 * fs) * np.arange(sizeEnv * maxplotfreq / (.5 * fs)) / sizeEnv
plt.pcolormesh(frmTime, binFreq,
               np.transpose(mYst[:, :sizeEnv * maxplotfreq / (.5 * fs) + 1]))

harms = hfreq * np.less(hfreq, maxplotfreq)
harms[harms == 0] = np.nan
numFrames = int(harms.shape[0])
frmTime = H * np.arange(numFrames) / float(fs)
plt.plot(frmTime, harms, color='k', ms=3, alpha=1)
plt.autoscale(tight=True)
plt.title('harmonics + stochastic')

plt.subplot(313)
plt.plot(np.arange(y.size) / float(fs), y, 'b')
plt.autoscale(tight=True)
plt.title('y')

plt.tight_layout()
plt.savefig('hpsModel-sax-phrase.png')
audio.write_wav(y, fs, 'sax-phrase-hps-synthesis.wav')
audio.write_wav(yh, fs, 'sax-phrase-harmonic.wav')
audio.write_wav(yst, fs, 'sax-phrase-stochastic.wav')
Esempio n. 29
0
def analysis(inputFile=demo_sound_path('mridangam.wav'),
             window='hamming',
             M=801,
             N=2048,
             t=-90,
             minSineDur=0.01,
             maxnSines=150,
             freqDevOffset=20,
             freqDevSlope=0.02,
             interactive=True,
             plotFile=False):
    """
    Analyze a sound with the sine model
    inputFile: input sound file (monophonic with sampling rate of 44100)
    window: analysis window type (rectangular, hanning, hamming, blackman, blackmanharris)
    M: analysis window size; N: fft size (power of two, bigger or equal than M)
    t: magnitude threshold of spectral peaks; minSineDur: minimum duration of sinusoidal tracks
    maxnSines: maximum number of parallel sinusoids
    freqDevOffset: frequency deviation allowed in the sinusoids from frame to frame at frequency 0
    freqDevSlope: slope of the frequency deviation, higher frequencies have bigger deviation
    returns inputFile: input file name; fs: sampling rate of input file,
            tfreq, tmag: sinusoidal frequencies and magnitudes
    """

    # size of fft used in synthesis
    Ns = 512

    # hop size (has to be 1/4 of Ns)
    H = 128

    # read input sound
    (fs, x) = audio.read_wav(inputFile)

    # compute analysis window
    w = get_window(window, M)

    # compute the sine model of the whole sound
    tfreq, tmag, tphase = sine.from_audio(x, fs, w, N, H, t, maxnSines,
                                          minSineDur, freqDevOffset,
                                          freqDevSlope)

    # synthesize the sines without original phases
    y = sine.to_audio(tfreq, tmag, np.array([]), Ns, H, fs)

    # output sound file (monophonic with sampling rate of 44100)
    outputFile = 'output_sounds/' + strip_file(inputFile) + '_sineModel.wav'

    # write the sound resulting from the inverse stft
    audio.write_wav(y, fs, outputFile)

    # create figure to show plots
    plt.figure(figsize=(12, 9))

    # frequency range to plot
    maxplotfreq = 5000.0

    # plot the input sound
    plt.subplot(3, 1, 1)
    plt.plot(np.arange(x.size) / float(fs), x)
    plt.axis([0, x.size / float(fs), min(x), max(x)])
    plt.ylabel('amplitude')
    plt.xlabel('time (sec)')
    plt.title('input sound: x')

    # plot the sinusoidal frequencies
    if (tfreq.shape[1] > 0):
        plt.subplot(3, 1, 2)
        tracks = np.copy(tfreq)
        tracks = tracks * np.less(tracks, maxplotfreq)
        tracks[tracks <= 0] = np.nan
        numFrames = int(tracks.shape[0])
        frmTime = H * np.arange(numFrames) / float(fs)
        plt.plot(frmTime, tracks)
        plt.axis([0, x.size / float(fs), 0, maxplotfreq])
        plt.title('frequencies of sinusoidal tracks')

    # plot the output sound
    plt.subplot(3, 1, 3)
    plt.plot(np.arange(y.size) / float(fs), y)
    plt.axis([0, y.size / float(fs), min(y), max(y)])
    plt.ylabel('amplitude')
    plt.xlabel('time (sec)')
    plt.title('output sound: y')

    plt.tight_layout()

    if interactive:
        plt.show(block=False)
    if plotFile:
        plt.savefig('output_plots/%s_sine_transformation_analysis.png' %
                    files.strip_file(inputFile))

    return inputFile, fs, tfreq, tmag
Esempio n. 30
0
frmTime = H1 * np.arange(numFrames) / float(fs)
binFreq = fs * np.arange(N1 * maxplotfreq / fs) / N1
plt.pcolormesh(frmTime, binFreq,
               np.transpose(mX[:, :N1 * maxplotfreq / fs + 1]))
plt.title('mX (orchestra.wav)')
plt.autoscale(tight=True)

plt.subplot(312)
numFrames = int(mX2.shape[0])
frmTime = H1 * np.arange(numFrames) / float(fs)

N = 2 * mX2.shape[1]
binFreq = fs * np.arange(N * maxplotfreq / fs) / N
plt.pcolormesh(frmTime, binFreq,
               np.transpose(mX2[:, :N * maxplotfreq / fs + 1]))
plt.title('mX2 (speech-male.wav)')
plt.autoscale(tight=True)

plt.subplot(313)
numFrames = int(mY.shape[0])
frmTime = H1 * np.arange(numFrames) / float(fs)
binFreq = fs * np.arange(N1 * maxplotfreq / fs) / N1
plt.pcolormesh(frmTime, binFreq,
               np.transpose(mY[:, :N1 * maxplotfreq / fs + 1]))
plt.title('mY')
plt.autoscale(tight=True)

plt.tight_layout()
audio.write_wav(y, fs, 'orchestra-speech-stftMorph.wav')
plt.savefig('stftMorph-orchestra.png')