예제 #1
0
def test_reconstruct_sound():
    fs, x = audio.read_wav(sound_path("sax-phrase-short.wav"))

    window_size, fft_size, hop_size = 4001, 4096, 2048
    window = get_window('hamming', window_size)

    xtfreq, xtmag, xtphase = sine.from_audio(
        x, fs, window, fft_size, hop_size,
        t=-80, maxnSines=100, minSineDur=.01, freqDevOffset=20, freqDevSlope=0.01)
    x_reconstructed = sine.to_audio(xtfreq, xtmag, xtphase, fft_size, hop_size, fs)

    assert 138746 == len(x)

    expected_frame_count = int(math.ceil(float(len(x)) / hop_size))
    assert expected_frame_count == len(xtfreq)
    assert expected_frame_count == len(xtmag)
    assert expected_frame_count == len(xtphase)

    assert xtfreq.shape[1] <= 100

    # statistics of the model for regression testing without explicitly storing the whole data
    assert np.allclose(945.892990545, xtfreq.mean())
    assert np.allclose(-30.3138495002, xtmag.mean())
    assert np.allclose(1.34449391701, xtphase.mean())

    # TODO: this is completely off, it should be equal to len(x)!
    assert 69 * 2048 == len(x_reconstructed)

    assert np.allclose(0.010812475879315771, rmse(x, x_reconstructed[:len(x)]))
예제 #2
0
def analysis(inputFile=demo_sound_path('mridangam.wav'),
             window='hamming',
             M=801,
             N=2048,
             t=-90,
             minSineDur=0.01,
             maxnSines=150,
             freqDevOffset=20,
             freqDevSlope=0.02,
             interactive=True,
             plotFile=False):
    """
    Analyze a sound with the sine model
    inputFile: input sound file (monophonic with sampling rate of 44100)
    window: analysis window type (rectangular, hanning, hamming, blackman, blackmanharris)
    M: analysis window size; N: fft size (power of two, bigger or equal than M)
    t: magnitude threshold of spectral peaks; minSineDur: minimum duration of sinusoidal tracks
    maxnSines: maximum number of parallel sinusoids
    freqDevOffset: frequency deviation allowed in the sinusoids from frame to frame at frequency 0
    freqDevSlope: slope of the frequency deviation, higher frequencies have bigger deviation
    returns inputFile: input file name; fs: sampling rate of input file,
            tfreq, tmag: sinusoidal frequencies and magnitudes
    """

    # size of fft used in synthesis
    Ns = 512

    # hop size (has to be 1/4 of Ns)
    H = 128

    # read input sound
    (fs, x) = audio.read_wav(inputFile)

    # compute analysis window
    w = get_window(window, M)

    # compute the sine model of the whole sound
    tfreq, tmag, tphase = sine.from_audio(x, fs, w, N, H, t, maxnSines,
                                          minSineDur, freqDevOffset,
                                          freqDevSlope)

    # synthesize the sines without original phases
    y = sine.to_audio(tfreq, tmag, np.array([]), Ns, H, fs)

    # output sound file (monophonic with sampling rate of 44100)
    outputFile = 'output_sounds/' + strip_file(inputFile) + '_sineModel.wav'

    # write the sound resulting from the inverse stft
    audio.write_wav(y, fs, outputFile)

    # create figure to show plots
    plt.figure(figsize=(12, 9))

    # frequency range to plot
    maxplotfreq = 5000.0

    # plot the input sound
    plt.subplot(3, 1, 1)
    plt.plot(np.arange(x.size) / float(fs), x)
    plt.axis([0, x.size / float(fs), min(x), max(x)])
    plt.ylabel('amplitude')
    plt.xlabel('time (sec)')
    plt.title('input sound: x')

    # plot the sinusoidal frequencies
    if (tfreq.shape[1] > 0):
        plt.subplot(3, 1, 2)
        tracks = np.copy(tfreq)
        tracks = tracks * np.less(tracks, maxplotfreq)
        tracks[tracks <= 0] = np.nan
        numFrames = int(tracks.shape[0])
        frmTime = H * np.arange(numFrames) / float(fs)
        plt.plot(frmTime, tracks)
        plt.axis([0, x.size / float(fs), 0, maxplotfreq])
        plt.title('frequencies of sinusoidal tracks')

    # plot the output sound
    plt.subplot(3, 1, 3)
    plt.plot(np.arange(y.size) / float(fs), y)
    plt.axis([0, y.size / float(fs), min(y), max(y)])
    plt.ylabel('amplitude')
    plt.xlabel('time (sec)')
    plt.title('output sound: y')

    plt.tight_layout()

    if interactive:
        plt.show(block=False)
    if plotFile:
        plt.savefig('output_plots/%s_sine_transformation_analysis.png' %
                    files.strip_file(inputFile))

    return inputFile, fs, tfreq, tmag
from smst.models import sine, stft

plt.figure(1, figsize=(9, 7))

plt.subplot(211)
(fs, x) = audio.read_wav('../../../sounds/vibraphone-C6.wav')
w = np.blackman(401)
N = 512
H = 100
t = -100
minSineDur = .02
maxnSines = 150
freqDevOffset = 20
freqDevSlope = 0.01
mX, pX = stft.from_audio(x, w, N, H)
tfreq, tmag, tphase = sine.from_audio(x, fs, w, N, H, t, maxnSines, minSineDur,
                                      freqDevOffset, freqDevSlope)

maxplotfreq = 10000.0
maxplotbin = int(N * maxplotfreq / fs)
numFrames = int(mX.shape[0])
frmTime = H * np.arange(numFrames) / float(fs)
binFreq = np.arange(maxplotbin + 1) * float(fs) / N
plt.pcolormesh(frmTime, binFreq, np.transpose(mX[:, :maxplotbin + 1]))
plt.autoscale(tight=True)

tracks = tfreq * np.less(tfreq, maxplotfreq)
tracks[tracks <= 0] = np.nan
plt.plot(frmTime, tracks, color='k', lw=1.5)
plt.autoscale(tight=True)
plt.title('mX + sine frequencies (vibraphone-C6.wav)')
from smst.utils import audio
from smst.models import sine

(fs, x) = audio.read_wav('../../../sounds/mridangam.wav')
x1 = x[:int(1.49 * fs)]
w = np.hamming(801)
N = 2048
t = -90
minSineDur = .005
maxnSines = 150
freqDevOffset = 20
freqDevSlope = 0.02
Ns = 512
H = Ns / 4
sfreq, smag, sphase = sine.from_audio(x1, fs, w, N, H, t, maxnSines,
                                      minSineDur, freqDevOffset, freqDevSlope)
timeScale = np.array([
    .01, .0, .03, .03, .335, .8, .355, .82, .671, 1.0, .691, 1.02, .858, 1.1,
    .878, 1.12, 1.185, 1.8, 1.205, 1.82, 1.49, 2.0
])
L = sfreq.shape[0]  # number of input frames
maxInTime = max(timeScale[::2])  # maximum value used as input times
maxOutTime = max(timeScale[1::2])  # maximum value used in output times
outL = int(L * maxOutTime / maxInTime)  # number of output frames
inFrames = L * timeScale[::2] / maxInTime  # input time values in frames
outFrames = outL * timeScale[1::2] / maxOutTime  # output time values in frames
timeScalingEnv = interp1d(outFrames, inFrames,
                          fill_value=0)  # interpolation function
indexes = timeScalingEnv(
    np.arange(outL))  # generate frame indexes for the output
ysfreq = sfreq[round(indexes[0]), :]  # first output frame
예제 #5
0
mpl.use('Agg')
import matplotlib.pyplot as plt
import numpy as np

from smst.utils import audio
from smst.models import sine, stft

(fs, x) = audio.read_wav('../../../sounds/flute-A4.wav')
w = np.blackman(601)
N = 1024
H = 150
t = -80
minSineDur = .1
maxnSines = 150
mX, pX = stft.from_audio(x, w, N, H)
tfreq, tmag, tphase = sine.from_audio(x, fs, w, N, H, t, maxnSines, minSineDur)

plt.figure(1, figsize=(9.5, 5))
maxplotfreq = 5000.0
maxplotbin = int(N * maxplotfreq / fs)
numFrames = int(mX.shape[0])
frmTime = H * np.arange(numFrames) / float(fs)
binFreq = np.arange(maxplotbin + 1) * float(fs) / N
plt.pcolormesh(frmTime, binFreq, np.transpose(mX[:, :maxplotbin + 1]))
plt.autoscale(tight=True)

tracks = tfreq * np.less(tfreq, maxplotfreq)
tracks[tracks <= 0] = np.nan
plt.plot(frmTime, tracks, color='k', lw=1.5)
plt.autoscale(tight=True)
plt.title('mX + sinusoidal tracks (flute-A4.wav)')
예제 #6
0
def main(inputFile=demo_sound_path('bendir.wav'), window='hamming', M=2001, N=2048, t=-80, minSineDur=0.02,
         maxnSines=150, freqDevOffset=10, freqDevSlope=0.001,
         interactive=True, plotFile=False):
    """
    Perform analysis/synthesis using the sinusoidal model
    inputFile: input sound file (monophonic with sampling rate of 44100)
    window: analysis window type (rectangular, hanning, hamming, blackman, blackmanharris)
    M: analysis window size; N: fft size (power of two, bigger or equal than M)
    t: magnitude threshold of spectral peaks; minSineDur: minimum duration of sinusoidal tracks
    maxnSines: maximum number of parallel sinusoids
    freqDevOffset: frequency deviation allowed in the sinusoids from frame to frame at frequency 0
    freqDevSlope: slope of the frequency deviation, higher frequencies have bigger deviation
    """

    # size of fft used in synthesis
    Ns = 512

    # hop size (has to be 1/4 of Ns)
    H = 128

    # read input sound
    fs, x = audio.read_wav(inputFile)

    # compute analysis window
    w = get_window(window, M)

    # analyze the sound with the sinusoidal model
    tfreq, tmag, tphase = sine.from_audio(x, fs, w, N, H, t, maxnSines, minSineDur, freqDevOffset, freqDevSlope)

    # synthesize the output sound from the sinusoidal representation
    y = sine.to_audio(tfreq, tmag, tphase, Ns, H, fs)

    # output sound file name
    outputFile = 'output_sounds/' + strip_file(inputFile) + '_sineModel.wav'

    # write the synthesized sound obtained from the sinusoidal synthesis
    audio.write_wav(y, fs, outputFile)

    # create figure to show plots
    plt.figure(figsize=(12, 9))

    # frequency range to plot
    maxplotfreq = 5000.0

    # plot the input sound
    plt.subplot(3, 1, 1)
    plt.plot(np.arange(x.size) / float(fs), x)
    plt.axis([0, x.size / float(fs), min(x), max(x)])
    plt.ylabel('amplitude')
    plt.xlabel('time (sec)')
    plt.title('input sound: x')

    # plot the sinusoidal frequencies
    plt.subplot(3, 1, 2)
    if (tfreq.shape[1] > 0):
        numFrames = tfreq.shape[0]
        frmTime = H * np.arange(numFrames) / float(fs)
        tfreq[tfreq <= 0] = np.nan
        plt.plot(frmTime, tfreq)
        plt.axis([0, x.size / float(fs), 0, maxplotfreq])
        plt.title('frequencies of sinusoidal tracks')

    # plot the output sound
    plt.subplot(3, 1, 3)
    plt.plot(np.arange(y.size) / float(fs), y)
    plt.axis([0, y.size / float(fs), min(y), max(y)])
    plt.ylabel('amplitude')
    plt.xlabel('time (sec)')
    plt.title('output sound: y')

    plt.tight_layout()

    if interactive:
        plt.show()
    if plotFile:
        plt.savefig('output_plots/%s_sine_model.png' % files.strip_file(inputFile))