Python to_audioの例、smst.models.sine.to_audio Pythonの例

コード例 #1

0

ファイルを表示

ファイル: harmonicModel_test.py プロジェクト: Garlandal/sms-tools

def test_reconstruct_sound():
    fs, x = audio.read_wav(sound_path("sax-phrase-short.wav"))

    window_size, fft_size, hop_size = 4001, 4096, 2048
    window = get_window('hamming', window_size)

    xtfreq, xtmag, xtphase = harmonic.from_audio(
        x, fs, window, fft_size, hop_size,
        t=-80, nH=20, minf0=100, maxf0=2000, f0et=5, harmDevSlope=0.01, minSineDur=.02)
    x_reconstructed = sine.to_audio(xtfreq, xtmag, xtphase, fft_size, hop_size, fs)

    assert 138746 == len(x)

    expected_frame_count = int(math.ceil(float(len(x)) / hop_size))
    assert expected_frame_count == len(xtfreq)
    assert expected_frame_count == len(xtmag)
    assert expected_frame_count == len(xtphase)

    assert xtfreq.shape[1] <= 100

    # statistics of the model for regression testing without explicitly storing the whole data
    assert np.allclose(1738.618043903208, xtfreq.mean())
    assert np.allclose(-64.939768348945279, xtmag.mean())
    assert np.allclose(1.6687005886001871, xtphase.mean())

    # TODO: this is completely off, it should be equal to len(x)!
    assert 69 * 2048 == len(x_reconstructed)

    assert np.allclose(0.036941947007791701, rmse(x, x_reconstructed[:len(x)]))

コード例 #2

0

ファイルを表示

ファイル: sineModel_test.py プロジェクト: Garlandal/sms-tools

def test_reconstruct_sound():
    fs, x = audio.read_wav(sound_path("sax-phrase-short.wav"))

    window_size, fft_size, hop_size = 4001, 4096, 2048
    window = get_window('hamming', window_size)

    xtfreq, xtmag, xtphase = sine.from_audio(
        x, fs, window, fft_size, hop_size,
        t=-80, maxnSines=100, minSineDur=.01, freqDevOffset=20, freqDevSlope=0.01)
    x_reconstructed = sine.to_audio(xtfreq, xtmag, xtphase, fft_size, hop_size, fs)

    assert 138746 == len(x)

    expected_frame_count = int(math.ceil(float(len(x)) / hop_size))
    assert expected_frame_count == len(xtfreq)
    assert expected_frame_count == len(xtmag)
    assert expected_frame_count == len(xtphase)

    assert xtfreq.shape[1] <= 100

    # statistics of the model for regression testing without explicitly storing the whole data
    assert np.allclose(945.892990545, xtfreq.mean())
    assert np.allclose(-30.3138495002, xtmag.mean())
    assert np.allclose(1.34449391701, xtphase.mean())

    # TODO: this is completely off, it should be equal to len(x)!
    assert 69 * 2048 == len(x_reconstructed)

    assert np.allclose(0.010812475879315771, rmse(x, x_reconstructed[:len(x)]))

コード例 #3

0

ファイルを表示

def analysis(inputFile=demo_sound_path('mridangam.wav'),
             window='hamming',
             M=801,
             N=2048,
             t=-90,
             minSineDur=0.01,
             maxnSines=150,
             freqDevOffset=20,
             freqDevSlope=0.02,
             interactive=True,
             plotFile=False):
    """
    Analyze a sound with the sine model
    inputFile: input sound file (monophonic with sampling rate of 44100)
    window: analysis window type (rectangular, hanning, hamming, blackman, blackmanharris)
    M: analysis window size; N: fft size (power of two, bigger or equal than M)
    t: magnitude threshold of spectral peaks; minSineDur: minimum duration of sinusoidal tracks
    maxnSines: maximum number of parallel sinusoids
    freqDevOffset: frequency deviation allowed in the sinusoids from frame to frame at frequency 0
    freqDevSlope: slope of the frequency deviation, higher frequencies have bigger deviation
    returns inputFile: input file name; fs: sampling rate of input file,
            tfreq, tmag: sinusoidal frequencies and magnitudes
    """

    # size of fft used in synthesis
    Ns = 512

    # hop size (has to be 1/4 of Ns)
    H = 128

    # read input sound
    (fs, x) = audio.read_wav(inputFile)

    # compute analysis window
    w = get_window(window, M)

    # compute the sine model of the whole sound
    tfreq, tmag, tphase = sine.from_audio(x, fs, w, N, H, t, maxnSines,
                                          minSineDur, freqDevOffset,
                                          freqDevSlope)

    # synthesize the sines without original phases
    y = sine.to_audio(tfreq, tmag, np.array([]), Ns, H, fs)

    # output sound file (monophonic with sampling rate of 44100)
    outputFile = 'output_sounds/' + strip_file(inputFile) + '_sineModel.wav'

    # write the sound resulting from the inverse stft
    audio.write_wav(y, fs, outputFile)

    # create figure to show plots
    plt.figure(figsize=(12, 9))

    # frequency range to plot
    maxplotfreq = 5000.0

    # plot the input sound
    plt.subplot(3, 1, 1)
    plt.plot(np.arange(x.size) / float(fs), x)
    plt.axis([0, x.size / float(fs), min(x), max(x)])
    plt.ylabel('amplitude')
    plt.xlabel('time (sec)')
    plt.title('input sound: x')

    # plot the sinusoidal frequencies
    if (tfreq.shape[1] > 0):
        plt.subplot(3, 1, 2)
        tracks = np.copy(tfreq)
        tracks = tracks * np.less(tracks, maxplotfreq)
        tracks[tracks <= 0] = np.nan
        numFrames = int(tracks.shape[0])
        frmTime = H * np.arange(numFrames) / float(fs)
        plt.plot(frmTime, tracks)
        plt.axis([0, x.size / float(fs), 0, maxplotfreq])
        plt.title('frequencies of sinusoidal tracks')

    # plot the output sound
    plt.subplot(3, 1, 3)
    plt.plot(np.arange(y.size) / float(fs), y)
    plt.axis([0, y.size / float(fs), min(y), max(y)])
    plt.ylabel('amplitude')
    plt.xlabel('time (sec)')
    plt.title('output sound: y')

    plt.tight_layout()

    if interactive:
        plt.show(block=False)
    if plotFile:
        plt.savefig('output_plots/%s_sine_transformation_analysis.png' %
                    files.strip_file(inputFile))

    return inputFile, fs, tfreq, tmag

コード例 #4

0

ファイルを表示

def transformation_synthesis(inputFile,
                             fs,
                             tfreq,
                             tmag,
                             freqScaling=np.array([0, 2.0, 1, .3]),
                             timeScaling=np.array(
                                 [0, .0, .671, .671, 1.978, 1.978 + 1.0]),
                             interactive=True,
                             plotFile=False):
    """
    Transform the analysis values returned by the analysis function and synthesize the sound
    inputFile: name of input file; fs: sampling rate of input file
    tfreq, tmag: sinusoidal frequencies and magnitudes
    freqScaling: frequency scaling factors, in time-value pairs
    timeScaling: time scaling factors, in time-value pairs
    """

    # size of fft used in synthesis
    Ns = 512

    # hop size (has to be 1/4 of Ns)
    H = 128

    # frequency scaling of the sinusoidal tracks
    ytfreq = sine.scale_frequencies(tfreq, freqScaling)

    # time scale the sinusoidal tracks
    ytfreq, ytmag = sine.scale_time(ytfreq, tmag, timeScaling)

    # synthesis
    y = sine.to_audio(ytfreq, ytmag, np.array([]), Ns, H, fs)

    # write output sound
    outputFile = 'output_sounds/' + strip_file(
        inputFile) + '_sineModelTransformation.wav'
    audio.write_wav(y, fs, outputFile)

    # create figure to plot
    plt.figure(figsize=(12, 6))

    # frequency range to plot
    maxplotfreq = 15000.0

    # plot the transformed sinusoidal frequencies
    if (ytfreq.shape[1] > 0):
        plt.subplot(2, 1, 1)
        tracks = np.copy(ytfreq)
        tracks = tracks * np.less(tracks, maxplotfreq)
        tracks[tracks <= 0] = np.nan
        numFrames = int(tracks.shape[0])
        frmTime = H * np.arange(numFrames) / float(fs)
        plt.plot(frmTime, tracks)
        plt.title('transformed sinusoidal tracks')
        plt.autoscale(tight=True)

    # plot the output sound
    plt.subplot(2, 1, 2)
    plt.plot(np.arange(y.size) / float(fs), y)
    plt.axis([0, y.size / float(fs), min(y), max(y)])
    plt.ylabel('amplitude')
    plt.xlabel('time (sec)')
    plt.title('output sound: y')

    plt.tight_layout()

    if interactive:
        plt.show()
    if plotFile:
        plt.savefig('output_plots/%s_sine_transformation_synthesis.png' %
                    files.strip_file(inputFile))

コード例 #5

0

ファイルを表示

def main(inputFile=demo_sound_path('bendir.wav'), window='hamming', M=2001, N=2048, t=-80, minSineDur=0.02,
         maxnSines=150, freqDevOffset=10, freqDevSlope=0.001,
         interactive=True, plotFile=False):
    """
    Perform analysis/synthesis using the sinusoidal model
    inputFile: input sound file (monophonic with sampling rate of 44100)
    window: analysis window type (rectangular, hanning, hamming, blackman, blackmanharris)
    M: analysis window size; N: fft size (power of two, bigger or equal than M)
    t: magnitude threshold of spectral peaks; minSineDur: minimum duration of sinusoidal tracks
    maxnSines: maximum number of parallel sinusoids
    freqDevOffset: frequency deviation allowed in the sinusoids from frame to frame at frequency 0
    freqDevSlope: slope of the frequency deviation, higher frequencies have bigger deviation
    """

    # size of fft used in synthesis
    Ns = 512

    # hop size (has to be 1/4 of Ns)
    H = 128

    # read input sound
    fs, x = audio.read_wav(inputFile)

    # compute analysis window
    w = get_window(window, M)

    # analyze the sound with the sinusoidal model
    tfreq, tmag, tphase = sine.from_audio(x, fs, w, N, H, t, maxnSines, minSineDur, freqDevOffset, freqDevSlope)

    # synthesize the output sound from the sinusoidal representation
    y = sine.to_audio(tfreq, tmag, tphase, Ns, H, fs)

    # output sound file name
    outputFile = 'output_sounds/' + strip_file(inputFile) + '_sineModel.wav'

    # write the synthesized sound obtained from the sinusoidal synthesis
    audio.write_wav(y, fs, outputFile)

    # create figure to show plots
    plt.figure(figsize=(12, 9))

    # frequency range to plot
    maxplotfreq = 5000.0

    # plot the input sound
    plt.subplot(3, 1, 1)
    plt.plot(np.arange(x.size) / float(fs), x)
    plt.axis([0, x.size / float(fs), min(x), max(x)])
    plt.ylabel('amplitude')
    plt.xlabel('time (sec)')
    plt.title('input sound: x')

    # plot the sinusoidal frequencies
    plt.subplot(3, 1, 2)
    if (tfreq.shape[1] > 0):
        numFrames = tfreq.shape[0]
        frmTime = H * np.arange(numFrames) / float(fs)
        tfreq[tfreq <= 0] = np.nan
        plt.plot(frmTime, tfreq)
        plt.axis([0, x.size / float(fs), 0, maxplotfreq])
        plt.title('frequencies of sinusoidal tracks')

    # plot the output sound
    plt.subplot(3, 1, 3)
    plt.plot(np.arange(y.size) / float(fs), y)
    plt.axis([0, y.size / float(fs), min(y), max(y)])
    plt.ylabel('amplitude')
    plt.xlabel('time (sec)')
    plt.title('output sound: y')

    plt.tight_layout()

    if interactive:
        plt.show()
    if plotFile:
        plt.savefig('output_plots/%s_sine_model.png' % files.strip_file(inputFile))

コード例 #6

0

ファイルを表示

ファイル: harmonicModel-analysis-synthesis.py プロジェクト: Garlandal/sms-tools

(fs, x) = audio.read_wav('../../../sounds/vignesh.wav')
w = np.blackman(1201)
N = 2048
t = -90
nH = 100
minf0 = 130
maxf0 = 300
f0et = 7
Ns = 512
H = Ns / 4
minSineDur = .1
harmDevSlope = 0.01
hfreq, hmag, hphase = harmonic.from_audio(x, fs, w, N, H, t, nH, minf0, maxf0,
                                          f0et, harmDevSlope, minSineDur)
y = sine.to_audio(hfreq, hmag, hphase, Ns, H, fs)

numFrames = int(hfreq.shape[0])
frmTime = H * np.arange(numFrames) / float(fs)

plt.figure(1, figsize=(9, 7))

plt.subplot(3, 1, 1)
plt.plot(np.arange(x.size) / float(fs), x, 'b')
plt.axis([0, x.size / float(fs), min(x), max(x)])
plt.title('x (vignesh.wav)')

plt.subplot(3, 1, 2)
yhfreq = hfreq
yhfreq[hfreq == 0] = np.nan
plt.plot(frmTime, hfreq, lw=1.2)

コード例 #7

0

ファイルを表示

from smst.models import sine

(fs, x) = audio.read_wav('../../../sounds/bendir.wav')
x1 = x[0:50000]
w = np.blackman(2001)
N = 2048
H = 500
t = -90
minSineDur = .01
maxnSines = 150
freqDevOffset = 20
freqDevSlope = 0.02
Ns = 512
H = Ns / 4
tfreq, tmag, tphase = sine.from_audio(x1, fs, w, N, H, t, maxnSines, minSineDur, freqDevOffset, freqDevSlope)
y = sine.to_audio(tfreq, tmag, tphase, Ns, H, fs)

numFrames = int(tfreq.shape[0])
frmTime = H * np.arange(numFrames) / float(fs)
maxplotfreq = 3000.0

plt.figure(1, figsize=(9, 7))

plt.subplot(3, 1, 1)
plt.plot(np.arange(x1.size) / float(fs), x1, 'b', lw=1.5)
plt.axis([0, x1.size / float(fs), min(x1), max(x1)])
plt.title('x (bendir.wav)')

plt.subplot(3, 1, 2)
tracks = tfreq * np.less(tfreq, maxplotfreq)
tracks[tracks <= 0] = np.nan

コード例 #8

0

ファイルを表示

w = np.hamming(801)
N = 2048
t = -90
minSineDur = .005
maxnSines = 150
freqDevOffset = 20
freqDevSlope = 0.02
Ns = 512
H = Ns / 4
mX, pX = stft.from_audio(x, w, N, H)
tfreq, tmag, tphase = sine.from_audio(x, fs, w, N, H, t, maxnSines, minSineDur, freqDevOffset, freqDevSlope)
timeScale = np.array(
    [.01, .0, .03, .03, .335, .4, .355, .42, .671, .8, .691, .82, .858, 1.2, .878, 1.22, 1.185, 1.6, 1.205, 1.62, 1.497,
     2.0, 1.517, 2.02, 1.686, 2.4, 1.706, 2.42, 1.978, 2.8])
ytfreq, ytmag = sine.scale_time(tfreq, tmag, timeScale)
y = sine.to_audio(ytfreq, ytmag, np.array([]), Ns, H, fs)
mY, pY = stft.from_audio(y, w, N, H)

plt.figure(1, figsize=(12, 9))
maxplotfreq = 4000.0
plt.subplot(4, 1, 1)
plt.plot(np.arange(x.size) / float(fs), x, 'b')
plt.axis([0, x.size / float(fs), min(x), max(x)])
plt.title('x (mridangam.wav)')

plt.subplot(4, 1, 2)
numFrames = int(tfreq.shape[0])
frmTime = H * np.arange(numFrames) / float(fs)
tracks = tfreq * np.less(tfreq, maxplotfreq)
tracks[tracks <= 0] = np.nan
plt.plot(frmTime, tracks, color='k', lw=1)

コード例 #9

0

ファイルを表示

ファイル: harmonicModel_function.py プロジェクト: Garlandal/sms-tools

def main(inputFile=demo_sound_path('vignesh.wav'),
         window='blackman',
         M=1201,
         N=2048,
         t=-90,
         minSineDur=0.1,
         nH=100,
         minf0=130,
         maxf0=300,
         f0et=7,
         harmDevSlope=0.01,
         interactive=True,
         plotFile=False):
    """
    Analysis and synthesis using the harmonic model
    inputFile: input sound file (monophonic with sampling rate of 44100)
    window: analysis window type (rectangular, hanning, hamming, blackman, blackmanharris)
    M: analysis window size; N: fft size (power of two, bigger or equal than M)
    t: magnitude threshold of spectral peaks; minSineDur: minimum duration of sinusoidal tracks
    nH: maximum number of harmonics; minf0: minimum fundamental frequency in sound
    maxf0: maximum fundamental frequency in sound; f0et: maximum error accepted in f0 detection algorithm
    harmDevSlope: allowed deviation of harmonic tracks, higher harmonics could have higher allowed deviation
    """

    # size of fft used in synthesis
    Ns = 512

    # hop size (has to be 1/4 of Ns)
    H = 128

    # read input sound
    (fs, x) = audio.read_wav(inputFile)

    # compute analysis window
    w = get_window(window, M)

    # detect harmonics of input sound
    hfreq, hmag, hphase = harmonic.from_audio(x, fs, w, N, H, t, nH, minf0,
                                              maxf0, f0et, harmDevSlope,
                                              minSineDur)

    # synthesize the harmonics
    y = sine.to_audio(hfreq, hmag, hphase, Ns, H, fs)

    # output sound file (monophonic with sampling rate of 44100)
    outputFile = 'output_sounds/' + files.strip_file(
        inputFile) + '_harmonicModel.wav'

    # write the sound resulting from harmonic analysis
    audio.write_wav(y, fs, outputFile)

    # create figure to show plots
    plt.figure(figsize=(12, 9))

    # frequency range to plot
    maxplotfreq = 5000.0

    # plot the input sound
    plt.subplot(3, 1, 1)
    plt.plot(np.arange(x.size) / float(fs), x)
    plt.axis([0, x.size / float(fs), min(x), max(x)])
    plt.ylabel('amplitude')
    plt.xlabel('time (sec)')
    plt.title('input sound: x')

    # plot the harmonic frequencies
    plt.subplot(3, 1, 2)
    if (hfreq.shape[1] > 0):
        numFrames = hfreq.shape[0]
        frmTime = H * np.arange(numFrames) / float(fs)
        hfreq[hfreq <= 0] = np.nan
        plt.plot(frmTime, hfreq)
        plt.axis([0, x.size / float(fs), 0, maxplotfreq])
        plt.title('frequencies of harmonic tracks')

    # plot the output sound
    plt.subplot(3, 1, 3)
    plt.plot(np.arange(y.size) / float(fs), y)
    plt.axis([0, y.size / float(fs), min(y), max(y)])
    plt.ylabel('amplitude')
    plt.xlabel('time (sec)')
    plt.title('output sound: y')

    plt.tight_layout()

    if interactive:
        plt.show()
    if plotFile:
        plt.savefig('output_plots/%s_harmonic_model.png' %
                    files.strip_file(inputFile))