Ejemplo n.º 1
0
def main(inputFile=demo_sound_path('ocean.wav'), H=256, N=512, stocf=.1,
         interactive=True, plotFile=False):
    """
    inputFile: input sound file (monophonic with sampling rate of 44100)
    H: hop size, N: fft size
    stocf: decimation factor used for the stochastic approximation (bigger than 0, maximum 1)
    """

    # read input sound
    (fs, x) = audio.read_wav(inputFile)

    # compute stochastic model
    stocEnv = stochastic.from_audio(x, H, N, stocf)

    # synthesize sound from stochastic model
    y = stochastic.to_audio(stocEnv, H, N)

    outputFile = 'output_sounds/' + strip_file(inputFile) + '_stochasticModel.wav'

    # write output sound
    audio.write_wav(y, fs, outputFile)

    # create figure to plot
    plt.figure(figsize=(12, 9))

    # plot the input sound
    plt.subplot(3, 1, 1)
    plt.plot(np.arange(x.size) / float(fs), x)
    plt.axis([0, x.size / float(fs), min(x), max(x)])
    plt.ylabel('amplitude')
    plt.xlabel('time (sec)')
    plt.title('input sound: x')

    # plot stochastic representation
    plt.subplot(3, 1, 2)
    numFrames = int(stocEnv.shape[0])
    frmTime = H * np.arange(numFrames) / float(fs)
    binFreq = np.arange(stocf * (N / 2 + 1)) * float(fs) / (stocf * N)
    plt.pcolormesh(frmTime, binFreq, np.transpose(stocEnv))
    plt.autoscale(tight=True)
    plt.xlabel('time (sec)')
    plt.ylabel('frequency (Hz)')
    plt.title('stochastic approximation')

    # plot the output sound
    plt.subplot(3, 1, 3)
    plt.plot(np.arange(y.size) / float(fs), y)
    plt.axis([0, y.size / float(fs), min(y), max(y)])
    plt.ylabel('amplitude')
    plt.xlabel('time (sec)')

    plt.tight_layout()

    if interactive:
        plt.show()
    if plotFile:
        plt.savefig('output_plots/%s_stochastic_model.png' % files.strip_file(inputFile))
Ejemplo n.º 2
0
def analysis(inputFile1=demo_sound_path('violin-B3.wav'),
             window1='blackman',
             M1=1001,
             N1=1024,
             t1=-100,
             minSineDur1=0.05,
             nH=60,
             minf01=200,
             maxf01=300,
             f0et1=10,
             harmDevSlope1=0.01,
             stocf=0.1,
             inputFile2=demo_sound_path('soprano-E4.wav'),
             window2='blackman',
             M2=901,
             N2=1024,
             t2=-100,
             minSineDur2=0.05,
             minf02=250,
             maxf02=500,
             f0et2=10,
             harmDevSlope2=0.01,
             interactive=True,
             plotFile=False):
    """
    Analyze two sounds with the harmonic plus stochastic model
    inputFile: input sound file (monophonic with sampling rate of 44100)
    window: analysis window type (rectangular, hanning, hamming, blackman, blackmanharris)
    M: analysis window size
    N: fft size (power of two, bigger or equal than M)
    t: magnitude threshold of spectral peaks
    minSineDur: minimum duration of sinusoidal tracks
    nH: maximum number of harmonics
    minf0: minimum fundamental frequency in sound
    maxf0: maximum fundamental frequency in sound
    f0et: maximum error accepted in f0 detection algorithm
    harmDevSlope: allowed deviation of harmonic tracks, higher harmonics have higher allowed deviation
    stocf: decimation factor used for the stochastic approximation
    returns inputFile: input file name; fs: sampling rate of input file,
            hfreq, hmag: harmonic frequencies, magnitude; stocEnv: stochastic residual
    """

    # size of fft used in synthesis
    Ns = 512
    # hop size (has to be 1/4 of Ns)
    H = 128
    # read input sounds
    (fs1, x1) = audio.read_wav(inputFile1)
    (fs2, x2) = audio.read_wav(inputFile2)
    # compute analysis windows
    w1 = get_window(window1, M1)
    w2 = get_window(window2, M2)
    # compute the harmonic plus stochastic models
    hfreq1, hmag1, hphase1, stocEnv1 = hps.from_audio(x1, fs1, w1, N1, H, t1,
                                                      nH, minf01, maxf01,
                                                      f0et1, harmDevSlope1,
                                                      minSineDur1, Ns, stocf)
    hfreq2, hmag2, hphase2, stocEnv2 = hps.from_audio(x2, fs2, w2, N2, H, t2,
                                                      nH, minf02, maxf02,
                                                      f0et2, harmDevSlope2,
                                                      minSineDur2, Ns, stocf)

    # create figure to plot
    plt.figure(figsize=(12, 9))

    # frequency range to plot
    maxplotfreq = 15000.0

    # plot spectrogram stochastic component of sound 1
    plt.subplot(2, 1, 1)
    numFrames = int(stocEnv1.shape[0])
    sizeEnv = int(stocEnv1.shape[1])
    frmTime = H * np.arange(numFrames) / float(fs1)
    binFreq = (.5 * fs1) * np.arange(sizeEnv * maxplotfreq /
                                     (.5 * fs1)) / sizeEnv
    plt.pcolormesh(
        frmTime, binFreq,
        np.transpose(stocEnv1[:, :sizeEnv * maxplotfreq / (.5 * fs1) + 1]))
    plt.autoscale(tight=True)

    # plot harmonic on top of stochastic spectrogram of sound 1
    if (hfreq1.shape[1] > 0):
        harms = np.copy(hfreq1)
        harms = harms * np.less(harms, maxplotfreq)
        harms[harms == 0] = np.nan
        numFrames = int(harms.shape[0])
        frmTime = H * np.arange(numFrames) / float(fs1)
        plt.plot(frmTime, harms, color='k', ms=3, alpha=1)
        plt.xlabel('time (sec)')
        plt.ylabel('frequency (Hz)')
        plt.autoscale(tight=True)
        plt.title('harmonics + stochastic spectrogram of sound 1')

    # plot spectrogram stochastic component of sound 2
    plt.subplot(2, 1, 2)
    numFrames = int(stocEnv2.shape[0])
    sizeEnv = int(stocEnv2.shape[1])
    frmTime = H * np.arange(numFrames) / float(fs2)
    binFreq = (.5 * fs2) * np.arange(sizeEnv * maxplotfreq /
                                     (.5 * fs2)) / sizeEnv
    plt.pcolormesh(
        frmTime, binFreq,
        np.transpose(stocEnv2[:, :sizeEnv * maxplotfreq / (.5 * fs2) + 1]))
    plt.autoscale(tight=True)

    # plot harmonic on top of stochastic spectrogram of sound 2
    if (hfreq2.shape[1] > 0):
        harms = np.copy(hfreq2)
        harms = harms * np.less(harms, maxplotfreq)
        harms[harms == 0] = np.nan
        numFrames = int(harms.shape[0])
        frmTime = H * np.arange(numFrames) / float(fs2)
        plt.plot(frmTime, harms, color='k', ms=3, alpha=1)
        plt.xlabel('time (sec)')
        plt.ylabel('frequency (Hz)')
        plt.autoscale(tight=True)
        plt.title('harmonics + stochastic spectrogram of sound 2')

    plt.tight_layout()

    if interactive:
        plt.show(block=False)
    if plotFile:
        plt.savefig(
            'output_plots/%s_%s_hps_morph_analysis.png' %
            (files.strip_file(inputFile1), files.strip_file(inputFile2)))

    return inputFile1, fs1, hfreq1, hmag1, stocEnv1, inputFile2, hfreq2, hmag2, stocEnv2
Ejemplo n.º 3
0
def main(inputFile1=demo_sound_path('ocean.wav'),
         inputFile2=demo_sound_path('speech-male.wav'),
         window1='hamming',
         window2='hamming',
         M1=1024,
         M2=1024,
         N1=1024,
         N2=1024,
         H1=256,
         smoothf=.5,
         balancef=0.2,
         interactive=True,
         plotFile=False):
    """
    Function to perform a morph between two sounds
    inputFile1: name of input sound file to be used as source
    inputFile2: name of input sound file to be used as filter
    window1 and window2: windows for both files
    M1 and M2: window sizes for both files
    N1 and N2: fft sizes for both sounds
    H1: hop size for sound 1 (the one for sound 2 is computed automatically)
    smoothf: smoothing factor to be applyed to magnitude spectrum of sound 2 before morphing
    balancef: balance factor between booth sounds, 0 is sound 1 and 1 is sound 2
    """

    # read input sounds
    (fs, x1) = audio.read_wav(inputFile1)
    (fs, x2) = audio.read_wav(inputFile2)

    # compute analysis windows
    w1 = get_window(window1, M1)
    w2 = get_window(window2, M2)

    # perform morphing
    y = stft.morph(x1, x2, fs, w1, N1, w2, N2, H1, smoothf, balancef)

    # compute the magnitude and phase spectrogram of input sound (for plotting)
    mX1, pX1 = stft.from_audio(x1, w1, N1, H1)

    # compute the magnitude and phase spectrogram of output sound (for plotting)
    mY, pY = stft.from_audio(y, w1, N1, H1)

    # write output sound
    outputFile = 'output_sounds/' + os.path.basename(
        inputFile1)[:-4] + '_stftMorph.wav'
    audio.write_wav(y, fs, outputFile)

    # create figure to plot
    plt.figure(figsize=(12, 9))

    # frequency range to plot
    maxplotfreq = 10000.0

    # plot sound 1
    plt.subplot(4, 1, 1)
    plt.plot(np.arange(x1.size) / float(fs), x1)
    plt.axis([0, x1.size / float(fs), min(x1), max(x1)])
    plt.ylabel('amplitude')
    plt.xlabel('time (sec)')
    plt.title('input sound: x')

    # plot magnitude spectrogram of sound 1
    plt.subplot(4, 1, 2)
    numFrames = int(mX1.shape[0])
    frmTime = H1 * np.arange(numFrames) / float(fs)
    binFreq = fs * np.arange(N1 * maxplotfreq / fs) / N1
    plt.pcolormesh(frmTime, binFreq,
                   np.transpose(mX1[:, :N1 * maxplotfreq / fs + 1]))
    plt.xlabel('time (sec)')
    plt.ylabel('frequency (Hz)')
    plt.title('magnitude spectrogram of x')
    plt.autoscale(tight=True)

    # plot magnitude spectrogram of morphed sound
    plt.subplot(4, 1, 3)
    numFrames = int(mY.shape[0])
    frmTime = H1 * np.arange(numFrames) / float(fs)
    binFreq = fs * np.arange(N1 * maxplotfreq / fs) / N1
    plt.pcolormesh(frmTime, binFreq,
                   np.transpose(mY[:, :N1 * maxplotfreq / fs + 1]))
    plt.xlabel('time (sec)')
    plt.ylabel('frequency (Hz)')
    plt.title('magnitude spectrogram of y')
    plt.autoscale(tight=True)

    # plot the morphed sound
    plt.subplot(4, 1, 4)
    plt.plot(np.arange(y.size) / float(fs), y)
    plt.axis([0, y.size / float(fs), min(y), max(y)])
    plt.ylabel('amplitude')
    plt.xlabel('time (sec)')
    plt.title('output sound: y')

    plt.tight_layout()

    if interactive:
        plt.show()
    if plotFile:
        plt.savefig(
            'output_plots/%s_%s_stft_morph.png' %
            (files.strip_file(inputFile1), files.strip_file(inputFile2)))
Ejemplo n.º 4
0
def main(inputFile=demo_sound_path('sax-phrase-short.wav'),
         window='blackman',
         M=601,
         N=1024,
         t=-100,
         minSineDur=0.1,
         nH=100,
         minf0=350,
         maxf0=700,
         f0et=5,
         harmDevSlope=0.01,
         interactive=True,
         plotFile=False):
    """
    Perform analysis/synthesis using the harmonic plus residual model
    inputFile: input sound file (monophonic with sampling rate of 44100)
    window: analysis window type (rectangular, hanning, hamming, blackman, blackmanharris)
    M: analysis window size; N: fft size (power of two, bigger or equal than M)
    t: magnitude threshold of spectral peaks; minSineDur: minimum duration of sinusoidal tracks
    nH: maximum number of harmonics; minf0: minimum fundamental frequency in sound
    maxf0: maximum fundamental frequency in sound; f0et: maximum error accepted in f0 detection algorithm
    harmDevSlope: allowed deviation of harmonic tracks, higher harmonics have higher allowed deviation
    """

    # size of fft used in synthesis
    Ns = 512

    # hop size (has to be 1/4 of Ns)
    H = 128

    # read input sound
    (fs, x) = audio.read_wav(inputFile)

    # compute analysis window
    w = get_window(window, M)

    # find harmonics and residual
    hfreq, hmag, hphase, xr = hpr.from_audio(x, fs, w, N, H, t, minSineDur, nH,
                                             minf0, maxf0, f0et, harmDevSlope)

    # compute spectrogram of residual
    mXr, pXr = stft.from_audio(xr, w, N, H)

    # synthesize hpr model
    y, yh = hpr.to_audio(hfreq, hmag, hphase, xr, Ns, H, fs)

    # output sound file (monophonic with sampling rate of 44100)
    baseFileName = files.strip_file(inputFile)
    outputFileSines, outputFileResidual, outputFile = [
        'output_sounds/%s_hprModel%s.wav' % (baseFileName, i)
        for i in ('_sines', '_residual', '')
    ]

    # write sounds files for harmonics, residual, and the sum
    audio.write_wav(yh, fs, outputFileSines)
    audio.write_wav(xr, fs, outputFileResidual)
    audio.write_wav(y, fs, outputFile)

    # create figure to plot
    plt.figure(figsize=(12, 9))

    # frequency range to plot
    maxplotfreq = 5000.0

    # plot the input sound
    plt.subplot(3, 1, 1)
    plt.plot(np.arange(x.size) / float(fs), x)
    plt.axis([0, x.size / float(fs), min(x), max(x)])
    plt.ylabel('amplitude')
    plt.xlabel('time (sec)')
    plt.title('input sound: x')

    # plot the magnitude spectrogram of residual
    plt.subplot(3, 1, 2)
    maxplotbin = int(N * maxplotfreq / fs)
    numFrames = int(mXr.shape[0])
    frmTime = H * np.arange(numFrames) / float(fs)
    binFreq = np.arange(maxplotbin + 1) * float(fs) / N
    plt.pcolormesh(frmTime, binFreq, np.transpose(mXr[:, :maxplotbin + 1]))
    plt.autoscale(tight=True)

    # plot harmonic frequencies on residual spectrogram
    if (hfreq.shape[1] > 0):
        harms = hfreq * np.less(hfreq, maxplotfreq)
        harms[harms == 0] = np.nan
        numFrames = int(harms.shape[0])
        frmTime = H * np.arange(numFrames) / float(fs)
        plt.plot(frmTime, harms, color='k', ms=3, alpha=1)
        plt.xlabel('time(s)')
        plt.ylabel('frequency(Hz)')
        plt.autoscale(tight=True)
        plt.title('harmonics + residual spectrogram')

    # plot the output sound
    plt.subplot(3, 1, 3)
    plt.plot(np.arange(y.size) / float(fs), y)
    plt.axis([0, y.size / float(fs), min(y), max(y)])
    plt.ylabel('amplitude')
    plt.xlabel('time (sec)')
    plt.title('output sound: y')

    plt.tight_layout()

    if interactive:
        plt.show()
    if plotFile:
        plt.savefig('output_plots/%s_hpr_model.png' %
                    files.strip_file(inputFile))
Ejemplo n.º 5
0
    def initUI(self):

        choose_label = "Input file (.wav, mono and 44100 sampling rate):"
        Label(self.parent, text=choose_label).grid(row=0,
                                                   column=0,
                                                   sticky=W,
                                                   padx=5,
                                                   pady=(10, 2))

        # TEXTBOX TO PRINT PATH OF THE SOUND FILE
        self.filelocation = Entry(self.parent)
        self.filelocation.focus_set()
        self.filelocation["width"] = 25
        self.filelocation.grid(row=1, column=0, sticky=W, padx=10)
        self.filelocation.delete(0, END)
        self.filelocation.insert(0, 'sounds/ocean.wav')

        # BUTTON TO BROWSE SOUND FILE
        self.open_file = Button(
            self.parent, text="Browse...",
            command=self.browse_file)  # see: def browse_file(self)
        self.open_file.grid(row=1, column=0, sticky=W,
                            padx=(220,
                                  6))  # put it beside the filelocation textbox

        # BUTTON TO PREVIEW SOUND FILE
        self.preview = Button(
            self.parent,
            text=">",
            command=lambda: audio.play_wav(self.filelocation.get()),
            bg="gray30",
            fg="white")
        self.preview.grid(row=1, column=0, sticky=W, padx=(306, 6))

        ## STOCHASTIC MODEL

        # HOP SIZE
        H_label = "Hop size (H):"
        Label(self.parent, text=H_label).grid(row=2,
                                              column=0,
                                              sticky=W,
                                              padx=5,
                                              pady=(10, 2))
        self.H = Entry(self.parent, justify=CENTER)
        self.H["width"] = 5
        self.H.grid(row=2, column=0, sticky=W, padx=(90, 5), pady=(10, 2))
        self.H.delete(0, END)
        self.H.insert(0, "256")

        # FFT size
        N_label = "FFT size (N):"
        Label(self.parent, text=N_label).grid(row=3,
                                              column=0,
                                              sticky=W,
                                              padx=5,
                                              pady=(10, 2))
        self.N = Entry(self.parent, justify=CENTER)
        self.N["width"] = 5
        self.N.grid(row=3, column=0, sticky=W, padx=(90, 5), pady=(10, 2))
        self.N.delete(0, END)
        self.N.insert(0, "512")

        # DECIMATION FACTOR
        stocf_label = "Decimation factor (bigger than 0, max of 1):"
        Label(self.parent, text=stocf_label).grid(row=4,
                                                  column=0,
                                                  sticky=W,
                                                  padx=5,
                                                  pady=(10, 2))
        self.stocf = Entry(self.parent, justify=CENTER)
        self.stocf["width"] = 5
        self.stocf.grid(row=4, column=0, sticky=W, padx=(285, 5), pady=(10, 2))
        self.stocf.delete(0, END)
        self.stocf.insert(0, "0.1")

        # BUTTON TO COMPUTE EVERYTHING
        self.compute = Button(self.parent,
                              text="Compute",
                              command=self.compute_model,
                              bg="dark red",
                              fg="white")
        self.compute.grid(row=5, column=0, padx=5, pady=(10, 2), sticky=W)

        # BUTTON TO PLAY OUTPUT
        output_label = "Stochastic:"
        Label(self.parent, text=output_label).grid(row=6,
                                                   column=0,
                                                   sticky=W,
                                                   padx=5,
                                                   pady=(10, 15))
        self.output = Button(
            self.parent,
            text=">",
            command=lambda: audio.play_wav('output_sounds/' + strip_file(
                self.filelocation.get()) + '_stochasticModel.wav'),
            bg="gray30",
            fg="white")
        self.output.grid(row=6,
                         column=0,
                         padx=(80, 5),
                         pady=(10, 15),
                         sticky=W)

        # define options for opening file
        self.file_opt = options = {}
        options['defaultextension'] = '.wav'
        options['filetypes'] = [('All files', '.*'), ('Wav files', '.wav')]
        options['initialdir'] = 'sounds/'
        options[
            'title'] = 'Open a mono audio file .wav with sample frequency 44100 Hz'
Ejemplo n.º 6
0
def transformation_synthesis(inputFile,
                             fs,
                             tfreq,
                             tmag,
                             freqScaling=np.array([0, 2.0, 1, .3]),
                             timeScaling=np.array(
                                 [0, .0, .671, .671, 1.978, 1.978 + 1.0]),
                             interactive=True,
                             plotFile=False):
    """
    Transform the analysis values returned by the analysis function and synthesize the sound
    inputFile: name of input file; fs: sampling rate of input file
    tfreq, tmag: sinusoidal frequencies and magnitudes
    freqScaling: frequency scaling factors, in time-value pairs
    timeScaling: time scaling factors, in time-value pairs
    """

    # size of fft used in synthesis
    Ns = 512

    # hop size (has to be 1/4 of Ns)
    H = 128

    # frequency scaling of the sinusoidal tracks
    ytfreq = sine.scale_frequencies(tfreq, freqScaling)

    # time scale the sinusoidal tracks
    ytfreq, ytmag = sine.scale_time(ytfreq, tmag, timeScaling)

    # synthesis
    y = sine.to_audio(ytfreq, ytmag, np.array([]), Ns, H, fs)

    # write output sound
    outputFile = 'output_sounds/' + strip_file(
        inputFile) + '_sineModelTransformation.wav'
    audio.write_wav(y, fs, outputFile)

    # create figure to plot
    plt.figure(figsize=(12, 6))

    # frequency range to plot
    maxplotfreq = 15000.0

    # plot the transformed sinusoidal frequencies
    if (ytfreq.shape[1] > 0):
        plt.subplot(2, 1, 1)
        tracks = np.copy(ytfreq)
        tracks = tracks * np.less(tracks, maxplotfreq)
        tracks[tracks <= 0] = np.nan
        numFrames = int(tracks.shape[0])
        frmTime = H * np.arange(numFrames) / float(fs)
        plt.plot(frmTime, tracks)
        plt.title('transformed sinusoidal tracks')
        plt.autoscale(tight=True)

    # plot the output sound
    plt.subplot(2, 1, 2)
    plt.plot(np.arange(y.size) / float(fs), y)
    plt.axis([0, y.size / float(fs), min(y), max(y)])
    plt.ylabel('amplitude')
    plt.xlabel('time (sec)')
    plt.title('output sound: y')

    plt.tight_layout()

    if interactive:
        plt.show()
    if plotFile:
        plt.savefig('output_plots/%s_sine_transformation_synthesis.png' %
                    files.strip_file(inputFile))
Ejemplo n.º 7
0
    def initUI(self):

        choose_label = "Input file (.wav, mono and 44100 sampling rate):"
        Label(self.parent, text=choose_label).grid(row=0,
                                                   column=0,
                                                   sticky=W,
                                                   padx=5,
                                                   pady=(10, 2))

        # TEXTBOX TO PRINT PATH OF THE SOUND FILE
        self.filelocation = Entry(self.parent)
        self.filelocation.focus_set()
        self.filelocation["width"] = 25
        self.filelocation.grid(row=1, column=0, sticky=W, padx=10)
        self.filelocation.delete(0, END)
        self.filelocation.insert(0, 'sounds/bendir.wav')

        # BUTTON TO BROWSE SOUND FILE
        self.open_file = Button(
            self.parent, text="Browse...",
            command=self.browse_file)  # see: def browse_file(self)
        self.open_file.grid(row=1, column=0, sticky=W,
                            padx=(220,
                                  6))  # put it beside the filelocation textbox

        # BUTTON TO PREVIEW SOUND FILE
        self.preview = Button(
            self.parent,
            text=">",
            command=lambda: audio.play_wav(self.filelocation.get()),
            bg="gray30",
            fg="white")
        self.preview.grid(row=1, column=0, sticky=W, padx=(306, 6))

        ## SPS MODEL

        # ANALYSIS WINDOW TYPE
        wtype_label = "Window type:"
        Label(self.parent, text=wtype_label).grid(row=2,
                                                  column=0,
                                                  sticky=W,
                                                  padx=5,
                                                  pady=(10, 2))
        self.w_type = StringVar()
        self.w_type.set("hamming")  # initial value
        window_option = OptionMenu(self.parent, self.w_type, "rectangular",
                                   "hanning", "hamming", "blackman",
                                   "blackmanharris")
        window_option.grid(row=2,
                           column=0,
                           sticky=W,
                           padx=(95, 5),
                           pady=(10, 2))

        # WINDOW SIZE
        M_label = "Window size (M):"
        Label(self.parent, text=M_label).grid(row=3,
                                              column=0,
                                              sticky=W,
                                              padx=5,
                                              pady=(10, 2))
        self.M = Entry(self.parent, justify=CENTER)
        self.M["width"] = 5
        self.M.grid(row=3, column=0, sticky=W, padx=(115, 5), pady=(10, 2))
        self.M.delete(0, END)
        self.M.insert(0, "2001")

        # FFT SIZE
        N_label = "FFT size (N) (power of two bigger than M):"
        Label(self.parent, text=N_label).grid(row=4,
                                              column=0,
                                              sticky=W,
                                              padx=5,
                                              pady=(10, 2))
        self.N = Entry(self.parent, justify=CENTER)
        self.N["width"] = 5
        self.N.grid(row=4, column=0, sticky=W, padx=(270, 5), pady=(10, 2))
        self.N.delete(0, END)
        self.N.insert(0, "2048")

        # THRESHOLD MAGNITUDE
        t_label = "Magnitude threshold (t) (in dB):"
        Label(self.parent, text=t_label).grid(row=5,
                                              column=0,
                                              sticky=W,
                                              padx=5,
                                              pady=(10, 2))
        self.t = Entry(self.parent, justify=CENTER)
        self.t["width"] = 5
        self.t.grid(row=5, column=0, sticky=W, padx=(205, 5), pady=(10, 2))
        self.t.delete(0, END)
        self.t.insert(0, "-80")

        # MIN DURATION SINUSOIDAL TRACKS
        minSineDur_label = "Minimum duration of sinusoidal tracks:"
        Label(self.parent, text=minSineDur_label).grid(row=6,
                                                       column=0,
                                                       sticky=W,
                                                       padx=5,
                                                       pady=(10, 2))
        self.minSineDur = Entry(self.parent, justify=CENTER)
        self.minSineDur["width"] = 5
        self.minSineDur.grid(row=6,
                             column=0,
                             sticky=W,
                             padx=(250, 5),
                             pady=(10, 2))
        self.minSineDur.delete(0, END)
        self.minSineDur.insert(0, "0.02")

        # MAX NUMBER PARALLEL SINUSOIDS
        maxnSines_label = "Maximum number of parallel sinusoids:"
        Label(self.parent, text=maxnSines_label).grid(row=7,
                                                      column=0,
                                                      sticky=W,
                                                      padx=5,
                                                      pady=(10, 2))
        self.maxnSines = Entry(self.parent, justify=CENTER)
        self.maxnSines["width"] = 5
        self.maxnSines.grid(row=7,
                            column=0,
                            sticky=W,
                            padx=(250, 5),
                            pady=(10, 2))
        self.maxnSines.delete(0, END)
        self.maxnSines.insert(0, "150")

        # FREQUENCY DEVIATION ALLOWED
        freqDevOffset_label = "Max frequency deviation in sinusoidal tracks (at freq 0):"
        Label(self.parent, text=freqDevOffset_label).grid(row=8,
                                                          column=0,
                                                          sticky=W,
                                                          padx=5,
                                                          pady=(10, 2))
        self.freqDevOffset = Entry(self.parent, justify=CENTER)
        self.freqDevOffset["width"] = 5
        self.freqDevOffset.grid(row=8,
                                column=0,
                                sticky=W,
                                padx=(350, 5),
                                pady=(10, 2))
        self.freqDevOffset.delete(0, END)
        self.freqDevOffset.insert(0, "10")

        # SLOPE OF THE FREQ DEVIATION
        freqDevSlope_label = "Slope of the frequency deviation (as function of freq):"
        Label(self.parent, text=freqDevSlope_label).grid(row=9,
                                                         column=0,
                                                         sticky=W,
                                                         padx=5,
                                                         pady=(10, 2))
        self.freqDevSlope = Entry(self.parent, justify=CENTER)
        self.freqDevSlope["width"] = 5
        self.freqDevSlope.grid(row=9,
                               column=0,
                               sticky=W,
                               padx=(340, 5),
                               pady=(10, 2))
        self.freqDevSlope.delete(0, END)
        self.freqDevSlope.insert(0, "0.001")

        # DECIMATION FACTOR
        stocf_label = "Stochastic approximation factor:"
        Label(self.parent, text=stocf_label).grid(row=10,
                                                  column=0,
                                                  sticky=W,
                                                  padx=5,
                                                  pady=(10, 2))
        self.stocf = Entry(self.parent, justify=CENTER)
        self.stocf["width"] = 5
        self.stocf.grid(row=10,
                        column=0,
                        sticky=W,
                        padx=(210, 5),
                        pady=(10, 2))
        self.stocf.delete(0, END)
        self.stocf.insert(0, "0.2")

        # BUTTON TO COMPUTE EVERYTHING
        self.compute = Button(self.parent,
                              text="Compute",
                              command=self.compute_model,
                              bg="dark red",
                              fg="white")
        self.compute.grid(row=11, column=0, padx=5, pady=(10, 2), sticky=W)

        # BUTTON TO PLAY SINE OUTPUT
        output_label = "Sinusoidal:"
        Label(self.parent, text=output_label).grid(row=12,
                                                   column=0,
                                                   sticky=W,
                                                   padx=5,
                                                   pady=(10, 0))
        self.output = Button(
            self.parent,
            text=">",
            command=lambda: audio.play_wav('output_sounds/' + strip_file(
                self.filelocation.get()) + '_spsModel_sines.wav'),
            bg="gray30",
            fg="white")
        self.output.grid(row=12,
                         column=0,
                         padx=(80, 5),
                         pady=(10, 0),
                         sticky=W)

        # BUTTON TO PLAY STOCHASTIC OUTPUT
        output_label = "Stochastic:"
        Label(self.parent, text=output_label).grid(row=22,
                                                   column=0,
                                                   sticky=W,
                                                   padx=5,
                                                   pady=(5, 0))
        self.output = Button(
            self.parent,
            text=">",
            command=lambda: audio.play_wav('output_sounds/' + strip_file(
                self.filelocation.get()) + '_spsModel_stochastic.wav'),
            bg="gray30",
            fg="white")
        self.output.grid(row=22, column=0, padx=(80, 5), pady=(5, 0), sticky=W)

        # BUTTON TO PLAY OUTPUT
        output_label = "Output:"
        Label(self.parent, text=output_label).grid(row=23,
                                                   column=0,
                                                   sticky=W,
                                                   padx=5,
                                                   pady=(5, 15))
        self.output = Button(
            self.parent,
            text=">",
            command=lambda: audio.play_wav('output_sounds/' + strip_file(
                self.filelocation.get()) + '_spsModel.wav'),
            bg="gray30",
            fg="white")
        self.output.grid(row=23,
                         column=0,
                         padx=(80, 5),
                         pady=(5, 15),
                         sticky=W)

        # define options for opening file
        self.file_opt = options = {}
        options['defaultextension'] = '.wav'
        options['filetypes'] = [('All files', '.*'), ('Wav files', '.wav')]
        options['initialdir'] = 'sounds/'
        options[
            'title'] = 'Open a mono audio file .wav with sample frequency 44100 Hz'
Ejemplo n.º 8
0
    def initUI(self):

        choose_label = "inputFile:"
        Label(self.parent, text=choose_label).grid(row=0,
                                                   column=0,
                                                   sticky=W,
                                                   padx=5,
                                                   pady=(10, 2))

        # TEXTBOX TO PRINT PATH OF THE SOUND FILE
        self.filelocation = Entry(self.parent)
        self.filelocation.focus_set()
        self.filelocation["width"] = 32
        self.filelocation.grid(row=0,
                               column=0,
                               sticky=W,
                               padx=(70, 5),
                               pady=(10, 2))
        self.filelocation.delete(0, END)
        self.filelocation.insert(0, 'sounds/mridangam.wav')

        # BUTTON TO BROWSE SOUND FILE
        open_file = Button(
            self.parent, text="...",
            command=self.browse_file)  # see: def browse_file(self)
        open_file.grid(row=0, column=0, sticky=W, padx=(340, 6),
                       pady=(10, 2))  # put it beside the filelocation textbox

        # BUTTON TO PREVIEW SOUND FILE
        preview = Button(
            self.parent,
            text=">",
            command=lambda: audio.play_wav(self.filelocation.get()),
            bg="gray30",
            fg="white")
        preview.grid(row=0, column=0, sticky=W, padx=(385, 6), pady=(10, 2))

        ## SINE TRANSFORMATIONS ANALYSIS

        # ANALYSIS WINDOW TYPE
        wtype_label = "window:"
        Label(self.parent, text=wtype_label).grid(row=1,
                                                  column=0,
                                                  sticky=W,
                                                  padx=5,
                                                  pady=(10, 2))
        self.w_type = StringVar()
        self.w_type.set("hamming")  # initial value
        window_option = OptionMenu(self.parent, self.w_type, "rectangular",
                                   "hanning", "hamming", "blackman",
                                   "blackmanharris")
        window_option.grid(row=1,
                           column=0,
                           sticky=W,
                           padx=(65, 5),
                           pady=(10, 2))

        # WINDOW SIZE
        M_label = "M:"
        Label(self.parent, text=M_label).grid(row=1,
                                              column=0,
                                              sticky=W,
                                              padx=(180, 5),
                                              pady=(10, 2))
        self.M = Entry(self.parent, justify=CENTER)
        self.M["width"] = 5
        self.M.grid(row=1, column=0, sticky=W, padx=(200, 5), pady=(10, 2))
        self.M.delete(0, END)
        self.M.insert(0, "801")

        # FFT SIZE
        N_label = "N:"
        Label(self.parent, text=N_label).grid(row=1,
                                              column=0,
                                              sticky=W,
                                              padx=(255, 5),
                                              pady=(10, 2))
        self.N = Entry(self.parent, justify=CENTER)
        self.N["width"] = 5
        self.N.grid(row=1, column=0, sticky=W, padx=(275, 5), pady=(10, 2))
        self.N.delete(0, END)
        self.N.insert(0, "2048")

        # THRESHOLD MAGNITUDE
        t_label = "t:"
        Label(self.parent, text=t_label).grid(row=1,
                                              column=0,
                                              sticky=W,
                                              padx=(330, 5),
                                              pady=(10, 2))
        self.t = Entry(self.parent, justify=CENTER)
        self.t["width"] = 5
        self.t.grid(row=1, column=0, sticky=W, padx=(348, 5), pady=(10, 2))
        self.t.delete(0, END)
        self.t.insert(0, "-90")

        # MIN DURATION SINUSOIDAL TRACKS
        minSineDur_label = "minSineDur:"
        Label(self.parent, text=minSineDur_label).grid(row=2,
                                                       column=0,
                                                       sticky=W,
                                                       padx=(5, 5),
                                                       pady=(10, 2))
        self.minSineDur = Entry(self.parent, justify=CENTER)
        self.minSineDur["width"] = 5
        self.minSineDur.grid(row=2,
                             column=0,
                             sticky=W,
                             padx=(87, 5),
                             pady=(10, 2))
        self.minSineDur.delete(0, END)
        self.minSineDur.insert(0, "0.01")

        # MAX NUMBER OF SINES
        maxnSines_label = "maxnSines:"
        Label(self.parent, text=maxnSines_label).grid(row=2,
                                                      column=0,
                                                      sticky=W,
                                                      padx=(145, 5),
                                                      pady=(10, 2))
        self.maxnSines = Entry(self.parent, justify=CENTER)
        self.maxnSines["width"] = 5
        self.maxnSines.grid(row=2,
                            column=0,
                            sticky=W,
                            padx=(220, 5),
                            pady=(10, 2))
        self.maxnSines.delete(0, END)
        self.maxnSines.insert(0, "150")

        # FREQUENCY DEVIATION ALLOWED
        freqDevOffset_label = "freqDevOffset:"
        Label(self.parent, text=freqDevOffset_label).grid(row=2,
                                                          column=0,
                                                          sticky=W,
                                                          padx=(280, 5),
                                                          pady=(10, 2))
        self.freqDevOffset = Entry(self.parent, justify=CENTER)
        self.freqDevOffset["width"] = 5
        self.freqDevOffset.grid(row=2,
                                column=0,
                                sticky=W,
                                padx=(372, 5),
                                pady=(10, 2))
        self.freqDevOffset.delete(0, END)
        self.freqDevOffset.insert(0, "20")

        # SLOPE OF THE FREQUENCY DEVIATION
        freqDevSlope_label = "freqDevSlope:"
        Label(self.parent, text=freqDevSlope_label).grid(row=3,
                                                         column=0,
                                                         sticky=W,
                                                         padx=(5, 5),
                                                         pady=(10, 2))
        self.freqDevSlope = Entry(self.parent, justify=CENTER)
        self.freqDevSlope["width"] = 5
        self.freqDevSlope.grid(row=3,
                               column=0,
                               sticky=W,
                               padx=(98, 5),
                               pady=(10, 2))
        self.freqDevSlope.delete(0, END)
        self.freqDevSlope.insert(0, "0.02")

        # BUTTON TO DO THE ANALYSIS OF THE SOUND
        self.compute = Button(self.parent,
                              text="Analysis/Synthesis",
                              command=self.analysis,
                              bg="dark red",
                              fg="white")
        self.compute.grid(row=4, column=0, padx=5, pady=(10, 5), sticky=W)

        # BUTTON TO PLAY ANALYSIS/SYNTHESIS OUTPUT
        self.output = Button(
            self.parent,
            text=">",
            command=lambda: audio.play_wav('output_sounds/' + strip_file(
                self.filelocation.get()) + '_sineModel.wav'),
            bg="gray30",
            fg="white")
        self.output.grid(row=4,
                         column=0,
                         padx=(145, 5),
                         pady=(10, 5),
                         sticky=W)

        ###
        # SEPARATION LINE
        Frame(self.parent, height=1, width=50, bg="black").grid(row=5,
                                                                pady=5,
                                                                sticky=W + E)
        ###

        # FREQUENCY SCALING FACTORS
        freqScaling_label = "Frequency scaling factors (time, value pairs):"
        Label(self.parent, text=freqScaling_label).grid(row=6,
                                                        column=0,
                                                        sticky=W,
                                                        padx=5,
                                                        pady=(5, 2))
        self.freqScaling = Entry(self.parent, justify=CENTER)
        self.freqScaling["width"] = 35
        self.freqScaling.grid(row=7,
                              column=0,
                              sticky=W + E,
                              padx=5,
                              pady=(0, 2))
        self.freqScaling.delete(0, END)
        self.freqScaling.insert(0, "[0, 2.0, 1, .3]")

        # TIME SCALING FACTORS
        timeScaling_label = "Time scaling factors (in time, value pairs):"
        Label(self.parent, text=timeScaling_label).grid(row=8,
                                                        column=0,
                                                        sticky=W,
                                                        padx=5,
                                                        pady=(5, 2))
        self.timeScaling = Entry(self.parent, justify=CENTER)
        self.timeScaling["width"] = 35
        self.timeScaling.grid(row=9,
                              column=0,
                              sticky=W + E,
                              padx=5,
                              pady=(0, 2))
        self.timeScaling.delete(0, END)
        self.timeScaling.insert(0, "[0, .0, .671, .671, 1.978, 1.978+1.0]")

        # BUTTON TO DO THE SYNTHESIS
        self.compute = Button(self.parent,
                              text="Apply Transformation",
                              command=self.transformation_synthesis,
                              bg="dark green",
                              fg="white")
        self.compute.grid(row=13, column=0, padx=5, pady=(10, 15), sticky=W)

        # BUTTON TO PLAY TRANSFORMATION SYNTHESIS OUTPUT
        self.transf_output = Button(
            self.parent,
            text=">",
            command=lambda: audio.play_wav('output_sounds/' + strip_file(
                self.filelocation.get()) + '_sineModelTransformation.wav'),
            bg="gray30",
            fg="white")
        self.transf_output.grid(row=13,
                                column=0,
                                padx=(165, 5),
                                pady=(10, 15),
                                sticky=W)

        # define options for opening file
        self.file_opt = options = {}
        options['defaultextension'] = '.wav'
        options['filetypes'] = [('All files', '.*'), ('Wav files', '.wav')]
        options['initialdir'] = 'sounds/'
        options[
            'title'] = 'Open a mono audio file .wav with sample frequency 44100 Hz'
Ejemplo n.º 9
0
def analysis(inputFile=demo_sound_path('sax-phrase-short.wav'), window='blackman', M=601, N=1024, t=-100,
             minSineDur=0.1, nH=100, minf0=350, maxf0=700, f0et=5, harmDevSlope=0.01, stocf=0.1,
             interactive=True, plotFile=False):
    """
    Analyze a sound with the harmonic plus stochastic model
    inputFile: input sound file (monophonic with sampling rate of 44100)
    window: analysis window type (rectangular, hanning, hamming, blackman, blackmanharris)
    M: analysis window size
    N: fft size (power of two, bigger or equal than M)
    t: magnitude threshold of spectral peaks
    minSineDur: minimum duration of sinusoidal tracks
    nH: maximum number of harmonics
    minf0: minimum fundamental frequency in sound
    maxf0: maximum fundamental frequency in sound
    f0et: maximum error accepted in f0 detection algorithm
    harmDevSlope: allowed deviation of harmonic tracks, higher harmonics have higher allowed deviation
    stocf: decimation factor used for the stochastic approximation
    returns inputFile: input file name; fs: sampling rate of input file,
            hfreq, hmag: harmonic frequencies, magnitude; mYst: stochastic residual
    """

    # size of fft used in synthesis
    Ns = 512

    # hop size (has to be 1/4 of Ns)
    H = 128

    # read input sound
    (fs, x) = audio.read_wav(inputFile)

    # compute analysis window
    w = get_window(window, M)

    # compute the harmonic plus stochastic model of the whole sound
    hfreq, hmag, hphase, mYst = hps.from_audio(x, fs, w, N, H, t, nH, minf0, maxf0, f0et, harmDevSlope, minSineDur, Ns,
                                              stocf)

    # synthesize the harmonic plus stochastic model without original phases
    y, yh, yst = hps.to_audio(hfreq, hmag, np.array([]), mYst, Ns, H, fs)

    # write output sound
    outputFile = 'output_sounds/' + strip_file(inputFile) + '_hpsModel.wav'
    audio.write_wav(y, fs, outputFile)

    # create figure to plot
    plt.figure(figsize=(12, 9))

    # frequency range to plot
    maxplotfreq = 15000.0

    # plot the input sound
    plt.subplot(3, 1, 1)
    plt.plot(np.arange(x.size) / float(fs), x)
    plt.axis([0, x.size / float(fs), min(x), max(x)])
    plt.ylabel('amplitude')
    plt.xlabel('time (sec)')
    plt.title('input sound: x')

    # plot spectrogram stochastic compoment
    plt.subplot(3, 1, 2)
    numFrames = int(mYst.shape[0])
    sizeEnv = int(mYst.shape[1])
    frmTime = H * np.arange(numFrames) / float(fs)
    binFreq = (.5 * fs) * np.arange(sizeEnv * maxplotfreq / (.5 * fs)) / sizeEnv
    plt.pcolormesh(frmTime, binFreq, np.transpose(mYst[:, :sizeEnv * maxplotfreq / (.5 * fs) + 1]))
    plt.autoscale(tight=True)

    # plot harmonic on top of stochastic spectrogram
    if (hfreq.shape[1] > 0):
        harms = hfreq * np.less(hfreq, maxplotfreq)
        harms[harms == 0] = np.nan
        numFrames = int(harms.shape[0])
        frmTime = H * np.arange(numFrames) / float(fs)
        plt.plot(frmTime, harms, color='k', ms=3, alpha=1)
        plt.xlabel('time (sec)')
        plt.ylabel('frequency (Hz)')
        plt.autoscale(tight=True)
        plt.title('harmonics + stochastic spectrogram')

    # plot the output sound
    plt.subplot(3, 1, 3)
    plt.plot(np.arange(y.size) / float(fs), y)
    plt.axis([0, y.size / float(fs), min(y), max(y)])
    plt.ylabel('amplitude')
    plt.xlabel('time (sec)')
    plt.title('output sound: y')

    plt.tight_layout()

    if interactive:
        plt.show(block=False)
    if plotFile:
        plt.savefig('output_plots/%s_hps_transformation_analysis.png' % files.strip_file(inputFile))

    return inputFile, fs, hfreq, hmag, mYst
Ejemplo n.º 10
0
def transformation_synthesis(inputFile, fs, hfreq, hmag, mYst,
                             freqScaling=np.array([0, 1.2, 2.01, 1.2, 2.679, .7, 3.146, .7]),
                             freqStretching=np.array([0, 1, 2.01, 1, 2.679, 1.5, 3.146, 1.5]), timbrePreservation=1,
                             timeScaling=np.array([0, 0, 2.138, 2.138 - 1.0, 3.146, 3.146]),
                             interactive=True, plotFile=False):
    """
    transform the analysis values returned by the analysis function and synthesize the sound
    inputFile: name of input file
    fs: sampling rate of input file
    hfreq, hmag: harmonic frequencies and magnitudes
    mYst: stochastic residual
    freqScaling: frequency scaling factors, in time-value pairs (value of 1 no scaling)
    freqStretching: frequency stretching factors, in time-value pairs (value of 1 no stretching)
    timbrePreservation: 1 preserves original timbre, 0 it does not
    timeScaling: time scaling factors, in time-value pairs
    """

    # size of fft used in synthesis
    Ns = 512

    # hop size (has to be 1/4 of Ns)
    H = 128

    # frequency scaling of the harmonics
    hfreqt, hmagt = harmonic.scale_frequencies(hfreq, hmag, freqScaling, freqStretching, timbrePreservation, fs)

    # time scaling the sound
    yhfreq, yhmag, ystocEnv = hps.scale_time(hfreqt, hmagt, mYst, timeScaling)

    # synthesis from the trasformed hps representation
    y, yh, yst = hps.to_audio(yhfreq, yhmag, np.array([]), ystocEnv, Ns, H, fs)

    # write output sound
    outputFile = 'output_sounds/' + strip_file(inputFile) + '_hpsModelTransformation.wav'
    audio.write_wav(y, fs, outputFile)

    # create figure to plot
    plt.figure(figsize=(12, 6))

    # frequency range to plot
    maxplotfreq = 15000.0

    # plot spectrogram of transformed stochastic compoment
    plt.subplot(2, 1, 1)
    numFrames = int(ystocEnv.shape[0])
    sizeEnv = int(ystocEnv.shape[1])
    frmTime = H * np.arange(numFrames) / float(fs)
    binFreq = (.5 * fs) * np.arange(sizeEnv * maxplotfreq / (.5 * fs)) / sizeEnv
    plt.pcolormesh(frmTime, binFreq, np.transpose(ystocEnv[:, :sizeEnv * maxplotfreq / (.5 * fs) + 1]))
    plt.autoscale(tight=True)

    # plot transformed harmonic on top of stochastic spectrogram
    if (yhfreq.shape[1] > 0):
        harms = yhfreq * np.less(yhfreq, maxplotfreq)
        harms[harms == 0] = np.nan
        numFrames = int(harms.shape[0])
        frmTime = H * np.arange(numFrames) / float(fs)
        plt.plot(frmTime, harms, color='k', ms=3, alpha=1)
        plt.xlabel('time (sec)')
        plt.ylabel('frequency (Hz)')
        plt.autoscale(tight=True)
        plt.title('harmonics + stochastic spectrogram')

    # plot the output sound
    plt.subplot(2, 1, 2)
    plt.plot(np.arange(y.size) / float(fs), y)
    plt.axis([0, y.size / float(fs), min(y), max(y)])
    plt.ylabel('amplitude')
    plt.xlabel('time (sec)')
    plt.title('output sound: y')

    plt.tight_layout()

    if interactive:
        plt.show()
    if plotFile:
        plt.savefig('output_plots/%s_hps_transformation_synthesis.png' % files.strip_file(inputFile))
Ejemplo n.º 11
0
def main(inputFile=demo_sound_path('bendir.wav'),
         window='hamming',
         M=2001,
         N=2048,
         t=-80,
         minSineDur=0.02,
         maxnSines=150,
         freqDevOffset=10,
         freqDevSlope=0.001,
         interactive=True,
         plotFile=False):
    """
    inputFile: input sound file (monophonic with sampling rate of 44100)
    window: analysis window type (rectangular, hanning, hamming, blackman, blackmanharris)
    M: analysis window size
    N: fft size (power of two, bigger or equal than M)
    t: magnitude threshold of spectral peaks
    minSineDur: minimum duration of sinusoidal tracks
    maxnSines: maximum number of parallel sinusoids
    freqDevOffset: frequency deviation allowed in the sinusoids from frame to frame at frequency 0
    freqDevSlope: slope of the frequency deviation, higher frequencies have bigger deviation
    """

    # size of fft used in synthesis
    Ns = 512

    # hop size (has to be 1/4 of Ns)
    H = 128

    # read input sound
    (fs, x) = audio.read_wav(inputFile)

    # compute analysis window
    w = get_window(window, M)

    # perform sinusoidal plus residual analysis
    tfreq, tmag, tphase, xr = spr.from_audio(x, fs, w, N, H, t, minSineDur,
                                             maxnSines, freqDevOffset,
                                             freqDevSlope)

    # compute spectrogram of residual
    mXr, pXr = stft.from_audio(xr, w, N, H)

    # sum sinusoids and residual
    y, ys = spr.to_audio(tfreq, tmag, tphase, xr, Ns, H, fs)

    # output sound file (monophonic with sampling rate of 44100)
    baseFileName = strip_file(inputFile)
    outputFileSines, outputFileResidual, outputFile = [
        'output_sounds/%s_sprModel%s.wav' % (baseFileName, i)
        for i in ('_sines', '_residual', '')
    ]

    # write sounds files for sinusoidal, residual, and the sum
    audio.write_wav(ys, fs, outputFileSines)
    audio.write_wav(xr, fs, outputFileResidual)
    audio.write_wav(y, fs, outputFile)

    # create figure to show plots
    plt.figure(figsize=(12, 9))

    # frequency range to plot
    maxplotfreq = 5000.0

    # plot the input sound
    plt.subplot(3, 1, 1)
    plt.plot(np.arange(x.size) / float(fs), x)
    plt.axis([0, x.size / float(fs), min(x), max(x)])
    plt.ylabel('amplitude')
    plt.xlabel('time (sec)')
    plt.title('input sound: x')

    # plot the magnitude spectrogram of residual
    plt.subplot(3, 1, 2)
    maxplotbin = int(N * maxplotfreq / fs)
    numFrames = int(mXr.shape[0])
    frmTime = H * np.arange(numFrames) / float(fs)
    binFreq = np.arange(maxplotbin + 1) * float(fs) / N
    plt.pcolormesh(frmTime, binFreq, np.transpose(mXr[:, :maxplotbin + 1]))
    plt.autoscale(tight=True)

    # plot the sinusoidal frequencies on top of the residual spectrogram
    if (tfreq.shape[1] > 0):
        tracks = tfreq * np.less(tfreq, maxplotfreq)
        tracks[tracks <= 0] = np.nan
        plt.plot(frmTime, tracks, color='k')
        plt.title('sinusoidal tracks + residual spectrogram')
        plt.autoscale(tight=True)

    # plot the output sound
    plt.subplot(3, 1, 3)
    plt.plot(np.arange(y.size) / float(fs), y)
    plt.axis([0, y.size / float(fs), min(y), max(y)])
    plt.ylabel('amplitude')
    plt.xlabel('time (sec)')
    plt.title('output sound: y')

    plt.tight_layout()

    if interactive:
        plt.show()
    if plotFile:
        plt.savefig('output_plots/%s_spr_model.png' %
                    files.strip_file(inputFile))
Ejemplo n.º 12
0
def main(inputFile=demo_sound_path('vignesh.wav'),
         window='blackman',
         M=1201,
         N=2048,
         t=-90,
         minSineDur=0.1,
         nH=100,
         minf0=130,
         maxf0=300,
         f0et=7,
         harmDevSlope=0.01,
         interactive=True,
         plotFile=False):
    """
    Analysis and synthesis using the harmonic model
    inputFile: input sound file (monophonic with sampling rate of 44100)
    window: analysis window type (rectangular, hanning, hamming, blackman, blackmanharris)
    M: analysis window size; N: fft size (power of two, bigger or equal than M)
    t: magnitude threshold of spectral peaks; minSineDur: minimum duration of sinusoidal tracks
    nH: maximum number of harmonics; minf0: minimum fundamental frequency in sound
    maxf0: maximum fundamental frequency in sound; f0et: maximum error accepted in f0 detection algorithm
    harmDevSlope: allowed deviation of harmonic tracks, higher harmonics could have higher allowed deviation
    """

    # size of fft used in synthesis
    Ns = 512

    # hop size (has to be 1/4 of Ns)
    H = 128

    # read input sound
    (fs, x) = audio.read_wav(inputFile)

    # compute analysis window
    w = get_window(window, M)

    # detect harmonics of input sound
    hfreq, hmag, hphase = harmonic.from_audio(x, fs, w, N, H, t, nH, minf0,
                                              maxf0, f0et, harmDevSlope,
                                              minSineDur)

    # synthesize the harmonics
    y = sine.to_audio(hfreq, hmag, hphase, Ns, H, fs)

    # output sound file (monophonic with sampling rate of 44100)
    outputFile = 'output_sounds/' + files.strip_file(
        inputFile) + '_harmonicModel.wav'

    # write the sound resulting from harmonic analysis
    audio.write_wav(y, fs, outputFile)

    # create figure to show plots
    plt.figure(figsize=(12, 9))

    # frequency range to plot
    maxplotfreq = 5000.0

    # plot the input sound
    plt.subplot(3, 1, 1)
    plt.plot(np.arange(x.size) / float(fs), x)
    plt.axis([0, x.size / float(fs), min(x), max(x)])
    plt.ylabel('amplitude')
    plt.xlabel('time (sec)')
    plt.title('input sound: x')

    # plot the harmonic frequencies
    plt.subplot(3, 1, 2)
    if (hfreq.shape[1] > 0):
        numFrames = hfreq.shape[0]
        frmTime = H * np.arange(numFrames) / float(fs)
        hfreq[hfreq <= 0] = np.nan
        plt.plot(frmTime, hfreq)
        plt.axis([0, x.size / float(fs), 0, maxplotfreq])
        plt.title('frequencies of harmonic tracks')

    # plot the output sound
    plt.subplot(3, 1, 3)
    plt.plot(np.arange(y.size) / float(fs), y)
    plt.axis([0, y.size / float(fs), min(y), max(y)])
    plt.ylabel('amplitude')
    plt.xlabel('time (sec)')
    plt.title('output sound: y')

    plt.tight_layout()

    if interactive:
        plt.show()
    if plotFile:
        plt.savefig('output_plots/%s_harmonic_model.png' %
                    files.strip_file(inputFile))
Ejemplo n.º 13
0
def main(inputFile=demo_sound_path('piano.wav'),
         window='hamming',
         M=1024,
         N=1024,
         H=512,
         interactive=True,
         plotFile=False):
    """
    analysis/synthesis using the STFT
    inputFile: input sound file (monophonic with sampling rate of 44100)
    window: analysis window type (choice of rectangular, hanning, hamming, blackman, blackmanharris)
    M: analysis window size
    N: fft size (power of two, bigger or equal than M)
    H: hop size (at least 1/2 of analysis window size to have good overlap-add)
    """

    # read input sound (monophonic with sampling rate of 44100)
    fs, x = audio.read_wav(inputFile)

    # compute analysis window
    w = get_window(window, M)

    # compute the magnitude and phase spectrogram
    mX, pX = stft.from_audio(x, w, N, H)

    # perform the inverse stft
    y = stft.to_audio(mX, pX, M, H)

    # output sound file (monophonic with sampling rate of 44100)
    outputFile = 'output_sounds/' + strip_file(inputFile) + '_stft.wav'

    # write the sound resulting from the inverse stft
    audio.write_wav(y, fs, outputFile)

    # create figure to plot
    plt.figure(figsize=(12, 9))

    # frequency range to plot
    maxplotfreq = 5000.0

    # plot the input sound
    plt.subplot(4, 1, 1)
    plt.plot(np.arange(x.size) / float(fs), x)
    plt.axis([0, x.size / float(fs), min(x), max(x)])
    plt.ylabel('amplitude')
    plt.xlabel('time (sec)')
    plt.title('input sound: x')

    # plot magnitude spectrogram
    plt.subplot(4, 1, 2)
    numFrames = int(mX.shape[0])
    frmTime = H * np.arange(numFrames) / float(fs)
    binFreq = fs * np.arange(N * maxplotfreq / fs) / N
    plt.pcolormesh(frmTime, binFreq,
                   np.transpose(mX[:, :N * maxplotfreq / fs + 1]))
    plt.xlabel('time (sec)')
    plt.ylabel('frequency (Hz)')
    plt.title('magnitude spectrogram')
    plt.autoscale(tight=True)

    # plot the phase spectrogram
    plt.subplot(4, 1, 3)
    numFrames = int(pX.shape[0])
    frmTime = H * np.arange(numFrames) / float(fs)
    binFreq = fs * np.arange(N * maxplotfreq / fs) / N
    plt.pcolormesh(
        frmTime, binFreq,
        np.transpose(np.diff(pX[:, :N * maxplotfreq / fs + 1], axis=1)))
    plt.xlabel('time (sec)')
    plt.ylabel('frequency (Hz)')
    plt.title('phase spectrogram (derivative)')
    plt.autoscale(tight=True)

    # plot the output sound
    plt.subplot(4, 1, 4)
    plt.plot(np.arange(y.size) / float(fs), y)
    plt.axis([0, y.size / float(fs), min(y), max(y)])
    plt.ylabel('amplitude')
    plt.xlabel('time (sec)')
    plt.title('output sound: y')

    plt.tight_layout()

    if interactive:
        plt.show()
    if plotFile:
        plt.savefig('output_plots/%s_stft_model.png' %
                    files.strip_file(inputFile))
Ejemplo n.º 14
0
def transformation_synthesis(inputFile1,
                             fs,
                             hfreq1,
                             hmag1,
                             stocEnv1,
                             inputFile2,
                             hfreq2,
                             hmag2,
                             stocEnv2,
                             hfreqIntp=np.array([0, 0, .1, 0, .9, 1, 1, 1]),
                             hmagIntp=np.array([0, 0, .1, 0, .9, 1, 1, 1]),
                             stocIntp=np.array([0, 0, .1, 0, .9, 1, 1, 1]),
                             interactive=True,
                             plotFile=False):
    """
    Transform the analysis values returned by the analysis function and synthesize the sound
    inputFile1: name of input file 1
    fs: sampling rate of input file	1
    hfreq1, hmag1, stocEnv1: hps representation of sound 1
    inputFile2: name of input file 2
    hfreq2, hmag2, stocEnv2: hps representation of sound 2
    hfreqIntp: interpolation factor between the harmonic frequencies of the two sounds, 0 is sound 1 and 1 is sound 2 (time,value pairs)
    hmagIntp: interpolation factor between the harmonic magnitudes of the two sounds, 0 is sound 1 and 1 is sound 2  (time,value pairs)
    stocIntp: interpolation factor between the stochastic representation of the two sounds, 0 is sound 1 and 1 is sound 2  (time,value pairs)
    """

    # size of fft used in synthesis
    Ns = 512
    # hop size (has to be 1/4 of Ns)
    H = 128

    # morph the two sounds
    yhfreq, yhmag, ystocEnv = hps.morph(hfreq1, hmag1, stocEnv1, hfreq2, hmag2,
                                        stocEnv2, hfreqIntp, hmagIntp,
                                        stocIntp)

    # synthesis
    y, yh, yst = hps.to_audio(yhfreq, yhmag, np.array([]), ystocEnv, Ns, H, fs)

    # write output sound
    outputFile = 'output_sounds/' + os.path.basename(
        inputFile1)[:-4] + '_hpsMorph.wav'
    audio.write_wav(y, fs, outputFile)

    # create figure to plot
    plt.figure(figsize=(12, 9))

    # frequency range to plot
    maxplotfreq = 15000.0

    # plot spectrogram of transformed stochastic compoment
    plt.subplot(2, 1, 1)
    numFrames = int(ystocEnv.shape[0])
    sizeEnv = int(ystocEnv.shape[1])
    frmTime = H * np.arange(numFrames) / float(fs)
    binFreq = (.5 * fs) * np.arange(sizeEnv * maxplotfreq /
                                    (.5 * fs)) / sizeEnv
    plt.pcolormesh(
        frmTime, binFreq,
        np.transpose(ystocEnv[:, :sizeEnv * maxplotfreq / (.5 * fs) + 1]))
    plt.autoscale(tight=True)

    # plot transformed harmonic on top of stochastic spectrogram
    if (yhfreq.shape[1] > 0):
        harms = np.copy(yhfreq)
        harms = harms * np.less(harms, maxplotfreq)
        harms[harms == 0] = np.nan
        numFrames = int(harms.shape[0])
        frmTime = H * np.arange(numFrames) / float(fs)
        plt.plot(frmTime, harms, color='k', ms=3, alpha=1)
        plt.xlabel('time (sec)')
        plt.ylabel('frequency (Hz)')
        plt.autoscale(tight=True)
        plt.title('harmonics + stochastic spectrogram')

    # plot the output sound
    plt.subplot(2, 1, 2)
    plt.plot(np.arange(y.size) / float(fs), y)
    plt.axis([0, y.size / float(fs), min(y), max(y)])
    plt.ylabel('amplitude')
    plt.xlabel('time (sec)')
    plt.title('output sound: y')

    plt.tight_layout()

    if interactive:
        plt.show()
    if plotFile:
        plt.savefig(
            'output_plots/%s_%s_hps_morph_synthesis.png' %
            (files.strip_file(inputFile1), files.strip_file(inputFile2)))
Ejemplo n.º 15
0
    def initUI(self):

        choose_label = "Input file (.wav, mono and 44100 sampling rate):"
        Label(self.parent, text=choose_label).grid(row=0,
                                                   column=0,
                                                   sticky=W,
                                                   padx=5,
                                                   pady=(10, 2))

        # TEXTBOX TO PRINT PATH OF THE SOUND FILE
        self.filelocation = Entry(self.parent)
        self.filelocation.focus_set()
        self.filelocation["width"] = 25
        self.filelocation.grid(row=1, column=0, sticky=W, padx=10)
        self.filelocation.delete(0, END)
        self.filelocation.insert(0, 'sounds/piano.wav')

        # BUTTON TO BROWSE SOUND FILE
        self.open_file = Button(
            self.parent, text="Browse...",
            command=self.browse_file)  # see: def browse_file(self)
        self.open_file.grid(row=1, column=0, sticky=W,
                            padx=(220,
                                  6))  # put it beside the filelocation textbox

        # BUTTON TO PREVIEW SOUND FILE
        self.preview = Button(
            self.parent,
            text=">",
            command=lambda: audio.play_wav(self.filelocation.get()),
            bg="gray30",
            fg="white")
        self.preview.grid(row=1, column=0, sticky=W, padx=(306, 6))

        ## STFT

        # ANALYSIS WINDOW TYPE
        wtype_label = "Window type:"
        Label(self.parent, text=wtype_label).grid(row=2,
                                                  column=0,
                                                  sticky=W,
                                                  padx=5,
                                                  pady=(10, 2))
        self.w_type = StringVar()
        self.w_type.set("hamming")  # initial value
        window_option = OptionMenu(self.parent, self.w_type, "rectangular",
                                   "hanning", "hamming", "blackman",
                                   "blackmanharris")
        window_option.grid(row=2,
                           column=0,
                           sticky=W,
                           padx=(95, 5),
                           pady=(10, 2))

        # WINDOW SIZE
        M_label = "Window size (M):"
        Label(self.parent, text=M_label).grid(row=3,
                                              column=0,
                                              sticky=W,
                                              padx=5,
                                              pady=(10, 2))
        self.M = Entry(self.parent, justify=CENTER)
        self.M["width"] = 5
        self.M.grid(row=3, column=0, sticky=W, padx=(115, 5), pady=(10, 2))
        self.M.delete(0, END)
        self.M.insert(0, "1024")

        # FFT SIZE
        N_label = "FFT size (N) (power of two bigger than M):"
        Label(self.parent, text=N_label).grid(row=4,
                                              column=0,
                                              sticky=W,
                                              padx=5,
                                              pady=(10, 2))
        self.N = Entry(self.parent, justify=CENTER)
        self.N["width"] = 5
        self.N.grid(row=4, column=0, sticky=W, padx=(270, 5), pady=(10, 2))
        self.N.delete(0, END)
        self.N.insert(0, "1024")

        # HOP SIZE
        H_label = "Hop size (H):"
        Label(self.parent, text=H_label).grid(row=5,
                                              column=0,
                                              sticky=W,
                                              padx=5,
                                              pady=(10, 2))
        self.H = Entry(self.parent, justify=CENTER)
        self.H["width"] = 5
        self.H.grid(row=5, column=0, sticky=W, padx=(95, 5), pady=(10, 2))
        self.H.delete(0, END)
        self.H.insert(0, "512")

        # BUTTON TO COMPUTE EVERYTHING
        self.compute = Button(self.parent,
                              text="Compute",
                              command=self.compute_model,
                              bg="dark red",
                              fg="white")
        self.compute.grid(row=6, column=0, padx=5, pady=(10, 2), sticky=W)

        # BUTTON TO PLAY OUTPUT
        output_label = "Output:"
        Label(self.parent, text=output_label).grid(row=7,
                                                   column=0,
                                                   sticky=W,
                                                   padx=5,
                                                   pady=(10, 15))
        self.output = Button(
            self.parent,
            text=">",
            command=lambda: audio.play_wav('output_sounds/' + strip_file(
                self.filelocation.get()) + '_stft.wav'),
            bg="gray30",
            fg="white")
        self.output.grid(row=7,
                         column=0,
                         padx=(60, 5),
                         pady=(10, 15),
                         sticky=W)

        # define options for opening file
        self.file_opt = options = {}
        options['defaultextension'] = '.wav'
        options['filetypes'] = [('All files', '.*'), ('Wav files', '.wav')]
        options['initialdir'] = 'sounds/'
        options[
            'title'] = 'Open a mono audio file .wav with sample frequency 44100 Hz'
Ejemplo n.º 16
0
def main(inputFile=demo_sound_path('bendir.wav'), window='hamming', M=2001, N=2048, t=-80, minSineDur=0.02,
         maxnSines=150, freqDevOffset=10, freqDevSlope=0.001,
         interactive=True, plotFile=False):
    """
    Perform analysis/synthesis using the sinusoidal model
    inputFile: input sound file (monophonic with sampling rate of 44100)
    window: analysis window type (rectangular, hanning, hamming, blackman, blackmanharris)
    M: analysis window size; N: fft size (power of two, bigger or equal than M)
    t: magnitude threshold of spectral peaks; minSineDur: minimum duration of sinusoidal tracks
    maxnSines: maximum number of parallel sinusoids
    freqDevOffset: frequency deviation allowed in the sinusoids from frame to frame at frequency 0
    freqDevSlope: slope of the frequency deviation, higher frequencies have bigger deviation
    """

    # size of fft used in synthesis
    Ns = 512

    # hop size (has to be 1/4 of Ns)
    H = 128

    # read input sound
    fs, x = audio.read_wav(inputFile)

    # compute analysis window
    w = get_window(window, M)

    # analyze the sound with the sinusoidal model
    tfreq, tmag, tphase = sine.from_audio(x, fs, w, N, H, t, maxnSines, minSineDur, freqDevOffset, freqDevSlope)

    # synthesize the output sound from the sinusoidal representation
    y = sine.to_audio(tfreq, tmag, tphase, Ns, H, fs)

    # output sound file name
    outputFile = 'output_sounds/' + strip_file(inputFile) + '_sineModel.wav'

    # write the synthesized sound obtained from the sinusoidal synthesis
    audio.write_wav(y, fs, outputFile)

    # create figure to show plots
    plt.figure(figsize=(12, 9))

    # frequency range to plot
    maxplotfreq = 5000.0

    # plot the input sound
    plt.subplot(3, 1, 1)
    plt.plot(np.arange(x.size) / float(fs), x)
    plt.axis([0, x.size / float(fs), min(x), max(x)])
    plt.ylabel('amplitude')
    plt.xlabel('time (sec)')
    plt.title('input sound: x')

    # plot the sinusoidal frequencies
    plt.subplot(3, 1, 2)
    if (tfreq.shape[1] > 0):
        numFrames = tfreq.shape[0]
        frmTime = H * np.arange(numFrames) / float(fs)
        tfreq[tfreq <= 0] = np.nan
        plt.plot(frmTime, tfreq)
        plt.axis([0, x.size / float(fs), 0, maxplotfreq])
        plt.title('frequencies of sinusoidal tracks')

    # plot the output sound
    plt.subplot(3, 1, 3)
    plt.plot(np.arange(y.size) / float(fs), y)
    plt.axis([0, y.size / float(fs), min(y), max(y)])
    plt.ylabel('amplitude')
    plt.xlabel('time (sec)')
    plt.title('output sound: y')

    plt.tight_layout()

    if interactive:
        plt.show()
    if plotFile:
        plt.savefig('output_plots/%s_sine_model.png' % files.strip_file(inputFile))
    def initUI(self):

        choose_label = "inputFile:"
        Label(self.parent, text=choose_label).grid(row=0,
                                                   column=0,
                                                   sticky=W,
                                                   padx=5,
                                                   pady=(10, 2))

        # TEXTBOX TO PRINT PATH OF THE SOUND FILE
        self.filelocation = Entry(self.parent)
        self.filelocation.focus_set()
        self.filelocation["width"] = 32
        self.filelocation.grid(row=0,
                               column=0,
                               sticky=W,
                               padx=(70, 5),
                               pady=(10, 2))
        self.filelocation.delete(0, END)
        self.filelocation.insert(0, 'sounds/vignesh.wav')

        # BUTTON TO BROWSE SOUND FILE
        open_file = Button(
            self.parent, text="...",
            command=self.browse_file)  # see: def browse_file(self)
        open_file.grid(row=0, column=0, sticky=W, padx=(340, 6),
                       pady=(10, 2))  # put it beside the filelocation textbox

        # BUTTON TO PREVIEW SOUND FILE
        preview = Button(
            self.parent,
            text=">",
            command=lambda: audio.play_wav(self.filelocation.get()),
            bg="gray30",
            fg="white")
        preview.grid(row=0, column=0, sticky=W, padx=(385, 6), pady=(10, 2))

        ## HARMONIC TRANSFORMATIONS ANALYSIS

        # ANALYSIS WINDOW TYPE
        wtype_label = "window:"
        Label(self.parent, text=wtype_label).grid(row=1,
                                                  column=0,
                                                  sticky=W,
                                                  padx=5,
                                                  pady=(10, 2))
        self.w_type = StringVar()
        self.w_type.set("blackman")  # initial value
        window_option = OptionMenu(self.parent, self.w_type, "rectangular",
                                   "hanning", "hamming", "blackman",
                                   "blackmanharris")
        window_option.grid(row=1,
                           column=0,
                           sticky=W,
                           padx=(65, 5),
                           pady=(10, 2))

        # WINDOW SIZE
        M_label = "M:"
        Label(self.parent, text=M_label).grid(row=1,
                                              column=0,
                                              sticky=W,
                                              padx=(180, 5),
                                              pady=(10, 2))
        self.M = Entry(self.parent, justify=CENTER)
        self.M["width"] = 5
        self.M.grid(row=1, column=0, sticky=W, padx=(200, 5), pady=(10, 2))
        self.M.delete(0, END)
        self.M.insert(0, "1201")

        # FFT SIZE
        N_label = "N:"
        Label(self.parent, text=N_label).grid(row=1,
                                              column=0,
                                              sticky=W,
                                              padx=(255, 5),
                                              pady=(10, 2))
        self.N = Entry(self.parent, justify=CENTER)
        self.N["width"] = 5
        self.N.grid(row=1, column=0, sticky=W, padx=(275, 5), pady=(10, 2))
        self.N.delete(0, END)
        self.N.insert(0, "2048")

        # THRESHOLD MAGNITUDE
        t_label = "t:"
        Label(self.parent, text=t_label).grid(row=1,
                                              column=0,
                                              sticky=W,
                                              padx=(330, 5),
                                              pady=(10, 2))
        self.t = Entry(self.parent, justify=CENTER)
        self.t["width"] = 5
        self.t.grid(row=1, column=0, sticky=W, padx=(348, 5), pady=(10, 2))
        self.t.delete(0, END)
        self.t.insert(0, "-90")

        # MIN DURATION SINUSOIDAL TRACKS
        minSineDur_label = "minSineDur:"
        Label(self.parent, text=minSineDur_label).grid(row=2,
                                                       column=0,
                                                       sticky=W,
                                                       padx=(5, 5),
                                                       pady=(10, 2))
        self.minSineDur = Entry(self.parent, justify=CENTER)
        self.minSineDur["width"] = 5
        self.minSineDur.grid(row=2,
                             column=0,
                             sticky=W,
                             padx=(87, 5),
                             pady=(10, 2))
        self.minSineDur.delete(0, END)
        self.minSineDur.insert(0, "0.1")

        # MAX NUMBER OF HARMONICS
        nH_label = "nH:"
        Label(self.parent, text=nH_label).grid(row=2,
                                               column=0,
                                               sticky=W,
                                               padx=(145, 5),
                                               pady=(10, 2))
        self.nH = Entry(self.parent, justify=CENTER)
        self.nH["width"] = 5
        self.nH.grid(row=2, column=0, sticky=W, padx=(172, 5), pady=(10, 2))
        self.nH.delete(0, END)
        self.nH.insert(0, "100")

        # MIN FUNDAMENTAL FREQUENCY
        minf0_label = "minf0:"
        Label(self.parent, text=minf0_label).grid(row=2,
                                                  column=0,
                                                  sticky=W,
                                                  padx=(227, 5),
                                                  pady=(10, 2))
        self.minf0 = Entry(self.parent, justify=CENTER)
        self.minf0["width"] = 5
        self.minf0.grid(row=2, column=0, sticky=W, padx=(275, 5), pady=(10, 2))
        self.minf0.delete(0, END)
        self.minf0.insert(0, "130")

        # MAX FUNDAMENTAL FREQUENCY
        maxf0_label = "maxf0:"
        Label(self.parent, text=maxf0_label).grid(row=2,
                                                  column=0,
                                                  sticky=W,
                                                  padx=(330, 5),
                                                  pady=(10, 2))
        self.maxf0 = Entry(self.parent, justify=CENTER)
        self.maxf0["width"] = 5
        self.maxf0.grid(row=2, column=0, sticky=W, padx=(380, 5), pady=(10, 2))
        self.maxf0.delete(0, END)
        self.maxf0.insert(0, "300")

        # MAX ERROR ACCEPTED
        f0et_label = "f0et:"
        Label(self.parent, text=f0et_label).grid(row=3,
                                                 column=0,
                                                 sticky=W,
                                                 padx=5,
                                                 pady=(10, 2))
        self.f0et = Entry(self.parent, justify=CENTER)
        self.f0et["width"] = 3
        self.f0et.grid(row=3, column=0, sticky=W, padx=(42, 5), pady=(10, 2))
        self.f0et.delete(0, END)
        self.f0et.insert(0, "7")

        # ALLOWED DEVIATION OF HARMONIC TRACKS
        harmDevSlope_label = "harmDevSlope:"
        Label(self.parent, text=harmDevSlope_label).grid(row=3,
                                                         column=0,
                                                         sticky=W,
                                                         padx=(90, 5),
                                                         pady=(10, 2))
        self.harmDevSlope = Entry(self.parent, justify=CENTER)
        self.harmDevSlope["width"] = 5
        self.harmDevSlope.grid(row=3,
                               column=0,
                               sticky=W,
                               padx=(190, 5),
                               pady=(10, 2))
        self.harmDevSlope.delete(0, END)
        self.harmDevSlope.insert(0, "0.01")

        # BUTTON TO DO THE ANALYSIS OF THE SOUND
        self.compute = Button(self.parent,
                              text="Analysis/Synthesis",
                              command=self.analysis,
                              bg="dark red",
                              fg="white")
        self.compute.grid(row=4, column=0, padx=5, pady=(10, 5), sticky=W)

        # BUTTON TO PLAY ANALYSIS/SYNTHESIS OUTPUT
        self.output = Button(
            self.parent,
            text=">",
            command=lambda: audio.play_wav('output_sounds/' + strip_file(
                self.filelocation.get()) + '_harmonicModel.wav'),
            bg="gray30",
            fg="white")
        self.output.grid(row=4,
                         column=0,
                         padx=(145, 5),
                         pady=(10, 5),
                         sticky=W)

        ###
        # SEPARATION LINE
        Frame(self.parent, height=1, width=50, bg="black").grid(row=5,
                                                                pady=5,
                                                                sticky=W + E)
        ###

        # FREQUENCY SCALING FACTORS
        freqScaling_label = "Frequency scaling factors (time, value pairs):"
        Label(self.parent, text=freqScaling_label).grid(row=6,
                                                        column=0,
                                                        sticky=W,
                                                        padx=5,
                                                        pady=(5, 2))
        self.freqScaling = Entry(self.parent, justify=CENTER)
        self.freqScaling["width"] = 35
        self.freqScaling.grid(row=7,
                              column=0,
                              sticky=W + E,
                              padx=5,
                              pady=(0, 2))
        self.freqScaling.delete(0, END)
        self.freqScaling.insert(0, "[0, 2.0, 1, 0.3]")

        # FREQUENCY STRETCHING FACTORSharmonicModelTransformation
        freqStretching_label = "Frequency stretching factors (time, value pairs):"
        Label(self.parent, text=freqStretching_label).grid(row=8,
                                                           column=0,
                                                           sticky=W,
                                                           padx=5,
                                                           pady=(5, 2))
        self.freqStretching = Entry(self.parent, justify=CENTER)
        self.freqStretching["width"] = 35
        self.freqStretching.grid(row=9,
                                 column=0,
                                 sticky=W + E,
                                 padx=5,
                                 pady=(0, 2))
        self.freqStretching.delete(0, END)
        self.freqStretching.insert(0, "[0, 1, 1, 1.5]")

        # TIMBRE PRESERVATION
        timbrePreservation_label = "Timbre preservation (1 preserves original timbre, 0 it does not):"
        Label(self.parent, text=timbrePreservation_label).grid(row=10,
                                                               column=0,
                                                               sticky=W,
                                                               padx=5,
                                                               pady=(5, 2))
        self.timbrePreservation = Entry(self.parent, justify=CENTER)
        self.timbrePreservation["width"] = 2
        self.timbrePreservation.grid(row=10,
                                     column=0,
                                     sticky=W + E,
                                     padx=(395, 5),
                                     pady=(5, 2))
        self.timbrePreservation.delete(0, END)
        self.timbrePreservation.insert(0, "1")

        # TIME SCALING FACTORS
        timeScaling_label = "Time scaling factors (time, value pairs):"
        Label(self.parent, text=timeScaling_label).grid(row=11,
                                                        column=0,
                                                        sticky=W,
                                                        padx=5,
                                                        pady=(5, 2))
        self.timeScaling = Entry(self.parent, justify=CENTER)
        self.timeScaling["width"] = 35
        self.timeScaling.grid(row=12,
                              column=0,
                              sticky=W + E,
                              padx=5,
                              pady=(0, 2))
        self.timeScaling.delete(0, END)
        self.timeScaling.insert(0, "[0, 0, 0.671, 0.671, 1.978, 1.978+1.0]")

        # BUTTON TO DO THE SYNTHESIS
        self.compute = Button(self.parent,
                              text="Apply Transformation",
                              command=self.transformation_synthesis,
                              bg="dark green",
                              fg="white")
        self.compute.grid(row=13, column=0, padx=5, pady=(10, 15), sticky=W)

        # BUTTON TO PLAY TRANSFORMATION SYNTHESIS OUTPUT
        self.transf_output = Button(
            self.parent,
            text=">",
            command=lambda: audio.play_wav('output_sounds/' + strip_file(
                self.filelocation.get()) + '_harmonicModelTransformation.wav'),
            bg="gray30",
            fg="white")
        self.transf_output.grid(row=13,
                                column=0,
                                padx=(165, 5),
                                pady=(10, 15),
                                sticky=W)

        # define options for opening file
        self.file_opt = options = {}
        options['defaultextension'] = '.wav'
        options['filetypes'] = [('All files', '.*'), ('Wav files', '.wav')]
        options['initialdir'] = 'sounds/'
        options[
            'title'] = 'Open a mono audio file .wav with sample frequency 44100 Hz'
Ejemplo n.º 18
0
    def initUI(self):

        choose_label = "inputFile:"
        Label(self.parent, text=choose_label).grid(row=0,
                                                   column=0,
                                                   sticky=W,
                                                   padx=5,
                                                   pady=(10, 2))

        # TEXTBOX TO PRINT PATH OF THE SOUND FILE
        self.filelocation = Entry(self.parent)
        self.filelocation.focus_set()
        self.filelocation["width"] = 25
        self.filelocation.grid(row=0,
                               column=0,
                               sticky=W,
                               padx=(70, 5),
                               pady=(10, 2))
        self.filelocation.delete(0, END)
        self.filelocation.insert(0, 'sounds/rain.wav')

        # BUTTON TO BROWSE SOUND FILE
        open_file = Button(
            self.parent, text="...",
            command=self.browse_file)  # see: def browse_file(self)
        open_file.grid(row=0, column=0, sticky=W, padx=(280, 6),
                       pady=(10, 2))  # put it beside the filelocation textbox

        # BUTTON TO PREVIEW SOUND FILE
        preview = Button(
            self.parent,
            text=">",
            command=lambda: audio.play_wav(self.filelocation.get()),
            bg="gray30",
            fg="white")
        preview.grid(row=0, column=0, sticky=W, padx=(325, 6), pady=(10, 2))

        ## STOCHASTIC TRANSFORMATIONS ANALYSIS

        # DECIMATION FACTOR
        stocf_label = "stocf:"
        Label(self.parent, text=stocf_label).grid(row=1,
                                                  column=0,
                                                  sticky=W,
                                                  padx=(5, 5),
                                                  pady=(10, 2))
        self.stocf = Entry(self.parent, justify=CENTER)
        self.stocf["width"] = 5
        self.stocf.grid(row=1, column=0, sticky=W, padx=(47, 5), pady=(10, 2))
        self.stocf.delete(0, END)
        self.stocf.insert(0, "0.1")

        # TIME SCALING FACTORS
        timeScaling_label = "Time scaling factors (time, value pairs):"
        Label(self.parent, text=timeScaling_label).grid(row=2,
                                                        column=0,
                                                        sticky=W,
                                                        padx=5,
                                                        pady=(5, 2))
        self.timeScaling = Entry(self.parent, justify=CENTER)
        self.timeScaling["width"] = 35
        self.timeScaling.grid(row=3,
                              column=0,
                              sticky=W + E,
                              padx=5,
                              pady=(0, 2))
        self.timeScaling.delete(0, END)
        self.timeScaling.insert(0, "[0, 0, 1, 2]")

        # BUTTON TO DO THE SYNTHESIS
        self.compute = Button(self.parent,
                              text="Apply Transformation",
                              command=self.transformation_synthesis,
                              bg="dark green",
                              fg="white")
        self.compute.grid(row=13, column=0, padx=5, pady=(10, 15), sticky=W)

        # BUTTON TO PLAY TRANSFORMATION SYNTHESIS OUTPUT
        self.transf_output = Button(
            self.parent,
            text=">",
            command=lambda:
            audio.play_wav('output_sounds/' + strip_file(self.filelocation.get(
            )) + '_stochasticModelTransformation.wav'),
            bg="gray30",
            fg="white")
        self.transf_output.grid(row=13,
                                column=0,
                                padx=(165, 5),
                                pady=(10, 15),
                                sticky=W)

        # define options for opening file
        self.file_opt = options = {}
        options['defaultextension'] = '.wav'
        options['filetypes'] = [('All files', '.*'), ('Wav files', '.wav')]
        options['initialdir'] = 'sounds/'
        options[
            'title'] = 'Open a mono audio file .wav with sample frequency 44100 Hz'
Ejemplo n.º 19
0
def main(inputFile=demo_sound_path('piano.wav'),
         window='blackman',
         M=511,
         N=1024,
         time=.2,
         interactive=True,
         plotFile=False):
    """
    inputFile: input sound file (monophonic with sampling rate of 44100)
    window: analysis window type (choice of rectangular, hanning, hamming, blackman, blackmanharris)
    M: analysis window size (odd integer value)
    N: fft size (power of two, bigger or equal than than M)
    time: time  to start analysis (in seconds)
    """

    # read input sound (monophonic with sampling rate of 44100)
    fs, x = audio.read_wav(inputFile)

    # compute analysis window
    w = get_window(window, M)

    # get a fragment of the input sound of size M
    sample = int(time * fs)
    if (sample + M >= x.size
            or sample < 0):  # raise error if time outside of sound
        raise ValueError("Time outside sound boundaries")
    x_frame = x[sample:sample + M]

    # compute the dft of the sound fragment
    mX, pX = dft.from_audio(x_frame, w, N)

    # compute the inverse dft of the spectrum
    y = dft.to_audio(mX, pX, w.size) * sum(w)

    # create figure
    plt.figure(figsize=(12, 9))

    # plot the sound fragment
    plt.subplot(4, 1, 1)
    plt.plot(time + np.arange(M) / float(fs), x_frame)
    plt.axis([time, time + M / float(fs), min(x_frame), max(x_frame)])
    plt.ylabel('amplitude')
    plt.xlabel('time (sec)')
    plt.title('input sound: x')

    # plot the magnitude spectrum
    plt.subplot(4, 1, 2)
    plt.plot(float(fs) * np.arange(mX.size) / float(N), mX, 'r')
    plt.axis([0, fs / 2.0, min(mX), max(mX)])
    plt.title('magnitude spectrum: mX')
    plt.ylabel('amplitude (dB)')
    plt.xlabel('frequency (Hz)')

    # plot the phase spectrum
    plt.subplot(4, 1, 3)
    plt.plot(float(fs) * np.arange(pX.size) / float(N), pX, 'c')
    plt.axis([0, fs / 2.0, min(pX), max(pX)])
    plt.title('phase spectrum: pX')
    plt.ylabel('phase (radians)')
    plt.xlabel('frequency (Hz)')

    # plot the sound resulting from the inverse dft
    plt.subplot(4, 1, 4)
    plt.plot(time + np.arange(M) / float(fs), y)
    plt.axis([time, time + M / float(fs), min(y), max(y)])
    plt.ylabel('amplitude')
    plt.xlabel('time (sec)')
    plt.title('output sound: y')

    plt.tight_layout()

    if interactive:
        plt.show()
    if plotFile:
        plt.savefig('output_plots/%s_dft_model.png' %
                    files.strip_file(inputFile))
Ejemplo n.º 20
0
def main(inputFile=demo_sound_path('rain.wav'), stocf=0.1, timeScaling=np.array([0, 0, 1, 2]),
         interactive=True, plotFile=False):
    """
    function to perform a time scaling using the stochastic model
    inputFile: name of input sound file
    stocf: decimation factor used for the stochastic approximation
    timeScaling: time scaling factors, in time-value pairs
    """

    # hop size
    H = 128

    # read input sound
    (fs, x) = audio.read_wav(inputFile)

    # perform stochastic analysis
    mYst = stochastic.from_audio(x, H, H * 2, stocf)

    # perform time scaling of stochastic representation
    ystocEnv = stochastic.scale_time(mYst, timeScaling)

    # synthesize output sound
    y = stochastic.to_audio(ystocEnv, H, H * 2)

    # write output sound
    outputFile = 'output_sounds/' + strip_file(inputFile) + '_stochasticModelTransformation.wav'
    audio.write_wav(y, fs, outputFile)

    # create figure to plot
    plt.figure(figsize=(12, 9))

    # plot the input sound
    plt.subplot(4, 1, 1)
    plt.plot(np.arange(x.size) / float(fs), x)
    plt.axis([0, x.size / float(fs), min(x), max(x)])
    plt.ylabel('amplitude')
    plt.xlabel('time (sec)')
    plt.title('input sound: x')

    # plot stochastic representation
    plt.subplot(4, 1, 2)
    numFrames = int(mYst.shape[0])
    frmTime = H * np.arange(numFrames) / float(fs)
    binFreq = np.arange(stocf * H) * float(fs) / (stocf * 2 * H)
    plt.pcolormesh(frmTime, binFreq, np.transpose(mYst))
    plt.autoscale(tight=True)
    plt.xlabel('time (sec)')
    plt.ylabel('frequency (Hz)')
    plt.title('stochastic approximation')

    # plot modified stochastic representation
    plt.subplot(4, 1, 3)
    numFrames = int(ystocEnv.shape[0])
    frmTime = H * np.arange(numFrames) / float(fs)
    binFreq = np.arange(stocf * H) * float(fs) / (stocf * 2 * H)
    plt.pcolormesh(frmTime, binFreq, np.transpose(ystocEnv))
    plt.autoscale(tight=True)
    plt.xlabel('time (sec)')
    plt.ylabel('frequency (Hz)')
    plt.title('modified stochastic approximation')

    # plot the output sound
    plt.subplot(4, 1, 4)
    plt.plot(np.arange(y.size) / float(fs), y)
    plt.axis([0, y.size / float(fs), min(y), max(y)])
    plt.ylabel('amplitude')
    plt.xlabel('time (sec)')

    plt.tight_layout()

    if interactive:
        plt.show()
    if plotFile:
        plt.savefig('output_plots/%s_stochastic_transformation.png' % files.strip_file(inputFile))
Ejemplo n.º 21
0
    def initUI(self):

        choose_label = "Input file (.wav, mono and 44100 sampling rate):"
        Label(self.parent, text=choose_label).grid(row=0,
                                                   column=0,
                                                   sticky=W,
                                                   padx=5,
                                                   pady=(10, 2))

        # TEXTBOX TO PRINT PATH OF THE SOUND FILE
        self.filelocation = Entry(self.parent)
        self.filelocation.focus_set()
        self.filelocation["width"] = 25
        self.filelocation.grid(row=1, column=0, sticky=W, padx=10)
        self.filelocation.delete(0, END)
        self.filelocation.insert(0, 'sounds/sax-phrase-short.wav')

        # BUTTON TO BROWSE SOUND FILE
        self.open_file = Button(
            self.parent, text="Browse...",
            command=self.browse_file)  # see: def browse_file(self)
        self.open_file.grid(row=1, column=0, sticky=W,
                            padx=(220,
                                  6))  # put it beside the filelocation textbox

        # BUTTON TO PREVIEW SOUND FILE
        self.preview = Button(
            self.parent,
            text=">",
            command=lambda: audio.play_wav(self.filelocation.get()),
            bg="gray30",
            fg="white")
        self.preview.grid(row=1, column=0, sticky=W, padx=(306, 6))

        ## HARMONIC MODEL

        # ANALYSIS WINDOW TYPE
        wtype_label = "Window type:"
        Label(self.parent, text=wtype_label).grid(row=2,
                                                  column=0,
                                                  sticky=W,
                                                  padx=5,
                                                  pady=(10, 2))
        self.w_type = StringVar()
        self.w_type.set("blackman")  # initial value
        window_option = OptionMenu(self.parent, self.w_type, "rectangular",
                                   "hanning", "hamming", "blackman",
                                   "blackmanharris")
        window_option.grid(row=2,
                           column=0,
                           sticky=W,
                           padx=(95, 5),
                           pady=(10, 2))

        # WINDOW SIZE
        M_label = "Window size (M):"
        Label(self.parent, text=M_label).grid(row=4,
                                              column=0,
                                              sticky=W,
                                              padx=5,
                                              pady=(10, 2))
        self.M = Entry(self.parent, justify=CENTER)
        self.M["width"] = 5
        self.M.grid(row=4, column=0, sticky=W, padx=(115, 5), pady=(10, 2))
        self.M.delete(0, END)
        self.M.insert(0, "601")

        # FFT SIZE
        N_label = "FFT size (N) (power of two bigger than M):"
        Label(self.parent, text=N_label).grid(row=5,
                                              column=0,
                                              sticky=W,
                                              padx=5,
                                              pady=(10, 2))
        self.N = Entry(self.parent, justify=CENTER)
        self.N["width"] = 5
        self.N.grid(row=5, column=0, sticky=W, padx=(270, 5), pady=(10, 2))
        self.N.delete(0, END)
        self.N.insert(0, "1024")

        # THRESHOLD MAGNITUDE
        t_label = "Magnitude threshold (t) (in dB):"
        Label(self.parent, text=t_label).grid(row=6,
                                              column=0,
                                              sticky=W,
                                              padx=5,
                                              pady=(10, 2))
        self.t = Entry(self.parent, justify=CENTER)
        self.t["width"] = 5
        self.t.grid(row=6, column=0, sticky=W, padx=(205, 5), pady=(10, 2))
        self.t.delete(0, END)
        self.t.insert(0, "-100")

        # MIN DURATION SINUSOIDAL TRACKS
        minSineDur_label = "Minimum duration of sinusoidal tracks:"
        Label(self.parent, text=minSineDur_label).grid(row=7,
                                                       column=0,
                                                       sticky=W,
                                                       padx=5,
                                                       pady=(10, 2))
        self.minSineDur = Entry(self.parent, justify=CENTER)
        self.minSineDur["width"] = 5
        self.minSineDur.grid(row=7,
                             column=0,
                             sticky=W,
                             padx=(250, 5),
                             pady=(10, 2))
        self.minSineDur.delete(0, END)
        self.minSineDur.insert(0, "0.1")

        # MAX NUMBER OF HARMONICS
        nH_label = "Maximum number of harmonics:"
        Label(self.parent, text=nH_label).grid(row=8,
                                               column=0,
                                               sticky=W,
                                               padx=5,
                                               pady=(10, 2))
        self.nH = Entry(self.parent, justify=CENTER)
        self.nH["width"] = 5
        self.nH.grid(row=8, column=0, sticky=W, padx=(215, 5), pady=(10, 2))
        self.nH.delete(0, END)
        self.nH.insert(0, "100")

        # MIN FUNDAMENTAL FREQUENCY
        minf0_label = "Minimum fundamental frequency:"
        Label(self.parent, text=minf0_label).grid(row=9,
                                                  column=0,
                                                  sticky=W,
                                                  padx=5,
                                                  pady=(10, 2))
        self.minf0 = Entry(self.parent, justify=CENTER)
        self.minf0["width"] = 5
        self.minf0.grid(row=9, column=0, sticky=W, padx=(220, 5), pady=(10, 2))
        self.minf0.delete(0, END)
        self.minf0.insert(0, "350")

        # MAX FUNDAMENTAL FREQUENCY
        maxf0_label = "Maximum fundamental frequency:"
        Label(self.parent, text=maxf0_label).grid(row=10,
                                                  column=0,
                                                  sticky=W,
                                                  padx=5,
                                                  pady=(10, 2))
        self.maxf0 = Entry(self.parent, justify=CENTER)
        self.maxf0["width"] = 5
        self.maxf0.grid(row=10,
                        column=0,
                        sticky=W,
                        padx=(220, 5),
                        pady=(10, 2))
        self.maxf0.delete(0, END)
        self.maxf0.insert(0, "700")

        # MAX ERROR ACCEPTED
        f0et_label = "Maximum error in f0 detection algorithm:"
        Label(self.parent, text=f0et_label).grid(row=11,
                                                 column=0,
                                                 sticky=W,
                                                 padx=5,
                                                 pady=(10, 2))
        self.f0et = Entry(self.parent, justify=CENTER)
        self.f0et["width"] = 5
        self.f0et.grid(row=11, column=0, sticky=W, padx=(265, 5), pady=(10, 2))
        self.f0et.delete(0, END)
        self.f0et.insert(0, "5")

        # ALLOWED DEVIATION OF HARMONIC TRACKS
        harmDevSlope_label = "Max frequency deviation in harmonic tracks:"
        Label(self.parent, text=harmDevSlope_label).grid(row=12,
                                                         column=0,
                                                         sticky=W,
                                                         padx=5,
                                                         pady=(10, 2))
        self.harmDevSlope = Entry(self.parent, justify=CENTER)
        self.harmDevSlope["width"] = 5
        self.harmDevSlope.grid(row=12,
                               column=0,
                               sticky=W,
                               padx=(285, 5),
                               pady=(10, 2))
        self.harmDevSlope.delete(0, END)
        self.harmDevSlope.insert(0, "0.01")

        # DECIMATION FACTOR
        stocf_label = "Stochastic approximation factor:"
        Label(self.parent, text=stocf_label).grid(row=13,
                                                  column=0,
                                                  sticky=W,
                                                  padx=5,
                                                  pady=(10, 2))
        self.stocf = Entry(self.parent, justify=CENTER)
        self.stocf["width"] = 5
        self.stocf.grid(row=13,
                        column=0,
                        sticky=W,
                        padx=(210, 5),
                        pady=(10, 2))
        self.stocf.delete(0, END)
        self.stocf.insert(0, "0.2")

        # BUTTON TO COMPUTE EVERYTHING
        self.compute = Button(self.parent,
                              text="Compute",
                              command=self.compute_model,
                              bg="dark red",
                              fg="white")
        self.compute.grid(row=14, column=0, padx=5, pady=(10, 2), sticky=W)

        # BUTTON TO PLAY SINE OUTPUT
        output_label = "Sinusoidal:"
        Label(self.parent, text=output_label).grid(row=15,
                                                   column=0,
                                                   sticky=W,
                                                   padx=5,
                                                   pady=(10, 0))
        self.output = Button(
            self.parent,
            text=">",
            command=lambda: audio.play_wav('output_sounds/' + strip_file(
                self.filelocation.get()) + '_hpsModel_sines.wav'),
            bg="gray30",
            fg="white")
        self.output.grid(row=15,
                         column=0,
                         padx=(80, 5),
                         pady=(10, 0),
                         sticky=W)

        # BUTTON TO PLAY STOCHASTIC OUTPUT
        output_label = "Stochastic:"
        Label(self.parent, text=output_label).grid(row=16,
                                                   column=0,
                                                   sticky=W,
                                                   padx=5,
                                                   pady=(5, 0))
        self.output = Button(
            self.parent,
            text=">",
            command=lambda: audio.play_wav('output_sounds/' + strip_file(
                self.filelocation.get()) + '_hpsModel_stochastic.wav'),
            bg="gray30",
            fg="white")
        self.output.grid(row=16, column=0, padx=(80, 5), pady=(5, 0), sticky=W)

        # BUTTON TO PLAY OUTPUT
        output_label = "Output:"
        Label(self.parent, text=output_label).grid(row=17,
                                                   column=0,
                                                   sticky=W,
                                                   padx=5,
                                                   pady=(5, 15))
        self.output = Button(
            self.parent,
            text=">",
            command=lambda: audio.play_wav('output_sounds/' + strip_file(
                self.filelocation.get()) + '_hpsModel.wav'),
            bg="gray30",
            fg="white")
        self.output.grid(row=17,
                         column=0,
                         padx=(80, 5),
                         pady=(5, 15),
                         sticky=W)

        # define options for opening file
        self.file_opt = options = {}
        options['defaultextension'] = '.wav'
        options['filetypes'] = [('All files', '.*'), ('Wav files', '.wav')]
        options['initialdir'] = 'sounds/'
        options[
            'title'] = 'Open a mono audio file .wav with sample frequency 44100 Hz'
Ejemplo n.º 22
0
def analysis(inputFile=demo_sound_path('mridangam.wav'),
             window='hamming',
             M=801,
             N=2048,
             t=-90,
             minSineDur=0.01,
             maxnSines=150,
             freqDevOffset=20,
             freqDevSlope=0.02,
             interactive=True,
             plotFile=False):
    """
    Analyze a sound with the sine model
    inputFile: input sound file (monophonic with sampling rate of 44100)
    window: analysis window type (rectangular, hanning, hamming, blackman, blackmanharris)
    M: analysis window size; N: fft size (power of two, bigger or equal than M)
    t: magnitude threshold of spectral peaks; minSineDur: minimum duration of sinusoidal tracks
    maxnSines: maximum number of parallel sinusoids
    freqDevOffset: frequency deviation allowed in the sinusoids from frame to frame at frequency 0
    freqDevSlope: slope of the frequency deviation, higher frequencies have bigger deviation
    returns inputFile: input file name; fs: sampling rate of input file,
            tfreq, tmag: sinusoidal frequencies and magnitudes
    """

    # size of fft used in synthesis
    Ns = 512

    # hop size (has to be 1/4 of Ns)
    H = 128

    # read input sound
    (fs, x) = audio.read_wav(inputFile)

    # compute analysis window
    w = get_window(window, M)

    # compute the sine model of the whole sound
    tfreq, tmag, tphase = sine.from_audio(x, fs, w, N, H, t, maxnSines,
                                          minSineDur, freqDevOffset,
                                          freqDevSlope)

    # synthesize the sines without original phases
    y = sine.to_audio(tfreq, tmag, np.array([]), Ns, H, fs)

    # output sound file (monophonic with sampling rate of 44100)
    outputFile = 'output_sounds/' + strip_file(inputFile) + '_sineModel.wav'

    # write the sound resulting from the inverse stft
    audio.write_wav(y, fs, outputFile)

    # create figure to show plots
    plt.figure(figsize=(12, 9))

    # frequency range to plot
    maxplotfreq = 5000.0

    # plot the input sound
    plt.subplot(3, 1, 1)
    plt.plot(np.arange(x.size) / float(fs), x)
    plt.axis([0, x.size / float(fs), min(x), max(x)])
    plt.ylabel('amplitude')
    plt.xlabel('time (sec)')
    plt.title('input sound: x')

    # plot the sinusoidal frequencies
    if (tfreq.shape[1] > 0):
        plt.subplot(3, 1, 2)
        tracks = np.copy(tfreq)
        tracks = tracks * np.less(tracks, maxplotfreq)
        tracks[tracks <= 0] = np.nan
        numFrames = int(tracks.shape[0])
        frmTime = H * np.arange(numFrames) / float(fs)
        plt.plot(frmTime, tracks)
        plt.axis([0, x.size / float(fs), 0, maxplotfreq])
        plt.title('frequencies of sinusoidal tracks')

    # plot the output sound
    plt.subplot(3, 1, 3)
    plt.plot(np.arange(y.size) / float(fs), y)
    plt.axis([0, y.size / float(fs), min(y), max(y)])
    plt.ylabel('amplitude')
    plt.xlabel('time (sec)')
    plt.title('output sound: y')

    plt.tight_layout()

    if interactive:
        plt.show(block=False)
    if plotFile:
        plt.savefig('output_plots/%s_sine_transformation_analysis.png' %
                    files.strip_file(inputFile))

    return inputFile, fs, tfreq, tmag
Ejemplo n.º 23
0
def main(inputFile=demo_sound_path('bendir.wav'),
         window='hamming',
         M=2001,
         N=2048,
         t=-80,
         minSineDur=0.02,
         maxnSines=150,
         freqDevOffset=10,
         freqDevSlope=0.001,
         stocf=0.2,
         interactive=True,
         plotFile=False):
    """
    inputFile: input sound file (monophonic with sampling rate of 44100)
    window: analysis window type (rectangular, hanning, hamming, blackman, blackmanharris)
    M: analysis window size; N: fft size (power of two, bigger or equal than M)
    t: magnitude threshold of spectral peaks; minSineDur: minimum duration of sinusoidal tracks
    maxnSines: maximum number of parallel sinusoids
    freqDevOffset: frequency deviation allowed in the sinusoids from frame to frame at frequency 0
    freqDevSlope: slope of the frequency deviation, higher frequencies have bigger deviation
    stocf: decimation factor used for the stochastic approximation
    """

    # size of fft used in synthesis
    Ns = 512

    # hop size (has to be 1/4 of Ns)
    H = 128

    # read input sound
    (fs, x) = audio.read_wav(inputFile)

    # compute analysis window
    w = get_window(window, M)

    # perform sinusoidal+sotchastic analysis
    tfreq, tmag, tphase, stocEnv = sps.from_audio(x, fs, w, N, H, t,
                                                  minSineDur, maxnSines,
                                                  freqDevOffset, freqDevSlope,
                                                  stocf)

    # synthesize sinusoidal+stochastic model
    y, ys, yst = sps.to_audio(tfreq, tmag, tphase, stocEnv, Ns, H, fs)

    # output sound file (monophonic with sampling rate of 44100)
    baseFileName = strip_file(inputFile)
    outputFileSines, outputFileStochastic, outputFile = [
        'output_sounds/%s_spsModel%s.wav' % (baseFileName, i)
        for i in ('_sines', '_stochastic', '')
    ]

    # write sounds files for sinusoidal, residual, and the sum
    audio.write_wav(ys, fs, outputFileSines)
    audio.write_wav(yst, fs, outputFileStochastic)
    audio.write_wav(y, fs, outputFile)

    # create figure to plot
    plt.figure(figsize=(12, 9))

    # frequency range to plot
    maxplotfreq = 10000.0

    # plot the input sound
    plt.subplot(3, 1, 1)
    plt.plot(np.arange(x.size) / float(fs), x)
    plt.axis([0, x.size / float(fs), min(x), max(x)])
    plt.ylabel('amplitude')
    plt.xlabel('time (sec)')
    plt.title('input sound: x')

    plt.subplot(3, 1, 2)
    numFrames = int(stocEnv.shape[0])
    sizeEnv = int(stocEnv.shape[1])
    frmTime = H * np.arange(numFrames) / float(fs)
    binFreq = (.5 * fs) * np.arange(sizeEnv * maxplotfreq /
                                    (.5 * fs)) / sizeEnv
    plt.pcolormesh(
        frmTime, binFreq,
        np.transpose(stocEnv[:, :sizeEnv * maxplotfreq / (.5 * fs) + 1]))
    plt.autoscale(tight=True)

    # plot sinusoidal frequencies on top of stochastic component
    if (tfreq.shape[1] > 0):
        sines = tfreq * np.less(tfreq, maxplotfreq)
        sines[sines == 0] = np.nan
        numFrames = int(sines.shape[0])
        frmTime = H * np.arange(numFrames) / float(fs)
        plt.plot(frmTime, sines, color='k', ms=3, alpha=1)
        plt.xlabel('time(s)')
        plt.ylabel('Frequency(Hz)')
        plt.autoscale(tight=True)
        plt.title('sinusoidal + stochastic spectrogram')

    # plot the output sound
    plt.subplot(3, 1, 3)
    plt.plot(np.arange(y.size) / float(fs), y)
    plt.axis([0, y.size / float(fs), min(y), max(y)])
    plt.ylabel('amplitude')
    plt.xlabel('time (sec)')
    plt.title('output sound: y')

    plt.tight_layout()

    if interactive:
        plt.show()
    if plotFile:
        plt.savefig('output_plots/%s_sps_model.png' %
                    files.strip_file(inputFile))