Esempio n. 1
0
def stft_anal_synth(s1,
                    s2,
                    fs,
                    w,
                    N,
                    H,
                    m_phi=[
                        np.zeros(1 + N // 2),
                        np.zeros(1 + N // 2),
                        np.zeros(1 + N // 2)
                    ],
                    p_phi=[
                        np.zeros(1 + N // 2),
                        np.zeros(1 + N // 2),
                        np.zeros(1 + N // 2)
                    ],
                    m_sim=[
                        np.zeros(1 + N // 2),
                        np.zeros(1 + N // 2),
                        np.zeros(1 + N // 2)
                    ],
                    p_sim=[
                        np.zeros(1 + N // 2),
                        np.zeros(1 + N // 2),
                        np.zeros(1 + N // 2)
                    ]):
    """
	STFT analysis-synthesis for Ambience Extraction
	s1: stereo_left
	s2: stereo_right

	w: analysis window, N: FFT size, H: hop size
	returns y: output sound
	"""
    M = w.size  # size of analysis window
    hM1 = int(math.floor((M + 1) / 2))  # half analysis window size by rounding
    hM2 = int(math.floor(M / 2))  # half analysis window size by floor
    s1 = np.append(
        np.zeros(hM2),
        s1)  # add zeros at beginning to center first window at sample 0
    s1 = np.append(
        s1, np.zeros(hM1))  # add zeros at the end to analyze last sample
    s2 = np.append(
        np.zeros(hM2),
        s2)  # add zeros at beginning to center first window at sample 0
    s2 = np.append(
        s2, np.zeros(hM1))  # add zeros at the end to analyze last sample
    pin = hM1  # initialize sound pointer in middle of analysis window
    pend = s1.size - hM1  # last sample to start a frame
    w = w / sum(w)  # normalize analysis window
    yL = np.zeros(s1.size)  # initialize output array
    yR = np.zeros(s2.size)
    yL_F = np.zeros(s2.size)
    yC = np.zeros(s2.size)
    yR_F = np.zeros(s2.size)

    max_L = np.zeros(1)
    max_R = np.zeros(1)
    max_C = np.zeros(1)
    max_1 = np.zeros(1)
    max_2 = np.zeros(1)
    while pin <= pend:  # while sound pointer is smaller than last sample
        #-----------------analysis---------------------------------
        x1 = s1[pin - hM1:pin + hM2]  # select one frame of input sound
        mX1, pX1 = DFT.dftAnal(x1, w, N)  # compute dft
        x2 = s2[pin - hM1:pin + hM2]
        mX2, pX2 = DFT.dftAnal(x2, w, N)

        mX1 = 10**(mX1 / 20)
        mX2 = 10**(mX2 / 20)

        #----------------spectral transformations------------------

        #-----caluclating inter-channel short-time coherence------
        m_phi, p_phi = coherence(mX1, pX1, mX2, pX2, m_phi, p_phi, lamda)
        phi = np.divide(m_phi[2], np.sqrt(np.multiply(m_phi[0], m_phi[1])))

        if (np.sum(p_phi[0]) != 0 or np.sum(p_phi[1]) != 0):
            print("coh_phases not cancelling")

        tau = ((u1 - u0) / 2) * np.tanh(sigma * np.pi *
                                        (phi0 - phi)) + ((u1 + u0) / 2)

        mY1 = np.multiply(mX1, tau)
        mY2 = np.multiply(mX2, tau)

        #--caluclating similarity for identifying panned sources and unmixing them--
        #-copmute coherence with lamda = 1.0
        m_sim, p_sim = coherence(mX1, pX1, mX2, pX2, m_sim, p_sim, 1.0)
        sim = 2 * np.divide(m_sim[2], np.add(m_sim[0],
                                             m_sim[1]))  #similarity function

        if (np.sum(p_sim[0]) != 0 or np.sum(p_sim[1]) != 0):
            print("sim_phases not cancelling")

        sim_0 = np.divide(
            m_sim[2], m_sim[0])  #partial similarity function for left channel
        sim_1 = np.divide(
            m_sim[2], m_sim[1])  #partial similarity function for right channel

        diff = np.subtract(sim_0, sim_1)  #equation 8 in the report
        pos = (diff > 0).astype(int) * 1
        neg = (diff < 0).astype(int) * -1
        delta = np.add(pos, neg)  #equation 9

        pan_ind = np.multiply(np.subtract(np.ones(np.size(sim)), sim),
                              delta)  #equation 10
        #moving average filter to smoothen panning indices across frequency points
        #pan_ind = movingAvg(pan_ind)

        gwf_l = v + (1 - v) * np.exp(
            np.multiply((-1 / (2 * E)), np.square(np.subtract(pan_ind, si_l))))
        gwf_c = v + (1 - v) * np.exp(
            np.multiply((-1 / (2 * E)), np.square(np.subtract(pan_ind, si_c))))
        gwf_r = v + (1 - v) * np.exp(
            np.multiply((-1 / (2 * E)), np.square(np.subtract(pan_ind, si_r))))

        #normalizing the windows
        gwf_sum = gwf_l + gwf_c + gwf_r
        gwf_l = np.divide(gwf_l, gwf_sum)
        gwf_c = np.divide(gwf_c, gwf_sum)
        gwf_r = np.divide(gwf_r, gwf_sum)
        #complex addition
        mX_sumLR, pX_sumLR = c_add(mX1, pX1, mX2, pX2)
        #equation 13
        mY_L = np.multiply(gwf_l, mX_sumLR)
        mY_C = np.multiply(gwf_c, mX_sumLR)
        mY_R = np.multiply(gwf_r, mX_sumLR)

        #-----Power Compensation---------RMS(stereo) = RMS(Surround)-----------
        #testing voc_pl_sr

        total_pow = np.sum(np.square(mX1)) + np.sum(
            np.square(mX2))  # Stereo RMS Power

        new_pow = np.sum(np.square(mY_L)) + np.sum(np.square(mY_C)) + np.sum(
            np.square(mY_R))  # 5.1 RMS Power
        #np.sum(np.square(mY1))+np.sum(np.square(mY2))+
        pow_ratio = np.sqrt(total_pow / new_pow)
        # Ratio of power

        # normalizing output spectrum
        mY1 = pow_ratio * mY1
        #rear-left
        mY2 = pow_ratio * mY2
        #read-right
        mY_L = pow_ratio * mY_L
        #front-left
        mY_C = pow_ratio * mY_C
        #front-center
        mY_R = pow_ratio * mY_R
        #front-right

        max_L = np.append(max_L, np.sum(np.square(mY_L)))
        max_C = np.append(max_C, np.sum(np.square(mY_C)))
        max_R = np.append(max_R, np.sum(np.square(mY_R)))
        max_1 = np.append(max_1, np.sum(np.square(mX1)))
        max_2 = np.append(max_2, np.sum(np.square(mX2)))

        #print np.max(mY_L),np.max(mY_C),np.max(mY_R)
        #print np.max(mX1),np.max(mX2)

        #-----spectral plots------

        #print(np.min(mY1),np.max(mY1),np.min(mY2),np.max(mY2))
        #print(np.min(mX1),np.max(mX1),np.min(mX2),np.max(mX2))
        #plt.plot(mY1)
        #plt.plot(mX1)
        #plt.show()

        #plt.plot(mY1)
        # plt.plot(gwf_l)
        # plt.plot(gwf_c)
        # plt.plot(gwf_r)
        # plt.show()

        plt.plot(mX1)
        plt.plot(mY_C)
        plt.show()

        mY1 = 20 * np.log10(mY1)
        mY2 = 20 * np.log10(mY2)
        mY_L = 20 * np.log10(mY_L)
        mY_C = 20 * np.log10(mY_C)
        mY_R = 20 * np.log10(mY_R)

        #-------------------------synthesis-----------------------------

        #----ambience: rear left and right speakers-----
        y1 = DFT.dftSynth(mY1, pX1, M)  # compute idft
        yL[pin - hM1:pin +
           hM2] += H * y1  # overlap-add to generate output sound
        y2 = DFT.dftSynth(mY2, pX2, M)
        yR[pin - hM1:pin + hM2] += H * y2

        #----front image: Left, Center, Right speakers-----
        yl = DFT.dftSynth(mY_L, pX_sumLR, M)
        yL_F[pin - hM1:pin + hM2] += H * yl

        yc = DFT.dftSynth(mY_C, pX_sumLR, M)
        yC[pin - hM1:pin + hM2] += H * yc

        yr = DFT.dftSynth(mY_R, pX_sumLR, M)
        yR_F[pin - hM1:pin + hM2] += H * yr

        #----hopping----
        pin += H  # advance sound pointer

    yL = np.delete(
        yL,
        range(hM2))  # delete half of first window which was added in dftAnal
    yL = np.delete(yL, range(
        yL.size - hM1, yL.size))  # add zeros at the end to analyze last sample
    yR = np.delete(yR, range(hM2))
    yR = np.delete(yR, range(yR.size - hM1, yR.size))
    yL_F = np.delete(yL_F, range(hM2))
    yL_F = np.delete(yL_F, range(yL_F.size - hM1, yL_F.size))
    yR_F = np.delete(yR_F, range(hM2))
    yR_F = np.delete(yR_F, range(yR_F.size - hM1, yR_F.size))
    yC = np.delete(yC, range(hM2))
    yC = np.delete(yC, range(yC.size - hM1, yC.size))

    # label_1, = plt.plot(max_L, label='max_L')
    # label_2, = plt.plot(max_C, label='max_C')
    # label_3, = plt.plot(max_R, label='max_R')
    # label_4, = plt.plot(max_1, label='max_1')
    # label_5, = plt.plot(max_2, label='max_2')
    # plt.legend(handles=[label_1,label_2,label_3,label_4,label_5])
    # plt.show()

    return yL, yR, yL_F, yC, yR_F
Esempio n. 2
0
import matplotlib.pyplot as plt
from scipy.signal import triang
from scipy.fftpack import fft
import stft as DFT
from scipy.signal import get_window
import sys, math, os

sys.path.append('/Users/mac/git/sms-tools/software/models')
import utilFunctions as UF

from scipy import signal
from scipy.signal import butter, lfilter, freqz

M = 2048
N = 2048
fs = 44100
w = get_window('hamming', M)
x = get_window('blackmanharris', M)
#x = signal.chebwin(M, at=100)
#x = x/sum(x)

#plt.plot(x)

m, p = DFT.dftAnal(x, w, N)
m = 10**(m / 20)
print np.shape(m)
plt.plot(m)
plt.show()

#y = DFT.dftSynth(m, p, w.size)*sum(w) #sum(w) is to normalize