Esempio n. 1
0
def process_audio(infile):

    y, sr = librosa.load(infile, sr=SR)

    # 1. Compute magnitude spectrogram
    D = np.abs(librosa.stft(y, n_fft=N_FFT, hop_length=HOP))

    # 2. Compute HPSS
    Harm, Perc = hpss(y)

    # 3. Compute RPCA
    Lowrank, Sparse, _ = rpca.robust_pca(D, max_iter=RPCA_MAX_ITER)

    Lowrank = np.maximum(0.0, Lowrank)
    Sparse = np.maximum(0.0, Sparse)

    D = np.abs(D)**2
    Harm = np.abs(Harm)**2
    Perc = np.abs(Perc)**2
    Lowrank = np.abs(Lowrank)**2
    Sparse = np.abs(Sparse)**2

    S = librosa.feature.melspectrogram(S=librosa.logamplitude(
        D, ref_power=D.max()),
                                       sr=sr,
                                       n_mels=N_MELS,
                                       fmax=FMAX)

    Harm = librosa.feature.melspectrogram(S=librosa.logamplitude(
        Harm, ref_power=Harm.max()),
                                          sr=sr,
                                          n_mels=N_MELS,
                                          fmax=FMAX)

    Perc = librosa.feature.melspectrogram(S=librosa.logamplitude(
        Perc, ref_power=Perc.max()),
                                          sr=sr,
                                          n_mels=N_MELS,
                                          fmax=FMAX)

    Lowrank = librosa.feature.melspectrogram(S=librosa.logamplitude(
        Lowrank, ref_power=Lowrank.max()),
                                             sr=sr,
                                             n_mels=N_MELS,
                                             fmax=FMAX)

    Sparse = librosa.feature.melspectrogram(S=librosa.logamplitude(
        Sparse, ref_power=Sparse.max()),
                                            sr=sr,
                                            n_mels=N_MELS,
                                            fmax=FMAX)

    return S, Harm, Perc, Lowrank, Sparse
Esempio n. 2
0
def decompose(filename, offset=0, duration=30, voice=True):
    '''Decompose a song into its pieces

    :parameters:
    - filename : str
        path to the audio
    - offset : float
        initial offset for loading audio
    - duration : float
        maximum amount of audio to load

    :returns:
    - D : np.array, dtype=complex
        STFT of the full signal
    - D_inst : np.array, dtype=complex
        STFT of the instruments
    - D_vox : np.array, dtype=complex
        STFT of the vocals
    - D_inst_harm : np.array, dtype=complex
        STFT of the instrument harmonics
    - D_inst_perc : np.array, dtype=complex
        STFT of the instruments percussives
    '''
    y, sr = librosa.load(filename, sr=SR, offset=offset, duration=duration)

    # Step 1: compute STFT
    D = librosa.stft(y, n_fft=N_FFT,
                     hop_length=HOP_LENGTH).astype(np.complex64)

    # Step 2: separate magnitude and phase
    S, P = librosa.magphase(D)
    S = S / S.max()

    if voice:
        tau = (D.shape[0] * 3) / 4

        # Step 3: RPCA to separate voice and background
        S1, S2, _ = rpca.robust_pca(S[:tau, :], max_iter=25)
        S1, S2 = rpca_correct(S[:tau, :], S1, S2)

        S1 = np.vstack((S1, S[tau:, :]))
        S2 = np.vstack((S2, S[tau:, :]))
    else:
        S1, S2 = librosa.hpss.hpss_median(S,
                                          win_H=WIN_HPSS,
                                          win_P=WIN_HPSS,
                                          p=1.0)

    # Step 4: recombine with phase
    return D, S1 * P, S2 * P
Esempio n. 3
0
def process_audio(infile):

    y, sr = librosa.load(infile, sr=SR)

    # 1. Compute magnitude spectrogram
    D = np.abs(librosa.stft(y, n_fft=N_FFT, hop_length=HOP))

    # 2. Compute HPSS
    Harm, Perc = hpss(y)

    # 3. Compute RPCA
    Lowrank, Sparse, _ = rpca.robust_pca(D, max_iter=RPCA_MAX_ITER)

    Lowrank = np.maximum(0.0, Lowrank)
    Sparse  = np.maximum(0.0, Sparse)

    D = np.abs(D)**2
    Harm = np.abs(Harm)**2
    Perc = np.abs(Perc)**2
    Lowrank = np.abs(Lowrank)**2
    Sparse = np.abs(Sparse)**2

    S       = librosa.feature.melspectrogram(S=librosa.logamplitude(D, ref_power=D.max()), 
                                             sr=sr,
                                             n_mels=N_MELS,
                                             fmax=FMAX)

    Harm       = librosa.feature.melspectrogram(S=librosa.logamplitude(Harm, ref_power=Harm.max()), 
                                             sr=sr,
                                             n_mels=N_MELS,
                                             fmax=FMAX)

    Perc       = librosa.feature.melspectrogram(S=librosa.logamplitude(Perc, ref_power=Perc.max()), 
                                             sr=sr,
                                             n_mels=N_MELS,
                                             fmax=FMAX)

    Lowrank       = librosa.feature.melspectrogram(S=librosa.logamplitude(Lowrank, ref_power=Lowrank.max()), 
                                             sr=sr,
                                             n_mels=N_MELS,
                                             fmax=FMAX)

    Sparse       = librosa.feature.melspectrogram(S=librosa.logamplitude(Sparse, ref_power=Sparse.max()), 
                                             sr=sr,
                                             n_mels=N_MELS,
                                             fmax=FMAX)

    return S, Harm, Perc, Lowrank, Sparse
Esempio n. 4
0
def decompose(filename, offset=0, duration=30, voice=True):
    '''Decompose a song into its pieces

    :parameters:
    - filename : str
        path to the audio
    - offset : float
        initial offset for loading audio
    - duration : float
        maximum amount of audio to load

    :returns:
    - D : np.array, dtype=complex
        STFT of the full signal
    - D_inst : np.array, dtype=complex
        STFT of the instruments
    - D_vox : np.array, dtype=complex
        STFT of the vocals
    - D_inst_harm : np.array, dtype=complex
        STFT of the instrument harmonics
    - D_inst_perc : np.array, dtype=complex
        STFT of the instruments percussives
    '''
    y, sr = librosa.load(filename, sr=SR, offset=offset, duration=duration)
    
    # Step 1: compute STFT
    D = librosa.stft(y, n_fft=N_FFT, hop_length=HOP_LENGTH).astype(np.complex64)
    
    # Step 2: separate magnitude and phase
    S, P = librosa.magphase(D)
    S    = S / S.max()
    
    if voice:
        tau = (D.shape[0] * 3) / 4
    
        # Step 3: RPCA to separate voice and background
        S1, S2, _ = rpca.robust_pca(S[:tau,:], max_iter=25)
        S1, S2    = rpca_correct(S[:tau,:], S1, S2)
    
        S1 = np.vstack((S1, S[tau:,:]))
        S2 = np.vstack((S2, S[tau:,:]))
    else:
        S1, S2 = librosa.hpss.hpss_median(S, win_H=WIN_HPSS, win_P=WIN_HPSS, p=1.0)
    
    # Step 4: recombine with phase
    return D, S1 * P, S2 * P
Esempio n. 5
0
    def detect_anom_local(self, x, plot=False):
        assert x.shape[0] % self.period == 0

        X = x.reshape([self.period, x.shape[0] / self.period])

        # rpca
        lamb_base = max(x.shape) ** -0.5
        L, S = robust_pca(X, lamb=lamb_base * self.lamb_rate)
        L = L.reshape([x.shape[0]])
        S = S.reshape([x.shape[0]])

        # select anomaly
        ret = pyasl.generalizedESD(S, int(x.shape[0] * self.esd_rate))
        anom_ind = ret[1]
        anom_val = np.array([x[k] for k in anom_ind])

        if plot is True:
            plot_verticle([x, L, S], anom_ind, anom_val)

        return anom_ind
Esempio n. 6
0
#computing stft
f, t, data_stft = signal.stft(data)

#plot
plt.figure()
plt.pcolormesh(t, f, np.abs(data_stft), vmin=0, vmax=amp)
plt.ylim([f[1], f[-1]])
plt.title('STFT Magnitude')
plt.ylabel('Frequency [Hz]')
plt.xlabel('Time [sec]')
plt.yscale('log')
plt.show()

#computing RPCA

L, S = rpca.robust_pca(data_stft)

plt.figure()
plt.pcolormesh(t, f, np.abs(L), vmin=0, vmax=amp)
plt.ylim([f[1], f[-1]])
plt.title('STFT Magnitude')
plt.ylabel('Frequency [Hz]')
plt.xlabel('Time [sec]')
plt.yscale('log')
plt.show()

#masking
if masking:
    mask = np.array(double(abs(S) > (gain * abs(L))))
    S_mask = mask * data_stft
    L_mask = data_stft - S_mask
Esempio n. 7
0
import numpy as np
from rpca import robust_pca
import cv2

gc.collect()
imgNo = 700
# imgNo = 1700
A, X, Y, snapshots, x_pix, y_pix = loadimgs(num=imgNo, filepath='D:/input_hw/')
n = A.shape[1]
m = A.shape[0]

#############################################
# rdmd & backgorund/foreground extraction

start = time.clock()
L, S = robust_pca(A)

print(L)
print(S.shape)
print(S)
S = S * np.power(10, 7)
# print(S.reshape((2240,100)))
# S= S * np.power(10,4.5)
# print(S.reshape((2240,100)))

# Dstart = 0
# Dend = batchsize
# subStart = 0
# subEnd = batchsize - 1
# rank_new = int((rank + p) * batchsize / n)
# errors = 0