Python istftの例、pyroomacoustics.istft Pythonの例

コード例 #1

0

ファイルを表示

 def convergence_callback(Y):
     global SDR, SIR
     from mir_eval.separation import bss_eval_sources
     ref = np.moveaxis(separate_recordings, 1, 2)
     y = np.array([pra.istft(Y[:,:,ch], L, L, transform=np.fft.irfft, zp_front=L//2, zp_back=L//2) for ch in range(Y.shape[2])])
     sdr, sir, sar, perm = bss_eval_sources(ref[:,:y.shape[1]-L//2,0], y[:,L//2:ref.shape[1]+L//2])
     SDR.append(sdr)
     SIR.append(sir)

コード例 #2

0

ファイルを表示

    def test_stft_nowindow(self):
        frames = 100
        fftsize = [128, 256, 512]
        hop_div = [1, 2]
        loops = 10

        for n in fftsize:
            for div in hop_div:
                for epoch in range(loops):
                    x = np.random.randn(frames * n // div + n - n // div)
                    X = pra.stft(x, n, n // div, transform=np.fft.rfft)
                    y = pra.istft(X, n, n // div, transform=np.fft.irfft)

                    # because of overlap, there is a scaling at reconstruction
                    y[n // div:-n // div] /= div
                    self.assertTrue(np.allclose(x, y))

コード例 #3

0

ファイルを表示

ファイル: bss_iva.py プロジェクト: xinkez/pyroomacoustics

                 zp_front=L // 2,
                 zp_back=L // 2) for ch in mics_signals
    ])
    X = np.moveaxis(X, 0, 2)

    # Run AuxIVA
    Y = pra.bss.auxiva(X,
                       n_iter=30,
                       proj_back=True,
                       callback=convergence_callback)

    # run iSTFT
    y = np.array([
        pra.istft(Y[:, :, ch],
                  L,
                  L,
                  transform=np.fft.irfft,
                  zp_front=L // 2,
                  zp_back=L // 2) for ch in range(Y.shape[2])
    ])

    # Compare SIR
    #############
    sdr, sir, sar, perm = bss_eval_sources(ref[:, :y.shape[1] - L // 2, 0],
                                           y[:, L // 2:ref.shape[1] + L // 2])

    print('SDR:', sdr)
    print('SIR:', sir)

    import matplotlib.pyplot as plt
    plt.figure()
    plt.subplot(2, 2, 1)

コード例 #4

0

ファイルを表示

ファイル: test_sparseauxiva.py プロジェクト: virgile-hernicot/pyroomacoustics

def test_sparseauxiva():
    fs = 16000

    signals = [
        np.concatenate([
            wavfile.read(f)[1].astype(np.float32, order='C')
            for f in source_files
        ]) for source_files in wav_files
    ]

    wavfile.write('sample1.wav', fs, np.asarray(signals[0], dtype=np.int16))
    wavfile.write('sample2.wav', fs, np.asarray(signals[1], dtype=np.int16))

    # Define an anechoic room envrionment, as well as the microphone array and source locations.

    # Room 4m by 6m
    room_dim = [8, 9]
    # source locations and delays
    locations = [[2.5, 3], [2.5, 6]]
    delays = [1., 0.]
    # create an anechoic room with sources and mics
    room = pra.ShoeBox(room_dim,
                       fs=16000,
                       max_order=15,
                       absorption=0.35,
                       sigma2_awgn=1e-8)

    # add mic and good source to room
    # Add silent signals to all sources
    for sig, d, loc in zip(signals, delays, locations):
        room.add_source(loc, signal=np.zeros_like(sig), delay=d)

    # add microphone array

    room.add_microphone_array(
        pra.MicrophoneArray(np.c_[[6.5, 4.49], [6.5, 4.51]], room.fs))

    # Compute the RIRs as in the Room Impulse Response generation section.

    # compute RIRs
    room.compute_rir()

    # Record each source separately

    separate_recordings = []
    for source, signal in zip(room.sources, signals):
        source.signal[:] = signal
        room.simulate()
        separate_recordings.append(room.mic_array.signals)
        source.signal[:] = 0.
    separate_recordings = np.array(separate_recordings)

    # Mix down the recorded signals
    mics_signals = np.sum(separate_recordings, axis=0)

    # save mixed signals as wav files
    wavfile.write('mix1.wav', fs, np.asarray(mics_signals[0].T,
                                             dtype=np.int16))
    wavfile.write('mix2.wav', fs, np.asarray(mics_signals[1].T,
                                             dtype=np.int16))
    wavfile.write(
        'mix1_norm.wav', fs,
        np.asarray(mics_signals[0].T / np.max(np.abs(mics_signals[0].T)) *
                   32767,
                   dtype=np.int16))
    wavfile.write(
        'mix2_norm.wav', fs,
        np.asarray(mics_signals[1].T / np.max(np.abs(mics_signals[1].T)) *
                   32767,
                   dtype=np.int16))

    # STFT frame length
    L = 2048

    # START BSS
    ###########

    # Preprocessing
    # Observation vector in the STFT domain
    X = np.array([
        pra.stft(ch,
                 L,
                 L,
                 transform=np.fft.rfft,
                 zp_front=L // 2,
                 zp_back=L // 2) for ch in mics_signals
    ])
    X = np.moveaxis(X, 0, 2)

    # Reference signal to calculate performance of BSS
    ref = np.moveaxis(separate_recordings, 1, 2)

    ratio = 0.35
    average = np.abs(np.mean(np.mean(X, axis=2), axis=0))
    k = np.int_(average.shape[0] * ratio)
    S = np.argpartition(average, -k)[-k:]
    S = np.sort(S)
    n_iter = 30

    # Run SparseAuxIva
    Y = pra.bss.sparseauxiva(X, S, n_iter, lasso=True)

    # run iSTFT
    y = np.array([
        pra.istft(Y[:, :, ch],
                  L,
                  L,
                  transform=np.fft.irfft,
                  zp_front=L // 2,
                  zp_back=L // 2) for ch in range(Y.shape[2])
    ])

    # Compare SIR and SDR with our reference signal
    sdr, isr, sir, sar, perm = bss_eval_images(
        ref[:, :y.shape[1] - L // 2, 0], y[:, L // 2:ref.shape[1] + L // 2])
    print('SDR: {0}, SIR: {1}'.format(sdr, sir))

    wavfile.write('demix1.wav', fs, np.asarray(y[0].T, dtype=np.int16))
    wavfile.write('demix2.wav', fs, np.asarray(y[1].T, dtype=np.int16))
    wavfile.write(
        'demix1_norm.wav', fs,
        np.asarray(y[0].T / np.max(np.abs(y[0].T)) * 32767, dtype=np.int16))
    wavfile.write(
        'demix2_norm.wav', fs,
        np.asarray(y[1].T / np.max(np.abs(y[1].T)) * 32767, dtype=np.int16))

コード例 #5

0

ファイルを表示

def test_ilrma():

    # STFT frame length
    L = 256

    # Room 4m by 6m
    room_dim = [8, 9]

    # source location
    source = np.array([1, 4.5])

    # create an anechoic room with sources and mics
    room = pra.ShoeBox(room_dim, fs=16000, max_order=0, sigma2_awgn=1e-8)

    # get signals
    signals = [
        np.concatenate(
            [wavfile.read(f)[1].astype(np.float32) for f in source_files])
        for source_files in wav_files
    ]
    delays = [1., 0.]
    locations = [[2.5, 3], [2.5, 6]]

    # add mic and good source to room
    # Add silent signals to all sources
    for sig, d, loc in zip(signals, delays, locations):
        room.add_source(loc, signal=np.zeros_like(sig), delay=d)

    # add microphone array
    room.add_microphone_array(
        pra.MicrophoneArray(np.c_[[6.5, 4.49], [6.5, 4.51]], fs=room.fs))

    # compute RIRs
    room.compute_rir()

    # Record each source separately
    separate_recordings = []
    for source, signal in zip(room.sources, signals):

        source.signal[:] = signal

        room.simulate()
        separate_recordings.append(room.mic_array.signals)

        source.signal[:] = 0.
    separate_recordings = np.array(separate_recordings)

    # Mix down the recorded signals
    mics_signals = np.sum(separate_recordings, axis=0)

    # START BSS
    ###########

    # shape == (n_chan, n_frames, n_freq)
    X = np.array([
        pra.stft(ch,
                 L,
                 L,
                 transform=np.fft.rfft,
                 zp_front=L // 2,
                 zp_back=L // 2) for ch in mics_signals
    ])
    X = np.moveaxis(X, 0, 2)

    # Run ILRMA
    Y = pra.bss.ilrma(X, n_iter=30, n_components=30, proj_back=True)

    # run iSTFT
    y = np.array([
        pra.istft(Y[:, :, ch],
                  L,
                  L,
                  transform=np.fft.irfft,
                  zp_front=L // 2,
                  zp_back=L // 2) for ch in range(Y.shape[2])
    ])

    # Compare SIR
    #############
    ref = np.moveaxis(separate_recordings, 1, 2)
    y_aligned = y[:, L // 2:ref.shape[1] + L // 2]

    mse = np.mean((ref[:, :, 0] - y_aligned)**2)
    input_variance = np.var(np.concatenate(signals))

    print('Relative MSE (expect less than 1e-5):', mse / input_variance)

    assert (mse / input_variance) < 1e-5

コード例 #6

0

ファイルを表示

ファイル: multinmf_conv_mu.py プロジェクト: wjliu0215/separake

def multinmf_conv_mu_wrapper(x,
                             n_src,
                             n_latent_var,
                             stft_win_len,
                             partial_rirs=None,
                             W_dict=None,
                             n_iter=500,
                             l1_reg=0.,
                             random_seed=0,
                             verbose=False):
    '''
    A wrapper around multichannel nmf using MU updates to use with pyroormacoustcs.
    Performs STFT and ensures all signals are the correct shape.

    Parameters
    ----------
    x: ndarray
        (n_samples x n_channel) array of time domain samples
    n_src: int
        The number of sources
    n_latent_var: int
        The number of latent variables in the NMF
    stft_win_len:
        The length of the STFT window
    partial_rirs: array_like, optional
        (n_channel x n_src x n_bins) array of partial TF. If provided, Q is not optimized.
    W_dict: array_like, optional
        A dictionary of atoms that can be used in the NMF. If provided, W is not optimized.
    n_iter: int, optional
        The number of iterations of NMF (default 500)
    l1_reg: float, optional
        The weight of the l1 regularization term for the activations (default 0., not regularized)
    random_seed: unsigned int, optional
        The seed to provide to the RNG prior to initialization of NMF parameters. This allows to use
        repeatable initialization.
    verbose: bool, optional
        When true, prints convergence info of NMF (default False)
    '''

    n_channel = x.shape[1]

    # STFT
    window = np.sqrt(pra.cosine(stft_win_len))  # use sqrt because of synthesis
    # X is (n_channel, n_frame, n_bin)
    X = np.array([
        pra.stft(x[:, ch],
                 stft_win_len,
                 stft_win_len // 2,
                 win=window,
                 transform=np.fft.rfft) for ch in range(n_channel)
    ])
    # move axes to match Ozerov's order (n_bin, n_frame, n_channel)
    X = np.moveaxis(X, [0, 1, 2], [2, 1, 0])
    n_bin = X.shape[0]
    n_frame = X.shape[1]

    # Squared magnitude and unit energy per bin
    V = np.abs(X)**2
    V /= np.mean(V)

    # Random initialization of multichannel NMF parameters
    np.random.seed(random_seed)

    K = n_latent_var * n_src
    source_NMF_ind = []
    for j in range(n_src):
        source_NMF_ind = np.reshape(
            np.arange(n_latent_var * n_src, dtype=np.int), (n_src, -1))

    mix_psd = np.mean(V, axis=(1, 2))
    # W is intialized so that its enegy follows mixture PSD
    if W_dict is None:
        W_init = 0.5 * ((np.abs(np.random.randn(n_bin, K)) + np.ones(
            (n_bin, K))) * (mix_psd[:, np.newaxis] * np.ones((1, K))))
        fix_W = False
    else:
        if W_dict.shape[1] == n_latent_var:
            W_init = np.tile(W_dict, n_src)
        elif W_dict.shape[1] == n_src * n_latent_var:
            W_init = W_dict
        else:
            raise ValueError(
                'Mismatch between dictionary size and latent variables')
        fix_W = True

    # follow average activations
    mix_act = np.mean(V, axis=(0, 2))
    H_init = 0.5 * (np.abs(np.random.randn(K, n_frame)) + np.ones(
        (K, n_frame))) * mix_act[np.newaxis, :]

    if partial_rirs is not None:
        # squared mag partial rirs (n_bin, n_channel, n_src)
        Q_init = np.moveaxis(np.abs(partial_rirs)**2, [2], [0])
        Q_init /= np.max(Q_init, axis=0)[None, :, :]
        fix_Q = True
    else:
        # random initialization
        Q_shape = (n_bin, n_channel, n_src)
        Q_init = (0.5 * (1.9 * np.abs(np.random.randn(*Q_shape)) +
                         0.1 * np.ones(Q_shape)))**2
        fix_Q = False

    # RUN NMF
    W_MU, H_MU, Q_MU, cost = \
        multinmf_conv_mu(
                np.abs(X)**2, W_init, H_init, Q_init, source_NMF_ind,
                n_iter=n_iter, fix_Q=fix_Q, fix_W=fix_W,
                H_l1_reg=l1_reg,
                verbose=verbose)

    # Computation of the spatial source images
    Im = multinmf_recons_im(X, W_MU, H_MU, Q_MU, source_NMF_ind)

    sep_sources = []
    # Inverse STFT
    for j in range(n_src):
        # channel-wise istft with synthesis window
        ie_MU = []
        for ch in range(n_channel):
            ie_MU.append(
                pra.istft(Im[:, :, j, ch].T,
                          stft_win_len,
                          stft_win_len // 2,
                          win=window,
                          transform=np.fft.irfft))

        sep_sources.append(np.array(ie_MU).T)

    return np.array(sep_sources)

コード例 #7

0

ファイルを表示

def test_sparseauxiva():

    signals = [np.concatenate([wavfile.read(f)[1].astype(np.float32, order='C')
               for f in source_files])
               for source_files in wav_files]

    # Define a room environment, as well as the microphone array and source locations.
    ###########
    # Room dimensions in meters
    room_dim = [8, 9]
    # source locations and delays
    locations = [[2.5, 3], [2.5, 6]]
    delays = [1., 0.]
    # create a room with sources and mics
    room = pra.ShoeBox(room_dim, fs=16000, max_order=15, absorption=0.35, sigma2_awgn=1e-8)

    # add mic and good source to room
    # Add silent signals to all sources
    for sig, d, loc in zip(signals, delays, locations):
        room.add_source(loc, signal=np.zeros_like(sig), delay=d)

    # add microphone array
    room.add_microphone_array(pra.MicrophoneArray(np.c_[[6.5, 4.49], [6.5, 4.51]], room.fs))

    # Compute the RIRs as in the Room Impulse Response generation section.

    # compute RIRs
    room.compute_rir()

    # Record each source separately
    separate_recordings = []
    for source, signal in zip(room.sources, signals):
        source.signal[:] = signal
        room.simulate()
        separate_recordings.append(room.mic_array.signals)
        source.signal[:] = 0.
    separate_recordings = np.array(separate_recordings)

    # Mix down the recorded signals
    ###########
    mics_signals = np.sum(separate_recordings, axis=0)

    # STFT frame length
    L = 2048

    # Observation vector in the STFT domain
    X = np.array([pra.stft(ch, L, L, transform=np.fft.rfft, zp_front=L // 2, zp_back=L // 2)
                  for ch in mics_signals])
    X = np.moveaxis(X, 0, 2)

    # START BSS
    ###########
    # Estimate set of active frequency bins
    ratio = 0.35
    average = np.abs(np.mean(np.mean(X, axis=2), axis=0))
    k = np.int_(average.shape[0] * ratio)
    S = np.sort(np.argpartition(average, -k)[-k:])

    # Run SparseAuxIva
    Y = pra.bss.sparseauxiva(X, S)

    # run iSTFT
    y = np.array([pra.istft(Y[:, :, ch], L, L, transform=np.fft.irfft, zp_front=L // 2, zp_back=L // 2)
                  for ch in range(Y.shape[2])])

    # Compare SIR
    #############
    ref = np.moveaxis(separate_recordings, 1, 2)
    y_aligned = y[:,L//2:ref.shape[1]+L//2]

    mse = np.mean((ref[:,:,0] - y_aligned)**2)
    input_variance = np.var(np.concatenate(signals))

    print('Relative MSE (expect less than 1e-3):', mse / input_variance)

    assert (mse / input_variance) < 1e-3

コード例 #8

0

ファイルを表示

def multinmf_conv_em_wrapper(
        x, n_src, stft_win_len, n_latent_var, n_iter=500, \
        A_init=None, W_init=None, H_init=None, \
        update_a=True, update_w=True, update_h=True, \
        verbose = False):

    '''
    A wrapper around multichannel nmf using EM updates to use with pyroormacoustcs.
    Performs STFT and ensures all signals are the correct shape.

    Parameters
    ----------
    x: ndarray
        (n_samples x n_chan) array of time domain samples
    n_latent_var: int
        number of latent variables in the NMF
    '''

    n_chan = x.shape[1]

    # STFT
    window = np.sqrt(pra.cosine(stft_win_len))  # use sqrt because of synthesis
    # X is (n_chan, n_frame, n_bin)
    X = np.array(
            [pra.stft(x[:,ch], stft_win_len, stft_win_len // 2, win=window, transform=np.fft.rfft) for ch in range(n_chan)]
            )
    # move axes to match Ozerov's order (n_bin, n_frame, n_chan)
    X = np.moveaxis(X, [0,1,2], [2,1,0])
    n_bin = X.shape[0]
    n_frame = X.shape[1]

    if W_init is None:
        K = n_latent_var * n_src
    else:
        K = W_init.shape[-1]

    # Random initialization of multichannel NMF parameters
    source_NMF_ind = []
    for j in range(n_src):
        source_NMF_ind = np.reshape(np.arange(K, dtype=np.int), (n_src,-1))

    mix_psd = 0.5 * (np.mean(np.sum(np.abs(X)**2, axis=2), axis=1))
    if A_init is None:
        # random initialization
        update_a = True
        A_init = (0.5 *
                    ( 1.9 * np.abs(random.randn(n_bin, n_chan, n_src))       \
                    + 0.1 * np.ones((n_bin, n_chan, n_src))                  \
                    ) * np.sign( random.randn(n_bin, n_chan, n_src)          \
                                + 1j * random.randn(n_bin, n_chan, n_src))  \
                )
    else:
        # reshape the partial rir input (n_bin, n_chan, n_src)
        A_init = np.moveaxis(A_init, [2], [0])

    # W is intialized so that its enegy follows mixture PSD
    if W_init is None:
        W_init = 0.5 * (
                ( np.abs(np.random.randn(n_bin,K)) + np.ones((n_bin,K)) )
                * ( mix_psd[:,np.newaxis] * np.ones((1,K)) )
                )
    if H_init is None:
        H_init = 0.5 * ( np.abs(np.random.randn(K,n_frame)) + np.ones((K,n_frame)) )

    Sigma_b_init = mix_psd / 100


    W_EM, H_EM, Ae_EM, Sigma_b_EM, Se_EM, log_like_arr = \
        multinmf_conv_em(X, W_init, H_init, A_init, Sigma_b_init, source_NMF_ind,
            iter_num=n_iter, update_a=update_a, update_w=update_w, update_h=update_h, verbose=verbose)

    Ae_EM = np.moveaxis(Ae_EM, [0], [2])

    # Computation of the spatial source images
    if verbose:
        print('Computation of the spatial source images\n')
    Ie_EM = np.zeros((n_bin,n_frame,n_src,n_chan), dtype=np.complex)
    for j in range(n_src):
        for f in range(n_bin):
            Ie_EM[f,:,j,:] = np.outer(Se_EM[f,:,j], Ae_EM[:,j,f])

    sep_sources = []

    # Inverse STFT
    ie_EM = []
    for j in range(n_src):
        # channel-wise istft with synthesis window
        ie_EM = []
        for ch in range(n_chan):
            ie_EM.append(
                    pra.istft(Ie_EM[:,:,j,ch].T, stft_win_len, stft_win_len // 2, win=window, transform=np.fft.irfft)
                    )
        sep_sources.append(np.array(ie_EM).T)

    return np.array(sep_sources)

コード例 #9

0

ファイルを表示

def example_usage_multinmf_conv_em():
    #
    # example_usage_multinmf_conv_em()
    #
    # Example of usage of EM algorithm for multichannel NMF decomposition in
    #   convolutive mixture
    #
    #
    # input
    # -----
    #
    # ...
    #
    # output
    # ------
    #
    # estimated source images are written in the results_dir
    #
    ##%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
    # Copyright 2017 Robin Scheibler, adapted to Python
    # Copyright 2010 Alexey Ozerov
    # (alexey.ozerov -at- irisa.fr)
    #
    # This software is distributed under the terms of the GNU Public License
    # version 3 (http://www.gnu.org/licenses/gpl.txt)
    #
    # If you use this code please cite this paper
    #
    # A. Ozerov and C. Fevotte,
    # "Multichannel nonnegative matrix factorization in convolutive mixtures for audio source separation,"
    # IEEE Trans. on Audio, Speech and Lang. Proc. special issue on Signal Models and Representations
    # of Musical and Environmental Sounds, vol. 18, no. 3, pp. 550-563, March 2010.
    # Available: http://www.irisa.fr/metiss/ozerov/Publications/OzerovFevotte_IEEE_TASLP10.pdf
    ##%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%

    NMF_CompPerSrcNum = 4
    nsrc = 3
    stft_win_len = 2048

    data_dir = 'data/Speech/'
    results_dir = 'data/Speech/'
    file_prefix = '3sources_3channels'

    # Input time-frequency representation
    print('Input time-frequency representation')
    fs, x = wavfile.read(data_dir + file_prefix + '_mix.wav')
    x = x / (2**15)
    mix_nsamp = x.shape[0]
    nchan = x.shape[1]

    # TODO STFT
    window = pra.cosine(stft_win_len)
    # X is (nchan, nframe, nbin)
    X = np.array([
        pra.stft(x[:, ch],
                 stft_win_len,
                 stft_win_len // 2,
                 win=window,
                 transform=np.fft.rfft) for ch in range(nchan)
    ])
    # move axes to match Ozerov's order (nbin, nfram, nchan)
    X = np.moveaxis(X, [0, 1, 2], [2, 1, 0])
    nbin = X.shape[0]
    nfram = X.shape[1]

    # Random initialization of multichannel NMF parameters
    print('Random initialization of multichannel NMF parameters')
    K = NMF_CompPerSrcNum * nsrc
    source_NMF_ind = []
    for j in range(nsrc):
        source_NMF_ind.append(
            np.arange(NMF_CompPerSrcNum) + j * NMF_CompPerSrcNum)
    mix_psd = 0.5 * (np.mean(np.abs(np.sum(X**2, axis=2)), axis=1))
    random_phases = random.randn(nchan, nsrc,
                                 nbin) + 1j * random.randn(nchan, nsrc, nbin)
    random_phases /= np.abs(random_phases)
    A_init = (0.5 *
              (1.9 * np.abs(random.randn(nchan, nsrc, nbin)) + 0.1 * np.ones(
                  (nchan, nsrc, nbin))) * random_phases)
    # W is intialized so that its enegy follows mixture PSD
    W_init = 0.5 * ((np.abs(random.randn(nbin, K)) + np.ones(
        (nbin, K))) * (mix_psd[:, np.newaxis] * np.ones((1, K))))
    # W_init = np.load("W_dictionary_em.npy")
    # print(W_init.shape)
    # K = W_init.shape[1]
    H_init = 0.5 * (np.abs(random.randn(K, nfram)) + np.ones((K, nfram)))
    Sigma_b_init = mix_psd / 100

    # run 500 iterations of multichannel NMF EM algorithm (with annealing)
    A_init = np.moveaxis(A_init, [2], [0])


    W_EM, H_EM, Ae_EM, Sigma_b_EM, Se_EM, log_like_arr = \
        multinmf_conv_em(X, W_init, H_init, A_init, Sigma_b_init, source_NMF_ind, iter_num=300)

    Ae_EM = np.moveaxis(Ae_EM, [0], [2])

    # Computation of the spatial source images
    print('Computation of the spatial source images\n')
    Ie_EM = np.zeros((nbin, nfram, nsrc, nchan), dtype=np.complex)
    for j in range(nsrc):
        for f in range(nbin):
            Ie_EM[f, :, j, :] = np.outer(Se_EM[f, :, j], Ae_EM[:, j, f])

    # Inverse STFT
    ie_EM = []
    for j in range(nsrc):
        # channel-wise istft with synthesis window
        ie_EM = []
        for ch in range(nchan):
            ie_EM.append(
                pra.istft(Ie_EM[:, :, j, ch].T,
                          stft_win_len,
                          stft_win_len // 2,
                          win=window,
                          transform=np.fft.irfft))
        # write the separated source to a wav file
        out_filename = results_dir + '_sim_EM_' + str(j) + '.wav'
        wavfile.write(out_filename, fs, np.array(ie_EM).T)

    # Plot estimated W and H
    print('Plot estimated W and H')
    plt.figure()
    plot_ind = 1
    for k in range(NMF_CompPerSrcNum):
        for j in range(nsrc):
            plt.subplot(NMF_CompPerSrcNum, nsrc, plot_ind)
            plt.plot(np.log10(np.maximum(W_EM[:, source_NMF_ind[j][k]],
                                         1e-40)))
            plt.title('Source_{}, log10(W_{})'.format(j, k))
            plot_ind += 1
    plt.tight_layout()

    plt.figure()
    plot_ind = 1
    for k in range(NMF_CompPerSrcNum):
        for j in range(nsrc):
            plt.subplot(NMF_CompPerSrcNum, nsrc, plot_ind)
            plt.plot(H_EM[source_NMF_ind[j][k], :])
            plt.title('Source_{}, H_{}'.format(j, k))
            plot_ind = plot_ind + 1
    plt.tight_layout()

    plt.show()

    plt.figure()
    plt.plot(log_like_arr)
    plt.show()

コード例 #10

0

ファイルを表示

print()
print("----- MULTIPLE FRAMES AT A TIME -----")
print("One shot function : ", end="")
start = time.time()
for k in range(num_times):

    y_mic_stft = np.array([
        pra.stft(signals[:, k],
                 block_size,
                 hop,
                 transform=np.fft.rfft,
                 win=win).T for k in range(num_mic)
    ])
    x_r = np.array([
        pra.istft(y_mic_stft[k, :, :].T,
                  block_size,
                  hop,
                  transform=np.fft.irfft) for k in range(num_mic)
    ])
avg_time = (time.time() - start) / num_times
print("%0.3f sec" % avg_time)
err_dB = 20 * np.log10(
    np.max(
        np.abs(signals[hop:x_r.shape[1] - hop, ] -
               x_r.T[hop:x_r.shape[1] - hop, ])))
print("Error [dB] : %0.3f" % err_dB)

warnings.filterwarnings(
    "ignore")  # to avoid warning of appending zeros to be printed
print("With STFT object (not fixed) : ", end="")
stft = STFT(block_size,
            hop=hop,