Beispiel #1
0
def _test(method='AuxLaplaceIVA'):
    np.random.seed(111)
    
    # Room impulse response
    sr = 16000
    reverb = 0.16
    duration = 0.5
    samples = int(duration * sr)
    mic_intervals = [8, 8, 8, 8, 8, 8, 8]
    mic_indices = [2, 5]
    degrees = [60, 300]
    titles = ['man-16000', 'woman-16000']

    mixed_signal = _convolve_mird(titles, reverb=reverb, degrees=degrees, mic_intervals=mic_intervals, mic_indices=mic_indices, samples=samples)

    n_channels, T = mixed_signal.shape
    
    # STFT
    fft_size, hop_size = 2048, 1024
    mixture = stft(mixed_signal, fft_size=fft_size, hop_size=hop_size)

    # IVA
    lr = 0.1
    n_sources = len(titles)
    iteration = 200

    if method == 'GradLaplaceIVA':
        iva = GradLaplaceIVA(lr=lr)
        iteration = 5000
    elif method == 'NaturalGradLaplaceIVA':
        iva = NaturalGradLaplaceIVA(lr=lr)
        iteration = 200
    elif method == 'AuxLaplaceIVA':
        iva = AuxLaplaceIVA()
        iteration = 50
    else:
        raise ValueError("Not support method {}".format(method))

    estimation = iva(mixture, iteration=iteration)

    estimated_signal = istft(estimation, fft_size=fft_size, hop_size=hop_size, length=T)
    
    print("Mixture: {}, Estimation: {}".format(mixed_signal.shape, estimated_signal.shape))

    for idx in range(n_sources):
        _estimated_signal = estimated_signal[idx]
        write_wav("data/IVA/{}/mixture-{}_estimated-iter{}-{}.wav".format(method, sr, iteration, idx), signal=_estimated_signal, sr=sr)
    
    plt.figure()
    plt.plot(iva.loss, color='black')
    plt.xlabel('Iteration')
    plt.ylabel('Loss')
    plt.savefig('data/IVA/{}/loss.png'.format(method), bbox_inches='tight')
    plt.close()
def _test(method, n_bases=10, partitioning=False):
    np.random.seed(111)
    
    # Room impulse response
    sr = 16000
    reverb = 0.16
    duration = 0.5
    samples = int(duration * sr)
    mic_intervals = [8, 8, 8, 8, 8, 8, 8]
    mic_indices = [2, 5]
    degrees = [60, 300]
    titles = ['man-16000', 'woman-16000']

    mixed_signal = _convolve_mird(titles, reverb=reverb, degrees=degrees, mic_intervals=mic_intervals, mic_indices=mic_indices, samples=samples)

    n_sources, T = mixed_signal.shape
    
    # STFT
    fft_size, hop_size = 2048, 1024
    mixture = stft(mixed_signal, fft_size=fft_size, hop_size=hop_size)

    # ILRMA
    n_channels = len(titles)
    iteration = 200

    if method == 'Gauss':
        ilrma = GaussILRMA(n_bases=n_bases, partitioning=partitioning)
    elif method == 't':
        ilrma = tILRMA(n_bases=n_bases, partitioning=partitioning)
    else:
        raise ValueError("Not support {}-ILRMA.".format(method))
    estimation = ilrma(mixture, iteration=iteration)

    estimated_signal = istft(estimation, fft_size=fft_size, hop_size=hop_size, length=T)
    
    print("Mixture: {}, Estimation: {}".format(mixed_signal.shape, estimated_signal.shape))

    for idx in range(n_channels):
        _estimated_signal = estimated_signal[idx]
        write_wav("data/ILRMA/{}ILMRA/partitioning{}/mixture-{}_estimated-iter{}-{}.wav".format(method, int(partitioning), sr, iteration, idx), signal=_estimated_signal, sr=sr)
    
    plt.figure()
    plt.plot(ilrma.loss, color='black')
    plt.xlabel('Iteration')
    plt.ylabel('Loss')
    plt.savefig('data/ILRMA/{}ILMRA/partitioning{}/loss.png'.format(method, int(partitioning)), bbox_inches='tight')
    plt.close()
def _test(method='DSBF'):
    # Room impulse response
    sr = 16000
    reverb = 0.16
    duration = 0.5
    samples = int(duration * sr)
    mic_intervals = [3, 3, 3, 8, 3, 3, 3]
    mic_indices = [0, 1, 2, 3, 4, 5, 6, 7]
    mic_position = np.array([[0.13, 0], [0.10, 0], [0.07, 0], [0.04, 0],
                             [-0.04, 0], [-0.07, 0], [-0.10, 0], [-0.13, 0]])
    degrees = [0, 90]
    titles = ['man-16000', 'woman-16000']

    n_sources, n_channels = len(degrees), len(mic_indices)
    mixed_signal = _convolve_mird(titles,
                                  reverb=reverb,
                                  degrees=degrees,
                                  mic_intervals=mic_intervals,
                                  mic_indices=mic_indices,
                                  samples=samples)
    _, T = mixed_signal.shape

    # STFT
    fft_size, hop_size = 2048, 1024
    n_bins = fft_size // 2 + 1
    frequency = np.arange(0, n_bins) * sr / fft_size
    mixture = stft(mixed_signal, fft_size=fft_size,
                   hop_size=hop_size)  # (n_channels, n_bins, n_frames)

    # Steeing vectors
    degrees = np.array(degrees) / 180 * np.pi
    x_source, y_source = np.sin(degrees), np.cos(degrees)  # (n_sources,)
    source_position = np.vstack([x_source,
                                 y_source]).transpose(1, 0)  # (n_sources, 2)
    steering_vector = np.exp(
        2j * np.pi * frequency[:, np.newaxis, np.newaxis] *
        np.sum(source_position * mic_position[:, np.newaxis, :], axis=2) /
        sound_speed)  # (n_bins, n_channels, n_sources)
    steering_vector = steering_vector / np.sqrt(len(mic_indices))

    if method == 'DSBF':
        beamformer = DelaySumBeamformer(steering_vector=steering_vector)
    elif method == 'MVDR':
        beamformer = MVDRBeamformer(steering_vector=steering_vector)
    else:
        raise NotImplementedError("Not support {} beamformer".format(method))

    estimation = beamformer(mixture)

    spectrogram = np.abs(estimation)
    log_spectrogram = 10 * np.log10(spectrogram**2)
    N, F_bin, T_bin = log_spectrogram.shape
    t = np.arange(T_bin + 1)
    f = np.arange(F_bin + 1)

    for n in range(N):
        plt.figure()
        plt.pcolormesh(t, f, log_spectrogram[n], cmap='jet')
        plt.savefig("data/Beamform/{}/specrtogram-{}.png".format(method, n),
                    bbox_inches='tight')
        plt.close()

    estimated_signal = istft(estimation,
                             fft_size=fft_size,
                             hop_size=hop_size,
                             length=T)

    print("Mixture: {}, Estimation: {}".format(mixed_signal.shape,
                                               estimated_signal.shape))

    for idx in range(n_sources):
        _estimated_signal = estimated_signal[idx]
        write_wav("data/Beamform/{}/mixture-{}_estimated-{}.wav".format(
            method, sr, idx),
                  signal=_estimated_signal,
                  sr=sr)
Beispiel #4
0
def _test(metric='EUC'):
    np.random.seed(111)

    fft_size, hop_size = 1024, 256
    n_bases = 6
    iteration = 100

    signal, sr = read_wav("data/single-channel/music-8000.wav")

    T = len(signal)

    spectrogram = stft(signal, fft_size=fft_size, hop_size=hop_size)
    amplitude = np.abs(spectrogram)
    power = amplitude**2

    if metric == 'EUC':
        nmf = EUCNMF(n_bases)
    elif metric == 'IS':
        nmf = ISNMF(n_bases)
    elif metric == 'KL':
        nmf = KLNMF(n_bases)
    else:
        raise NotImplementedError("Not support {}-NMF".format(metric))

    nmf.update(power, iteration=iteration)

    amplitude[amplitude < EPS] = EPS

    estimated_power = nmf.base @ nmf.activation
    estimated_amplitude = np.sqrt(estimated_power)
    ratio = estimated_amplitude / amplitude
    estimated_spectrogram = ratio * spectrogram

    estimated_signal = istft(estimated_spectrogram,
                             fft_size=fft_size,
                             hop_size=hop_size,
                             length=T)
    estimated_signal = estimated_signal / np.abs(estimated_signal).max()
    write_wav("data/NMF/{}/music-8000-estimated-iter{}.wav".format(
        metric, iteration),
              signal=estimated_signal,
              sr=8000)

    power[power < EPS] = EPS
    log_spectrogram = 10 * np.log10(power)

    plt.figure()
    plt.pcolormesh(log_spectrogram, cmap='jet')
    plt.colorbar()
    plt.savefig('data/NMF/spectrogram.png', bbox_inches='tight')
    plt.close()

    for idx in range(n_bases):
        estimated_power = nmf.base[:, idx:idx + 1] @ nmf.activation[idx:idx +
                                                                    1, :]
        estimated_amplitude = np.sqrt(estimated_power)
        ratio = estimated_amplitude / amplitude
        estimated_spectrogram = ratio * spectrogram

        estimated_signal = istft(estimated_spectrogram,
                                 fft_size=fft_size,
                                 hop_size=hop_size,
                                 length=T)
        estimated_signal = estimated_signal / np.abs(estimated_signal).max()
        write_wav("data/NMF/{}/music-8000-estimated-iter{}-base{}.wav".format(
            metric, iteration, idx),
                  signal=estimated_signal,
                  sr=8000)

        estimated_power[estimated_power < EPS] = EPS
        log_spectrogram = 10 * np.log10(estimated_power)

        plt.figure()
        plt.pcolormesh(log_spectrogram, cmap='jet')
        plt.colorbar()
        plt.savefig(
            'data/NMF/{}/estimated-spectrogram-iter{}-base{}.png'.format(
                metric, iteration, idx),
            bbox_inches='tight')
        plt.close()

    plt.figure()
    plt.plot(nmf.loss, color='black')
    plt.xlabel('Iteration')
    plt.ylabel('Loss')
    plt.savefig('data/NMF/{}/loss.png'.format(metric), bbox_inches='tight')
    plt.close()