def voiceMusicSeparation(audio, masktype=1, lamb=1.25, gain=1.25):
    import stft
    # stft
    specgram = stft.spectrogram(audio)
    # rpca
    D = abs(specgram)
    angle = np.angle(specgram)
    A_mag, E_mag, numiter = ialmRPCA(D, lamb)
    A = A_mag * scipy.exp(angle * 1j)
    E = E_mag * scipy.exp(angle * 1j)
    # binary mask
    if (masktype):
        m = 1.0 * (abs(E_mag) > abs(gain * A_mag))
        Emask = m * specgram
        Amask = specgram - Emask
    else:
        Emask = E
        Amask = A
    # istft
    outputA = stft.ispectrogram(Amask)
    outputE = stft.ispectrogram(Emask)
    #output
    wavoutA = np.array(outputA[:len(audio)], dtype=np.int16)
    wavoutE = np.array(outputE[:len(audio)], dtype=np.int16)
    return wavoutA, wavoutE
예제 #2
0
def test_maxdim():
    a = numpy.random.random((512, 2, 2))

    with pytest.raises(ValueError):
        stft.spectrogram(a)

    b = numpy.random.random((512, 2, 2, 3))
    with pytest.raises(ValueError):
        stft.ispectrogram(b)
예제 #3
0
def createMatrix():
    # spectrogram_arguments = {'framelength': 512, 'overlap': 512, 'window': scipy.signal.hamming(512)}
    def saveFile(fn, data):
        f = open(fn, 'wb')
        pickle.dump(data, f)
        f.close()

    fs1, data1 = wavfile.read(raw1)
    fs2, data2 = wavfile.read(raw2)

    minlen = min(len(data1), len(data2))
    data1 = data1[:minlen]
    data2 = data2[:minlen]

    spec1 = stft.spectrogram(data1)
    spec2 = stft.spectrogram(data2)

    # Reduce dimension
    spec1 = squeeze(spec1)
    spec2 = squeeze(spec2)

    # same dimensions
    a = np.zeros(spec1.shape)
    b = np.zeros(spec2.shape)

    # hard
    for i in range(len(spec1)):
        for j in range(len(spec1[0])):
            if abs(spec1[i][j]) < abs(spec2[i][j]):
                b[i][j] = 1.0
            else:
                a[i][j] = 1.0

    # soft
    # for i in range(len(spec1)):
    # 	for j in range(len(spec1[0])):
    # 		if (abs(spec1[i][j]) + abs(spec2[i][j])) == 0:
    # 			continue
    # 		a[i][j] = abs(spec1[i][j]) / (abs(spec1[i][j]) + abs(spec2[i][j]))
    # 		b[i][j] = abs(spec2[i][j]) / (abs(spec1[i][j]) + abs(spec2[i][j]))

    fs, data = wavfile.read(merged)
    spec = stft.spectrogram(data)
    spec = squeeze(spec)

    output_a = createSpectrogram(np.multiply(a, spec), spec)
    output_b = createSpectrogram(np.multiply(b, spec), spec)

    output_a2 = stft.ispectrogram(output_a)
    output_b2 = stft.ispectrogram(output_b)

    writeWav(separated_dir + "a.wav", fs1, output_a2)
    writeWav(separated_dir + "b.wav", fs1, output_b2)

    return
예제 #4
0
파일: __init__.py 프로젝트: audiolabs/mdct
def imdst(
    X,
    odd=True,
    transforms=None,
    **kwargs
):
    """ Calculate lapped inverse MDST of input signal

    Parameters
    ----------
    x : array_like
        The input signal
    odd : boolean, optional
        Switch to oddly stacked transform. Defaults to :code:`True`.
    transforms : module, optional
        Module reference to core transforms. Mostly used to replace
        fast with slow core transforms, for testing. Defaults to
        :mod:`mdct.fast`
        Additional keyword arguments passed to :code:`stft.spectrogram`

    Returns
    -------
    out : array_like
        The output signal

    See Also
    --------
    mdct.fast.transforms.imdst : inverse MDST

    """
    if transforms is None:
        transforms = transforms_default

    kwargs.setdefault('framelength', 2048)

    if not odd:
        return stft.ispectrogram(
            X,
            transform=[
                functools.partial(transforms.imdst, odd=False),
                functools.partial(transforms.imdct, odd=False),
            ],
            halved=False,
            **kwargs
        )
    else:
        return stft.ispectrogram(
            X,
            transform=transforms.imdst,
            halved=False,
            **kwargs
        )
예제 #5
0
def test_maxdim():
    """
    Test if breaking elementary limitations (2D signal, 3D spectrogram at most)
    are caught appropriately

    """
    a = numpy.random.random((512, 2, 2))

    with pytest.raises(ValueError):
        stft.spectrogram(a)

    b = numpy.random.random((512, 2, 2, 3))
    with pytest.raises(ValueError):
        # we cannot infer data from a NumPy array, so we set framelengt here
        stft.ispectrogram(b, framelength=1024)
예제 #6
0
def test_maxdim():
    """
    Test if breaking elementary limitations (2D signal, 3D spectrogram at most)
    are caught appropriately

    """
    a = numpy.random.random((512, 2, 2))

    with pytest.raises(ValueError):
        stft.spectrogram(a)

    b = numpy.random.random((512, 2, 2, 3))
    with pytest.raises(ValueError):
        # we cannot infer data from a NumPy array, so we set framelengt here
        stft.ispectrogram(b, framelength=1024)
예제 #7
0
def icmdct(X, odd=True, transforms=None, **kwargs):
    """ Calculate lapped inverse complex MDCT/MCLT of input signal

    Parameters
    ----------
    x : array_like
        The input signal
    odd : boolean, optional
        Switch to oddly stacked transform. Defaults to :code:`True`.
    transforms : module, optional
        Module reference to core transforms. Mostly used to replace
        fast with slow core transforms, for testing. Defaults to
        :mod:`mdct.fast`
        Additional keyword arguments passed to :code:`stft.spectrogram`

    Returns
    -------
    out : array_like
        The output signal

    See Also
    --------
    mdct.fast.transforms.icmdct : inverse complex MDCT

    """
    if transforms is None:
        transforms = transforms_default

    return stft.ispectrogram(X,
                             transform=functools.partial(transforms.icmdct,
                                                         odd=odd),
                             halved=False,
                             **kwargs)
예제 #8
0
def test_issue_autoinverse_defaults(signal):
    """
    Using defaults in inverse did not work because there were none in place

    """
    x = numpy.array(stft.spectrogram(signal))
    y = stft.ispectrogram(x)
예제 #9
0
def test_issue_autoinverse_defaults(signal):
    """
    Using defaults in inverse did not work because there were none in place

    """
    x = numpy.array(stft.spectrogram(signal))
    y = stft.ispectrogram(x)
예제 #10
0
def test_issue_autoinverse_values(signal, framelength):
    """
    Passing values to inverse on a plain array failed as the values were
    not actually used

    """
    x = numpy.array(stft.spectrogram(signal, framelength=framelength))
    y = stft.ispectrogram(x, framelength=framelength)
예제 #11
0
def test_issue_autoinverse_values(signal, framelength):
    """
    Passing values to inverse on a plain array failed as the values were
    not actually used

    """
    x = numpy.array(stft.spectrogram(signal, framelength=framelength))
    y = stft.ispectrogram(x, framelength=framelength)
예제 #12
0
def imdst(X, odd=True, transforms=None, **kwargs):
    """ Calculate lapped inverse MDST of input signal

    Parameters
    ----------
    x : array_like
        The input signal
    odd : boolean, optional
        Switch to oddly stacked transform. Defaults to :code:`True`.
    transforms : module, optional
        Module reference to core transforms. Mostly used to replace
        fast with slow core transforms, for testing. Defaults to
        :mod:`mdct.fast`
        Additional keyword arguments passed to :code:`stft.spectrogram`

    Returns
    -------
    out : array_like
        The output signal

    See Also
    --------
    mdct.fast.transforms.imdst : inverse MDST

    """
    if transforms is None:
        transforms = transforms_default

    kwargs.setdefault('framelength', 2048)

    if not odd:
        return stft.ispectrogram(X,
                                 transform=[
                                     functools.partial(transforms.imdst,
                                                       odd=False),
                                     functools.partial(transforms.imdct,
                                                       odd=False),
                                 ],
                                 halved=False,
                                 **kwargs)
    else:
        return stft.ispectrogram(X,
                                 transform=transforms.imdst,
                                 halved=False,
                                 **kwargs)
예제 #13
0
def compute_inverse_spectrogram(reals, ims=None):
    if ims != None:
        specgram = reals + 1j * ims
    else:
        specgram = reals
    output = stft.ispectrogram(specgram,
                               framelength=SEG_SIZE,
                               overlap=OVER_LAP)
    return output
예제 #14
0
def test_multiple_transforms(signal):
    """
    Test if giving multiple different transforms works OK

    """
    a = signal

    x = stft.spectrogram(a, transform=[scipy.fftpack.fft, numpy.fft.fft])
    y = stft.ispectrogram(x, transform=[scipy.fftpack.ifft, numpy.fft.ifft])

    assert numpy.allclose(a, y)
예제 #15
0
파일: test_things.py 프로젝트: faroit/stft
def test_real(signal):
    """
    Test if real valued input results in real valued output

    """
    a = signal

    x = stft.spectrogram(a)
    y = stft.ispectrogram(x)

    assert y.dtype == numpy.float64
예제 #16
0
def divide():
    def loadFile(fn):
        f = open(fn, 'rb')
        data = pickle.load(f)
        f.close()
        return data

    fs, data = wavfile.read(merged)
    spec = stft.spectrogram(data, framelength=512)
    spec = squeeze(spec)
    Ma = loadFile(m_dir + "M_" + raw1[:-4])
    Mb = loadFile(m_dir + "M_" + raw2[:-4])
    a = createSpectrogram(np.dot(Ma, spec), spec)
    b = createSpectrogram(np.dot(Mb, spec), spec)

    output_a = stft.ispectrogram(a)
    output_b = stft.ispectrogram(b)

    writeWav(separated_dir + "a.wav", fs, output_a)
    writeWav(separated_dir + "b.wav", fs, output_b)
예제 #17
0
def test_multiple_transforms(signal):
    """
    Test if giving multiple different transforms works OK

    """
    a = signal

    x = stft.spectrogram(a, transform=[scipy.fftpack.fft, numpy.fft.fft])
    y = stft.ispectrogram(x, transform=[scipy.fftpack.ifft, numpy.fft.ifft])

    assert numpy.allclose(a, y)
예제 #18
0
def test_precision(channels, padding, signal, framelength):
    """
    Test if transform-inverse identity holds

    """
    a = signal

    x = stft.spectrogram(a, framelength=framelength, padding=padding)
    y = stft.ispectrogram(x, framelength=framelength, padding=padding)

    # Crop first and last frame
    assert numpy.allclose(a, y)
예제 #19
0
def test_overriding(channels, padding, signal, framelength):
    """
    Test if overriding transform settings works

    """
    a = signal

    x = stft.spectrogram(a, framelength=framelength, padding=padding)
    y = stft.ispectrogram(x, framelength=framelength)

    # We were using no overlap during inverse, so our output is twice as long
    assert numpy.allclose(a, y)
예제 #20
0
def test_rms(channels, padding, signal, framelength):
    """
    Test if transform-inverse identity holds

    """
    a = signal

    x = stft.spectrogram(a, framelength=framelength, padding=padding)
    y = stft.ispectrogram(x, framelength=framelength, padding=padding)

    # Crop first and last frame
    assert numpy.sqrt(numpy.mean((a - y) ** 2)) < 1e-8
예제 #21
0
def _istft(stft_matrix_list):
    '''
    Inverse Short-Time Fourier Transformation
    '''
    audios = []
    for sm in stft_matrix_list:
        sm = np.transpose(sm)
        ad = stft.ispectrogram(sm,
                               framelength=config.STFT_POINT,
                               overlap=config.STFT_OVERLAP)
        audios.append(ad)
    return audios
예제 #22
0
def test_overriding(channels, padding, signal, framelength):
    """
    Test if overriding transform settings works

    """
    a = signal

    x = stft.spectrogram(a, framelength=framelength, padding=padding)
    y = stft.ispectrogram(x, framelength=framelength)

    # We were using no overlap during inverse, so our output is twice as long
    assert numpy.allclose(a, y)
예제 #23
0
def test_rms(channels, padding, signal, framelength, halved):
    """
    Test if transform-inverse identity holds

    """
    a = signal

    x = stft.spectrogram(
        a, framelength=framelength, padding=padding, halved=halved
    )
    y = stft.ispectrogram(x)

    assert numpy.sqrt(numpy.mean((a - y) ** 2)) < 1e-8
예제 #24
0
def test_precision(channels, padding, signal, framelength, halved):
    """
    Test if transform-inverse identity holds

    """
    a = signal

    x = stft.spectrogram(
        a, framelength=framelength, padding=padding, halved=halved
    )
    y = stft.ispectrogram(x)

    assert numpy.allclose(a, y)
예제 #25
0
파일: test_things.py 프로젝트: faroit/stft
def test_complex(signal):
    """
    Test transform-inverse works for complex input

    """
    a = signal

    # create complex test vectors by adding random phase
    c = a + 1j*numpy.random.random(a.shape)
    x = stft.spectrogram(c, halved=False)
    y = stft.ispectrogram(x, halved=False)

    assert c.dtype == y.dtype
    assert numpy.allclose(c, y)
예제 #26
0
def test_precision(channels, padding, signal, framelength, halved):
    """
    Test if transform-inverse identity holds

    """
    a = signal

    x = stft.spectrogram(a,
                         framelength=framelength,
                         padding=padding,
                         halved=halved)
    y = stft.ispectrogram(x)

    assert numpy.allclose(a, y)
예제 #27
0
def test_rms(channels, padding, signal, framelength, halved):
    """
    Test if transform-inverse identity holds

    """
    a = signal

    x = stft.spectrogram(a,
                         framelength=framelength,
                         padding=padding,
                         halved=halved)
    y = stft.ispectrogram(x)

    assert numpy.sqrt(numpy.mean((a - y)**2)) < 1e-8
예제 #28
0
파일: __init__.py 프로젝트: audiolabs/mdct
def icmdct(
    X,
    odd=True,
    transforms=None,
    **kwargs
):
    """ Calculate lapped inverse complex MDCT/MCLT of input signal

    Parameters
    ----------
    x : array_like
        The input signal
    odd : boolean, optional
        Switch to oddly stacked transform. Defaults to :code:`True`.
    transforms : module, optional
        Module reference to core transforms. Mostly used to replace
        fast with slow core transforms, for testing. Defaults to
        :mod:`mdct.fast`
        Additional keyword arguments passed to :code:`stft.spectrogram`

    Returns
    -------
    out : array_like
        The output signal

    See Also
    --------
    mdct.fast.transforms.icmdct : inverse complex MDCT

    """
    if transforms is None:
        transforms = transforms_default

    return stft.ispectrogram(
        X,
        transform=functools.partial(transforms.icmdct, odd=odd),
        halved=False,
        **kwargs
    )
예제 #29
0
    wavfile.write(fn, fs, data)


if __name__ == '__main__':
    spectrogram_args = {'framelength': 512}
    rate_clean, data_clean = wavfile.read(CLEAN_FILE)
    rate_noise, data_noise = wavfile.read(NOISE_FILE)

    data_len = len(data_clean)
    data_noise = data_noise[:data_len]

    print(data_clean.dtype)
    print(data_noise.dtype)

    data_combined = np.array(
        [s1 / 2 + s2 / 2 for (s1, s2) in zip(data_clean, data_noise)],
        dtype=np.int16)
    # data_combined = data_noise

    print(data_combined.dtype)

    wavfile.write('%scombined.wav' % (OUTPUT_DIR), rate_clean, data_combined)

    Sx_clean = stft.spectrogram(data_clean, **spectrogram_args)
    Sx_noise = stft.spectrogram(data_noise, **spectrogram_args)

    reverted_clean = stft.ispectrogram(Sx_clean)
    reverted_noise = stft.ispectrogram(Sx_noise)

    writeWav('%soriginal_clean.wav' % (OUTPUT_DIR), rate_clean, reverted_clean)
    writeWav('%soriginal_noise.wav' % (OUTPUT_DIR), rate_noise, reverted_noise)
예제 #30
0
파일: __init__.py 프로젝트: audiolabs/mdct
def imdct(
    X,
    odd=True,
    transforms=None,
    **kwargs
):
    """ Calculate lapped inverse MDCT of input signal

    Parameters
    ----------
    x : array_like
        The spectrogram to be inverted. May be a 2D matrix for single channel
        or a 3D tensor for multi channel data. In case of a mono signal, the
        data must be in the shape of :code:`bins x frames`. In case of a multi
        channel signal, the data must be in the shape of :code:`bins x frames x
        channels`.
    odd : boolean, optional
        Switch to oddly stacked transform. Defaults to :code:`True`.
    framelength : int
        The signal frame length. Defaults to infer from data.
    hopsize : int
        The signal frame hopsize. Defaults to infer from data. Setting this
        value will override :code:`overlap`.
    overlap : int
        The signal frame overlap coefficient. Value :code:`x` means
        :code:`1/x` overlap. Defaults to infer from data. Note that anything
        but :code:`2` will result in a filterbank without perfect
        reconstruction.
    centered : boolean
        Pad input signal so that the first and last window are centered around
        the beginning of the signal. Defaults to to infer from data.
        The first and last half-frame will have aliasing, so using
        centering during forward MDCT is recommended.
    window : callable, array_like
        Window to be used for deringing. Can be :code:`False` to disable
        windowing. Defaults to to infer from data.
    halved : boolean
        Switch to reconstruct the other halve of the spectrum if the forward
        transform has been truncated. Defaults to to infer from data.
    transforms : module, optional
        Module reference to core transforms. Mostly used to replace
        fast with slow core transforms, for testing. Defaults to
        :mod:`mdct.fast`
    padding : int
        Zero-pad signal with x times the number of samples. Defaults to infer
        from data.
    outlength : int
        Crop output signal to length. Useful when input length of spectrogram
        did not fit into framelength and input data had to be padded. Not
        setting this value will disable cropping, the output data may be
        longer than expected.

    Returns
    -------
    out : array_like
        The output signal

    See Also
    --------
    mdct.fast.transforms.imdct : inverse MDCT

    """
    if transforms is None:
        transforms = transforms_default

    kwargs.setdefault('framelength', 2048)

    if not odd:
        return stft.ispectrogram(
            X,
            transform=[
                functools.partial(transforms.imdct, odd=False),
                functools.partial(transforms.imdst, odd=False),
            ],
            halved=False,
            **kwargs
        )
    else:
        return stft.ispectrogram(
            X,
            transform=transforms.imdct,
            halved=False,
            **kwargs
        )
예제 #31
0
def imdct(X, odd=True, transforms=None, **kwargs):
    """ Calculate lapped inverse MDCT of input signal

    Parameters
    ----------
    x : array_like
        The spectrogram to be inverted. May be a 2D matrix for single channel
        or a 3D tensor for multi channel data. In case of a mono signal, the
        data must be in the shape of :code:`bins x frames`. In case of a multi
        channel signal, the data must be in the shape of :code:`bins x frames x
        channels`.
    odd : boolean, optional
        Switch to oddly stacked transform. Defaults to :code:`True`.
    framelength : int
        The signal frame length. Defaults to infer from data.
    hopsize : int
        The signal frame hopsize. Defaults to infer from data. Setting this
        value will override :code:`overlap`.
    overlap : int
        The signal frame overlap coefficient. Value :code:`x` means
        :code:`1/x` overlap. Defaults to infer from data. Note that anything
        but :code:`2` will result in a filterbank without perfect
        reconstruction.
    centered : boolean
        Pad input signal so that the first and last window are centered around
        the beginning of the signal. Defaults to to infer from data.
        The first and last half-frame will have aliasing, so using
        centering during forward MDCT is recommended.
    window : callable, array_like
        Window to be used for deringing. Can be :code:`False` to disable
        windowing. Defaults to to infer from data.
    halved : boolean
        Switch to reconstruct the other halve of the spectrum if the forward
        transform has been truncated. Defaults to to infer from data.
    transforms : module, optional
        Module reference to core transforms. Mostly used to replace
        fast with slow core transforms, for testing. Defaults to
        :mod:`mdct.fast`
    padding : int
        Zero-pad signal with x times the number of samples. Defaults to infer
        from data.
    outlength : int
        Crop output signal to length. Useful when input length of spectrogram
        did not fit into framelength and input data had to be padded. Not
        setting this value will disable cropping, the output data may be
        longer than expected.

    Returns
    -------
    out : array_like
        The output signal

    See Also
    --------
    mdct.fast.transforms.imdct : inverse MDCT

    """
    if transforms is None:
        transforms = transforms_default

    kwargs.setdefault('framelength', 2048)

    if not odd:
        return stft.ispectrogram(X,
                                 transform=[
                                     functools.partial(transforms.imdct,
                                                       odd=False),
                                     functools.partial(transforms.imdst,
                                                       odd=False),
                                 ],
                                 halved=False,
                                 **kwargs)
    else:
        return stft.ispectrogram(X,
                                 transform=transforms.imdct,
                                 halved=False,
                                 **kwargs)
예제 #32
0
#    np.concatenate(((np.zeros((65,i+1))),test_data[i+1,0,:,:]),axis=1).shape
    add = np.concatenate((add,np.zeros((65,1))),axis=1)\
    + np.concatenate(((np.zeros((65,i+1))),pred_data[i+1,:,:]),axis=1)

avg_out = add/20.0
alpha = 0.5
Male_binary_out = np.array(avg_out > alpha)#,dtype=int)
Female_binary_out = np.array(avg_out < (1-alpha))#,dtype=int)

xf_test = xf_deci[newfrate*120:xfnor.size] # original samples not noramalized.
xm_test = xm_deci[newfrate*120:xfnor.size] 
mix_test = np.short(xf_test + xm_test)
mixspec_test = stft.spectrogram(mix_test,framelength=128,hopsize=16,\
window=scipy.signal.hanning)

Male_output = Male_binary_out*(mixspec_test)
Female_output = Female_binary_out*(mixspec_test)

male_audio_recover = stft.ispectrogram(Male_output,framelength=128,hopsize=16,\
window=scipy.signal.hanning)
female_audio_recover = stft.ispectrogram(Female_output,framelength=128,hopsize=16,\
window=scipy.signal.hanning)

writewave('./male_recovered.wav',male_audio_recover,f1rate,2,1)
writewave('./female_recovered2.wav',np.short(female_audio_recover),f1rate,2,1)

#pylab.pcolormesh(Male_binary_out*(10*np.log10(xmixspectest[:,1:-3])))
#pylab.pcolormesh(np.nan_to_num(10*np.log10(Female_output)))
################################################

예제 #33
0
def createMatrix():
    # spectrogram_arguments = {'framelength': 512, 'overlap': 512, 'window': scipy.signal.hamming(512)}
    def saveFile(fn, data):
        f = open(fn, 'wb')
        pickle.dump(data, f)
        f.close()

    fs1, data1 = wavfile.read(raw1)
    fs2, data2 = wavfile.read(raw2)

    minlen = min(len(data1), len(data2))
    data1 = data1[:minlen]
    data2 = data2[:minlen]

    spec1 = stft.spectrogram(data1)
    spec2 = stft.spectrogram(data2)

    # Reduce dimension
    spec1 = squeeze(spec1)
    spec2 = squeeze(spec2)

    # same dimensions
    a = np.zeros(spec1.shape)
    b = np.zeros(spec2.shape)

    # hard
    for i in range(len(spec1)):
        for j in range(len(spec1[0])):
            if abs(spec1[i][j]) < abs(spec2[i][j]):
                b[i][j] = 1.0
            else:
                a[i][j] = 1.0

    # soft
    # for i in range(len(spec1)):
    # 	for j in range(len(spec1[0])):
    # 		if (abs(spec1[i][j]) + abs(spec2[i][j])) == 0:
    # 			continue
    # 		a[i][j] = abs(spec1[i][j]) / (abs(spec1[i][j]) + abs(spec2[i][j]))
    # 		b[i][j] = abs(spec2[i][j]) / (abs(spec1[i][j]) + abs(spec2[i][j]))

    def plotfft(data, sr, ylim=None):
        plt.plot(np.abs(data))
        if ylim != None:
            plt.ylim(ylim)
        plt.show()

    fs, data = wavfile.read(merged)
    spec = stft.spectrogram(data)
    spec = squeeze(spec)

    # ax1 = plt.subplot(211)
    time = np.arange(0, 7.6382, 0.0001)
    # plt.plot(time, data1)
    plt.xlim([0, 2])
    # plt.subplot(212)
    Pxx, freqs, bins, im = plt.specgram(data,
                                        NFFT=200,
                                        Fs=fs,
                                        noverlap=100,
                                        cmap=plt.cm.gist_heat)
    plt.show()

    return

    output_a = createSpectrogram(np.multiply(a, spec), spec)
    output_b = createSpectrogram(np.multiply(b, spec), spec)

    output_a2 = stft.ispectrogram(output_a)
    output_b2 = stft.ispectrogram(output_b)

    writeWav(separated_dir + "a.wav", fs1, output_a2)
    writeWav(separated_dir + "b.wav", fs1, output_b2)

    return
예제 #34
0
def wav_from_magnitude_phase(magnitude, phase, dtype):
    fourier = magnitude * phase
    return ispectrogram(fourier).real.astype(dtype)
예제 #35
0
import stft
import scipy.io.wavfile as wav

fs, audio = wav.read('nto2.wav','r')
specgram = stft.spectrogram(audio)
output = stft.ispectrogram(specgram)
print output
예제 #36
0
rows2, columns2 = spectragram2.shape

for r in range(0, rows2):
    if spectral_fit_predict_reversed[r] == 0:
        for c in range(0, columns2):
            spectragram_db[r, c] = 0
            spectragram2[r, c] = 0

directory = '01_spectral_clustering_spec/result01/'
output_file = directory + 'output.wav'
plot_file = directory + 'spectral.png'

if not os.path.exists(directory):
    os.makedirs(directory)

output = stft.ispectrogram(spectragram2)
wavfile.write(output_file, fs, output)

plt.figure(1).set_size_inches(12, 8)
plt.figure(1).subplots_adjust(left=0.05,
                              bottom=0.1,
                              right=0.95,
                              top=0.9,
                              wspace=0.6,
                              hspace=0.8)
plt.pcolormesh(spectragram_db, cmap="YlGnBu")
plt.ylabel('Frequency [Hz]')
plt.xlabel('Samples')
plt.savefig(plot_file, dpi=300)

plt.figure(2).set_size_inches(12, 8)
예제 #37
0
def model_test(test_input):
    test_rate, test_audio = wavfile.read(test_input)
    clean_rate, clean_audio = wavfile.read(CLEAN_FILE)
    noise_rate, noise_audio = wavfile.read(NOISE_FILE)

    length = len(clean_audio)
    noise_audio = noise_audio[:length]

    clean_spec = stft.spectrogram(clean_audio)
    noise_spec = stft.spectrogram(noise_audio)
    test_spec = stft.spectrogram(test_audio)

    reverted_clean = stft.ispectrogram(clean_spec)
    reverted_noise = stft.ispectrogram(noise_spec)

    test_data = np.array([test_spec.transpose() / 100000
                          ])  # make data a batch of 1

    with tf.Graph().as_default():
        model = SeparationModel()
        saver = tf.train.Saver(tf.trainable_variables())

        with tf.Session() as session:
            ckpt = tf.train.get_checkpoint_state('checkpoints/')
            if ckpt:
                print("Reading model parameters from %s" %
                      ckpt.model_checkpoint_path)
                saver.restore(session, ckpt.model_checkpoint_path)
            else:
                print("Created model with fresh parameters.")
                session.run(tf.initialize_all_variables())

            test_data_shape = np.shape(test_data)
            dummy_target = np.zeros((test_data_shape[0], test_data_shape[1],
                                     2 * test_data_shape[2]))

            output, _, _ = model.train_on_batch(session,
                                                test_data,
                                                dummy_target,
                                                train=False)

            num_freq_bin = output.shape[2] / 2
            clean_output = output[0, :, :num_freq_bin]
            noise_output = output[0, :, num_freq_bin:]

            clean_mask, noise_mask = create_mask(clean_output, noise_output)

            clean_spec = createSpectrogram(
                np.multiply(clean_mask.transpose(), test_spec),
                test_spec.stft_settings)
            noise_spec = createSpectrogram(
                np.multiply(noise_mask.transpose(), test_spec),
                test_spec.stft_settings)

            clean_wav = stft.ispectrogram(clean_spec)
            noise_wav = stft.ispectrogram(noise_spec)

            sdr, sir, sar, _ = bss_eval_sources(
                np.array([reverted_clean, reverted_noise]),
                np.array([clean_wav, noise_wav]), False)
            print(sdr, sir, sar)

            writeWav('data/test_combined/output_clean.wav', 44100, clean_wav)
            writeWav('data/test_combined/output_noise.wav', 44100, noise_wav)
예제 #38
0
def model_batch_test():
    test_batch = h5py.File('%stest_batch' % (DIR))
    data = test_batch['data'].value

    with open('%stest_settings.pkl' % (DIR), 'rb') as f:
        settings = pickle.load(f)

    # print(settings[:2])

    combined, clean, noise = zip(data)
    combined = combined[0]
    clean = clean[0]
    noise = noise[0]
    target = np.concatenate((clean, noise), axis=2)

    # test_rate, test_audio = wavfile.read('data/test_combined/combined.wav')
    # test_spec = stft.spectrogram(test_audio)

    combined_batch, target_batch = create_batch(combined, target, 50)

    original_combined_batch = [
        copy.deepcopy(batch) for batch in combined_batch
    ]

    with tf.Graph().as_default():
        model = SeparationModel()
        saver = tf.train.Saver(tf.trainable_variables())

        with tf.Session() as session:
            ckpt = tf.train.get_checkpoint_state('checkpoints/')
            if ckpt:
                print("Reading model parameters from %s" %
                      ckpt.model_checkpoint_path)
                saver.restore(session, ckpt.model_checkpoint_path)
            else:
                print("Created model with fresh parameters.")
                session.run(tf.initialize_all_variables())

            curr_mask_array = []
            prev_mask_array = None
            diff = float('inf')
            iters = 0

            while True:
                iters += 1
                output, _, _ = model.train_on_batch(session,
                                                    combined_batch[0],
                                                    target_batch[0],
                                                    train=False)

                num_freq_bin = output.shape[2] / 2
                clean_outputs = output[:, :, :num_freq_bin]
                noise_outputs = output[:, :, num_freq_bin:]

                # clean = [target[:,:num_freq_bin] for target in target_batch]
                # noise = [target[:,num_freq_bin:] for target in target_batch]

                num_outputs = len(clean_outputs)

                results = []

                for i in xrange(num_outputs):
                    orig_clean_output = clean_outputs[i]
                    orig_noise_output = noise_outputs[i]

                    stft_settings = copy.deepcopy(settings[i])
                    orig_length = stft_settings['orig_length']
                    stft_settings.pop('orig_length', None)
                    clean_output = orig_clean_output[-orig_length:]
                    noise_output = orig_noise_output[-orig_length:]

                    clean_mask, noise_mask = create_mask(
                        clean_output, noise_output)
                    orig_clean_mask, orig_noise_mask = create_mask(
                        orig_clean_output, orig_noise_output)

                    curr_mask_array.append(clean_mask)
                    # if i == 0:
                    # print clean_mask[10:20,10:20]
                    curr_mask_array.append(noise_mask)

                    clean_spec = createSpectrogram(
                        np.multiply(
                            clean_mask.transpose(), original_combined_batch[0]
                            [i][-orig_length:].transpose()), settings[i])
                    noise_spec = createSpectrogram(
                        np.multiply(
                            noise_mask.transpose(), original_combined_batch[0]
                            [i][-orig_length:].transpose()), settings[i])

                    # print '-' * 20
                    # print original_combined_batch[0][i]
                    # print '=' * 20
                    combined_batch[0][i] += np.multiply(
                        orig_clean_mask, original_combined_batch[0][i]) * 0.1
                    # print combined_batch[0][i]
                    # print '=' * 20
                    # print original_combined_batch[0][i]
                    # print '-' * 20

                    estimated_clean_wav = stft.ispectrogram(clean_spec)
                    estimated_noise_wav = stft.ispectrogram(noise_spec)

                    reference_clean_wav = stft.ispectrogram(
                        SpectrogramArray(clean[i][-orig_length:],
                                         stft_settings).transpose())
                    reference_noise_wav = stft.ispectrogram(
                        SpectrogramArray(noise[i][-orig_length:],
                                         stft_settings).transpose())

                    try:
                        sdr, sir, sar, _ = bss_eval_sources(
                            np.array(
                                [reference_clean_wav, reference_noise_wav]),
                            np.array(
                                [estimated_clean_wav, estimated_noise_wav]),
                            False)
                        results.append(
                            (sdr[0], sdr[1], sir[0], sir[1], sar[0], sar[1]))
                        # print('%f, %f, %f, %f, %f, %f' % (sdr[0], sdr[1], sir[0], sir[1], sar[0], sar[1]))
                    except ValueError:
                        print('error')
                        continue
                break

                # diff = 1
                # if prev_mask_array is not None:
                #     # print curr_mask_array[0]
                #     # print prev_mask_array[0]
                #     diff = sum(np.sum(np.abs(curr_mask_array[i] - prev_mask_array[i])) for i in xrange(len(prev_mask_array)))
                #     print('Changes after iteration %d: %d' % (iters, diff))

                # sdr_cleans, sdr_noises, sir_cleans, sir_noises, sar_cleans, sar_noises = zip(*results)
                # print('Avg sdr_cleans: %f, sdr_noises: %f, sir_cleans: %f, sir_noises: %f, sar_cleans: %f, sar_noises: %f' % (np.mean(sdr_cleans), np.mean(sdr_noises), np.mean(sir_cleans), np.mean(sir_noises), np.mean(sar_cleans), np.mean(sar_noises)))

                # prev_mask_array = [copy.deepcopy(mask[:,:]) for mask in curr_mask_array]

                # if diff == 0:
                #     break

            results_filename = '%sresults_%d_%f' % (
                'data/results/', Config.num_layers, Config.lr)
            # results_filename += 'freq_weighted'

            with open(results_filename + '.csv', 'w+') as f:
                for sdr_1, sdr_2, sir_1, sir_2, sar_1, sar_2 in results:
                    f.write('%f,%f,%f,%f,%f,%f\n' %
                            (sdr_1, sdr_2, sir_1, sir_2, sar_1, sar_2))