コード例 #1
0
def run_NMFdiag():
    inpPath = '../data/'
    matlabMatricesPath = 'matrices/NMFdiag/'

    filenameSource = 'Bees_Buzzing.wav'
    filenameTarget = 'Beatles_LetItBe.wav'

    # read signals
    fs, xs = wav.read(os.path.join(inpPath, filenameSource))
    fs, xt = wav.read(os.path.join(inpPath, filenameTarget))

    # make monaural if necessary
    xs = make_monaural(xs)
    xt = make_monaural(xt)

    # convert wavs from int16 to float32
    xs = pcmInt16ToFloat32Numpy(xs)
    xt = pcmInt16ToFloat32Numpy(xt)

    paramSTFT = dict()
    paramSTFT['blockSize'] = 2048
    paramSTFT['hopSize'] = 1024
    paramSTFT['winFunc'] = np.hanning(paramSTFT['blockSize'])
    paramSTFT['reconstMirror'] = True
    paramSTFT['appendFrame'] = True
    paramSTFT['numSamples'] = len(xt)

    # STFT computation
    Xs, As, Ps = forwardSTFT(xs, paramSTFT)
    Xt, At, Pt = forwardSTFT(xt, paramSTFT)

    # get dimensions and time and freq resolutions
    _, numTargetFrames = Xt.shape

    # initialize activations randomly
    # load randomly initialized matrix on MATLAB
    H0 = load_matlab_dict(os.path.join(matlabMatricesPath, 'H0.mat'), 'H0')

    # init templates by source frames
    W0 = As * 1. / (EPS + np.sum(As, axis=0))

    paramNMFdiag = dict()
    paramNMFdiag['fixW'] = True
    paramNMFdiag['numOfIter'] = 3
    paramNMFdiag['continuity'] = dict()
    paramNMFdiag['continuity']['polyphony'] = 10
    paramNMFdiag['continuity']['length'] = 7
    paramNMFdiag['continuity']['grid'] = 1
    paramNMFdiag['continuity']['sparsen'] = [1, 7]

    # call the reference implementation as provided by Jonathan Driedger
    # with divergence update rules
    nmfdiagW_div, nmfdiagH_div = NMFdiag(At, W0, H0, paramNMFdiag)

    python_res = {
        'nmfdiagW_div': nmfdiagW_div,
        'nmfdiagH_div': nmfdiagH_div,
    }

    return python_res
コード例 #2
0
def run_logFreqLogMag():
    inpPath = '../data/'
    filename = 'runningExample_IGotYouMixture.wav'

    # read signal
    fs, x = wav.read(os.path.join(inpPath, filename))

    # make monaural if necessary
    x = make_monaural(x)

    # convert wav from int16 to float32
    x = pcmInt16ToFloat32Numpy(x)

    # spectral parameters
    paramSTFT = dict()
    paramSTFT['blockSize'] = 2048
    paramSTFT['hopSize'] = 512
    paramSTFT['winFunc'] = np.hanning(paramSTFT['blockSize'])
    paramSTFT['reconstMirror'] = True
    paramSTFT['appendFrame'] = True
    paramSTFT['numSamples'] = len(x)

    # STFT computation
    _, A, _ = forwardSTFT(x, paramSTFT)

    # get dimensions and time and freq resolutions
    deltaF = fs / paramSTFT['blockSize']

    # get logarithmically-spaced frequency axis version for visualization
    logFreqLogMagA, logFreqAxis = logFreqLogMag(A, deltaF)

    python_res = {'logFreqLogMagA': logFreqLogMagA, 'logFreqAxis': logFreqAxis}

    return python_res
コード例 #3
0
def run_LSEE_MSTFTM_GriffinLim():
    inpPath = '../data/'
    filename = 'runningExample_IGotYouMixture.wav'

    # read signal
    fs, x = wav.read(os.path.join(inpPath, filename))

    # make monaural if necessary
    x = make_monaural(x)

    # convert wav from int16 to float32
    x = pcmInt16ToFloat32Numpy(x)

    # spectral parameters
    paramSTFT = dict()
    paramSTFT['blockSize'] = 2048
    paramSTFT['hopSize'] = 512
    paramSTFT['winFunc'] = np.hanning(paramSTFT['blockSize'])
    paramSTFT['reconstMirror'] = True
    paramSTFT['appendFrame'] = True
    paramSTFT['numSamples'] = len(x)

    # STFT computation
    _, A, _ = forwardSTFT(x, paramSTFT)

    Xout, Pout, res = LSEE_MSTFTM_GriffinLim(A, paramSTFT)

    python_res = dict()
    python_res['res'] = res

    return python_res
コード例 #4
0
def LSEE_MSTFTM_GriffinLim(X, parameter=None):
    """Performs one iteration of the phase reconstruction algorithm as
    described in [2].

    References
    ----------
    [2] Daniel W. Griffin and Jae S. Lim, Signal estimation
    from modified short-time fourier transform, IEEE
    Transactions on Acoustics, Speech and Signal Processing,
    vol. 32, no. 2, pp. 236-243, Apr 1984.

    The operation performs an iSTFT (LSEE-MSTFT) followed by STFT on the
    resynthesized signal.

    Parameters
    ----------
    X: array-like
        The STFT spectrogram to iterate upon

    parameter: dict
        blockSize:       The blocksize to use during analysis
        hopSize:         The used hopsize (denoted as S in [1])
        anaWinFunc:      The window used for analysis (denoted w in [1])
        synWinFunc:      The window used for synthesis (denoted w in [1])
        reconstMirror:   If this is enabled, we have to generate the
                         mirror spectrum by means of conjugation and flipping
        appendFrames:    If this is enabled, safety spaces have to be removed
                         after the iSTFT
        targetEnv:       If desired, we can define a time-signal mask from the
                         outside for better restoration of transients

    Returns
    -------
    Xout: array-like
        The spectrogram after iSTFT->STFT processing

    Pout: array-like
        The phase spectrogram after iSTFT->STFT processing

    res: array-like
        Reconstructed time-domain signal obtained via iSTFT
    """

    numBins, _ = X.shape
    parameter = init_parameters(parameter, numBins)

    Xout = deepcopy(X)
    A = abs(Xout)

    for k in range(parameter['numIterGriffinLim']):
        # perform inverse STFT
        res, _ = inverseSTFT(Xout, parameter)

        # perform forward STFT
        _, _, Pout = forwardSTFT(res.squeeze(), parameter)

        Xout = A * np.exp(1j * Pout)

    return Xout, Pout, res
コード例 #5
0
def run_initActivations():
    inpPath = '../data/'
    filename = 'runningExample_IGotYouMixture.wav'

    # read signal
    fs, x = wav.read(os.path.join(inpPath, filename))

    # make monaural if necessary
    x = make_monaural(x)

    # convert wav from int16 to float32
    x = pcmInt16ToFloat32Numpy(x)

    # read corresponding transcription files
    melodyTranscription = np.loadtxt(
        os.path.join(inpPath, 'runningExample_IGotYouMelody.txt'))
    drumsTranscription = np.loadtxt(
        os.path.join(inpPath, 'runningExample_IGotYouDrums.txt'))

    # spectral parameters
    paramSTFT = dict()
    paramSTFT['blockSize'] = 2048
    paramSTFT['hopSize'] = 512
    paramSTFT['winFunc'] = np.hanning(paramSTFT['blockSize'])
    paramSTFT['reconstMirror'] = True
    paramSTFT['appendFrame'] = True
    paramSTFT['numSamples'] = len(x)

    # STFT computation
    X, A, P = forwardSTFT(x, paramSTFT)

    # get dimensions and time and freq resolutions
    numBins, numFrames = X.shape
    deltaT = paramSTFT['hopSize'] / fs

    # generate score-informed activations for the melodic part
    paramActivations = dict()
    paramActivations['deltaT'] = deltaT
    paramActivations['numFrames'] = numFrames
    paramActivations['pitches'] = melodyTranscription[:, 1]
    paramActivations['onsets'] = melodyTranscription[:, 0]
    paramActivations['durations'] = melodyTranscription[:, 2]
    pitchedH = initActivations(paramActivations, 'pitched')

    # generate score-informed activations for the drum part
    paramActivations['drums'] = drumsTranscription[:, 1]
    paramActivations['onsets'] = drumsTranscription[:, 0]
    paramActivations['decay'] = 0.75
    drumsH = initActivations(paramActivations, 'drums')

    # generate uniform activations
    paramActivations = dict()
    paramActivations['numComp'] = 30
    paramActivations['numFrames'] = numFrames
    uniformH = initActivations(paramActivations, 'uniform')

    python_res = {'pitchedH': pitchedH, 'drumsH': drumsH, 'uniformH': uniformH}

    return python_res
コード例 #6
0
def run_initTemplates():
    inpPath = '../data/'
    filename = 'runningExample_IGotYouMixture.wav'

    # read signal
    fs, x = wav.read(os.path.join(inpPath, filename))

    # make monaural if necessary
    x = make_monaural(x)

    # convert wav from int16 to float32
    x = pcmInt16ToFloat32Numpy(x)

    # read corresponding transcription files
    melodyTranscription = np.loadtxt(
        os.path.join(inpPath, 'runningExample_IGotYouMelody.txt'))

    # spectral parameters
    paramSTFT = dict()
    paramSTFT['blockSize'] = 2048
    paramSTFT['hopSize'] = 512
    paramSTFT['winFunc'] = np.hanning(paramSTFT['blockSize'])
    paramSTFT['reconstMirror'] = True
    paramSTFT['appendFrame'] = True
    paramSTFT['numSamples'] = len(x)

    # STFT computation
    X, A, P = forwardSTFT(x, paramSTFT)

    # get dimensions and time and freq resolutions
    numBins, numFrames = X.shape
    deltaF = fs / paramSTFT['blockSize']

    # set common parameters
    numDrumComp = 3
    numTemplateFrames = 8

    # generate score-informed templates for the melodic part
    paramTemplates = dict()
    paramTemplates['deltaF'] = deltaF
    paramTemplates['numBins'] = numBins
    paramTemplates['numTemplateFrames'] = numTemplateFrames
    paramTemplates['pitches'] = melodyTranscription[:, 1]
    pitchedW = initTemplates(paramTemplates, 'pitched')

    # generate audio-informed templates for the drum part
    paramTemplates['numComp'] = numDrumComp
    drumsW = initTemplates(paramTemplates, 'drums')

    # generate uniform templates
    uniformW = initTemplates(paramTemplates, 'uniform')

    python_res = {'pitchedW': pitchedW, 'drumsW': drumsW, 'uniformW': uniformW}

    return python_res
コード例 #7
0
def run_HPSS_KAM():
    inpPath = '../data/'
    filename = 'runningExample_IGotYouMixture.wav'

    # read signals
    fs, x = wav.read(os.path.join(inpPath, filename))

    # make monaural if necessary
    x = make_monaural(x)

    # convert wavs from int16 to float32
    x = pcmInt16ToFloat32Numpy(x)

    paramSTFT = dict()
    paramSTFT['blockSize'] = 2048
    paramSTFT['hopSize'] = 512
    paramSTFT['winFunc'] = np.hanning(paramSTFT['blockSize'])
    paramSTFT['reconstMirror'] = True
    paramSTFT['appendFrame'] = True
    paramSTFT['numSamples'] = len(x)

    X, A, P = forwardSTFT(x, paramSTFT)

    numIterKAM = 1
    medFitzGeraldA, Kern, KernOrd = HPSS_KAM_Fitzgerald(
        A, numIterKAM, 15, True, 2)
    convFitzGeraldA, _, _ = HPSS_KAM_Fitzgerald(A, numIterKAM, 15, False, 2)
    # WARNING!: conv2 on MATLAB and convolve2d on python don't give the same result!!

    python_res = {
        'medFitzGeraldA': medFitzGeraldA,
        'convFitzGeraldA': convFitzGeraldA,
        'Kern': Kern,
        'KernOrd': KernOrd
    }

    return python_res
コード例 #8
0
def run_NMFconv():
    inpPath = '../data'
    filename = 'runningExample_AmenBreak.wav'

    # read signals
    fs, x = wav.read(os.path.join(inpPath, filename))

    # make monaural if necessary
    x = make_monaural(x)

    # convert wav from int16 to float32
    x = pcmInt16ToFloat32Numpy(x)

    paramSTFT = dict()
    paramSTFT['blockSize'] = 2048
    paramSTFT['hopSize'] = 512
    paramSTFT['winFunc'] = np.hanning(paramSTFT['blockSize'])
    paramSTFT['reconstMirror'] = True
    paramSTFT['appendFrame'] = True
    paramSTFT['numSamples'] = len(x)

    # STFT computation
    X, A, P = forwardSTFT(x, paramSTFT)

    # get dimensions and time and freq resolutions
    numBins, numFrames = X.shape
    deltaT = paramSTFT['hopSize'] / fs
    deltaF = fs / paramSTFT['blockSize']

    # 3. apply NMF variants to STFT magnitude
    # set common parameters
    numComp = 3
    numIter = 3
    numTemplateFrames = 8

    # generate initial guess for templates
    paramTemplates = dict()
    paramTemplates['deltaF'] = deltaF
    paramTemplates['numComp'] = numComp
    paramTemplates['numBins'] = numBins
    paramTemplates['numTemplateFrames'] = numTemplateFrames
    initW = initTemplates(paramTemplates, 'drums')

    # generate initial activations
    paramActivations = dict()
    paramActivations['numComp'] = numComp
    paramActivations['numFrames'] = numFrames
    initH = initActivations(paramActivations, 'uniform')

    # NMFconv parameters
    paramNMFconv = dict()

    paramNMFconv['numComp'] = numComp
    paramNMFconv['numFrames'] = numFrames
    paramNMFconv['numIter'] = numIter
    paramNMFconv['numTemplateFrames'] = numTemplateFrames
    paramNMFconv['initW'] = initW
    paramNMFconv['initH'] = initH
    paramNMFconv['beta'] = 0

    # NMFconv core method
    nmfconvW, nmfconvH, nmfconvV, divBeta = NMFconv(A, paramNMFconv)

    python_res = {
        'nmfconvW': nmfconvW,
        'nmfconvH': nmfconvH,
        'nmfconvV': nmfconvV,
        'divBeta': divBeta.reshape(1, -1)
    }

    return python_res
コード例 #9
0
def run_NMF():
    inpPath = '../data/'
    filename = 'runningExample_AmenBreak.wav'

    # read signals
    fs, x = wav.read(os.path.join(inpPath, filename))

    # make monaural if necessary
    x = make_monaural(x)

    # convert wavs from int16 to float32
    x = pcmInt16ToFloat32Numpy(x)

    # spectral parameters
    paramSTFT = dict()
    paramSTFT['blockSize'] = 2048
    paramSTFT['hopSize'] = 512
    paramSTFT['winFunc'] = np.hanning(paramSTFT['blockSize'])
    paramSTFT['reconstMirror'] = True
    paramSTFT['appendFrame'] = True
    paramSTFT['numSamples'] = len(x)

    # STFT computation
    X, A, P = forwardSTFT(x, paramSTFT)

    # get dimensions and time and freq resolutions
    numBins, numFrames = X.shape
    deltaT = paramSTFT['hopSize'] / fs
    deltaF = fs / paramSTFT['blockSize']

    # Apply NMF variants to STFT magnitude
    # set common parameters
    numComp = 3
    numIter = 3
    numTemplateFrames = 8

    # generate initial guess for templates
    paramTemplates = dict()
    paramTemplates['deltaF'] = deltaF
    paramTemplates['numComp'] = numComp
    paramTemplates['numBins'] = numBins
    paramTemplates['numTemplateFrames'] = numTemplateFrames
    initW = initTemplates(paramTemplates, 'drums')

    # generate initial activations
    paramActivations = dict()
    paramActivations['numComp'] = numComp
    paramActivations['numFrames'] = numFrames

    initH = initActivations(paramActivations, 'uniform')

    # NMFconv parameters
    paramNMFconv = dict()
    paramNMFconv['numComp'] = numComp
    paramNMFconv['numFrames'] = numFrames
    paramNMFconv['numIter'] = numIter
    paramNMFconv['numTemplateFrames'] = numTemplateFrames
    paramNMFconv['initW'] = initW
    paramNMFconv['initH'] = initH
    paramNMFconv['beta'] = 0

    # NMFconv core method
    nmfconvW, _, nmfconvV, _ = NMFconv(A, paramNMFconv)

    # alpha-Wiener filtering
    nmfconvA, _ = alphaWienerFilter(A, nmfconvV, 1)

    W0 = np.concatenate(nmfconvW, axis=1)

    # set common parameters
    numComp = W0.shape[1]
    numIter = 3

    # generate random initialization for activations
    paramActivations = dict()
    paramActivations['numComp'] = numComp
    paramActivations['numFrames'] = numFrames
    initH = initActivations(paramActivations, 'uniform')

    # store common parameters
    paramNMF = dict()
    paramNMF['numComp'] = numComp
    paramNMF['numFrames'] = numFrames
    paramNMF['numIter'] = numIter
    paramNMF['initW'] = W0
    paramNMF['initH'] = initH

    # NMF with Euclidean Distance cost function
    paramNMF['costFunc'] = 'EucDist'
    nmfEucDistW, nmfEucDistH, nmfEucDistV = NMF(A, paramNMF)

    # NMF with KLDiv Distance cost function
    paramNMF['costFunc'] = 'KLDiv'
    nmfKLDivW, nmfKLDivH, nmfKLDivV = NMF(A, paramNMF)

    # NMF with ISDiv Distance cost function
    paramNMF['costFunc'] = 'ISDiv'
    nmfISDivW, nmfISDivH, nmfISDivV = NMF(A, paramNMF)

    python_res = {
        'nmfEucDistW': nmfEucDistW,
        'nmfEucDistH': nmfEucDistH,
        'nmfEucDistV': nmfEucDistV,
        'nmfKLDivW': nmfKLDivW,
        'nmfKLDivH': nmfKLDivH,
        'nmfKLDivV': nmfKLDivV,
        'nmfISDivW': nmfISDivW,
        'nmfISDivH': nmfISDivH,
        'nmfISDivV': nmfISDivV
    }

    return python_res