def inver_cqt(cqt, dcs, nys, h_size):
    CQIStand = standard.NSGIConstantQ(**kwargs)
    recFrame = []

    for j, i in enumerate(range(0,cqt.shape[0], h_size)):
        cqt_frame = cqt[i:i+h_size]
        # import pdb;pdb.set_trace()
        inv_cqt_frame = CQIStand(cqt_frame.T, dcs[j], nys[j])
        recFrame.append(inv_cqt_frame)
        utils.progress(j, int(cqt.shape[0]/h_size), "Inverse Done")
    frameSize = kwargs['inputSize']

    y = recFrame[0]

    invWindow = Windowing(type='triangular',normalized=False, zeroPhase=False)(standard.essentia.array(np.ones(frameSize)))


    for i in range(1,len(recFrame)):
        y = np.hstack([y,np.zeros(int(frameSize/2))])
        y[-frameSize:] = y[-frameSize:] + recFrame[i] 
        utils.progress(i, len(recFrame), "Overlap Done")
        
    y = y[int(frameSize/2):]

    return y
Exemple #2
0
def nsgicqgram(cq,
               dc,
               nf,
               frameSize=8192,
               transitionSize=1024,
               minFrequency=65.41,
               maxFrequency=6000,
               binsPerOctave=48,
               sampleRate=44100,
               rasterize='full',
               phaseMode='global',
               gamma=0,
               normalize='none',
               window='hannnsgcq'):
    """Frame-wise invertible Constant-Q synthesis.
    This code replicates the Sli-CQ algorithm from [1]. An inverse Tukey window
    is used to resynthetise the original audio signal from the `nsgcqgram`
    representation using the `NSGIConstantQ` algorithm.

    References:
      [1] Velasco, G. A., Holighaus, N., Dörfler, M., & Grill, T. (2011).
        "Constructing an invertible constant-Q transform with non-stationary
        Gabor frames". Proceedings of DAFX11, Paris, 93-99.

    Args:
        (list of 2D complex arrays): Time / frequency complex matrices representing the NSGCQ `constantq` coefficients for each `frameSize // 2` samples jump.
        (list of complex vectors): Complex vectors representing the NSGCQ `constantqdc` coefficients for each `frameSize // 2` samples jump.
        (list of complex vectors): Complex vectors representing the NSGCQ `constantqnf` coefficients for each `frameSize // 2` samples jump.
    Returns:
        audio (vector): The synthetized audio.
    """

    hopSize = frameSize // 2
    halfTransitionSize = transitionSize // 2

    cqSize = cq[0].shape[1]
    dcSize = len(dc[0])
    nfSize = len(nf[0])

    NSGICQS = es.NSGIConstantQ(inputSize=frameSize,
                               minFrequency=minFrequency,
                               maxFrequency=maxFrequency,
                               binsPerOctave=binsPerOctave,
                               sampleRate=sampleRate,
                               rasterize=rasterize,
                               phaseMode=phaseMode,
                               gamma=gamma,
                               normalize=normalize,
                               window=window,
                               minimumWindow=8)

    # Tukey inverse window.
    window = np.zeros(frameSize)
    window[np.arange((hopSize + transitionSize) // 2,
                     (3 * hopSize - transitionSize) //
                     2)] = np.ones(hopSize - transitionSize)

    window[np.hstack([
        np.arange((hopSize - transitionSize) // 2,
                  (hopSize + transitionSize) // 2),
        np.arange((3 * hopSize - transitionSize) // 2,
                  (3 * hopSize + transitionSize) // 2)
    ])] = __inverseTukeyWindow__(
        np.arange(-transitionSize, transitionSize) / transitionSize)

    # Undo the frame centering.
    cqShiftEven = np.hstack(
        [np.arange(3 * cqSize // 4, cqSize),
         np.arange(0, 3 * cqSize // 4)])
    dcShiftEven = np.hstack(
        [np.arange(3 * dcSize // 4, dcSize),
         np.arange(0, 3 * dcSize // 4)])
    nfShiftEven = np.hstack(
        [np.arange(3 * nfSize // 4, nfSize),
         np.arange(0, 3 * nfSize // 4)])

    cqShiftOdd = np.hstack(
        [np.arange(cqSize // 4, cqSize),
         np.arange(0, cqSize // 4)])
    dcShiftOdd = np.hstack(
        [np.arange(dcSize // 4, dcSize),
         np.arange(0, dcSize // 4)])
    nfShiftOdd = np.hstack(
        [np.arange(nfSize // 4, nfSize),
         np.arange(0, nfSize // 4)])

    cqShift = [
        cq[i][:, cqShiftEven] if i % 2 else cq[i][:, cqShiftOdd]
        for i in range(len(cq))
    ]
    dcShift = [
        dc[i][dcShiftEven] if i % 2 else dc[i][dcShiftOdd]
        for i in range(len(dc))
    ]
    nfShift = [
        nf[i][nfShiftEven] if i % 2 else nf[i][nfShiftOdd]
        for i in range(len(nf))
    ]

    # Loop to store the audio frames.
    frames = [
        NSGICQS(cqShift[i], dcShift[i], nfShift[i]) for i in range(len(cq))
    ]

    # Overlap-add.
    audio = np.zeros((len(frames) + 1) * hopSize)

    for kk in range(len(frames)):
        audio[np.arange(int(np.ceil((kk - .5) * hopSize)),
                        int(np.ceil((kk + 1.5) * hopSize)))] += np.roll(
                            frames[kk], int(np.floor(
                                ((-1)**kk) * hopSize / 2))) * window

    return audio[hopSize:]