def inver_cqt(cqt, dcs, nys, h_size): CQIStand = standard.NSGIConstantQ(**kwargs) recFrame = [] for j, i in enumerate(range(0,cqt.shape[0], h_size)): cqt_frame = cqt[i:i+h_size] # import pdb;pdb.set_trace() inv_cqt_frame = CQIStand(cqt_frame.T, dcs[j], nys[j]) recFrame.append(inv_cqt_frame) utils.progress(j, int(cqt.shape[0]/h_size), "Inverse Done") frameSize = kwargs['inputSize'] y = recFrame[0] invWindow = Windowing(type='triangular',normalized=False, zeroPhase=False)(standard.essentia.array(np.ones(frameSize))) for i in range(1,len(recFrame)): y = np.hstack([y,np.zeros(int(frameSize/2))]) y[-frameSize:] = y[-frameSize:] + recFrame[i] utils.progress(i, len(recFrame), "Overlap Done") y = y[int(frameSize/2):] return y
def nsgicqgram(cq, dc, nf, frameSize=8192, transitionSize=1024, minFrequency=65.41, maxFrequency=6000, binsPerOctave=48, sampleRate=44100, rasterize='full', phaseMode='global', gamma=0, normalize='none', window='hannnsgcq'): """Frame-wise invertible Constant-Q synthesis. This code replicates the Sli-CQ algorithm from [1]. An inverse Tukey window is used to resynthetise the original audio signal from the `nsgcqgram` representation using the `NSGIConstantQ` algorithm. References: [1] Velasco, G. A., Holighaus, N., Dörfler, M., & Grill, T. (2011). "Constructing an invertible constant-Q transform with non-stationary Gabor frames". Proceedings of DAFX11, Paris, 93-99. Args: (list of 2D complex arrays): Time / frequency complex matrices representing the NSGCQ `constantq` coefficients for each `frameSize // 2` samples jump. (list of complex vectors): Complex vectors representing the NSGCQ `constantqdc` coefficients for each `frameSize // 2` samples jump. (list of complex vectors): Complex vectors representing the NSGCQ `constantqnf` coefficients for each `frameSize // 2` samples jump. Returns: audio (vector): The synthetized audio. """ hopSize = frameSize // 2 halfTransitionSize = transitionSize // 2 cqSize = cq[0].shape[1] dcSize = len(dc[0]) nfSize = len(nf[0]) NSGICQS = es.NSGIConstantQ(inputSize=frameSize, minFrequency=minFrequency, maxFrequency=maxFrequency, binsPerOctave=binsPerOctave, sampleRate=sampleRate, rasterize=rasterize, phaseMode=phaseMode, gamma=gamma, normalize=normalize, window=window, minimumWindow=8) # Tukey inverse window. window = np.zeros(frameSize) window[np.arange((hopSize + transitionSize) // 2, (3 * hopSize - transitionSize) // 2)] = np.ones(hopSize - transitionSize) window[np.hstack([ np.arange((hopSize - transitionSize) // 2, (hopSize + transitionSize) // 2), np.arange((3 * hopSize - transitionSize) // 2, (3 * hopSize + transitionSize) // 2) ])] = __inverseTukeyWindow__( np.arange(-transitionSize, transitionSize) / transitionSize) # Undo the frame centering. cqShiftEven = np.hstack( [np.arange(3 * cqSize // 4, cqSize), np.arange(0, 3 * cqSize // 4)]) dcShiftEven = np.hstack( [np.arange(3 * dcSize // 4, dcSize), np.arange(0, 3 * dcSize // 4)]) nfShiftEven = np.hstack( [np.arange(3 * nfSize // 4, nfSize), np.arange(0, 3 * nfSize // 4)]) cqShiftOdd = np.hstack( [np.arange(cqSize // 4, cqSize), np.arange(0, cqSize // 4)]) dcShiftOdd = np.hstack( [np.arange(dcSize // 4, dcSize), np.arange(0, dcSize // 4)]) nfShiftOdd = np.hstack( [np.arange(nfSize // 4, nfSize), np.arange(0, nfSize // 4)]) cqShift = [ cq[i][:, cqShiftEven] if i % 2 else cq[i][:, cqShiftOdd] for i in range(len(cq)) ] dcShift = [ dc[i][dcShiftEven] if i % 2 else dc[i][dcShiftOdd] for i in range(len(dc)) ] nfShift = [ nf[i][nfShiftEven] if i % 2 else nf[i][nfShiftOdd] for i in range(len(nf)) ] # Loop to store the audio frames. frames = [ NSGICQS(cqShift[i], dcShift[i], nfShift[i]) for i in range(len(cq)) ] # Overlap-add. audio = np.zeros((len(frames) + 1) * hopSize) for kk in range(len(frames)): audio[np.arange(int(np.ceil((kk - .5) * hopSize)), int(np.ceil((kk + 1.5) * hopSize)))] += np.roll( frames[kk], int(np.floor( ((-1)**kk) * hopSize / 2))) * window return audio[hopSize:]