def test_oct(self): siglen = int(10**np.random.uniform(4, 6)) sig = np.random.random(siglen) fmin = np.random.random() * 200 + 20 fmax = np.random.random() * (22048 - fmin) + fmin obins = np.random.randint(24) + 1 scale = OctScale(fmin, fmax, obins) nsgt = NSGT(scale, fs=44100, Ls=len(sig)) c = nsgt.forward(sig) s_r = nsgt.backward(c) self.assertTrue(np.allclose(sig, s_r, atol=1e-07))
def getiNSGT(C, L, Fs, resol=24): """ Perform an inverse Nonstationary Gabor Transform :param C: An NBinsxNFrames CQT array :param L: Number of samples in audio file :param Fs: Sample rate :param resol: Number of CQT bins per octave """ from nsgt import NSGT, OctScale scl = OctScale(50, Fs, resol) nsgt = NSGT(scl, Fs, L, matrixform=True) return nsgt.backward(C)
def getNSGT(X, Fs, resol=24): """ Perform a Nonstationary Gabor Transform implementation of CQT :param X: A 1D array of audio samples :param Fs: Sample rate :param resol: Number of CQT bins per octave """ from nsgt import NSGT, OctScale scl = OctScale(50, Fs, resol) nsgt = NSGT(scl, Fs, len(X), matrixform=True) C = nsgt.forward(X) return np.array(C)
def runit(self, siglen, fmin, fmax, obins, sllen, trlen, real): sig = rndsig[:siglen] scale = OctScale(fmin, fmax, obins) nsgt = NSGT_sliced(scale, fs=44100, sl_len=sllen, tr_area=trlen, real=real) c = nsgt.forward((sig,)) rc = nsgt.backward(c) s_r = np.concatenate(list(map(list,rc)))[:len(sig)] close = np.allclose(sig, s_r, atol=1.e-3) if not close: print("Failing params:", siglen, fmin, fmax, obins, sllen, trlen, real) dev = np.abs(s_r-sig) print("Error", np.where(dev>1.e-3), np.max(dev)) self.assertTrue(close)
def getiNSGTGriffinLim(C, L, Fs, resol=24, randPhase=False, NIters=20): from nsgt import NSGT, OctScale scl = OctScale(50, Fs, resol) nsgt = NSGT(scl, Fs, L, matrixform=True) eps = 2.2204e-16 if randPhase: C = np.exp( np.complex(0, 1) * np.random.rand(C.shape[0], C.shape[1])) * C A = np.array(C, dtype=np.complex) for i in range(NIters): print("iNSGT Griffin Lim Iteration %i of %i" % (i + 1, NIters)) Ai = np.array(nsgt.forward(nsgt.backward(C))) A = np.zeros_like(C) A[:, 0:Ai.shape[1]] = Ai Norm = np.sqrt(A * np.conj(A)) Norm[Norm < eps] = 1 A = np.abs(C) * (A / Norm) X = nsgt.backward(A) return np.real(X)
def main(): parser = ArgumentParser() parser.add_argument( "--mask", type=str, default="soft", choices=("hard", "soft"), help="mask strategy", ) parser.add_argument("--outdir", type=str, default="./", help="output directory") parser.add_argument( "--stream-size", type=int, default=1024, help="stream size for simulated realtime from wav (default=%(default)s)", ) parser.add_argument("input", type=str, help="input file") args = parser.parse_args() prefix = args.input.split("/")[-1].split("_")[0] harm_out = os.path.join(args.outdir, prefix + "_harmonic.wav") perc_out = os.path.join(args.outdir, prefix + "_percussive.wav") print("writing files to {0}, {1}".format(harm_out, perc_out)) lharm = 17 lperc = 7 # calculate transform parameters nsgt_scale = OctScale(80, 20000, 12) trlen = args.stream_size # transition length sllen = 4 * args.stream_size # slice length x, fs = librosa.load(args.input, sr=None) xh = numpy.zeros_like(x) xp = numpy.zeros_like(x) hop = trlen chunk_size = hop n_chunks = int(numpy.floor(x.shape[0] // hop)) eps = numpy.finfo(numpy.float32).eps slicq = NSGT_sliced( nsgt_scale, sllen, trlen, fs, real=True, matrixform=True, ) total_time = 0.0 for chunk in range(n_chunks - 1): t1 = cputime() start = chunk * hop end = start + sllen s = x[start:end] signal = (s,) c = slicq.forward(signal) c = list(c) C = numpy.asarray(c) Cmag = numpy.abs(C) H = scipy.ndimage.median_filter(Cmag, size=(1, lharm, 1)) P = scipy.ndimage.median_filter(Cmag, size=(1, 1, lperc)) if args.mask == "soft": # soft mask first tot = numpy.power(H, 2.0) + numpy.power(P, 2.0) + eps Mp = numpy.divide(numpy.power(H, 2.0), tot) Mh = numpy.divide(numpy.power(P, 2.0), tot) else: Mh = numpy.divide(H, P + eps) > 2.0 Mp = numpy.divide(P, H + eps) >= 2.0 Cp = numpy.multiply(Mp, C) Ch = numpy.multiply(Mh, C) # generator for backward transformation outseq_h = slicq.backward(Ch) outseq_p = slicq.backward(Cp) # make single output array from iterator sh_r = next(reblock(outseq_h, len(s), fulllast=False)) sh_r = sh_r.real sp_r = next(reblock(outseq_p, len(s), fulllast=False)) sp_r = sp_r.real xh[start:end] = sh_r xp[start:end] = sp_r t2 = cputime() total_time += t2 - t1 print("Calculation time per iter: %fs" % (total_time / n_chunks)) scipy.io.wavfile.write(harm_out, fs, xh) scipy.io.wavfile.write(perc_out, fs, xp) return 0