def test_oct(self): siglen = int(10**np.random.uniform(4, 6)) sig = np.random.random(siglen) fmin = np.random.random() * 200 + 20 fmax = np.random.random() * (22048 - fmin) + fmin obins = np.random.randint(24) + 1 scale = OctScale(fmin, fmax, obins) nsgt = NSGT(scale, fs=44100, Ls=len(sig)) c = nsgt.forward(sig) s_r = nsgt.backward(c) self.assertTrue(np.allclose(sig, s_r, atol=1e-07))
def test_oct(self): siglen = int(10 ** np.random.uniform(4, 6)) sig = np.random.random(siglen) fmin = np.random.random() * 200 + 20 fmax = np.random.random() * (22048 - fmin) + fmin obins = np.random.randint(24) + 1 scale = OctScale(fmin, fmax, obins) nsgt = NSGT(scale, fs=44100, Ls=len(sig)) c = nsgt.forward(sig) s_r = nsgt.backward(c) self.assertTrue(np.allclose(sig, s_r))
def getiNSGT(C, L, Fs, resol=24): """ Perform an inverse Nonstationary Gabor Transform :param C: An NBinsxNFrames CQT array :param L: Number of samples in audio file :param Fs: Sample rate :param resol: Number of CQT bins per octave """ from nsgt import NSGT, OctScale scl = OctScale(50, Fs, resol) nsgt = NSGT(scl, Fs, L, matrixform=True) return nsgt.backward(C)
def getNSGT(X, Fs, resol=24): """ Perform a Nonstationary Gabor Transform implementation of CQT :param X: A 1D array of audio samples :param Fs: Sample rate :param resol: Number of CQT bins per octave """ from nsgt import NSGT, OctScale scl = OctScale(50, Fs, resol) nsgt = NSGT(scl, Fs, len(X), matrixform=True) C = nsgt.forward(X) return np.array(C)
def _compute_hdf5_row(tup): spec_in = [] spec_out = [] all_ndarray_rows = [] (mix, ref) = tup x_mix, _ = librosa.load(mix, sr=sample_rate, mono=True) x_ref, _ = librosa.load(ref, sr=sample_rate, mono=True) assert x_mix.shape == x_ref.shape all_ndarray_rows = [] n_samples = x_mix.shape[0] n_chunks = int(numpy.ceil(n_samples / chunk_size)) n_pad = n_chunks * chunk_size - x_mix.shape[0] x_mix = numpy.concatenate((x_mix, numpy.zeros(n_pad))) x_ref = numpy.concatenate((x_ref, numpy.zeros(n_pad))) # calculate transform parameters L = chunk_size nsgt = NSGT(nsgt_scale, sample_rate, L, real=True, matrixform=True) for chunk in range(n_chunks - 1): x_mix_chunk = x_mix[chunk * chunk_size:(chunk + 1) * chunk_size] x_ref_chunk = x_ref[chunk * chunk_size:(chunk + 1) * chunk_size] # forward transform cmix = nsgt.forward(x_mix_chunk) Cmix = numpy.asarray(cmix) Cmagmix = numpy.abs(Cmix) cref = nsgt.forward(x_ref_chunk) Cref = numpy.asarray(cref) Cmagref = numpy.abs(Cref) spec_in.append(Cmagmix) spec_out.append(Cmagref) for spec_pairs in zip(spec_in, spec_out): all_ndarray_rows.append( numpy.concatenate((spec_pairs[0], spec_pairs[1]), axis=1)) return all_ndarray_rows
def getiNSGTGriffinLim(C, L, Fs, resol=24, randPhase=False, NIters=20): from nsgt import NSGT, OctScale scl = OctScale(50, Fs, resol) nsgt = NSGT(scl, Fs, L, matrixform=True) eps = 2.2204e-16 if randPhase: C = np.exp( np.complex(0, 1) * np.random.rand(C.shape[0], C.shape[1])) * C A = np.array(C, dtype=np.complex) for i in range(NIters): print("iNSGT Griffin Lim Iteration %i of %i" % (i + 1, NIters)) Ai = np.array(nsgt.forward(nsgt.backward(C))) A = np.zeros_like(C) A[:, 0:Ai.shape[1]] = Ai Norm = np.sqrt(A * np.conj(A)) Norm[Norm < eps] = 1 A = np.abs(C) * (A / Norm) X = nsgt.backward(A) return np.real(X)
def gabor(s, args): """ TODO: add default parameters to args""" fmin, fmax, real, matrixform, reducedform, rate, l_scale, bins, __time__ = args # define parameters for nsgt scales = {'log': LogScale, 'lin': LinScale, 'mel': MelScale, 'oct': OctScale} scale = scales[l_scale] # some default parameters Ls = len(s) if __time__: t1 = cputime() # parameters needed by nsgt with warnings.catch_warnings(record=True) as w: scl = scale(fmin, fmax, bins) description = 'scl raises UserWarning:' userWarning_action(w, description) nsgt = NSGT(scl, rate, Ls, real, matrixform, reducedform) description = 'nsgt raises UserWarning:' userWarning_action(w, description) warnings.simplefilter("ignore") # forward transform with warnings.catch_warnings(record=True) as w: c = nsgt.forward(s) description = 'UserWarning raised during forward transform:' userWarning_action(w, description) warnings.simplefilter("ignore") logger.debug('Gabor transform performed on {} samples.'.format(Ls)) if __time__: t2 = cputime() print('Gabor transform performed in {} seconds'.format(t2-t1)) return c
def build_cqt_nsgt_pipeline(self): from nsgt import NSGT, LogScale, LinScale, MelScale, OctScale print("") print("Configuring cqt_NSGT pipeline...") scales = { 'log': LogScale, 'lin': LinScale, 'mel': MelScale, 'oct': OctScale } nsgt_scale = scales[getattr(self, 'nsgt_scale', 'log')] nsgt_scale = nsgt_scale(getattr(self, 'fmin', 20), getattr(self, 'fmax', self.sample_rate / 2), getattr(self, 'n_bins', 96)) nsgt = NSGT(nsgt_scale, self.sample_rate, self.audio_length, real=getattr(self, 'real', False), matrixform=getattr(self, 'matrix_form', True), reducedform=getattr(self, 'reduced_form', False)) self.n_bins = len(nsgt.wins) self.n_frames = nsgt.ncoefs self.output_shape = (2, int(self.n_bins / 2), int(self.n_frames)) self._add_audio_loader() self._add_signal_zeropadding() self._add_fade_out() self._add_norm() self.pre_pipeline.extend([lambda x: x.reshape(-1, ), nsgt.forward]) self.post_pipeline.insert(0, nsgt.backward) self._add_mag_phase() self._add_log_mag() self._add_ifreq() # Add folded cqt if getattr(self, 'fold_cqt', False): self.pre_pipeline.append(fold_cqt) self.post_pipeline.insert(0, unfold_cqt) self.output_shape = (4, int(self.n_bins / 2), int(self.n_frames))
trlen = args.trlen if not args.nonsliced: slicq = NSGT_sliced(scl, sllen, trlen, fs, real=True, matrixform=True, multichannel=True, device="cpu") else: slicq = NSGT(scl, fs, signal.shape[-1], real=True, matrixform=True, multichannel=True, device="cpu") # total number of coefficients to represent input signal #ncoefs = int(sf.frames*slicq.coef_factor) # generator for forward transformation if args.nonsliced: c = slicq.forward(signal) else: c = slicq.forward((signal, )) # add a batch c = torch.unsqueeze(c, dim=0)
except KeyError: parser.error('scale unknown') scl = scale(args.fmin, args.fmax, args.bins) times = [] for _ in range(args.time or 1): t1 = cputime() # calculate transform parameters Ls = len(s) nsgt = NSGT(scl, fs, Ls, real=args.real, matrixform=args.matrixform, reducedform=args.reducedform) # forward transform c = nsgt.forward(s) # c = N.array(c) # print "c",len(c),N.array(map(len,c)) # inverse transform s_r = nsgt.backward(c) t2 = cputime() times.append(t2 - t1)
try: scale = scales[options.scale] except KeyError: parser.error('scale unknown') scl = scale(options.fmin,options.fmax,options.bins) times = [] for _ in xrange(options.time or 1): t1 = cputime() # calculate transform parameters Ls = len(s) nsgt = NSGT(scl,fs,Ls,real=options.real,matrixform=options.matrixform,reducedform=options.reducedform) # forward transform c = nsgt.forward(s) # c = N.array(c) # print "c",len(c),N.array(map(len,c)) # inverse transform s_r = nsgt.backward(c) t2 = cputime() times.append(t2-t1) norm = lambda x: N.sqrt(N.sum(N.abs(N.square(x)))) rec_err = norm(s-s_r)/norm(s)
def xtract_mixin(x, instrumental=False, single_model=False, pretrained_model_dir=None): if pretrained_model_dir is None: p_model = components["percussive"]["model_file"] h_model = components["harmonic"]["model_file"] v_model = components["vocal"]["model_file"] else: p_model = os.path.join(pretrained_model_dir, "model_percussive.h5") h_model = os.path.join(pretrained_model_dir, "model_harmonic.h5") v_model = os.path.join(pretrained_model_dir, "model_vocal.h5") print("Loading models from:\n\t{0}\n\t{1}\n\t{2}".format( h_model, p_model, v_model)) percussive_model = Model(p_model).model harmonic_model = Model(h_model).model vocal_model = Model(v_model).model n_samples = x.shape[0] n_chunks = int(numpy.ceil(n_samples / chunk_size)) n_pad = n_chunks * chunk_size - x.shape[0] x = numpy.concatenate((x, numpy.zeros(n_pad))) x_out_h = numpy.zeros_like(x) x_out_p = numpy.zeros_like(x) x_out_v = numpy.zeros_like(x) # calculate transform parameters L = chunk_size nsgt = NSGT(nsgt_scale, sample_rate, L, real=True, matrixform=True) for chunk in range(n_chunks - 1): s = x[chunk * chunk_size:(chunk + 1) * chunk_size] # forward transform c = nsgt.forward(s) C = numpy.asarray(c) Cmag_orig, Cphase_orig = librosa.magphase(C) Cmag_for_nn = numpy.reshape(Cmag_orig, (1, dim_1, dim_2, 1)) # inference from model Cmag_p = percussive_model.predict(Cmag_for_nn) Cmag_p = numpy.reshape(Cmag_p, (dim_1, dim_2)) Cmag_h = harmonic_model.predict(Cmag_for_nn) Cmag_h = numpy.reshape(Cmag_h, (dim_1, dim_2)) Cmag_v = numpy.zeros_like(Cmag_h) if not instrumental: Cmag_v = vocal_model.predict(Cmag_for_nn) Cmag_v = numpy.reshape(Cmag_v, (dim_1, dim_2)) if single_model: Ch_desired = _pol2cart(Cmag_h, Cphase_orig) Cp_desired = _pol2cart(Cmag_p, Cphase_orig) if not instrumental: Cv_desired = _pol2cart(Cmag_v, Cphase_orig) else: # soft mask first Mp = numpy.ones_like(Cmag_orig) Mh = numpy.ones_like(Cmag_orig) Mv = numpy.ones_like(Cmag_orig) tot = (numpy.power(Cmag_p, 2.0) + numpy.power(Cmag_h, 2.0) + numpy.power(Cmag_v, 2.0) + K.epsilon()) Mp = numpy.divide(numpy.power(Cmag_p, 2.0), tot) Mh = numpy.divide(numpy.power(Cmag_h, 2.0), tot) Mv = numpy.divide(numpy.power(Cmag_v, 2.0), tot) Cp_desired = numpy.multiply(Mp, C) Ch_desired = numpy.multiply(Mh, C) Cv_desired = numpy.multiply(Mv, C) # inverse transform s_p = nsgt.backward(Cp_desired) s_h = nsgt.backward(Ch_desired) s_v = numpy.zeros_like(s_h) if not instrumental: s_v = nsgt.backward(Cv_desired) x_out_p[chunk * chunk_size:(chunk + 1) * chunk_size] = s_p x_out_v[chunk * chunk_size:(chunk + 1) * chunk_size] = s_v x_out_h[chunk * chunk_size:(chunk + 1) * chunk_size] = s_h # strip off padding if n_pad > 0: x_out_p = x_out_p[:-n_pad] x_out_h = x_out_h[:-n_pad] x_out_v = x_out_v[:-n_pad] x_out_h = x_out_h.astype(numpy.float32) x_out_p = x_out_p.astype(numpy.float32) x_out_v = x_out_v.astype(numpy.float32) return x_out_h, x_out_p, x_out_v