Ejemplo n.º 1
0
def getiNSGTGriffinLim(C, L, Fs, resol=24, randPhase=False, NIters=20):
    from nsgt import NSGT, OctScale
    scl = OctScale(50, Fs, resol)
    nsgt = NSGT(scl, Fs, L, matrixform=True)
    eps = 2.2204e-16
    if randPhase:
        C = np.exp(
            np.complex(0, 1) * np.random.rand(C.shape[0], C.shape[1])) * C
    A = np.array(C, dtype=np.complex)
    for i in range(NIters):
        print("iNSGT Griffin Lim Iteration %i of %i" % (i + 1, NIters))
        Ai = np.array(nsgt.forward(nsgt.backward(C)))
        A = np.zeros_like(C)
        A[:, 0:Ai.shape[1]] = Ai
        Norm = np.sqrt(A * np.conj(A))
        Norm[Norm < eps] = 1
        A = np.abs(C) * (A / Norm)
    X = nsgt.backward(A)
    return np.real(X)
Ejemplo n.º 2
0
 def test_oct(self):
     siglen = int(10 ** np.random.uniform(4, 6))
     sig = np.random.random(siglen)
     fmin = np.random.random() * 200 + 20
     fmax = np.random.random() * (22048 - fmin) + fmin
     obins = np.random.randint(24) + 1
     scale = OctScale(fmin, fmax, obins)
     nsgt = NSGT(scale, fs=44100, Ls=len(sig))
     c = nsgt.forward(sig)
     s_r = nsgt.backward(c)
     self.assertTrue(np.allclose(sig, s_r))
Ejemplo n.º 3
0
 def test_oct(self):
     siglen = int(10**np.random.uniform(4, 6))
     sig = np.random.random(siglen)
     fmin = np.random.random() * 200 + 20
     fmax = np.random.random() * (22048 - fmin) + fmin
     obins = np.random.randint(24) + 1
     scale = OctScale(fmin, fmax, obins)
     nsgt = NSGT(scale, fs=44100, Ls=len(sig))
     c = nsgt.forward(sig)
     s_r = nsgt.backward(c)
     self.assertTrue(np.allclose(sig, s_r, atol=1e-07))
Ejemplo n.º 4
0
def getiNSGT(C, L, Fs, resol=24):
    """
    Perform an inverse Nonstationary Gabor Transform
    :param C: An NBinsxNFrames CQT array
    :param L: Number of samples in audio file
    :param Fs: Sample rate
    :param resol: Number of CQT bins per octave
    """
    from nsgt import NSGT, OctScale
    scl = OctScale(50, Fs, resol)
    nsgt = NSGT(scl, Fs, L, matrixform=True)
    return nsgt.backward(C)
Ejemplo n.º 5
0
    nsgt = NSGT(scl,
                fs,
                Ls,
                real=args.real,
                matrixform=args.matrixform,
                reducedform=args.reducedform)

    # forward transform
    c = nsgt.forward(s)

    #        c = N.array(c)
    #        print "c",len(c),N.array(map(len,c))

    # inverse transform
    s_r = nsgt.backward(c)

    t2 = cputime()
    times.append(t2 - t1)

norm = lambda x: np.sqrt(np.sum(np.abs(np.square(x))))
rec_err = norm(s - s_r) / norm(s)
print("Reconstruction error: %.3e" % rec_err)
print("Calculation time: %.3f±%.3fs (min=%.3f s)" %
      (np.mean(times), np.std(times) / 2, np.min(times)))

if args.output:
    print("Writing audio file '%s'" % args.output)
    sf = Sndfile(args.output,
                 mode='w',
                 format=Format('wav', 'pcm24'),
Ejemplo n.º 6
0
    for _ in xrange(options.time or 1):
        t1 = cputime()
        
        # calculate transform parameters
        Ls = len(s)
        
        nsgt = NSGT(scl,fs,Ls,real=options.real,matrixform=options.matrixform,reducedform=options.reducedform)
        
        # forward transform 
        c = nsgt.forward(s)

#        c = N.array(c)
#        print "c",len(c),N.array(map(len,c))
    
        # inverse transform 
        s_r = nsgt.backward(c)
 
        t2 = cputime()
        times.append(t2-t1)

    norm = lambda x: N.sqrt(N.sum(N.abs(N.square(x))))
    rec_err = norm(s-s_r)/norm(s)
    print "Reconstruction error: %.3e"%rec_err
    print "Calculation time: %.3f +- %.3f s (min=%.3f s)"%(N.mean(times),N.std(times)/2,N.min(times))

    if options.output:
        print "Written audio file",options.output
        sf = Sndfile(options.output,mode='w',format=Format('wav','pcm24'),channels=1,samplerate=fs)
        sf.write_frames(s_r)
        sf.close()
        print "Done"
Ejemplo n.º 7
0
def xtract_mixin(x,
                 instrumental=False,
                 single_model=False,
                 pretrained_model_dir=None):
    if pretrained_model_dir is None:
        p_model = components["percussive"]["model_file"]
        h_model = components["harmonic"]["model_file"]
        v_model = components["vocal"]["model_file"]
    else:
        p_model = os.path.join(pretrained_model_dir, "model_percussive.h5")
        h_model = os.path.join(pretrained_model_dir, "model_harmonic.h5")
        v_model = os.path.join(pretrained_model_dir, "model_vocal.h5")

    print("Loading models from:\n\t{0}\n\t{1}\n\t{2}".format(
        h_model, p_model, v_model))
    percussive_model = Model(p_model).model
    harmonic_model = Model(h_model).model
    vocal_model = Model(v_model).model

    n_samples = x.shape[0]
    n_chunks = int(numpy.ceil(n_samples / chunk_size))
    n_pad = n_chunks * chunk_size - x.shape[0]

    x = numpy.concatenate((x, numpy.zeros(n_pad)))
    x_out_h = numpy.zeros_like(x)
    x_out_p = numpy.zeros_like(x)
    x_out_v = numpy.zeros_like(x)

    # calculate transform parameters
    L = chunk_size
    nsgt = NSGT(nsgt_scale, sample_rate, L, real=True, matrixform=True)

    for chunk in range(n_chunks - 1):
        s = x[chunk * chunk_size:(chunk + 1) * chunk_size]

        # forward transform
        c = nsgt.forward(s)
        C = numpy.asarray(c)

        Cmag_orig, Cphase_orig = librosa.magphase(C)
        Cmag_for_nn = numpy.reshape(Cmag_orig, (1, dim_1, dim_2, 1))

        # inference from model
        Cmag_p = percussive_model.predict(Cmag_for_nn)
        Cmag_p = numpy.reshape(Cmag_p, (dim_1, dim_2))

        Cmag_h = harmonic_model.predict(Cmag_for_nn)
        Cmag_h = numpy.reshape(Cmag_h, (dim_1, dim_2))

        Cmag_v = numpy.zeros_like(Cmag_h)
        if not instrumental:
            Cmag_v = vocal_model.predict(Cmag_for_nn)
            Cmag_v = numpy.reshape(Cmag_v, (dim_1, dim_2))

        if single_model:
            Ch_desired = _pol2cart(Cmag_h, Cphase_orig)
            Cp_desired = _pol2cart(Cmag_p, Cphase_orig)

            if not instrumental:
                Cv_desired = _pol2cart(Cmag_v, Cphase_orig)
        else:
            # soft mask first
            Mp = numpy.ones_like(Cmag_orig)
            Mh = numpy.ones_like(Cmag_orig)
            Mv = numpy.ones_like(Cmag_orig)

            tot = (numpy.power(Cmag_p, 2.0) + numpy.power(Cmag_h, 2.0) +
                   numpy.power(Cmag_v, 2.0) + K.epsilon())
            Mp = numpy.divide(numpy.power(Cmag_p, 2.0), tot)
            Mh = numpy.divide(numpy.power(Cmag_h, 2.0), tot)
            Mv = numpy.divide(numpy.power(Cmag_v, 2.0), tot)

            Cp_desired = numpy.multiply(Mp, C)
            Ch_desired = numpy.multiply(Mh, C)
            Cv_desired = numpy.multiply(Mv, C)

        # inverse transform
        s_p = nsgt.backward(Cp_desired)
        s_h = nsgt.backward(Ch_desired)

        s_v = numpy.zeros_like(s_h)
        if not instrumental:
            s_v = nsgt.backward(Cv_desired)

        x_out_p[chunk * chunk_size:(chunk + 1) * chunk_size] = s_p
        x_out_v[chunk * chunk_size:(chunk + 1) * chunk_size] = s_v
        x_out_h[chunk * chunk_size:(chunk + 1) * chunk_size] = s_h

    # strip off padding
    if n_pad > 0:
        x_out_p = x_out_p[:-n_pad]
        x_out_h = x_out_h[:-n_pad]
        x_out_v = x_out_v[:-n_pad]

    x_out_h = x_out_h.astype(numpy.float32)
    x_out_p = x_out_p.astype(numpy.float32)
    x_out_v = x_out_v.astype(numpy.float32)

    return x_out_h, x_out_p, x_out_v