コード例 #1
0
def test_melraw_bel_60_9():
    target_file = "/u/bergstrj/cvs/bergstrj/articles/06_memoir/data/feat_melraw_bel_60_9.stat"
    file1_features = numpy.asarray(
        [float(token) for token in file(target_file).readline().split()])
    file1_features = file1_features.reshape((9, 560))

    masters_means = file1_features[:, :32]
    masters_covars = file1_features[:, 32:]

    idx = theano.tensor.lscalar()
    path, label = tzanetakis.tzanetakis_example(idx)
    samples, sr = wavread.wav_read_double(path)
    frames = (samples[:1024 * 640]).reshape((640, 1024))

    segment1_frames = frames[:60, :]
    AF = AudioFeatures(22050,
                       1024,
                       n_bands=32,
                       scale_to_int_range=False,
                       critical_band_fn=mel.MelMasters.warp_dense,
                       power_sqr_abs=False,
                       noise_level=1.0e-4)  #1/10000

    AF.f = theano.Method([idx], [AF.loudness(segment1_frames), sr])

    af = AF.make()

    segment1_features, actual_sr = af.f(0)

    assert actual_sr == 22050

    print numpy.mean(segment1_features, axis=0)
    print masters_means[0]

    print numpy.mean(segment1_features, axis=0) - masters_means[0]
コード例 #2
0
def test_audiofeatures_fft():
    M = theano.Module()
    M.audiofeatures = AudioFeatures(sample_rate=22050, frame_len=1024)
    frames = theano.tensor.dmatrix('x')
    M.fft = theano.Method([frames], M.audiofeatures.fft(frames,
                                                        half_fft=False))
    m = theano.make_init(M)

    rng = numpy.random.RandomState(4234)
    xval = rng.randn(100, 1024)
    assert numpy.allclose(m.fft(xval), numpy.fft.fft(xval, 1024, 1))
コード例 #3
0
def test_mfcc_vs_dan_ellis():
    # MATLAB
    # ref_mfcc = melfcc(d, sr, 'wintime', 0.04645, 'hoptime', 0.04645, 'sumpower', 1, 'preemph', 0,
    #                          'lifterexp', 0, 'fbtype', 'htkmel');
    # ref_mfcc(:,1:4)
    ref_mfcc = numpy.asarray([[133.7590, 136.6686, 146.2690, 144.5163],
                              [5.3952, 9.0799, 5.2408, 4.4615],
                              [3.2458, 6.9730, 2.7541, 3.0766],
                              [2.9110, 3.6644, 4.5135, 5.8142],
                              [-1.7893, -1.8694, 0.1458, -0.1025],
                              [-1.8752, -2.1330, -1.2386, -0.6199],
                              [1.7157, 1.3612, -0.3314, -0.1711],
                              [0.8243, -0.9234, -1.5373, -1.0007],
                              [-1.4298, -1.8253, -1.1328, -0.8016],
                              [1.8595, 1.7053, 0.6195, 0.3924],
                              [0.0345, -0.7927, -1.7412, -0.9397],
                              [0.4872, -2.0002, 0.7030, -0.1565],
                              [1.5356, -0.3391, -0.2883, -0.8777]])

    M = theano.Module()
    M.audiofeatures = AudioFeatures(sample_rate=22050,
                                    frame_len=1024,
                                    n_bands=40,
                                    critical_band_fn=mel.melhtk_4k)
    print M.audiofeatures.n_bands
    frames = theano.tensor.dmatrix('frames')
    power = M.audiofeatures.powspec_from_fft(M.audiofeatures.fft(frames,
                                                                 half_fft=True,
                                                                 hamming=True),
                                             scale_to_int_range=True)
    audspec = M.audiofeatures.audspec_from_power(power)
    print M.audiofeatures.n_bands
    M.mfcc = theano.Method([frames],
                           outputs=[
                               M.audiofeatures.audcc_from_power(audspec,
                                                                n_audcc=13),
                               audspec
                           ])
    m = theano.make_init(M)

    blah, ablah = m.mfcc(load_blues_frames()[:100, :])
    assert ablah.shape == (100, 40)
    assert blah.shape == (100, 13)
    my_mfcc = blah[0:4, :13].T

    rel_err = numpy.max(
        abs(ref_mfcc - my_mfcc) / (abs(ref_mfcc) + abs(my_mfcc) + 1.0e-8))
    if rel_err >= 0.01:
        print 'BLAH SHAPE', blah.shape
        print ablah[0:5, :13].T
        print my_mfcc
        print abs(ref_mfcc - my_mfcc) / (abs(ref_mfcc) + abs(my_mfcc) + 1.0e-8)
    assert rel_err < 0.01
コード例 #4
0
def test_audspec_vs_dan_ellis():
    # MATLAB:
    # [d, sr] = wavread('blues.00000.wav')
    # ref_spectrum = powspec(d, sr, .04645, .04645, 0);
    # size(ref_spectrum)
    # --> [513  646]
    # ref_audspec = audspec(ref_spectrum, sr, 12, 'htkmel');
    ref_audspec = 1.0e+11 * numpy.asarray(
        [[0.4252, 5.8783, 6.5783, 5.2775, 4.9325],
         [0.3338, 1.0942, 1.6309, 0.9345, 1.2804],
         [0.0944, 0.1080, 0.2539, 0.1605, 0.7033],
         [0.0216, 0.0193, 0.3786, 0.2363, 0.4438],
         [0.1064, 0.0938, 0.6994, 0.9129, 0.5748],
         [0.2204, 0.2589, 2.2318, 0.9404, 0.7342],
         [0.0296, 0.0305, 0.2253, 0.2793, 0.2129],
         [0.0376, 0.0411, 0.1984, 0.1598, 0.0992],
         [0.0191, 0.0368, 0.2006, 0.0545, 0.0567],
         [0.0094, 0.0163, 0.1646, 0.0354, 0.0636]])

    M = theano.Module()
    M.audiofeatures = AudioFeatures(sample_rate=22050,
                                    frame_len=1024,
                                    n_bands=12)
    frames = theano.tensor.dmatrix('frames')
    power = M.audiofeatures.powspec_from_fft(M.audiofeatures.fft(frames,
                                                                 half_fft=True,
                                                                 hamming=True),
                                             scale_to_int_range=True)
    M.audspec = theano.Method(
        [frames], outputs=M.audiofeatures.audspec_from_power(power))
    m = theano.make_init(M)

    aspec = m.audspec(load_blues_frames()[:100, :])

    audspec = aspec[0:5, 0:10].T

    print audspec
    #print abs(ref_audspec - audspec) / (ref_audspec + audspec)
    max_rel_err = numpy.max(
        abs(ref_audspec - audspec) / (ref_audspec + audspec))
    assert 0.01 > max_rel_err
コード例 #5
0
def test_powspec_vs_dan_ellis():
    # MATLAB:
    # [d, sr] = wavread('blues.00000.wav')
    # ref_spectrum = powspec(d, sr, .04645, .04645, 0);
    # size(ref_spectrum)
    # --> [513  646]
    # ref_spectrum(1:10, 1:5)
    ref_spectrum = 1.0e+11 * numpy.asarray(
        [[0.0157, 0.0206, 0.0052, 0.00002, 0.0048],
         [0.0475, 0.0024, 0.0418, 0.0227, 0.0690],
         [0.2779, 0.2738, 0.1758, 0.3320, 0.0483],
         [0.0818, 0.7200, 1.2436, 1.1854, 1.2359],
         [0.0105, 0.0273, 0.2763, 0.1390, 0.6413],
         [0.0748, 0.8080, 1.5515, 1.2973, 1.7614],
         [0.0346, 3.5484, 2.9804, 3.0852, 0.0576],
         [0.0131, 0.7809, 0.0450, 0.1524, 0.9599],
         [0.0161, 0.5228, 0.4678, 0.3170, 0.5822],
         [0.1017, 0.6875, 0.4384, 0.3327, 0.7367]])

    M = theano.Module()
    M.audiofeatures = AudioFeatures(sample_rate=22050, frame_len=1024)
    frames = theano.tensor.dmatrix('frames')
    M.powspec = theano.Method([frames],
                              M.audiofeatures.powspec_from_fft(
                                  fft=M.audiofeatures.fft(frames,
                                                          half_fft=False,
                                                          hamming=True),
                                  scale_to_int_range=True))
    m = theano.make_init(M)

    pspec = m.powspec(load_blues_frames()[:100, :])

    spectrum = pspec[0:5, 0:10].T

    print spectrum
    print abs(ref_spectrum - spectrum) / (ref_spectrum + spectrum)
    assert 0.01 > numpy.max(
        abs(ref_spectrum - spectrum) / (ref_spectrum + spectrum))
コード例 #6
0
import theano
import theano.tensor as T

M = theano.Module()
M.a, M.b, M.c = [T.dvector() for i in 1, 2, 3]

P = theano.Module()
P.m = M  #include a module by nesting
x = T.dvector()
P.f = theano.Method([x], None, {M.b: M.b + x})

p = P.make()  #this converts both M and P because M was nested within P
p.m.b = [4, 5, 6]
p.f(3)
print p.m.b
#  prints  array([7.,8.,9.])