Exemple #1
0
 def test_raw(self):
     # Test against previously generated data to make sure nothing has
     # broken and that there are no cross platform or snack version issues.
     # Data was generated by snack 2.2.10-r5 on Gentoo Linux.
     f_len = 0.001
     w_len = 0.025
     for fn in wav_fns:
         F0, V = snack_pitch(fn, frame_length=f_len, window_length=w_len)
         # Voice is 0 or 1, so (hopefully) no FP rounding issues.
         sample_data = get_sample_data(fn, 'V', 'sf0', '1ms')
         self.assertEqual(V, sample_data)
         sample_data = get_sample_data(fn, 'sF0', 'sf0', '1ms')
         self.assertEqual(len(F0), len(sample_data))
         for i in range(len(F0)):
             # The F0 float values are unlikely to compare exactly across
             # different machines.  NB: Voicesauce reports to 3 places.
             self.assertAlmostEqual(F0[i], sample_data[i], places=4)
    def test_pitch_against_voicesauce_data(self):
        # Test against Snack data generated by VoiceSauce
        # The data was generated on VoiceSauce v1.31 on Windows 7
        verbose = (sys.argv[1] == '-v')
        for fn in wav_fns:
            f_len = 1

            # Need ns (number of samples) and sampling rate (Fs) from wav file
            # to compute data length
            sound_file = SoundFile(fn)
            data_len = np.int_(np.floor(sound_file.ns / sound_file.fs / f_len * 1000))

            # Compute OpenSauce Snack F0 and V
            F0_os, V_os = snack_pitch(fn, snack_method, data_len, frame_shift=f_len, window_size=25, max_pitch=500, min_pitch=40, tcl_shell_cmd=tcl_cmd)

            # Get VoiceSauce data
            # NB: It doesn't matter which output file we use, the sF0 column is
            # the same in all of them.
            F0_vs = get_raw_data(fn, 'sF0', 'strF0', 'FMTs', 'estimated')
            V_vs = get_raw_data(fn, 'sV', 'strF0', 'FMTs', 'estimated')

            # Either corresponding entries for OpenSauce and VoiceSauce data
            # have to both be nan, or they need to be "close" enough in
            # floating precision
            self.assertAllClose(V_os, V_vs, rtol=1e-05, atol=1e-08, equal_nan=True)
            if not (np.isclose(F0_os, F0_vs, rtol=1e-05, atol=1e-08) | (np.isnan(F0_os) & np.isnan(F0_vs))).all():
                # If first check fails, try lowering relative tolerance and
                # redoing the check
                idx = np.where(np.isclose(F0_os, F0_vs, rtol=1e-05, atol=1e-08) | (np.isnan(F0_os) & np.isnan(F0_vs)) == False)[0]
                if verbose:
                    print('\nChecking F0 data using rtol=1e-05, atol=1e-08 in {}:'.format(fn))
                    print('Out of {} array entries in F0 snack data, discrepancies in these indices'.format(len(F0_os)))
                    for i in idx:
                        print('idx {}, OpenSauce F0 = {}, VoiceSauce F0 = {}'.format(i, F0_os[i], F0_vs[i]))
                    print('Reducing relative tolerance to rtol=3e-05 and redoing check:')
                self.assertAllClose(F0_os, F0_vs, rtol=3e-05, atol=1e-08, equal_nan=True)
                if verbose:
                    print('OK')
            else:
                self.assertAllClose(F0_os, F0_vs, rtol=1e-05, atol=1e-08, equal_nan=True)
Exemple #3
0
 def test_against_voicesauce_data(self):
     # XXX I think these are the voicesauce defaults (vs expresses them
     # in ms, but snack expects seconds).
     f_len = 0.001
     w_len = 0.025
     for fn in wav_fns:
         # XXX I think voicesauce will optionally emit the Voice data, but I
         # don't have example output data for it, so I'm ignoring it for now.
         F0, V = snack_pitch(fn, frame_length=f_len, window_length=w_len)
         # The first samples in all of our test data yield 0.
         self.assertEqual(F0[:10], [0.0]*10)
         # The snack esps data starts in the middle of the first window,
         # but the voicesauce output data starts counting frames from 0,
         # so pad our F0 with invalid frames.
         # XXX I'm not sure why -1 seems to make the data match up better,
         # voicesauce doesn't have the -1 term.
         F0[:0] = [float('NaN')] * int(math.floor(w_len/f_len/2)-1)
         # NB: It doesn't matter which output file we use, the sF0 column is
         # the same in all of them.
         voicesauce_data = get_test_data(fn, 'sF0', 'sf0', '1ms')
         os_data = []
         vs_data = []
         for ms, fs0 in voicesauce_data:
             ms = int(ms)
             # Voicesauce rounds to three places.
             os_data.append(round(F0[ms], 3))
             vs_data.append(fs0)
         # XXX debug prints.
         #for i in range(len(os_data)):
         #    print((i, os_data[i], voicesauce_data[i]))
         #print(fn)
         for i in range(len(os_data)):
             if (abs(os_data[i] - vs_data[i]) >= 40
                    and (os_data[i]==0 or vs_data[i]==0)):
                 continue
             self.assertLess(abs(os_data[i] - vs_data[i]), 0.6, "row %s" % i)
Exemple #4
0
    def test_pitch_against_voicesauce_data(self):
        # Test against Snack data generated by VoiceSauce
        # The data was generated on VoiceSauce v1.31 on Windows 7
        verbose = (sys.argv[1] == '-v')
        for fn in wav_fns:
            f_len = 1

            # Need ns (number of samples) and sampling rate (Fs) from wav file
            # to compute data length
            sound_file = SoundFile(fn)
            data_len = np.int_(
                np.floor(sound_file.ns / sound_file.fs / f_len * 1000))

            # Compute OpenSauce Snack F0 and V
            F0_os, V_os = snack_pitch(fn,
                                      snack_method,
                                      data_len,
                                      frame_shift=f_len,
                                      window_size=25,
                                      max_pitch=500,
                                      min_pitch=40,
                                      tcl_shell_cmd=tcl_cmd)

            # Get VoiceSauce data
            # NB: It doesn't matter which output file we use, the sF0 column is
            # the same in all of them.
            F0_vs = get_raw_data(fn, 'sF0', 'strF0', 'FMTs', 'estimated')
            V_vs = get_raw_data(fn, 'sV', 'strF0', 'FMTs', 'estimated')

            # Either corresponding entries for OpenSauce and VoiceSauce data
            # have to both be nan, or they need to be "close" enough in
            # floating precision
            self.assertAllClose(V_os,
                                V_vs,
                                rtol=1e-05,
                                atol=1e-08,
                                equal_nan=True)
            if not (np.isclose(F0_os, F0_vs, rtol=1e-05, atol=1e-08) |
                    (np.isnan(F0_os) & np.isnan(F0_vs))).all():
                # If first check fails, try lowering relative tolerance and
                # redoing the check
                idx = np.where(
                    np.isclose(F0_os, F0_vs, rtol=1e-05, atol=1e-08)
                    | (np.isnan(F0_os) & np.isnan(F0_vs)) == False)[0]
                if verbose:
                    print(
                        '\nChecking F0 data using rtol=1e-05, atol=1e-08 in {}:'
                        .format(fn))
                    print(
                        'Out of {} array entries in F0 snack data, discrepancies in these indices'
                        .format(len(F0_os)))
                    for i in idx:
                        print('idx {}, OpenSauce F0 = {}, VoiceSauce F0 = {}'.
                              format(i, F0_os[i], F0_vs[i]))
                    print(
                        'Reducing relative tolerance to rtol=3e-05 and redoing check:'
                    )
                self.assertAllClose(F0_os,
                                    F0_vs,
                                    rtol=3e-05,
                                    atol=1e-08,
                                    equal_nan=True)
                if verbose:
                    print('OK')
            else:
                self.assertAllClose(F0_os,
                                    F0_vs,
                                    rtol=1e-05,
                                    atol=1e-08,
                                    equal_nan=True)