def test_raw(self): # Test against previously generated data to make sure nothing has # broken and that there are no cross platform or snack version issues. # Data was generated by snack 2.2.10-r5 on Gentoo Linux. f_len = 0.001 w_len = 0.025 for fn in wav_fns: F0, V = snack_pitch(fn, frame_length=f_len, window_length=w_len) # Voice is 0 or 1, so (hopefully) no FP rounding issues. sample_data = get_sample_data(fn, 'V', 'sf0', '1ms') self.assertEqual(V, sample_data) sample_data = get_sample_data(fn, 'sF0', 'sf0', '1ms') self.assertEqual(len(F0), len(sample_data)) for i in range(len(F0)): # The F0 float values are unlikely to compare exactly across # different machines. NB: Voicesauce reports to 3 places. self.assertAlmostEqual(F0[i], sample_data[i], places=4)
def test_pitch_against_voicesauce_data(self): # Test against Snack data generated by VoiceSauce # The data was generated on VoiceSauce v1.31 on Windows 7 verbose = (sys.argv[1] == '-v') for fn in wav_fns: f_len = 1 # Need ns (number of samples) and sampling rate (Fs) from wav file # to compute data length sound_file = SoundFile(fn) data_len = np.int_(np.floor(sound_file.ns / sound_file.fs / f_len * 1000)) # Compute OpenSauce Snack F0 and V F0_os, V_os = snack_pitch(fn, snack_method, data_len, frame_shift=f_len, window_size=25, max_pitch=500, min_pitch=40, tcl_shell_cmd=tcl_cmd) # Get VoiceSauce data # NB: It doesn't matter which output file we use, the sF0 column is # the same in all of them. F0_vs = get_raw_data(fn, 'sF0', 'strF0', 'FMTs', 'estimated') V_vs = get_raw_data(fn, 'sV', 'strF0', 'FMTs', 'estimated') # Either corresponding entries for OpenSauce and VoiceSauce data # have to both be nan, or they need to be "close" enough in # floating precision self.assertAllClose(V_os, V_vs, rtol=1e-05, atol=1e-08, equal_nan=True) if not (np.isclose(F0_os, F0_vs, rtol=1e-05, atol=1e-08) | (np.isnan(F0_os) & np.isnan(F0_vs))).all(): # If first check fails, try lowering relative tolerance and # redoing the check idx = np.where(np.isclose(F0_os, F0_vs, rtol=1e-05, atol=1e-08) | (np.isnan(F0_os) & np.isnan(F0_vs)) == False)[0] if verbose: print('\nChecking F0 data using rtol=1e-05, atol=1e-08 in {}:'.format(fn)) print('Out of {} array entries in F0 snack data, discrepancies in these indices'.format(len(F0_os))) for i in idx: print('idx {}, OpenSauce F0 = {}, VoiceSauce F0 = {}'.format(i, F0_os[i], F0_vs[i])) print('Reducing relative tolerance to rtol=3e-05 and redoing check:') self.assertAllClose(F0_os, F0_vs, rtol=3e-05, atol=1e-08, equal_nan=True) if verbose: print('OK') else: self.assertAllClose(F0_os, F0_vs, rtol=1e-05, atol=1e-08, equal_nan=True)
def test_against_voicesauce_data(self): # XXX I think these are the voicesauce defaults (vs expresses them # in ms, but snack expects seconds). f_len = 0.001 w_len = 0.025 for fn in wav_fns: # XXX I think voicesauce will optionally emit the Voice data, but I # don't have example output data for it, so I'm ignoring it for now. F0, V = snack_pitch(fn, frame_length=f_len, window_length=w_len) # The first samples in all of our test data yield 0. self.assertEqual(F0[:10], [0.0]*10) # The snack esps data starts in the middle of the first window, # but the voicesauce output data starts counting frames from 0, # so pad our F0 with invalid frames. # XXX I'm not sure why -1 seems to make the data match up better, # voicesauce doesn't have the -1 term. F0[:0] = [float('NaN')] * int(math.floor(w_len/f_len/2)-1) # NB: It doesn't matter which output file we use, the sF0 column is # the same in all of them. voicesauce_data = get_test_data(fn, 'sF0', 'sf0', '1ms') os_data = [] vs_data = [] for ms, fs0 in voicesauce_data: ms = int(ms) # Voicesauce rounds to three places. os_data.append(round(F0[ms], 3)) vs_data.append(fs0) # XXX debug prints. #for i in range(len(os_data)): # print((i, os_data[i], voicesauce_data[i])) #print(fn) for i in range(len(os_data)): if (abs(os_data[i] - vs_data[i]) >= 40 and (os_data[i]==0 or vs_data[i]==0)): continue self.assertLess(abs(os_data[i] - vs_data[i]), 0.6, "row %s" % i)
def test_pitch_against_voicesauce_data(self): # Test against Snack data generated by VoiceSauce # The data was generated on VoiceSauce v1.31 on Windows 7 verbose = (sys.argv[1] == '-v') for fn in wav_fns: f_len = 1 # Need ns (number of samples) and sampling rate (Fs) from wav file # to compute data length sound_file = SoundFile(fn) data_len = np.int_( np.floor(sound_file.ns / sound_file.fs / f_len * 1000)) # Compute OpenSauce Snack F0 and V F0_os, V_os = snack_pitch(fn, snack_method, data_len, frame_shift=f_len, window_size=25, max_pitch=500, min_pitch=40, tcl_shell_cmd=tcl_cmd) # Get VoiceSauce data # NB: It doesn't matter which output file we use, the sF0 column is # the same in all of them. F0_vs = get_raw_data(fn, 'sF0', 'strF0', 'FMTs', 'estimated') V_vs = get_raw_data(fn, 'sV', 'strF0', 'FMTs', 'estimated') # Either corresponding entries for OpenSauce and VoiceSauce data # have to both be nan, or they need to be "close" enough in # floating precision self.assertAllClose(V_os, V_vs, rtol=1e-05, atol=1e-08, equal_nan=True) if not (np.isclose(F0_os, F0_vs, rtol=1e-05, atol=1e-08) | (np.isnan(F0_os) & np.isnan(F0_vs))).all(): # If first check fails, try lowering relative tolerance and # redoing the check idx = np.where( np.isclose(F0_os, F0_vs, rtol=1e-05, atol=1e-08) | (np.isnan(F0_os) & np.isnan(F0_vs)) == False)[0] if verbose: print( '\nChecking F0 data using rtol=1e-05, atol=1e-08 in {}:' .format(fn)) print( 'Out of {} array entries in F0 snack data, discrepancies in these indices' .format(len(F0_os))) for i in idx: print('idx {}, OpenSauce F0 = {}, VoiceSauce F0 = {}'. format(i, F0_os[i], F0_vs[i])) print( 'Reducing relative tolerance to rtol=3e-05 and redoing check:' ) self.assertAllClose(F0_os, F0_vs, rtol=3e-05, atol=1e-08, equal_nan=True) if verbose: print('OK') else: self.assertAllClose(F0_os, F0_vs, rtol=1e-05, atol=1e-08, equal_nan=True)