def test_calculate_SNR_negative_1(self): source_array = [89, -89] * 6000 + [502, -502] * 8000 + [89, -89] * 7000 source_data = reduce(lambda a, b: a + struct.pack('>h', b), source_array[1:], struct.pack('>h', source_array[0])) sampling_frequency = 8000 bounds_of_speech = [(2.0 * 6000.0 / sampling_frequency, 2.0 * (6000.0 + 8000.0) / sampling_frequency)] with self.assertRaisesRegex(AssertionError, 'Sound data are wrong!'): vad.calculate_SNR(source_data[0:-1], sampling_frequency, bounds_of_speech)
def test_calculate_SNR_negative_4(self): source_array = [89, -89] * 6000 + [502, -502] * 8000 + [89, -89] * 7000 source_data = reduce(lambda a, b: a + struct.pack('>h', b), source_array[1:], struct.pack('>h', source_array[0])) sampling_frequency = 8000 self.assertIsNone( vad.calculate_SNR(source_data, sampling_frequency, []))
def test_calculate_SNR_negative_3(self): source_array = [89, -89] * 6000 + [502, -502] * 8000 + [89, -89] * 7000 source_data = reduce(lambda a, b: a + struct.pack('>h', b), source_array[1:], struct.pack('>h', source_array[0])) sampling_frequency = 8000 bounds_of_speech = [(106000.0 / sampling_frequency, (106000.0 + 8000.0) / sampling_frequency)] self.assertIsNone( vad.calculate_SNR(source_data, sampling_frequency, bounds_of_speech))
def select_sound_files_and_their_SNR(dir_name, method_name, params_of_VAD): for cur_name in os.listdir(dir_name): if cur_name in {'.', '..'}: continue full_name = os.path.join(dir_name, cur_name) if os.path.isdir(full_name): yield from select_sound_files_and_their_SNR(full_name, method_name, params_of_VAD) elif cur_name.lower().endswith('.wav'): sound, fs = load_sound(full_name) if method_name == 'adapt': bounds_of_speech = list(detect_spoken_frames(sound, fs, params_of_VAD)) else: bounds_of_speech = list(detect_spoken_frames_with_webrtc(sound, fs, params_of_VAD)) snr = calculate_SNR(sound, fs, bounds_of_speech) if snr is not None: yield (full_name, int(round(snr)))
def test_calculate_SNR_positive_1(self): source_array = [89, -89] * 6000 + [502, -502] * 8000 + [89, -89] * 7000 source_data = reduce(lambda a, b: a + struct.pack('>h', b), source_array[1:], struct.pack('>h', source_array[0])) sampling_frequency = 8000 bounds_of_speech = [(2.0 * 6000.0 / sampling_frequency, 2.0 * (6000.0 + 8000.0) / sampling_frequency)] silence_energy = reduce( lambda a, b: a + b * b, source_array[0:(2 * 6000)] + source_array[(2 * (6000 + 8000)):], vad.EPS) / (2.0 * (6000.0 + 7000.0)) speech_energy = reduce(lambda a, b: a + b * b, source_array[ (2 * 6000):(2 * (6000 + 8000))], vad.EPS) / (2.0 * 8000.0) target_snr = 20.0 * math.log10(speech_energy / silence_energy) self.assertAlmostEqual( target_snr, vad.calculate_SNR(source_data, sampling_frequency, bounds_of_speech))
def test_calculate_SNR_negative_2(self): sampling_frequency = 8000 bounds_of_speech = [(2.0 * 6000.0 / sampling_frequency, 2.0 * (6000.0 + 8000.0) / sampling_frequency)] with self.assertRaisesRegex(AssertionError, 'Sound data are wrong!'): vad.calculate_SNR([], sampling_frequency, bounds_of_speech)