def RappSpeechDetectionSphinx(self, audioString, audioFile, language, audioSource, words=None, sentences=None, grammar=None): path = audioFileORaudioString(audioString, audioFile) ch = RappPlatformAPI(address=addr) response = ch.speechRecognitionSphinx(path, language=language, audio_source=audioSource, words=words, sentences=sentences, grammar=grammar) #f=open('/home/panos/Desktop/Swagger/finalTest/test_v3/RappGoogle.wav','r') return response
class voiceCommand: def __init__(self): self.frames = [] self.pa = pyaudio.PyAudio() self.stream = self.open_mic_stream() self.tap_threshold, self.noise_threshold = self.calibrate() self.hear = False self.ch = RappPlatformAPI() topic = rospy.get_param("rec_topic") self.publisher = rospy.Publisher(topic, String, queue_size=10) def stop(self): self.stream.close() #finds device inputs def find_input_device(self): device_index = None for i in range(self.pa.get_device_count()): devinfo = self.pa.get_device_info_by_index(i) print("Device %d: %s" % (i, devinfo["name"])) for keyword in ["mic", "input"]: if keyword in devinfo["name"].lower(): print("Found an input: device %d - %s" % (i, devinfo["name"])) device_index = i return device_index if device_index == None: print("No preferred input found; using default input device.") return device_index #opens stream def open_mic_stream(self): device_index = self.find_input_device() stream = self.pa.open(format=FORMAT, channels=CHANNELS, rate=RATE, input=True, input_device_index=device_index, frames_per_buffer=INPUT_FRAMES_PER_BLOCK) return stream def tapDetected(self): print "Tap!" def unpack(self, block): count = len(block) / 2 format = "%dh" % (count) shorts = struct.unpack(format, block) return shorts, count def get_rms(self, block): # RMS amplitude is defined as the square root of the # mean over time of the square of the amplitude. # so we need to convert this string of bytes into # a string of 16-bit samples... # we will get one short out for each # two chars in the string. shorts, count = self.unpack(block) # iterate over the block. sum_squares = 0.0 for sample in shorts: # sample is a signed short in +/- 32768. # normalize it to 1.0 n = sample * SHORT_NORMALIZE sum_squares += n * n return math.sqrt(sum_squares / count) #computeThreshold listens to stream for $seconds seconds #and computes the threshold using RMS def computeThreshold(self, seconds): block_sum = 0 counter = 0 while seconds > 0: block_sum += self.get_rms(self.stream.read(INPUT_FRAMES_PER_BLOCK)) seconds -= INPUT_BLOCK_TIME counter += 1 return block_sum / counter #calibrate compute silence and voice thresholds def calibrate(self): print("Noise calibration.BE QUIET.") noise_threshold = self.computeThreshold(2) print("Noise calibration COMPLETE.") print("Command calibration.") tap_threshold = self.computeThreshold(2) print("Command calibration complete.") return tap_threshold, noise_threshold #write2WAV saves self.frames as file.wav def write2WAV(self): waveFile = wave.open(PATH + 'file.wav', 'w') waveFile.setnchannels(CHANNELS) waveFile.setsampwidth(self.pa.get_sample_size(FORMAT)) waveFile.setframerate(RATE) waveFile.writeframes(b''.join(self.frames)) waveFile.close() self.frames = [] def setHear(self, value): self.hear = value def listen(self): try: downCounter = 0 upCounter = 0 self.frames = [] flag = False while self.hear and (not rospy.is_shutdown()): block = self.stream.read(INPUT_FRAMES_PER_BLOCK) amp = self.get_rms(block) if amp > self.tap_threshold + self.noise_threshold: flag = True #heard something for first time #self.tapDetected() downCounter = 0 upCounter += 1 else: downCounter += 1 if flag: self.frames.append(block) else: if len(self.frames) > 3: self.frames.pop(0) self.frames.append(block) if upCounter > 40: #Too long record 40*0.05 = 2sec print("TOO LONG RECORD") break if downCounter > 20: if upCounter > 1: self.stream.stop_stream() self.write2WAV() print('File recorded.') self.stream.start_stream() time.sleep(0.1) downCounter = 0 upCounter = 0 self.hear = False else: downCounter = 0 upCounter = 0 self.frames = [] flag = False except KeyboardInterrupt: self.stop() return False return True def voiceRec(self): response = self.ch.speechRecognitionSphinx(PATH + "file.wav", "nao_wav_1_ch", 'en', dictionary) str_response = str(response.get('words')) if len(response.get('words')) != 1: return '' str_response = str(response.get('words')[0]) return str_response
class SpeechDetectionSphinxTests(unittest.TestCase): def setUp(self): rospack = rospkg.RosPack() self.pkgDir = rospack.get_path('rapp_testing_tools') self.ch = RappPlatformAPI() def test_ogg_oxi(self): audioFile = path.join(self.pkgDir, 'test_data', 'speech_detection_samples', 'recording_oxi.ogg') response = self.ch.speechRecognitionSphinx(audioFile, 'nao_ogg', 'el',\ [u'ναι', u'οχι']) valid_words_found = [u'οχι'] self.assertEqual(response['error'], u'') self.assertEqual(response['words'], valid_words_found) def test_ogg_no(self): audioFile = path.join(self.pkgDir, 'test_data', 'speech_detection_samples', 'recording_no.ogg') response = self.ch.speechRecognitionSphinx(audioFile, 'nao_ogg', 'en',\ [u'yes', u'no']) valid_words_found = [u'no'] self.assertEqual(response['error'], u'') self.assertEqual(response['words'], valid_words_found) def test_wav_1_ch_yes_no(self): audioFile = path.join(self.pkgDir, 'test_data', 'yes-no.wav') response = self.ch.speechRecognitionSphinx(audioFile, 'nao_wav_1_ch', 'en',\ [u'yes', u'no']) valid_words_found = [u'yes', u'no'] self.assertEqual(response['error'], u'') self.assertEqual(response['words'], valid_words_found) def test_wav_1_ch_nai_oxi(self): audioFile = path.join(self.pkgDir, 'test_data', 'nai-oxi-test.wav') response = self.ch.speechRecognitionSphinx(audioFile, 'nao_wav_1_ch', 'el',\ [u'ναι', u'οχι', u'ισως']) valid_words_found = [u'ναι', u'οχι', u'ισως'] self.assertEqual(response['error'], u'') self.assertEqual(response['words'], valid_words_found) def test_headset_nai_oxi(self): audioFile = path.join(self.pkgDir, 'test_data', 'microphone_nai.wav') response = self.ch.speechRecognitionSphinx(audioFile, 'headset', 'el',\ [u'ναι', u'οχι']) valid_words_found = [u'ναι'] self.assertEqual(response['error'], u'') self.assertEqual(response['words'], valid_words_found) def test_speech_erroneous(self): audioFile = path.join(self.pkgDir, 'test_data', 'microphone_nai.wav') response = self.ch.speechRecognitionSphinx('', 'headset', 'el', [u'ναι', u'οχι']) self.assertNotEqual(response['error'], u'') response = self.ch.speechRecognitionSphinx([], 'headset', 'el', [u'ναι', u'οχι']) self.assertNotEqual(response['error'], u'') response = self.ch.speechRecognitionSphinx(3, 'headset', 'el', [u'ναι', u'οχι']) self.assertNotEqual(response['error'], u'') response = self.ch.speechRecognitionSphinx(audioFile, '', 'el', [u'ναι', u'οχι']) self.assertNotEqual(response['error'], u'') response = self.ch.speechRecognitionSphinx(audioFile, [], 'el', [u'ναι', u'οχι']) self.assertNotEqual(response['error'], u'') response = self.ch.speechRecognitionSphinx(audioFile, 3, 'el', [u'ναι', u'οχι']) self.assertNotEqual(response['error'], u'') response = self.ch.speechRecognitionSphinx(audioFile, 'headset', '', [u'ναι', u'οχι']) self.assertNotEqual(response['error'], u'') response = self.ch.speechRecognitionSphinx(audioFile, 'headset', [], [u'ναι', u'οχι']) self.assertNotEqual(response['error'], u'') response = self.ch.speechRecognitionSphinx(audioFile, 'headset', 3, [u'ναι', u'οχι']) self.assertNotEqual(response['error'], u'') response = self.ch.speechRecognitionSphinx(audioFile, 'headset', 'el', 3) self.assertNotEqual(response['error'], u'') response = self.ch.speechRecognitionSphinx(audioFile, 'headset', 'el', '') self.assertNotEqual(response['error'], u'')
class SpeechDetectionSphinxTests(unittest.TestCase): def setUp(self): rospack = rospkg.RosPack() self.pkgDir = rospack.get_path('rapp_testing_tools') self.ch = RappPlatformAPI() def test_ogg_oxi(self): audioFile = path.join(self.pkgDir, 'test_data', 'speech_detection_samples', 'recording_oxi.ogg') response = self.ch.speechRecognitionSphinx(audioFile, 'nao_ogg', 'el',\ [u'ναι', u'οχι']) valid_words_found = [u'οχι'] self.assertEqual(response['error'], u'') self.assertEqual(response['words'], valid_words_found) def test_ogg_no(self): audioFile = path.join(self.pkgDir, 'test_data', 'speech_detection_samples', 'recording_no.ogg') response = self.ch.speechRecognitionSphinx(audioFile, 'nao_ogg', 'en',\ [u'yes', u'no']) valid_words_found = [u'no'] self.assertEqual(response['error'], u'') self.assertEqual(response['words'], valid_words_found) def test_wav_1_ch_yes_no(self): audioFile = path.join(self.pkgDir, 'test_data', 'yes-no.wav') response = self.ch.speechRecognitionSphinx(audioFile, 'nao_wav_1_ch', 'en',\ [u'yes', u'no']) valid_words_found = [u'yes', u'no'] self.assertEqual(response['error'], u'') self.assertEqual(response['words'], valid_words_found) def test_wav_1_ch_nai_oxi(self): audioFile = path.join(self.pkgDir, 'test_data', 'nai-oxi-test.wav') response = self.ch.speechRecognitionSphinx(audioFile, 'nao_wav_1_ch', 'el',\ [u'ναι', u'οχι', u'ισως']) valid_words_found = [u'ναι', u'οχι', u'ισως'] self.assertEqual(response['error'], u'') self.assertEqual(response['words'], valid_words_found) def test_headset_nai_oxi(self): audioFile = path.join(self.pkgDir, 'test_data', 'microphone_nai.wav') response = self.ch.speechRecognitionSphinx(audioFile, 'headset', 'el',\ [u'ναι', u'οχι']) valid_words_found = [u'ναι'] self.assertEqual(response['error'], u'') self.assertEqual(response['words'], valid_words_found) def test_speech_erroneous(self): audioFile = path.join(self.pkgDir, 'test_data', 'microphone_nai.wav') response = self.ch.speechRecognitionSphinx('', 'headset', 'el', [u'ναι', u'οχι']) self.assertNotEqual(response['error'], u'') response = self.ch.speechRecognitionSphinx([], 'headset', 'el', [u'ναι', u'οχι']) self.assertNotEqual(response['error'], u'') response = self.ch.speechRecognitionSphinx(3, 'headset', 'el', [u'ναι', u'οχι']) self.assertNotEqual(response['error'], u'') response = self.ch.speechRecognitionSphinx(audioFile, '', 'el', [u'ναι', u'οχι']) self.assertNotEqual(response['error'], u'') response = self.ch.speechRecognitionSphinx(audioFile, [], 'el', [u'ναι', u'οχι']) self.assertNotEqual(response['error'], u'') response = self.ch.speechRecognitionSphinx(audioFile, 3, 'el', [u'ναι', u'οχι']) self.assertNotEqual(response['error'], u'') response = self.ch.speechRecognitionSphinx(audioFile, 'headset', '', [u'ναι', u'οχι']) self.assertNotEqual(response['error'], u'') response = self.ch.speechRecognitionSphinx(audioFile, 'headset', [], [u'ναι', u'οχι']) self.assertNotEqual(response['error'], u'') response = self.ch.speechRecognitionSphinx(audioFile, 'headset', 3, [u'ναι', u'οχι']) self.assertNotEqual(response['error'], u'') response = self.ch.speechRecognitionSphinx(audioFile, 'headset', 'el', 3) self.assertNotEqual(response['error'], u'') response = self.ch.speechRecognitionSphinx(audioFile, 'headset', 'el', '') self.assertNotEqual(response['error'], u'')