Ejemplo n.º 1
0
    def test_lm(self):
        ps = Pocketsphinx(
            dic='deps/pocketsphinx/test/data/defective.dic',
            mmap=False
        )

        # Decoding with 'defective' dictionary
        ps.decode()
        self.assertEqual(ps.hypothesis(), '')

        # Switch to 'turtle' language model
        turtle_lm = 'deps/pocketsphinx/test/data/turtle.lm.bin'
        lm = NGramModel(ps.get_config(), ps.get_logmath(), turtle_lm)
        ps.set_lm('turtle', lm)
        ps.set_search('turtle')

        # Decoding with 'turtle' language model
        ps.decode()
        self.assertEqual(ps.hypothesis(), '')

        # The word 'meters' isn't in the loaded dictionary
        # Let's add it manually
        ps.add_word('foobie', 'F UW B IY', False)
        ps.add_word('meters', 'M IY T ER Z', True)

        # Decoding with 'turtle' language model
        ps.decode()
        self.assertEqual(ps.hypothesis(), 'foobie meters meters')
Ejemplo n.º 2
0
    def test_lm(self):
        ps = Pocketsphinx(dic='deps/pocketsphinx/test/data/defective.dic',
                          mmap=False)

        # Decoding with 'defective' dictionary
        ps.decode()
        self.assertEqual(ps.hypothesis(), '')

        # Switch to 'turtle' language model
        turtle_lm = 'deps/pocketsphinx/test/data/turtle.lm.bin'
        lm = NGramModel(ps.get_config(), ps.get_logmath(), turtle_lm)
        ps.set_lm('turtle', lm)
        ps.set_search('turtle')

        # Decoding with 'turtle' language model
        ps.decode()
        self.assertEqual(ps.hypothesis(), '')

        # The word 'meters' isn't in the loaded dictionary
        # Let's add it manually
        ps.add_word('foobie', 'F UW B IY', False)
        ps.add_word('meters', 'M IY T ER Z', True)

        # Decoding with 'turtle' language model
        ps.decode()
        self.assertEqual(ps.hypothesis(), 'foobie meters meters')
Ejemplo n.º 3
0
def transform_audio_to_text(filename):

    user = expanduser("~")
    path = user + "/DTAI_Internship/src/speech_recognizer_node/data/"

    lm_file = path + "generated_language_model.lm"
    dict_file = path + "generated_dictionary.dic"

    hmm_file = user + "/.local/lib/python2.7/site-packages/pocketsphinx/model/en-us"

    model_path = get_model_path()
    data_path = get_data_path()

    config = {
        'hmm': os.path.join(model_path, 'en-us'),
        'lm': os.path.join(model_path, lm_file),
        'dict': os.path.join(model_path, dict_file)
    }

    ps = Pocketsphinx(**config)
    ps.decode(audio_file=os.path.join(data_path, filename),
              buffer_size=2048,
              no_search=False,
              full_utt=False)

    text = ps.hypothesis()

    print(text)

    return text
Ejemplo n.º 4
0
class TestRawDecoder(TestCase):

    def __init__(self, *args, **kwargs):
        self.ps = Pocketsphinx()
        self.ps.decode()
        super(TestRawDecoder, self).__init__(*args, **kwargs)

    def test_raw_decoder_lookup_word(self):
        self.assertEqual(self.ps.lookup_word('hello'), 'HH AH L OW')
        self.assertEqual(self.ps.lookup_word('abcdf'), None)

    def test_raw_decoder_hypothesis(self):
        self.assertEqual(self.ps.hypothesis(), 'go forward ten meters')
        self.assertEqual(self.ps.score(), -7066)
        self.assertEqual(self.ps.confidence(), 0.04042641466841839)

    def test_raw_decoder_segments(self):
        self.assertEqual(self.ps.segments(), [
            '<s>', '<sil>', 'go', 'forward', 'ten', 'meters', '</s>'
        ])

    def test_raw_decoder_best_hypothesis(self):
        self.assertEqual(self.ps.best(), [
            ('go forward ten meters', -28034),
            ('go for word ten meters', -28570),
            ('go forward and majors', -28670),
            ('go forward and meters', -28681),
            ('go forward and readers', -28685),
            ('go forward ten readers', -28688),
            ('go forward ten leaders', -28695),
            ('go forward can meters', -28695),
            ('go forward and leaders', -28706),
            ('go for work ten meters', -28722)
        ])
Ejemplo n.º 5
0
def pocket():

	ps = Pocketsphinx()


	language_directory = os.path.dirname(os.path.realpath(__file__))
	
	print language_directory

	acoustic_parameters_directory = os.path.join(language_directory, "acoustic-model")
	language_model_file = os.path.join(language_directory, "language-model.lm.bin")
	phoneme_dictionary_file = os.path.join(language_directory, "pronounciation-dictionary.dict")
    
	config = Decoder.default_config()
	config.set_string("-hmm", acoustic_parameters_directory)  # set the path of the hidden Markov model (HMM) parameter files
	config.set_string("-lm", language_model_file)
	config.set_string("-dict", phoneme_dictionary_file)

	decoder = Decoder(config)

	with sr.AudioFile(s_dir + "/a bad situation could become dramatically worse. /a bad situation could become dramatically worse. .wav") as source:
		audio_data = r.record(source)
	decoder.start_utt()
	decoder.process_raw(audio_data, False, True)
	decoder.end_utt()

	print decoder.hyp()

	ps.decode(
	    audio_file=os.path.join(s_dir, 'a bad situation could become dramatically worse. /a bad situation could become dramatically worse. .wav'),
	    buffer_size=2048,
	    no_search=False,
	    full_utt=False)
	print(ps.hypothesis()) # => ['<s>', '<sil>', 'go', 'forward', 'ten', 'meters', '</s>']
#pocket()
Ejemplo n.º 6
0
class TestRawDecoder(TestCase):
    def __init__(self, *args, **kwargs):
        self.ps = Pocketsphinx()
        self.ps.decode()
        super(TestRawDecoder, self).__init__(*args, **kwargs)

    def test_raw_decoder_lookup_word(self):
        self.assertEqual(self.ps.lookup_word('hello'), 'HH AH L OW')
        self.assertEqual(self.ps.lookup_word('abcdf'), None)

    def test_raw_decoder_hypothesis(self):
        self.assertEqual(self.ps.hypothesis(), 'go forward ten meters')
        self.assertEqual(self.ps.score(), -7066)
        self.assertEqual(self.ps.confidence(), 0.04042641466841839)

    def test_raw_decoder_segments(self):
        self.assertEqual(
            self.ps.segments(),
            ['<s>', '<sil>', 'go', 'forward', 'ten', 'meters', '</s>'])

    def test_raw_decoder_best_hypothesis(self):
        self.assertEqual(self.ps.best(), [('go forward ten meters', -28034),
                                          ('go for word ten meters', -28570),
                                          ('go forward and majors', -28670),
                                          ('go forward and meters', -28681),
                                          ('go forward and readers', -28685),
                                          ('go forward ten readers', -28688),
                                          ('go forward ten leaders', -28695),
                                          ('go forward can meters', -28695),
                                          ('go forward and leaders', -28706),
                                          ('go for work ten meters', -28722)])
Ejemplo n.º 7
0
	def __init__(self, mode):

		# state
		self.micbuf = np.zeros((0, 4), 'uint16')
		self.outbuf = None
		self.buffer_stuff = 0
		self.mode = mode
		self.playchan = 0
		self.playsamp = 0
		
		# check mode
		if not (mode == "echo" or mode == "record" or mode == "record4"):
			error("argument not recognised")

		# robot name
		topic_base_name = "/" + os.getenv("MIRO_ROBOT_NAME")

		# publish
		topic = topic_base_name + "/control/stream"
		print ("publish", topic)
		self.pub_stream = rospy.Publisher(topic, Int16MultiArray, queue_size=0)

		# subscribe
		topic = topic_base_name + "/sensors/stream"
		print ("subscribe", topic)
		self.sub_stream = rospy.Subscriber(topic, UInt16MultiArray, self.callback_stream, queue_size=1, tcp_nodelay=True)

		# subscribe
		topic = topic_base_name + "/sensors/mics"
		print ("subscribe", topic)
		self.sub_mics = rospy.Subscriber(topic, Int16MultiArray, self.callback_mics, queue_size=5, tcp_nodelay=True)
		
		# report
		print "recording from 4 microphones for", RECORD_TIME, "seconds..."


		####### Speech Recongnition using Pocket-Sphinx #########  
		

		model_path = get_model_path()
		data_path = get_data_path()

		config = {

		'hmm' : os.path.join(model_path, 'en-us'), # Hidden Markov Model, Speech Recongnition model - trained probability scoring system
		'lm': os.path.join(model_path, 'en-us.lm.bin'), #language model
		'dict' : os.path.join(model_path, 'cmudict-en-us.dict') # language dictionary
		}

		ps = Pocketsphinx(**config)
		ps.decode(
		audio_file=("/tmp/input.wav"), #add temp input.wav file
		buffer_size=2048,
		no_search= False,
		full_utt=False)

		print("Recognized: ")
		print((ps.hypothesis())) ## output
		print("END")
Ejemplo n.º 8
0
    def test_jsgf(self):
        ps = Pocketsphinx(lm='deps/pocketsphinx/test/data/turtle.lm.bin',
                          dic='deps/pocketsphinx/test/data/turtle.dic')

        # Decoding with 'turtle' language model
        ps.decode()
        self.assertEqual(ps.hypothesis(), 'go forward ten meters')

        # Switch to JSGF grammar
        jsgf = Jsgf('deps/pocketsphinx/test/data/goforward.gram')
        rule = jsgf.get_rule('goforward.move2')
        fsg = jsgf.build_fsg(rule, ps.get_logmath(), 7.5)
        ps.set_fsg('goforward', fsg)
        ps.set_search('goforward')

        # Decoding with 'goforward' grammar
        ps.decode()
        self.assertEqual(ps.hypothesis(), 'go forward ten meters')
Ejemplo n.º 9
0
 def test_cep_decoder_hypothesis(self):
     ps = Pocketsphinx()
     with open('deps/pocketsphinx/test/data/goforward.mfc', 'rb') as f:
         with ps.start_utterance():
             f.read(4)
             buf = f.read(13780)
             ps.process_cep(buf, False, True)
     self.assertEqual(ps.hypothesis(), 'go forward ten meters')
     self.assertEqual(ps.score(), -7095)
     self.assertEqual(ps.probability(), -32715)
Ejemplo n.º 10
0
 def test_cep_decoder_hypothesis(self):
     ps = Pocketsphinx()
     with open('deps/pocketsphinx/test/data/goforward.mfc', 'rb') as f:
         with ps.start_utterance():
             f.read(4)
             buf = f.read(13780)
             ps.process_cep(buf, False, True)
     self.assertEqual(ps.hypothesis(), 'go forward ten meters')
     self.assertEqual(ps.score(), -7095)
     self.assertEqual(ps.probability(), -32715)
Ejemplo n.º 11
0
    def test_jsgf(self):
        ps = Pocketsphinx(
            lm='deps/pocketsphinx/test/data/turtle.lm.bin',
            dic='deps/pocketsphinx/test/data/turtle.dic'
        )

        # Decoding with 'turtle' language model
        ps.decode()
        self.assertEqual(ps.hypothesis(), 'go forward ten meters')

        # Switch to JSGF grammar
        jsgf = Jsgf('deps/pocketsphinx/test/data/goforward.gram')
        rule = jsgf.get_rule('goforward.move2')
        fsg = jsgf.build_fsg(rule, ps.get_logmath(), 7.5)
        ps.set_fsg('goforward', fsg)
        ps.set_search('goforward')

        # Decoding with 'goforward' grammar
        ps.decode()
        self.assertEqual(ps.hypothesis(), 'go forward ten meters')
Ejemplo n.º 12
0
class TestPhoneme(TestCase):

    def __init__(self, *args, **kwargs):
        self.ps = Pocketsphinx(
            lm=False,
            dic=False,
            allphone='deps/pocketsphinx/model/en-us/en-us-phone.lm.bin',
            lw=2.0,
            pip=0.3,
            beam=1e-200,
            pbeam=1e-20,
            mmap=False
        )
        self.ps.decode()
        super(TestPhoneme, self).__init__(*args, **kwargs)

    def test_phoneme_hypothesis(self):
        self.assertEqual(
            self.ps.hypothesis(),
            'SIL G OW F AO R W ER D T AE N M IY IH ZH ER Z S V SIL'
        )

    def test_phoneme_best_phonemes(self):
        self.assertEqual(self.ps.segments(), [
            'SIL',
            'G',
            'OW',
            'F',
            'AO',
            'R',
            'W',
            'ER',
            'D',
            'T',
            'AE',
            'N',
            'M',
            'IY',
            'IH',
            'ZH',
            'ER',
            'Z',
            'S',
            'V',
            'SIL'
        ])
Ejemplo n.º 13
0
class TestPhoneme(TestCase):

    def __init__(self, *args, **kwargs):
        self.ps = Pocketsphinx(
            lm=False,
            dic=False,
            allphone='deps/pocketsphinx/model/en-us/en-us-phone.lm.bin',
            lw=2.0,
            pip=0.3,
            beam=1e-200,
            pbeam=1e-20,
            mmap=False
        )
        self.ps.decode()
        super(TestPhoneme, self).__init__(*args, **kwargs)

    def test_phoneme_hypothesis(self):
        self.assertEqual(
            self.ps.hypothesis(),
            'SIL G OW F AO R D T AE N NG IY ZH ER S SIL'
        )

    def test_phoneme_best_phonemes(self):
        self.assertEqual(self.ps.segments(), [
            'SIL',
            'G',
            'OW',
            'F',
            'AO',
            'R',
            'D',
            'T',
            'AE',
            'N',
            'NG',
            'IY',
            'ZH',
            'ER',
            'S',
            'SIL'
        ])
    full_utt=False
)

#print(ps.segments()) # => ['<s>', '<sil>', 'go', 'forward', 'ten', 'meters', '</s>']
#print('Detailed segments:', *ps.segments(detailed=True), sep='\n') # => [
#     word, prob, start_frame, end_frame
#     ('<s>', 0, 0, 24)
#     ('<sil>', -3778, 25, 45)
#     ('go', -27, 46, 63)
#     ('forward', -38, 64, 116)
#     ('ten', -14105, 117, 152)
#     ('meters', -2152, 153, 211)
#     ('</s>', 0, 212, 260)
# ]

print("hypothesis:\n" + ps.hypothesis())  # => go forward ten meters
print("probablity of correct:\n"+ str(ps.probability())) # => -32079
print("score:\n" + str(ps.score()))       # => -7066
print("confidence:\n" + str(ps.confidence()))  # => 0.04042641466841839

#print(*ps.best(count=10), sep='\n') # => [
#     ('go forward ten meters', -28034)
#     ('go for word ten meters', -28570)
#     ('go forward and majors', -28670)
#     ('go forward and meters', -28681)
#     ('go forward and readers', -28685)
#     ('go forward ten readers', -28688)
#     ('go forward ten leaders', -28695)
#     ('go forward can meters', -28695)
#     ('go forward and leaders', -28706)
#     ('go for work ten meters', -28722)
Ejemplo n.º 15
0
class HotwordRecognizer:
    """热词(唤醒词)识别器,对 |pocketsphinx| 的简单封装,默认的热词是 `'阿Q'` 和 `'R-cute`。

    如果要自定义热词,请参考 https://blog.51cto.com/feature09/2300352

    .. |pocketsphinx| raw:: html

        <a href='https://github.com/bambocher/pocketsphinx-python' target='blank'>pocketsphinx</a>

    .. |config| raw:: html

        <a href='https://github.com/bambocher/pocketsphinx-python#default-config' target='blank'>pocketsphinx Default config</a>

    :param hotword: 热词或热词列表,默认为 `['阿Q', 'R-cute']`
    :type hotword: str / list, optional
    :param hmm: 参考 |config|
    :type hmm: str, optional
    :param lm: 参考 |config|
    :type lm: str, optional
    :param dic: 参考 |config|
    :type dic: str, optional
    """
    def __init__(self, **kwargs):
        # signal.signal(signal.SIGINT, self.stop)
        self._no_search = False
        self._full_utt = False
        hotword = kwargs.pop('hotword', ['阿Q', 'R-cute'])
        self._hotwords = hotword if isinstance(hotword, list) else [hotword]

        model_path = get_model_path()
        opt = {
            'verbose': False,
            'hmm': os.path.join(model_path, 'en-us'),
            'lm': util.resource('sphinx/rcute.lm'),
            'dic': util.resource('sphinx/rcute.dic'),
        }
        opt.update(kwargs)
        self._rec = Pocketsphinx(**opt)

    def recognize(self, stream, timeout=None):
        """开始识别

        :param source: 声音来源
        :param timeout: 超时,即识别的最长时间(秒),默认为 `None` ,表示不设置超时,知道识别到热词才返回
        :type timeout: float, optional
        :return: 识别到的热词模型对应的热词,若超时没识别到热词则返回 `None`
        :rtype: str
        """
        self._cancel = False
        if timeout:
            count = 0.0
        in_speech = False
        with self._rec.start_utterance():
            while True:
                data = stream.raw_read()
                self._rec.process_raw(data, self._no_search, self._full_utt)
                if in_speech != self._rec.get_in_speech():
                    in_speech = not in_speech
                    if not in_speech and self._rec.hyp():
                        with self._rec.end_utterance():
                            hyp = self._rec.hypothesis()
                            if hyp in self._hotwords:
                                return hyp
                if self._cancel:
                    raise RuntimeError(
                        'Hotword detection cancelled by another thread')
                elif timeout:
                    count += source.frame_duration  #len(data) / 32000
                    if count > timeout:
                        return

    def cancel(self):
        """停止识别"""
        self._cancel = True
Ejemplo n.º 16
0
)

#print(ps.segments())

#save the detailed segments of the words, 
#which will contain details word, probablity, start_time and end_time
#print('Detailed segments:', *ps.segments(detailed=True), sep='\n')

# with open('output_segments_obama_farewell_speech.txt', 'a') as f:
#     print(*ps.segments(detailed=True), sep='\n', file=f)
    
with open(filename_output_segments, 'a') as f:
    print(*ps.segments(detailed=True), sep='\n', file=f)
    
#convert from audio to text and save    
text = ps.hypothesis()


file1 = open(filename_sphinx,"w")#write mode 
file1.write(text) 
file1.close() 

#load into dataframe
# For the above saved file, modify manually by removing '(',')',' and then save as modified fie
#df = pd.read_csv('output_segments_donaldTrump_modified.txt', sep=",", header=None)
df = pd.read_csv(filename_output_segments_mod, sep=",", header=None)
df.columns = ["word", "prob","startTime", "endTime"]
df.head()
#calculate time taken for each word
df['time_taken']=df['endTime'] - df['startTime']
df.head(20)
Ejemplo n.º 17
0
class SpeechToText:
    ''' Предназначен для распознавания речи с помощью PocketSphinx.
    1. mode - может иметь два значения: from_file и from_microphone
    1.1. from_file - распознавание речи из .wav файла (частота дискретизации >=16кГц, 16bit, моно)
    1.2. from_microphone - распознавание речи с микрофона
    2. name_dataset - имя набора данных, на основе которого построена языковая модель: plays_ru, subtitles_ru или conversations_ru '''
    def __init__(self, mode='from_microphone', name_dataset='plays_ru'):
        self.current_dirname = os.path.dirname(os.path.realpath(__file__))
        self.work_mode = mode
        model_path = get_model_path()

        if not (name_dataset == 'plays_ru' or name_dataset == 'subtitles_ru'
                or name_dataset == 'conversations_ru'):
            print(
                '\n[E] Неверное значение name_dataset. Возможные варианты: plays_ru, subtitles_ru или conversations_ru\n'
            )
            return

        if self.work_mode == 'from_file':
            config = {
                'hmm': os.path.join(model_path, 'zero_ru.cd_cont_4000'),
                'lm': os.path.join(model_path,
                                   'ru_bot_' + name_dataset + '.lm'),
                'dict': os.path.join(model_path,
                                     'ru_bot_' + name_dataset + '.dic')
            }
            self.speech_from_file = Pocketsphinx(**config)
        elif self.work_mode == 'from_microphone':
            self.speech_from_microphone = LiveSpeech(
                verbose=False,
                sampling_rate=16000,
                buffer_size=2048,
                no_search=False,
                full_utt=False,
                hmm=os.path.join(model_path, 'zero_ru.cd_cont_4000'),
                lm=os.path.join(model_path, 'ru_bot_' + name_dataset + '.lm'),
                dic=os.path.join(model_path,
                                 'ru_bot_' + name_dataset + '.dic'))
        else:
            print(
                '[E] Неподдерживаемый режим работы, проверьте значение аргумента mode.'
            )

    # Добавить фильтры шума, например с помощью sox
    def get(self, f_name_audio=None):
        ''' Распознавание речи с помощью PocketSphinx. Режим задаётся при создании объекта класса (из файла или с микрофона).
        1. f_name_audio - имя .wav или .opus файла с речью (для распознавания из файла, частота дискретизации >=16кГц, 16bit, моно)
        2. возвращает строку с распознанной речью '''

        if self.work_mode == 'from_file':
            if f_name_audio is None:
                print(
                    '[E] В режиме from_file необходимо указывать имя .wav или .opus файла.'
                )
                return
            filename_audio_raw = f_name_audio[:f_name_audio.find('.')] + '.raw'
            filename_audio_wav = f_name_audio[:f_name_audio.find('.')] + '.wav'
            audio_format = f_name_audio[f_name_audio.find('.') + 1:]

            # Конвертирование .opus файла в .wav
            if audio_format == 'opus':
                command_line = "yes | ffmpeg -i '" + f_name_audio + "' '" + filename_audio_wav + "'"
                proc = subprocess.Popen(command_line,
                                        shell=True,
                                        stdout=subprocess.PIPE,
                                        stderr=subprocess.PIPE)
                out, err = proc.communicate()
                if err.decode().find(f_name_audio + ':') != -1:
                    return 'error'

            # Конвертирование .wav файла в .raw
            audio_file = AudioSegment.from_wav(self.current_dirname + '/' +
                                               filename_audio_wav)
            audio_file = audio_file.set_frame_rate(16000)
            audio_file.export(self.current_dirname + '/' + filename_audio_raw,
                              format='raw')

            # Создание декодера и распознавание
            self.speech_from_file.decode(audio_file=self.current_dirname +
                                         '/' + filename_audio_raw,
                                         buffer_size=2048,
                                         no_search=False,
                                         full_utt=False)
            return self.speech_from_file.hypothesis()
        elif self.work_mode == 'from_microphone':
            for phrase in self.speech_from_microphone:
                return str(phrase)
    p = pyaudio.PyAudio()  # Create an interface to PortAudio

    # See PyAudio Documentation
    stream = p.open(format=sample_format,
                    channels=channels,
                    rate=fs,
                    frames_per_buffer=chunk,
                    input=True)

    print("Running as a daemon")
    print("Recording")

    #this is the only one that needs PyAudio it seems
    while True:
        ps.start_utt()
        # When there is silence, assume they stopped speaking
        while stream.get_read_available() > 0:
            data = stream.read(chunk)
            ps.process_raw(data, False, False)
            frames.append(data)
        ps.end_utt()  # This is not part of the While loop

        # This prevents it from printing silence
        if (ps.hypothesis() != ''):
            print(ps.hypothesis())
            frames = []

    stream.stop_stream()
    stream.close()
    p.terminate()
          full_utt=False)

# => ['<s>', '<sil>', 'go', 'forward', 'ten', 'meters', '</s>']
print(ps.segments())
print('Detailed segments:', *ps.segments(detailed=True), sep='\n')  # => [
#     word, prob, start_frame, end_frame
#     ('<s>', 0, 0, 24)
#     ('<sil>', -3778, 25, 45)
#     ('go', -27, 46, 63)
#     ('forward', -38, 64, 116)
#     ('ten', -14105, 117, 152)
#     ('meters', -2152, 153, 211)
#     ('</s>', 0, 212, 260)
# ]

print(ps.hypothesis())  # => go forward ten meters
print(ps.probability())  # => -32079
print(ps.score())  # => -7066
print(ps.confidence())  # => 0.04042641466841839

print(*ps.best(count=10), sep='\n')  # => [
#     ('go forward ten meters', -28034)
#     ('go for word ten meters', -28570)
#     ('go forward and majors', -28670)
#     ('go forward and meters', -28681)
#     ('go forward and readers', -28685)
#     ('go forward ten readers', -28688)
#     ('go forward ten leaders', -28695)
#     ('go forward can meters', -28695)
#     ('go forward and leaders', -28706)
#     ('go for work ten meters', -28722)
Ejemplo n.º 20
0
	def __init__(self):

		# state
		self.micbuf = np.zeros((0, 4), 'uint16')
		self.spkrbuf = None
		self.buffer_stuff = 0

		# robot name
		topic_base = "/" + os.getenv("MIRO_ROBOT_NAME") + "/"

		# publish
		topic = topic_base + "control/stream"
		print ("publish", topic)
		self.pub_stream = rospy.Publisher(topic, Int16MultiArray, queue_size=0)

		# subscribe
		topic = topic_base + "sensors/stream"
		print ("subscribe", topic)
		self.sub_stream = rospy.Subscriber(topic, UInt16MultiArray, self.callback_stream)

		# subscribe
		topic = topic_base + "sensors/mics"
		print ("subscribe", topic)
		self.sub_mics = rospy.Subscriber(topic, Int16MultiArray, self.callback_mics)
		
		# report
		print "recording on 4 microphone channels..."


		####### Speech Recongnition using Pocket-Sphinx #########  
		

		#obtain audio from microphone

		r = sr.Recognizer()
		with sr.callback_mics() as source:
			print("Say Hello")
			audio = r.listen(source)

		#write audio as a wav file
		with open("./tmp/input.wav", "wb") as f:

			f.write(audio.get_wav_data())

		model_path = get_model_path()
		data_path = get_data_path()

		config = {

		'hmm' : os.path.join(model_path, 'en-us'), # Hidden Markov Model, Speech Recongnition model - trained probability scoring system
		'lm': os.path.join(model_path, 'en-us.lm.bin'), #language model
		'dict' : os.path.join(model_path, 'cmudict-en-us.dict') # language dictionary
		}

		ps = Pocketsphinx(**config)
		ps.decode(
		audio_file=os.path.join(data_path, "./tmp/input.wav"),#add temp input.wav file
		buffer_size=2048
		no_search= False,
		full_utt=False
		)
		
		print(ps.hypothesis()) ## output
Ejemplo n.º 21
0
# Code retested by KhalsaLabs
# You can use your own audio file in code
# Raw or wav files would work perfectly
# For mp3 files, you need to modify code (add codex)

from __future__ import print_function
import os
from pocketsphinx import Pocketsphinx, get_model_path, get_data_path

model_path = get_model_path()
data_path = get_data_path()

config = {
    'hmm': os.path.join(model_path, 'en-us'),
    'lm': os.path.join(model_path, 'en-us.lm.bin'),
    'dict': os.path.join(model_path, 'cmudict-en-us.dict')
}

ps = Pocketsphinx(**config)
ps.decode(
    audio_file=os.path.join(data_path,
                            'test1.wav'),  # add your audio file here
    buffer_size=2048,
    no_search=False,
    full_utt=False)

print(ps.hypothesis())
Ejemplo n.º 22
0
    def loop(self):

        # loop
        while not rospy.core.is_shutdown():
            # if recording finished
            if not self.outbuf is None:
                # write output file
                print("writing output file")
                outfilename = '/tmp/input.wav'
                file = wave.open(outfilename, 'wb')
                file.setparams((1, 4, 20000, 0, 'NONE', 'not compressed'))
                print("Starting Reshape")
                x = np.reshape(self.outbuf[:, [0, 0]], (-1))
                print("writing frames")
                print(len(x))
                values = []
                for s in x:
                    packed_value = struct.pack('<h', s)
                    values.append(packed_value)
                    #file.writeframes(struct.pack('<h', s))
                #close file
                value_str = b''.join(values)
                file.writeframes(value_str)

                print("Closing file")
                file.close()

                model_path = get_model_path()
                data_path = get_data_path()

                config = {
                    'hmm': os.path.join(
                        model_path, 'en-us'
                    ),  # Hidden Markov Model, Speech Recongnition model - trained probability scoring system
                    'lm': os.path.join(model_path,
                                       'en-us.lm.bin'),  #language model
                    'dict': os.path.join(
                        model_path,
                        'cmudict-en-us.dict')  #, # language dictionary
                    #'samprate' : 16000
                }
                #cmd= "ffmpeg -y -i /tmp/output.wav -ar 8000 -af asetrate=16000*" + pitch + ",aresample=16000,atempo=" + tempo + " -ac 1 /tmp/outputConv.wav"
                #cmd = "ffmpeg -y -i /tmp/input.wav -f s32le -acodec pcm_s32le -ar 16000 -ac 1 /tmp/inputConv.wav"
                #cmd = "sox /tmp/input.wav -r 16000 inputConv.wav"
                #cmd = "ffmpeg -i /tmp/input.wav -ar 16000 /tmp/inputConv.wav"
                print("Converting via FFMPEG")
                cmd = "ffmpeg -y -i /tmp/input.wav -f s16le -acodec pcm_s16le -ar 16000 -af 'aresample=20000' -ac 1 /tmp/inputConv.wav -loglevel quiet"
                os.system(cmd)
                print("Decoding Via Pocketsphinx")
                ps = Pocketsphinx(**config)
                ps.decode(
                    audio_file=(
                        "/tmp/inputConv.wav"),  #add temp input.wav file
                    buffer_size=8192,
                    no_search=False,
                    full_utt=False)

                print("Recognized: ")
                print(ps.hypothesis())  ## output

                ## Speech Analysis, (what to start?)
                if ps.hypothesis() == "hello":
                    mml.say("Hello there human")  # Change this to whatever
                elif ps.hypothesis().find("how are you") >= 0:
                    mml.say("I'm always good")
                print("END")
                self.micbuf = np.zeros((0, 4), 'uint16')
                self.outbuf = None
                self.buffer_stuff = 0

                self.playchan = 0
                self.playsamp = 0

            # state
            time.sleep(0.02)