def raw_audio2wav(raw_audio: bytes, pyaudio_config: dict) -> bytes or None: ''' pyaudio_config has the next format: "pyaudio":{ "format": pyaudio.<format>, "channels": int, "rate": int, "frames_per_buffer": int, "device": int }, ''' try: samp_size = pyaudio.PyAudio().get_sample_size(pyaudio_config['format']) f = io.BytesIO() wave_writer = wave.Wave_write(f) wave_writer.setnchannels(pyaudio_config['channels']) wave_writer.setsampwidth(samp_size) wave_writer.setframerate(pyaudio_config['rate']) wave_writer.writeframes(raw_audio) f.seek(0) return f.read() except Exception as e: ErrorLogger(__file__, e) return None
def _parse_intent(self, intent: Dict) -> Dict: ''' Parse data recieved from api.ai bot Args: intent: data from api.ai bot Returns: dictionary = { 'text': text, 'action':{'name': name, 'parameters':parameters } } ''' try: answer = dict() #extract bot answer answer['text'] = intent['result']['fulfillment']['speech'] answer['intent_name'] = intent['result']['metadata']['intentName'] answer['action'] = dict() answer['action']['name'] = str() answer['action']['parameters'] = str() #if command is complete (all required fields are filled), extract the command information if 'actionIncomplete' in intent['result'] and intent['result'][ 'actionIncomplete'] is False: answer['action']['name'] = intent['result']['action'] answer['action']['parameters'] = intent['result']['parameters'] return answer except Exception as e: ErrorLogger(__file__, e) return None
def request(self, msg: str) -> Dict or None: ''' Make request and returns response from api.ai bot Args: msg: message to send to api.ai bot Returns: dictionary = { 'text': text, 'action':{'name': name, 'parameters':parameters } }: if data recieved None: if failed ''' try: req = self._ai.text_request() req.lang = 'ru' req.session_id = self.session_id req.query = msg if isinstance(msg, str) else '.' response = json.loads(req.getresponse().read().decode('utf-8')) #print('api.ai response: ', response) return self._parse_intent(response) except Exception as e: #print('...!!! with error!!!') ErrorLogger(__file__, e) return None
def __init__(self, pyaudio_config: dict, min_rms=100): ''' Constructor Args: pyaudio_config: pyaudio configuration in format dictionary = { 'format': audio_format, 'channels': channels_number, 'rate': rate, 'frames_per_buffer': frames_per_buffer 'device': input_device_number } min_rms: minimal sound rms value ''' try: self.set_min_rms(min_rms=min_rms) #set up pyaudio configuration and start audio stream self.format = pyaudio_config['format'] self.channels = pyaudio_config['channels'] self.rate = pyaudio_config['rate'] self.chunk_size = pyaudio_config['frames_per_buffer'] self.input_device = pyaudio_config['input_device'] self.audio = pyaudio.PyAudio() self.stream = self.audio.open(format=self.format, channels=self.channels, rate=self.rate, input=True, input_device_index=self.input_device, frames_per_buffer=self.chunk_size) self.stream.start_stream() except Exception as e: ErrorLogger(__file__, e)
def listen_audio(self, timeout=1000000) -> bytes or None: ''' Listen sound until silence ''' try: start = time.time() while time.time() - start < timeout: chunk = self.stream.read(self.chunk_size, exception_on_overflow=False) #if sound detected record raw data until silence if audioop.rms(chunk, 2) >= self.min_rms: buf = self.record_audio(last_chunk=chunk) return buf except Exception as e: ErrorLogger(__file__, e)
def read_pyaudio_config() -> dict or None: ''' read pyaudio configuration from ROS Parameter Server and return it as dictionary ''' if rospy.has_param('pyaudio'): pyaudio_config = rospy.get_param('pyaudio') if isinstance(pyaudio_config, dict): return pyaudio_config elif isinstance(pyaudio_config, str): try: return json.loads(pyaudio_config) except Exception as e: ErrorLogger(__file__, e) return None else: return None
def text_to_speech(self, text: str) -> bytes or None: ''' Translate text to speech Args: text: text to translate Returns: wave format audio bytes : if translated None : if failed ''' try: url = 'https://tts.voicetech.yandex.net/generate?text=' url += parse.quote(text) url += '&format=wav&lang=ru&speaker=ermil&key=' + self.yandex_voice_key req = request.urlopen(url) print('yandex!') return req.read() except Exception as e: ErrorLogger(__file__, e) return None
def play_audio(self, audio_data_src: http.client.HTTPResponse or io.BytesIO, format: str = 'wav') -> None: ''' Plays audio_data with format=format Args: audio_data: binary audio source format: audio format ''' try: if format == 'wav': #define stream chunk chunk = 1024 #open a wav format music f = wave.Wave_read(audio_data_src) #instantiate PyAudio p = pyaudio.PyAudio() #open stream stream = p.open(format=p.get_format_from_width( f.getsampwidth()), channels=f.getnchannels(), rate=f.getframerate(), output=True) #read data data = f.readframes(chunk) #play stream while data: stream.write(data) data = f.readframes(chunk) #stop stream stream.stop_stream() stream.close() #close PyAudio p.terminate() except Exception as e: ErrorLogger(__file__, e)
def record_audio(self, last_chunk): print('rec') '''Records speech until silence Args: last_chunk: last chunk before record starting stream: audio stream min_rms: minimal rms value that is not silence Returns: buf: contains recorded audio data, bytes type ''' try: buf = last_chunk silence_start = time.time() while (time.time() - silence_start) < 1.2: chunk = self.stream.read(self.chunk_size, exception_on_overflow=False) if audioop.rms(chunk, 2) >= self.min_rms / 3 : buf += chunk if audioop.rms(chunk, 2) >= self.min_rms: silence_start = time.time() return buf except Exception as e: ErrorLogger(__file__, e) return None
def recognize_speech(self, wav_data: bytes) -> str or None: ''' Recognize speech by wave format audio data Args: wav_data: wave audio data with speech Returns: recognized text : if data recieved None: if failed ''' try: url = 'https://asr.yandex.net/asr_xml?uuid=' + self._make_uuid() url += '&key=' + self.yandex_voice_key + '&topic=queries' r = requests.post(url, wav_data, headers={ 'Host': 'asr.yandex.net', 'Content-Type': 'audio/x-wav' }) print(r.text) root = ET.fromstring(r.text) max_conf = 0 result = None for child in root: if child.tag == 'variant': conf = float(child.attrib['confidence']) if conf > max_conf: max_conf = conf result = child.text return result except Exception as e: ErrorLogger(__file__, e) return None