def main(): rospy.init_node( 'beep_publisher' ) # parameters loop_rate = rospy.get_param( '~loop_rate', 100 ) buffer_size = rospy.get_param( '~buffer_size', 4096 ) use_audio_common = rospy.get_param( '~use_audio_common', False ) topicname_data = '~audio' topicname_info = '~info' # import message modules if use_audio_common: from audio_common_msgs.msg import AudioData from audio_stream_msgs.msg import AudioInfo else: from audio_stream_msgs.msg import AudioData, AudioInfo # publishers publisher_data = rospy.Publisher( topicname_data, AudioData, queue_size=1000 ) publisher_info = rospy.Publisher( topicname_info, AudioInfo, queue_size=10 ) # message msg_data = AudioData() msg_info = AudioInfo() # gen a default buffer msg_info.channels = 1 msg_info.sampling_rate = 16000 msg_info.width = 2 frequency_f0 = 440 # 基本周波数 frequency_beep = 1.0 # beepの周期 buffer_beep = generateBeep( 0.1*((2**8) ** 2), [ 440, 480, 500, 520, 400, 300, 600, 800 ], frequency_beep, msg_info.sampling_rate, 10.0, 0.5 ).astype(np.uint16).tobytes() r = rospy.Rate( loop_rate ) index = 0 time_played = 0.0; # sec time_start = time.time() while not rospy.is_shutdown(): rospy.loginfo('publish') if index > len(buffer_beep): index = 0 msg_info.header.stamp = rospy.Time.now() if ( len(buffer_beep) - index) < buffer_size: msg_data.data = buffer_beep[index:] time_played += 1.0 * ( len(buffer_beep) - index ) / ( msg_info.width * msg_info.sampling_rate ) else: msg_data.data = buffer_beep[index:index+buffer_size] time_played += 1.0 * buffer_size / ( msg_info.width * msg_info.sampling_rate ) time_passed = time.time() - time_start while time_played - time_passed > 0: time_passed = time.time() - time_start if rospy.is_shutdown(): return r.sleep() publisher_data.publish( msg_data ) publisher_info.publish( msg_info ) index += buffer_size
def audio_cb(self, data): #: :type data: AudioBuffer #ab = AudioBuffer() # This contains (in the 'data' field): int16[] data rospy.loginfo("Callback received!") ad = AudioData() # This is: uint8[] data data_uint8 = numpy.array(data.data, dtype=numpy.uint8) ad.data = data_uint8.tolist() self.pub.publish(ad)
def sample_audio_test(self, in_data, frame_count, time_info, status): #data_int = np.fromstring(in_data, dtype=np.int16) #msg_audio = AudioData16() #msg_audio.data = data_int msg_audio = AudioData() msg_audio.data = in_data self.raw_pub.publish(msg_audio) return None, pyaudio.paContinue
def tts(self, words): result = self.client.synthesis(words, 'zh', 1, { 'vol': 5, }) if isinstance(result, dict): rospy.logerr(result) return None audio_data = AudioData() audio_data.data = result return audio_data
def read_from_cache(text): if not os.path.exists("/tmp/xiaoqiang_tts"): os.mkdir("/tmp/xiaoqiang_tts") return None if os.path.exists(os.path.join("/tmp/xiaoqiang_tts", text.data)): with open(os.path.join("/tmp/xiaoqiang_tts", text.data), "rb") as audio_file: audio_data = AudioData() audio_data.data = audio_file.read() return audio_data return None
def on_audio(self, data, channel): self.pub_audios[channel].publish(AudioData(data=data)) if channel == self.main_channel: self.pub_audio.publish(AudioData(data=data)) if self.is_speeching: if len(self.speech_audio_buffer) == 0: self.speech_audio_buffer = self.speech_prefetch_buffer self.speech_audio_buffer += data else: self.speech_prefetch_buffer += data self.speech_prefetch_buffer = self.speech_prefetch_buffer[-self.speech_prefetch_bytes:]
def stream_audio(self): reccmd = self.arecord_command self.p = subprocess.Popen(reccmd, stdout=subprocess.PIPE) while not rospy.is_shutdown(): self.last_data = self.p.stdout.read(self.block_size) if self.last_data is not None: ad = AudioData() ad.data = tuple(np.fromstring(self.last_data, dtype=np.uint8)) self.pub.publish(ad) self.close()
def run(self): try: data = self.stream.read(self.CHUNK) except IOError as ex: if ex[1] != pyaudio.paInputOverflowed: raise print("ERROR") return amplitude = np.fromstring(data, np.uint8) print type(amplitude) msg = AudioData() msg.data = amplitude.tolist() self.pub.publish(msg)
def read_from_cache(text): m = hashlib.md5() m.update(text.data) audio_filename = m.hexdigest() if not os.path.exists("xiaoqiang_tts"): os.mkdir("xiaoqiang_tts") return None if os.path.exists(os.path.join("xiaoqiang_tts", audio_filename)) and \ os.path.getsize(os.path.join("xiaoqiang_tts", audio_filename)) > 0: with open(os.path.join( "xiaoqiang_tts", audio_filename), "rb") as audio_file: audio_data = AudioData() audio_data.data = audio_file.read() return audio_data return None
def topic_cb(self, msg): # Retrieve 1 channel audio data tmp = list(msg.data) tmp = tmp[0::len(msg.channelMap)] # Convert audio format: Int -> Buffer(Uint8) dataBuff = "" for i in range(0, len(tmp)): if tmp[i] < 0: tmp[i] = tmp[i] + 65536 dataBuff = dataBuff + chr(tmp[i] % 256) dataBuff = dataBuff + chr((tmp[i] - (tmp[i] % 256)) / 256) # Publish audio topic audio_data = AudioData() audio_data.data = dataBuff self.audiodata_pub.publish(audio_data)
def main(): rospy.init_node('AudioCapturer', anonymous=True) publisher = rospy.Publisher("rawAudioChunk", AudioData, queue_size=20) p = pyaudio.PyAudio() stream = p.open( input_device_index=None, # Default device format=FORMAT, channels=CHANNELS, rate=RATE, input=True, frames_per_buffer=CHUNK_SIZE) print("*Recording*") # Loop while node is not closed and audio is working. Note that # here is "not needed" something like `spin()` or `loop_sleep()` # because (presumably) PyAudio manages the blocked times waiting for # IO and puts the process/thread to sleep, also there are no callbacks # by ROS. while stream.is_active() and not rospy.is_shutdown(): try: in_data = stream.read(CHUNK_SIZE, exception_on_overflow=False) msg = in_data publisher.publish(AudioData(data=msg)) except IOError as e: print("I/O error({0}): {1}".format(e.errno, e.strerror)) # break if not stream.is_active(): print("Stream was not active.") stream.stop_stream() stream.close() p.terminate()
def onInitialize(self): #rospy.loginfo(self._properties.getProperty("type_name") + " version " + self._properties.getProperty("version")) rospy.loginfo("Copyright (C) 2010-2011 Yosuke Matsusaka") rospy.loginfo("Copyright (C) 2017 Isao Hara") rospy.loginfo("Copyright (C) 2020 Isao Hara") self._prevtime = time.clock() # configuration parameters self._samplerate = [16000,] self.bindParameter("rate", self._samplerate, 16000) self._character = ["male",] self.bindParameter("character", self._character, "male") self._sampling_rate = [0,] self.bindParameter("sampling_rate", self._sampling_rate, 0) # create inport self._tts_sub = rospy.Subscriber('/tts/data', String, self.onData) # create outport for wave data self._audio_out = rospy.Publisher('/audio_play/audio', AudioData, queue_size=1) self._outdata = AudioData() self._status = "ready" self._is_active = False return True
def publish(self): # Input Data input_data = self.audio_data_buffer[:self.length_queue_popup] self.audio_data_buffer = self.audio_data_buffer[self.length_queue_popup:] try: is_speech = self._vad.is_speech( input_data, self._audio_info.sample_rate) except Exception as e: rospy.logerr('Got an error while processing. {}'.format(e)) return self._pub_is_speech.publish(Bool(is_speech)) if self._current_speaking == True and is_speech == True: self._speech_audio_buffer += input_data elif self._current_speaking == False and is_speech == True: self._speech_audio_buffer = input_data self._current_speaking = True elif self._current_speaking == True and is_speech == False: self._speech_audio_buffer = self._speech_audio_buffer + input_data speech_duration = (len(self._speech_audio_buffer) / 2.0) / self._audio_info.sample_rate if speech_duration > self._minimum_duration: self._pub_speech_audio.publish( AudioData(self._speech_audio_buffer)) else: rospy.logwarn( 'speech duration: {} dropped'.format(speech_duration)) self._current_speaking = False self._speech_audio_buffer = None rospy.logwarn('Cleared speech_audio_buffer.')
def on_timer(self, event): stamp = event.current_real or rospy.Time.now() is_voice = self.respeaker.is_voice() doa_rad = math.radians(self.respeaker.direction - 180.0) doa_rad = angles.shortest_angular_distance( doa_rad, math.radians(self.doa_yaw_offset)) doa = math.degrees(doa_rad) # vad if is_voice != self.prev_is_voice: self.pub_vad.publish(Bool(data=is_voice)) self.prev_is_voice = is_voice # doa if doa != self.prev_doa: self.pub_doa_raw.publish(Int32(data=doa)) self.prev_doa = doa msg = PoseStamped() msg.header.frame_id = self.sensor_frame_id msg.header.stamp = stamp ori = T.quaternion_from_euler(math.radians(doa), 0, 0) msg.pose.position.x = self.doa_xy_offset * np.cos(doa_rad) msg.pose.position.y = self.doa_xy_offset * np.sin(doa_rad) msg.pose.orientation.w = ori[0] msg.pose.orientation.x = ori[1] msg.pose.orientation.y = ori[2] msg.pose.orientation.z = ori[3] self.pub_doa.publish(msg) if self.asr_engine != "silent": #actual cutting of the "speech" only audio for sending to asr service # get timestamp of "last time mic array detected voice" if is_voice and not self.pepper_is_speaking: self.speech_stopped = stamp #pepper is speaking: we go to cut (correct) #pepper is not speaking: #we are past the grace period in speaking: #second statement fails, we go to cutting (correct) #we are not past the grace period in speaking: #second statement is true, we don't cut off audio and go on with is_speaking = True (correct) if not self.pepper_is_speaking and (stamp - self.speech_stopped < rospy.Duration( self.speech_continuation)): self.is_speaking = True #grace period "speech_continuation where we just continue 'is speaking'" elif self.is_speaking: #otherwise there has been no speech for too long, and we were obviously speaking, so we cut the audio here buf = self.speech_audio_buffer self.speech_audio_buffer = str() self.is_speaking = False duration = 8.0 * len(buf) * self.respeaker_audio.bitwidth duration = duration / self.respeaker_audio.rate / self.respeaker_audio.bitdepth rospy.loginfo("Speech detected for %.3f seconds" % duration) if self.speech_min_duration <= duration < self.speech_max_duration: self.pub_speech_audio.publish(AudioData(data=buf))
def on_audio(self, data): self.pub_audio.publish(AudioData(data=data)) if self.is_speeching: if len(self.speech_audio_buffer) == 0: self.speech_audio_buffer = self.speech_prefetch_buffer self.speech_audio_buffer += data else: self.speech_prefetch_buffer += data self.speech_prefetch_buffer = self.speech_prefetch_buffer[-self.speech_prefetch_bytes:]
def read_from_cache(text): global locale m = hashlib.md5() m.update(text.data) audio_filename = m.hexdigest() if not os.path.exists("xiaoqiang_tts"): os.mkdir("xiaoqiang_tts") return None # 兼容以前版本 if os.path.exists(os.path.join("xiaoqiang_tts", audio_filename)) and \ os.path.getsize(os.path.join("xiaoqiang_tts", audio_filename)) > 0 and locale == "zh-cn": with open(os.path.join("xiaoqiang_tts", audio_filename), "rb") as audio_file: audio_data = AudioData() audio_data.data = audio_file.read() return audio_data if not os.path.exists(os.path.join("xiaoqiang_tts", locale)): os.mkdir(os.path.join("xiaoqiang_tts", locale)) return None # 从对应locale路径读取 if os.path.exists(os.path.join("xiaoqiang_tts", locale, audio_filename)) and \ os.path.getsize(os.path.join("xiaoqiang_tts", locale, audio_filename)) > 0: with open(os.path.join("xiaoqiang_tts", locale, audio_filename), "rb") as audio_file: audio_data = AudioData() audio_data.data = audio_file.read() return audio_data return None
def on_audio(self, data): speech_data = None for i, ch in enumerate(self.channels): chan_data = data[:, ch].tostring() if not speech_data:speech_data = chan_data self.pub_audio[i].publish(AudioData(data=chan_data)) if self.is_speeching: if len(self.speech_audio_buffer) == 0: self.speech_audio_buffer = self.speech_prefetch_buffer self.speech_audio_buffer += speech_data else: self.speech_prefetch_buffer += speech_data self.speech_prefetch_buffer = self.speech_prefetch_buffer[-self.speech_prefetch_bytes:]
def on_timer(self, event): stamp = event.current_real or rospy.Time.now() is_voice = self.respeaker.is_voice() doa_rad = math.radians(self.respeaker.direction - 180.0) doa_rad = angles.shortest_angular_distance( doa_rad, math.radians(self.doa_yaw_offset)) doa = math.degrees(doa_rad) # vad if is_voice != self.prev_is_voice: self.pub_vad.publish(Bool(data=is_voice)) self.prev_is_voice = is_voice # doa if doa != self.prev_doa: self.pub_doa_raw.publish(Int32(data=doa)) self.prev_doa = doa msg = PoseStamped() msg.header.frame_id = self.sensor_frame_id msg.header.stamp = stamp ori = T.quaternion_from_euler(math.radians(doa), 0, 0) msg.pose.position.x = self.doa_xy_offset * np.cos(doa_rad) msg.pose.position.y = self.doa_xy_offset * np.sin(doa_rad) msg.pose.orientation.w = ori[0] msg.pose.orientation.x = ori[1] msg.pose.orientation.y = ori[2] msg.pose.orientation.z = ori[3] self.pub_doa.publish(msg) # speech audio if is_voice: self.speech_stopped = stamp if stamp - self.speech_stopped < rospy.Duration( self.speech_continuation): self.is_speeching = True elif self.is_speeching: buf = self.speech_audio_buffer self.speech_audio_buffer = str() self.is_speeching = False duration = 8.0 * len(buf) * self.respeaker_audio.bitwidth duration = duration / self.respeaker_audio.rate / self.respeaker_audio.bitdepth rospy.loginfo("Speech detected for %.3f seconds" % duration) if self.speech_min_duration <= duration < self.speech_max_duration: self.pub_speech_audio.publish(AudioData(data=buf))
def onInitialize(self): rospy.loginfo("GoogleTextToSpeechRos version " + __version__) rospy.loginfo("Copyright (C) 2019 Isao Hara") # # create inport for audio stream self._inport = rospy.Subscriber('/tts/data', String, self.onData) # # create outport for result self._outdata = AudioData() self._outport = rospy.Publisher('/audio_play/audio', AudioData, queue_size=10) # # self.show_copyrights() return True
def send_audio(self): #print bytearray(buff) #print len(buff) #print len(buff)/200 with contextlib.closing(wave.open("test.wav", 'wb')) as wf: wf.setnchannels(1) wf.setsampwidth(2) wf.setframerate(16000) wf.writeframes(bytearray(self.tmp_buff)) tmp = [] with contextlib.closing(open("test.wav", 'rb')) as wf: ba = wf.read() for c in ba: #print c value = struct.unpack('B', c)[0] #print value tmp.append(value) self.pub_audio.publish(AudioData(tmp)) self.tmp_buff = None
def main(): rospy.init_node('colored_noise_publisher') # parameters loop_rate = int(rospy.get_param('~loop_rate', 100)) buffer_size = int(rospy.get_param('~buffer_size', 4096)) use_audio_common = bool(rospy.get_param('~use_audio_common', False)) beta = float(rospy.get_param('~exponent', 1)) level = float(rospy.get_param('~level', 0.5)) # import message modules if use_audio_common: from audio_common_msgs.msg import AudioData from audio_stream_msgs.msg import AudioInfo else: from audio_stream_msgs.msg import AudioData, AudioInfo # publishers publisher_data = rospy.Publisher('~audio', AudioData, queue_size=1000) publisher_info = rospy.Publisher('~info', AudioInfo, queue_size=10) # gen a default buffer msg_info = AudioInfo() msg_data = AudioData() msg_info.channels = 1 msg_info.sampling_rate = 16000 msg_info.width = 2 A = level * ((2**8)**2) buffer_wave = generateColoredNoise(beta, A, msg_info.sampling_rate, 30).astype(np.uint16).tobytes() r = rospy.Rate(loop_rate) index = 0 time_played = 0.0 # sec time_start = time.time() while not rospy.is_shutdown(): rospy.loginfo('publish') # update msg_info header msg_info.header.stamp = rospy.Time.now() # fill msg_data.data if (len(buffer_wave) - index) < buffer_size: msg_data.data = buffer_wave[index:] + buffer_wave[:index + buffer_size - len(buffer_wave)] time_played += 1.0 * (len(buffer_wave) - index) / ( msg_info.width * msg_info.sampling_rate) index = index + buffer_size - len(buffer_wave) else: msg_data.data = buffer_wave[index:index + buffer_size] time_played += 1.0 * buffer_size / (msg_info.width * msg_info.sampling_rate) index = index + buffer_size time_passed = time.time() - time_start while time_played > time_passed: time_passed = time.time() - time_start if rospy.is_shutdown(): return r.sleep() # publish them publisher_data.publish(msg_data) publisher_info.publish(msg_info)
def on_raw_audio(self, data, mic): pub = self.raw_audio_pubs[mic] pub.publish(AudioData(data=data))
def __init__(self): self.sub_audio = rospy.Subscriber('/audio', AudioData, self.audioCB) self.audio = AudioData()
import time from pyhelper_fns.vis_utils import MyAnimationMulti import numpy as np import time import pickle import argparse def gray_rgb(image): return np.concatenate([np.expand_dims(image, 2), np.expand_dims(image, 2), np.expand_dims(image, 2)], axis = 2) vis_tool = MyAnimationMulti(None, numPlots=6, isIm = np.ones(6)) if __name__ == '__main__': rospy.init_node('traj_replay_audio', anonymous = True) pub = rospy.Publisher('replay_audio/audio', AudioData, queue_size = 10) msg = AudioData() parser = argparse.ArgumentParser() parser.add_argument('path', type=str, help='path to pickle file') args = parser.parse_args() FILE_NAME = args.path with open(FILE_NAME, "rb") as handle: data_dict = pickle.load(handle) states = data_dict["state_list"] actions = data_dict["action_list"] images = data_dict["image_list"] audio = data_dict["audio_list"] print("len of states{} len of actions{} len of images{}".format(len(states), len(actions), len(images)))
def main(): rospy.init_node( 'audio_stream_publisher' ) # parameters loop_rate = rospy.get_param( '~loop_rate', 100 ) is_loop = rospy.get_param( '~is_loop', False ) filename = rospy.get_param( '~filename' ) buffer_size = rospy.get_param( '~buffer_size', 4096 ) max_precede = rospy.get_param( '~max_precede', 10.0 ) use_audio_common = rospy.get_param( '~use_audio_common', False ) topicname_data = '~audio' topicname_info = '~info' # import message modules if use_audio_common: from audio_common_msgs.msg import AudioData from audio_stream_msgs.msg import AudioInfo else: from audio_stream_msgs.msg import AudioData, AudioInfo # publishers publisher_data = rospy.Publisher( topicname_data, AudioData, queue_size=1 ) publisher_info = rospy.Publisher( topicname_info, AudioInfo, queue_size=1 ) # message msg_data = AudioData() msg_info = AudioInfo() # open wavefile wavefile = wave.open( filename, 'r' ) wave_channels = wavefile.getnchannels() wave_width = wavefile.getsampwidth() wave_framerate = wavefile.getframerate() wave_framenum = wavefile.getnframes() wave_data = wavefile.readframes( wave_framenum ) infostr = (\ '== Wave file info ==\n' +\ 'channels: {}\n' +\ 'sample size [byte]: {}\n' +\ 'sampling rate [hz]: {}\n' +\ 'number of frames: {} ( {} secs )').format( wave_channels, wave_width, wave_framerate, wave_framenum, wave_framenum*1.0/wave_framerate ) rospy.loginfo( infostr ) if wave_channels != 1 or not ( wave_width == 1 or wave_width == 2): rospy.logerr( 'Currently 8bit or 16bit monaural wave files are supported.' ) sys.exit(1) if buffer_size % wave_width > 0: rospy.logerr ( 'buffer_size must be multiple of wave_width' ) sys.exit(1) msg_info.channels = wave_channels msg_info.sampling_rate = wave_framerate msg_info.width = wave_width msg_info.header.stamp = rospy.Time.now() publisher_info.publish( msg_info ) r = rospy.Rate( loop_rate ) if is_loop: time_played = 0.0; # sec time_start = time.time() index = 0 while not rospy.is_shutdown(): # return to first if index is out of wave_data if index > wave_framenum: index = 0 # preparing msg data msg_info.header.stamp = rospy.Time.now() if ( wave_framenum - index ) < buffer_size: msg_data.data = wave_data[index:] time_played += 1.0 * ( wave_framenum - index ) / ( wave_width * wave_framerate ) else: msg_data.data = wave_data[index:index+buffer_size] time_played += 1.0 * buffer_size / ( wave_width * wave_framerate ) # sleep if played time is ahead of passed time time_passed = time.time() - time_start while time_played - time_passed > max_precede: time_passed = time.time() - time_start if rospy.is_shutdown(): return r.sleep() # publishing data publisher_data.publish( msg_data ) publisher_info.publish( msg_info ) # debug print rospy.logdebug( 'time played: {}, time passed {}'.format( time_played, time_passed ) ) # increase index index += buffer_size else: time_played = 0.0; # sec time_start = time.time() index = 0 while not rospy.is_shutdown(): # return to first if index is out of wave_data if index > wave_framenum: break # preparing msg data msg_info.header.stamp = rospy.Time.now() if ( wave_framenum - index ) < buffer_size: msg_data.data = wave_data[index:] time_played += 1.0 * ( wave_framenum - index ) / ( wave_width * wave_framerate ) else: msg_data.data = wave_data[index:index+buffer_size] time_played += 1.0 * buffer_size / ( wave_width * wave_framerate ) # sleep if played time is ahead of passed time time_passed = time.time() - time_start while time_played - time_passed > max_precede: time_passed = time.time() - time_start if rospy.is_shutdown(): return r.sleep() # publishing data publisher_data.publish( msg_data ) publisher_info.publish( msg_info ) # debug print rospy.logdebug( 'time played: {}, time passed {}'.format( time_played, time_passed ) ) # increase index index += buffer_size
def publishAudio(self): if self.chunk_count > self.MIN_CHUNKS_AUDIO_LENGTH: self.publisher.publish(AudioData(self.voiced_frames)) self.discardAudio()
def on_timer(self, event): stamp = event.current_real or rospy.Time.now() is_voice = self.respeaker.is_voice() doa_rad = math.radians(self.respeaker.direction - 180.0) doa_rad = angles.shortest_angular_distance( doa_rad, math.radians(self.doa_yaw_offset)) doa = math.degrees(doa_rad) if self.is_active: self.wait_command_count += 1 if self.wait_command_count > self.wait_command_thres: self.is_active = False self.respeaker.set_led_off() if self.is_waiting_response: self.wait_response_count += 1 if (self.wait_response_count > self.wait_response_thres): self.is_waiting_response = False self.respeaker.set_led_off() # vad # if is_voice != self.prev_is_voice: # self.pub_vad.publish(Bool(data=is_voice)) # self.prev_is_voice = is_voice # doa if doa != self.prev_doa: self.pub_doa_raw.publish(Int32(data=doa)) self.prev_doa = doa # msg = PoseStamped() # msg.header.frame_id = self.sensor_frame_id # msg.header.stamp = stamp # ori = T.quaternion_from_euler(math.radians(doa), 0, 0) # msg.pose.position.x = self.doa_xy_offset * np.cos(doa_rad) # msg.pose.position.y = self.doa_xy_offset * np.sin(doa_rad) # msg.pose.orientation.w = ori[0] # msg.pose.orientation.x = ori[1] # msg.pose.orientation.y = ori[2] # msg.pose.orientation.z = ori[3] # self.pub_doa.publish(msg) # speech audio if is_voice: self.speech_stopped = stamp if stamp - self.speech_stopped < rospy.Duration(self.speech_continuation): self.is_speeching = True elif self.is_speeching: buf = self.speech_audio_buffer self.speech_audio_buffer = str() self.is_speeching = False duration = 8.0 * len(buf) * self.respeaker_audio.bitwidth duration = duration / self.respeaker_audio.rate / self.respeaker_audio.bitdepth rospy.loginfo("Speech detected for %.3f seconds" % duration) status = self.detector.RunDetection(buf) if (status > 0): print("found kws") self.is_active = True self.wait_command_count = 0 self.respeaker.set_led_listen() return else: print("not kws") # self.respeaker.set_led_trace() if (self.is_active): self.is_active = False if self.speech_min_duration <= duration < self.speech_max_duration: self.respeaker.set_led_think() self.is_waiting_response = True self.wait_response_count = 0 self.pub_speech_audio.publish(AudioData(data=buf))
def subscribe(self): pass def unsubscribe(self): pass if __name__ == '__main__': rospy.init_node('audio_capture_microphone') s = SPH0645Audio() r = rospy.Rate(float(s.rate) / s.frame_per_buffer) signal.signal(signal.SIGINT, s.kill) while s.stream.is_active(): if s._connection_status is not True: # If not subscribed, only read stream to avoid 'IOError: [Errno -9981] Input overflowed' # https://stackoverflow.com/questions/10733903/pyaudio-input-overflowed in_data = np.frombuffer(s.stream.read(1024, exception_on_overflow=False), np.int32) r.sleep() continue # Input data to 16 bit in_data = np.frombuffer(s.stream.read(1024), np.int32) in_data = in_data >> 14 # This 18bit integer is raw data from microphone int16_data = (in_data >> 2).astype(np.int16) # Retreive 1 channel data chunk_per_channel = len(int16_data) / s.channels channel_data = int16_data[s.channel::s.channels] # Publish audio topic s.pub_audio.publish(AudioData(data=channel_data.tostring())) r.sleep()
def on_audio(self, data): self.pub_audio.publish(AudioData(data=data))
# # Author: Randoms # import rospy from audio_common_msgs.msg import AudioData from engines.baidu_tts import BaiduTTS as bd_client from engines.xunfei_tts import XunfeiTTS as xf_client from std_msgs.msg import String, Bool import time import struct import numpy import matplotlib.pyplot as plt from scipy import signal AUDIO_CACHE = AudioData() CURRENT_AUDIO = AudioData() NEW_AUDIO_FLAG = False MIN_VOLUM = 2000 AUDIO_STATUS = False b, a = signal.butter(3, 0.10, 'high') def is_end(audio_data): last_two_seconds = audio_data.data[2 * -16000 * 2:] if len(last_two_seconds) < 2 * 16000 * 2: return False last_two_seconds = [float(struct.unpack("<h", "".join( last_two_seconds[2*x:2*x + 2]))[0]) for x in range(0, 2 * 16000)] sf = signal.filtfilt(b, a, last_two_seconds)
break sounds = sounds[0] positions = positions[0] rate = rospy.Rate(MSG_FREQ) sound = np.int16(sounds.mean(0) * 32768) delete_all = Marker() delete_all.action = Marker.DELETEALL delete_all.ns = 'sound_sources' delete_all = MarkerArray(markers=[delete_all]) for t in xrange((sound.shape[0]+frames_per_msg-1)/frames_per_msg): begin = t*frames_per_msg end = min((t+1)*frames_per_msg, sound.shape[0]) audio_pub.publish(AudioData(data = sound[begin:end].tostring())) markers = MarkerArray() for i, (snd, pos) in enumerate(zip(sounds, positions)): scale = np.abs(snd[begin:end]).max() if scale == 0.: continue marker = Marker() marker.header.stamp = rospy.Time.now() marker.header.frame_id = 'base_footprint' marker.type = Marker.SPHERE marker.ns = 'sound_sources' marker.id = i marker.pose.position.x = (pos[0] - MAP_SIZE/2. + .5) * resolution marker.pose.position.y = (pos[1] - MAP_SIZE/2. + .5) * resolution marker.pose.orientation.w = 1.
def callback(self, data): rospy.loginfo("Callback received!") ad = AudioData() data_uint8 = numpy.array(data.data, dtype=numpy.uint8) ad.data = data_uint8.tolist() self.pub.publish(ad)