def rasa_nlu(): m1 = MicrophoneModule(5000) m2 = GoogleASRModule("en-US") m3 = CallbackModule(callback=lambda x: print("%s (%f) - %s" % ( x.text, x.confidence, x.final))) m4 = RasaNLUModule("data/rasa/models/nlu/default/current") m5 = CallbackModule(callback=lambda x: print(x.act, "-", x.concepts)) m1.subscribe(m2) m2.subscribe(m3) m2.subscribe(m4) m4.subscribe(m5) m4.setup() m2.setup() m1.setup() m3.setup() m5.setup() print("All setup") m1.run(run_setup=False) m2.run(run_setup=False) m3.run(run_setup=False) m4.run(run_setup=False) m5.run(run_setup=False) input() m1.stop() m2.stop() m3.stop() m4.stop() m5.stop()
def google_asr(): m1 = MicrophoneModule(5000) m2 = GoogleASRModule("en-US") # en-US or de-DE or .... m3 = CallbackModule(callback=lambda x: print("%s (%f) - %s" % ( x.text, x.stability, x.final))) m1.subscribe(m2) m2.subscribe(m3) m1.run() m2.run() m3.run() input() m1.stop() m2.stop() m3.stop()
def repeat_demo(): m1 = MicrophoneModule(5000) m2 = GoogleASRModule("en-US") m3 = TextDispatcherModule() m4 = GoogleTTSModule("en-US", "en-US-Wavenet-A") m5 = AudioDispatcherModule(5000) m6 = StreamingSpeakerModule(5000) m1.subscribe(m2) m2.subscribe(m3) m3.subscribe(m4) m4.subscribe(m5) m5.subscribe(m6) m1.setup() m2.setup() m3.setup() m4.setup() m5.setup() m6.setup() print("All setup") m1.run(run_setup=False) m2.run(run_setup=False) m3.run(run_setup=False) m4.run(run_setup=False) m5.run(run_setup=False) m6.run(run_setup=False) input() m1.stop() m2.stop() m3.stop() m4.stop() m5.stop() m6.stop()
def audio_demo(): m1 = MicrophoneModule(5000) m2 = StreamingSpeakerModule(5000) m1.subscribe(m2) m1.run() m2.run() input() m1.stop() m2.stop() input() m1.run() m2.run() input() m1.stop() m2.stop()
def audio(): rate = 16000 chunk_time = 0.1 chunk_size = int(rate * chunk_time) sample_width = 2 m1 = MicrophoneModule(chunk_size, rate=rate) m2 = StreamingSpeakerModule(chunk_size, rate) # db = DebugModule() db = CustomModule(chunk_size, rate=rate) m1.subscribe(m2) m1.subscribe(db) m1.run() m2.run() db.run() input() m1.stop() m2.stop() db.stop()
rospy.init_node("Robot_Ready_SDS_RosNode", anonymous=True) mic_publisher = RosPublisher('mic') asr_publisher = RosPublisher('asr') # asr_subscriber = RosSubscriber('asr',debug=True) debug = DebugModule() # hook modules up to each other mic.subscribe(asr) mic.subscribe(mic_publisher) asr.subscribe(iasr) iasr.subscribe(asr_publisher) iasr.subscribe(debug) # initialize modules mic.run() mic_publisher.run() asr.run() asr_publisher.run() # asr_subscriber.run() iasr.run() debug.run() input() # keep things running # rosnode.stop() mic.stop() mic_publisher.stop() asr.stop() asr_publisher.stop() # asr_subscriber.stop()
class Hearing(object): """ The Hearing component of the Spoken Dialog System. Components: - MicrophoneModule - ASRModule - IncrementalizeASRModule """ CACHE_DIR = "/tmp" def __init__( self, chunk_time=None, chunk_size=None, sample_rate=16000, bytes_per_sample=2, language="en-US", nchunks=20, use_asr=True, use_iasr=False, record=False, debug=False, bypass=False, cache_dir="/tmp", ): self.sample_rate = sample_rate assert (chunk_time is not None or chunk_size is not None), "please provide either chunk_time or chunk_size" if chunk_size is not None: self.chunk_size = int(chunk_size) self.chunk_time = chunk_size / sample_rate else: self.chunk_time = chunk_time self.chunk_size = int(chunk_time * sample_rate) self.bytes_per_sample = bytes_per_sample self.use_asr = use_asr self.use_iasr = use_iasr self.record = record self.debug = debug self.cache_dir = cache_dir if self.use_iasr: self.use_asr = True # Components that are always used if bypass: self.in_mic = MicrophoneOutputBypass( chunk_size=self.chunk_size, rate=self.sample_rate, sample_width=self.bytes_per_sample, ) else: self.in_mic = MicrophoneModule( chunk_size=self.chunk_size, rate=self.sample_rate, sample_width=self.bytes_per_sample, ) self.vad_frames = VADFrames( chunk_time=self.chunk_time, sample_rate=self.sample_rate, mode=3, debug=debug, ) self.in_mic.subscribe(self.vad_frames) # Optional Components if self.use_asr: self.asr = GoogleASRModule( language=language, nchunks=nchunks, # m chunks to trigger a new prediction rate=self.sample_rate, ) self.in_mic.subscribe(self.asr) if self.use_iasr: self.iasr = IncrementalizeASRModule( threshold=0.8 ) # Gets only the newly added words at each increment self.asr.subscribe(self.iasr) if self.record: self.cache_dir = join(cache_dir, "hearing") makedirs(self.cache_dir, exist_ok=True) print("Hearing: ", self.cache_dir) wav_filename = join(self.cache_dir, "user_audio.wav") self.audio_record = AudioRecorderModule( wav_filename, rate=sample_rate, sample_width=bytes_per_sample) self.in_mic.subscribe(self.audio_record) if self.debug: self.asr_debug = ASRDebugModule() if self.use_asr: self.asr.subscribe(self.asr_debug) logging.info(f"{self.name}: Initialized @ {time.time()}") @property def name(self): return self.__class__.__name__ def __repr__(self): s = "\n" + "=" * 40 s += "\n" + self.__class__.__name__ s += f"\nsample_rate: {self.sample_rate}" s += f"\nchunk_time: {self.chunk_time}" s += f"\nchunk_size: {self.chunk_size}" s += f"\nbytes_per_sample: {self.bytes_per_sample}" s += f"\nrecord: {self.record}" s += f"\nuse_asr: {self.use_asr}" s += f"\ndebug: {self.debug}" s += "\n" + "=" * 40 return s def setup(self, **kwargs): self.in_mic.setup(**kwargs) if self.use_asr: self.asr.setup(**kwargs) if self.use_iasr: self.iasr.setup(**kwargs) if self.record: self.audio_record.setup(**kwargs) logging.info(f"{self.name}: Setup") def run(self, **kwargs): self.in_mic.run(**kwargs) self.vad_frames.run(**kwargs) if self.use_asr: self.asr.run(**kwargs) if self.use_iasr: self.iasr.run(**kwargs) if self.record: self.audio_record.run(**kwargs) if self.debug: if self.use_asr: self.asr_debug.run(**kwargs) # self.iasr_debug.run(run_setup=run_setup) logging.info(f"{self.name}: run @ {time.time()}") def stop(self, **kwargs): self.in_mic.stop(**kwargs) self.vad_frames.stop(**kwargs) if self.use_asr: self.asr.stop(**kwargs) if self.use_iasr: self.iasr.stop(**kwargs) if self.record: self.audio_record.stop(**kwargs) if self.debug: if self.use_asr: self.asr_debug.stop(**kwargs) logging.info(f"{self.name}: stop_components @ {time.time()}")
def init_all(robot : cozmo.robot.Robot): domain_dir = '/home/casey/git/retico/data/cozmo/dm/dialogue.xml' aod_endpoint = "" aod_key = "" wac_dir = '/home/casey/git/retico/data/wac/subset' mask_rcnn_labels = '/home/casey/git/retico/data/maskrcnn/label_map.pbtxt' mask_rcnn_model = '/home/casey/git/retico/data/maskrcnn/frozen_inference_graph.pb' opendial_variables = ['face_count', # CozmoStateModule 'num_objs', # ObjectDetector 'near_object', # CozmoRefer 'exploring', # CozmoRefer 'aligned', # CozmoRefer 'word_to_find', # WordsAsClassifiersModule 'best_object', # WordsAsClassifiersModule 'obj_confidence'] # WordsAsClassifiersModule # # INSTANTIATE MODULES # # mic = RespeakerMicrophoneModule('192.168.0.102:8000') # asr = GoogleASRModule(rate=16000) mic = MicrophoneModule(1000) asr = GoogleASRModule() iasr = IncrementalizeASRModule() dm = OpenDialModule(domain_dir=domain_dir, variables=opendial_variables) cozmo_refer = CozmoReferModule(robot) cozmo_camera = CozmoCameraModule(robot) # cozmo_camera = WebcamModule() # cozmo_state = CozmoStateModule(robot) # object_detector = AzureObjectDetectionModule(aod_key, aod_endpoint) object_detector = MaskrRCNNObjectDetection(mask_rcnn_labels, mask_rcnn_model) feature_extractor = KerasObjectFeatureExtractorModule() wac = WordsAsClassifiersModule(wac_dir=wac_dir) debug = DebugModule() # psi related modules # WriterSingleton should use the *source* ip address (i.e., this machine's) # WriterSingleton(ip='192.168.0.101', port='12346') # create the zeromq writer # psi = ZeroMQWriter(topic='retico') # mic as input mic.subscribe(asr) asr.subscribe(iasr) iasr.subscribe(wac) wac.subscribe(dm) dm.subscribe(cozmo_refer) # robot state as input # cozmo_state.subscribe(dm) wac.subscribe(cozmo_refer) object_detector.subscribe(dm) object_detector.subscribe(cozmo_refer) cozmo_refer.subscribe(dm) # robot camera as input cozmo_camera.subscribe(object_detector) object_detector.subscribe(feature_extractor) feature_extractor.subscribe(wac) # feature_extractor.subscribe(debug) # iasr.subscribe(psi) # wac.subscribe(psi) # dm.subscribe(psi) # feature_extractor.subscribe(psi) # object_detector.subscribe(psi) # # INITIALIZE MODULES # mic.run() asr.run() iasr.run() dm.run() cozmo_refer.run() # IF I MAKE THIS RUN EVERYTHING SLOWS DOWN # cozmo_state.run() cozmo_camera.run() object_detector.run() feature_extractor.run() wac.run() debug.run() # psi.run() input() # keep everything running mic.stop() asr.stop() iasr.stop() cozmo_refer.stop() # cozmo_state.stop() dm.stop() cozmo_camera.stop() object_detector.stop() feature_extractor.stop() wac.stop() debug.stop()
m2.run() db.run() input() m1.stop() m2.stop() db.stop() if __name__ == "__main__": # google_asr() m1 = MicrophoneModule(5000) m2 = GoogleASRModule("en-US") # en-US or de-DE or .... m3 = CustomTextModule() m1.subscribe(m2) m2.subscribe(m3) m1.run() m2.run() m3.run() input() m1.stop() m2.stop() m3.stop()