Ejemplo n.º 1
0
def rasa_nlu():
    m1 = MicrophoneModule(5000)
    m2 = GoogleASRModule("en-US")
    m3 = CallbackModule(callback=lambda x: print("%s (%f) - %s" % (
        x.text, x.confidence, x.final)))
    m4 = RasaNLUModule("data/rasa/models/nlu/default/current")
    m5 = CallbackModule(callback=lambda x: print(x.act, "-", x.concepts))

    m1.subscribe(m2)
    m2.subscribe(m3)
    m2.subscribe(m4)
    m4.subscribe(m5)

    m4.setup()
    m2.setup()
    m1.setup()
    m3.setup()
    m5.setup()

    print("All setup")

    m1.run(run_setup=False)
    m2.run(run_setup=False)
    m3.run(run_setup=False)
    m4.run(run_setup=False)
    m5.run(run_setup=False)

    input()

    m1.stop()
    m2.stop()
    m3.stop()
    m4.stop()
    m5.stop()
Ejemplo n.º 2
0
def google_asr():
    m1 = MicrophoneModule(5000)
    m2 = GoogleASRModule("en-US")  # en-US or de-DE or ....
    m3 = CallbackModule(callback=lambda x: print("%s (%f) - %s" % (
        x.text, x.stability, x.final)))

    m1.subscribe(m2)
    m2.subscribe(m3)

    m1.run()
    m2.run()
    m3.run()

    input()

    m1.stop()
    m2.stop()
    m3.stop()
Ejemplo n.º 3
0
def repeat_demo():
    m1 = MicrophoneModule(5000)
    m2 = GoogleASRModule("en-US")
    m3 = TextDispatcherModule()
    m4 = GoogleTTSModule("en-US", "en-US-Wavenet-A")
    m5 = AudioDispatcherModule(5000)
    m6 = StreamingSpeakerModule(5000)

    m1.subscribe(m2)
    m2.subscribe(m3)
    m3.subscribe(m4)
    m4.subscribe(m5)
    m5.subscribe(m6)

    m1.setup()
    m2.setup()
    m3.setup()
    m4.setup()
    m5.setup()
    m6.setup()

    print("All setup")

    m1.run(run_setup=False)
    m2.run(run_setup=False)
    m3.run(run_setup=False)
    m4.run(run_setup=False)
    m5.run(run_setup=False)
    m6.run(run_setup=False)

    input()

    m1.stop()
    m2.stop()
    m3.stop()
    m4.stop()
    m5.stop()
    m6.stop()
Ejemplo n.º 4
0
def audio_demo():
    m1 = MicrophoneModule(5000)
    m2 = StreamingSpeakerModule(5000)

    m1.subscribe(m2)

    m1.run()
    m2.run()

    input()

    m1.stop()
    m2.stop()

    input()

    m1.run()
    m2.run()

    input()

    m1.stop()
    m2.stop()
Ejemplo n.º 5
0
def audio():
    rate = 16000
    chunk_time = 0.1
    chunk_size = int(rate * chunk_time)
    sample_width = 2

    m1 = MicrophoneModule(chunk_size, rate=rate)
    m2 = StreamingSpeakerModule(chunk_size, rate)
    # db = DebugModule()
    db = CustomModule(chunk_size, rate=rate)

    m1.subscribe(m2)
    m1.subscribe(db)

    m1.run()
    m2.run()
    db.run()

    input()

    m1.stop()
    m2.stop()
    db.stop()
Ejemplo n.º 6
0
rospy.init_node("Robot_Ready_SDS_RosNode", anonymous=True)
mic_publisher = RosPublisher('mic')
asr_publisher = RosPublisher('asr')
# asr_subscriber = RosSubscriber('asr',debug=True)

debug = DebugModule()

# hook modules up to each other
mic.subscribe(asr)
mic.subscribe(mic_publisher)
asr.subscribe(iasr)
iasr.subscribe(asr_publisher)
iasr.subscribe(debug)

# initialize modules
mic.run()
mic_publisher.run()
asr.run()
asr_publisher.run()
# asr_subscriber.run()
iasr.run()
debug.run()

input()  # keep things running

# rosnode.stop()
mic.stop()
mic_publisher.stop()
asr.stop()
asr_publisher.stop()
# asr_subscriber.stop()
Ejemplo n.º 7
0
class Hearing(object):
    """
    The Hearing component of the Spoken Dialog System.

    Components:
        - MicrophoneModule
        - ASRModule
        - IncrementalizeASRModule
    """

    CACHE_DIR = "/tmp"

    def __init__(
        self,
        chunk_time=None,
        chunk_size=None,
        sample_rate=16000,
        bytes_per_sample=2,
        language="en-US",
        nchunks=20,
        use_asr=True,
        use_iasr=False,
        record=False,
        debug=False,
        bypass=False,
        cache_dir="/tmp",
    ):
        self.sample_rate = sample_rate
        assert (chunk_time is not None or chunk_size
                is not None), "please provide either chunk_time or chunk_size"

        if chunk_size is not None:
            self.chunk_size = int(chunk_size)
            self.chunk_time = chunk_size / sample_rate
        else:
            self.chunk_time = chunk_time
            self.chunk_size = int(chunk_time * sample_rate)

        self.bytes_per_sample = bytes_per_sample
        self.use_asr = use_asr
        self.use_iasr = use_iasr
        self.record = record
        self.debug = debug

        self.cache_dir = cache_dir

        if self.use_iasr:
            self.use_asr = True

        # Components that are always used
        if bypass:
            self.in_mic = MicrophoneOutputBypass(
                chunk_size=self.chunk_size,
                rate=self.sample_rate,
                sample_width=self.bytes_per_sample,
            )
        else:
            self.in_mic = MicrophoneModule(
                chunk_size=self.chunk_size,
                rate=self.sample_rate,
                sample_width=self.bytes_per_sample,
            )
        self.vad_frames = VADFrames(
            chunk_time=self.chunk_time,
            sample_rate=self.sample_rate,
            mode=3,
            debug=debug,
        )
        self.in_mic.subscribe(self.vad_frames)

        # Optional Components
        if self.use_asr:
            self.asr = GoogleASRModule(
                language=language,
                nchunks=nchunks,  # m chunks to trigger a new prediction
                rate=self.sample_rate,
            )
            self.in_mic.subscribe(self.asr)

        if self.use_iasr:
            self.iasr = IncrementalizeASRModule(
                threshold=0.8
            )  # Gets only the newly added words at each increment
            self.asr.subscribe(self.iasr)

        if self.record:
            self.cache_dir = join(cache_dir, "hearing")
            makedirs(self.cache_dir, exist_ok=True)
            print("Hearing: ", self.cache_dir)
            wav_filename = join(self.cache_dir, "user_audio.wav")
            self.audio_record = AudioRecorderModule(
                wav_filename, rate=sample_rate, sample_width=bytes_per_sample)
            self.in_mic.subscribe(self.audio_record)

        if self.debug:
            self.asr_debug = ASRDebugModule()
            if self.use_asr:
                self.asr.subscribe(self.asr_debug)

        logging.info(f"{self.name}: Initialized @ {time.time()}")

    @property
    def name(self):
        return self.__class__.__name__

    def __repr__(self):
        s = "\n" + "=" * 40
        s += "\n" + self.__class__.__name__
        s += f"\nsample_rate: {self.sample_rate}"
        s += f"\nchunk_time: {self.chunk_time}"
        s += f"\nchunk_size: {self.chunk_size}"
        s += f"\nbytes_per_sample: {self.bytes_per_sample}"
        s += f"\nrecord: {self.record}"
        s += f"\nuse_asr: {self.use_asr}"
        s += f"\ndebug: {self.debug}"
        s += "\n" + "=" * 40
        return s

    def setup(self, **kwargs):
        self.in_mic.setup(**kwargs)
        if self.use_asr:
            self.asr.setup(**kwargs)
        if self.use_iasr:
            self.iasr.setup(**kwargs)
        if self.record:
            self.audio_record.setup(**kwargs)
        logging.info(f"{self.name}: Setup")

    def run(self, **kwargs):
        self.in_mic.run(**kwargs)
        self.vad_frames.run(**kwargs)

        if self.use_asr:
            self.asr.run(**kwargs)

        if self.use_iasr:
            self.iasr.run(**kwargs)

        if self.record:
            self.audio_record.run(**kwargs)

        if self.debug:
            if self.use_asr:
                self.asr_debug.run(**kwargs)
                # self.iasr_debug.run(run_setup=run_setup)
        logging.info(f"{self.name}: run @ {time.time()}")

    def stop(self, **kwargs):
        self.in_mic.stop(**kwargs)
        self.vad_frames.stop(**kwargs)

        if self.use_asr:
            self.asr.stop(**kwargs)

        if self.use_iasr:
            self.iasr.stop(**kwargs)

        if self.record:
            self.audio_record.stop(**kwargs)

        if self.debug:
            if self.use_asr:
                self.asr_debug.stop(**kwargs)
        logging.info(f"{self.name}: stop_components @ {time.time()}")
Ejemplo n.º 8
0
def init_all(robot : cozmo.robot.Robot):
    
    domain_dir = '/home/casey/git/retico/data/cozmo/dm/dialogue.xml'
    aod_endpoint = ""
    aod_key = ""
    wac_dir = '/home/casey/git/retico/data/wac/subset'
    mask_rcnn_labels = '/home/casey/git/retico/data/maskrcnn/label_map.pbtxt'
    mask_rcnn_model = '/home/casey/git/retico/data/maskrcnn/frozen_inference_graph.pb'

    opendial_variables = ['face_count', # CozmoStateModule
                           'num_objs', # ObjectDetector
                           'near_object', # CozmoRefer
                           'exploring', # CozmoRefer
                           'aligned', # CozmoRefer
                           'word_to_find', # WordsAsClassifiersModule
                           'best_object', # WordsAsClassifiersModule
                           'obj_confidence'] # WordsAsClassifiersModule

    #
    # INSTANTIATE MODULES
    #
    # mic = RespeakerMicrophoneModule('192.168.0.102:8000')
    # asr = GoogleASRModule(rate=16000)
    
    mic = MicrophoneModule(1000)
    asr = GoogleASRModule()
    iasr = IncrementalizeASRModule()
    dm = OpenDialModule(domain_dir=domain_dir, variables=opendial_variables)
    cozmo_refer = CozmoReferModule(robot)
    cozmo_camera = CozmoCameraModule(robot)
    # cozmo_camera = WebcamModule()
    # cozmo_state = CozmoStateModule(robot)
    # object_detector = AzureObjectDetectionModule(aod_key, aod_endpoint)
    object_detector = MaskrRCNNObjectDetection(mask_rcnn_labels, mask_rcnn_model)
    feature_extractor = KerasObjectFeatureExtractorModule()
    wac = WordsAsClassifiersModule(wac_dir=wac_dir)
    debug = DebugModule()

    # psi related modules
    # WriterSingleton should use the *source* ip address (i.e., this machine's)
    # WriterSingleton(ip='192.168.0.101', port='12346') # create the zeromq writer
    # psi = ZeroMQWriter(topic='retico')
   
    # mic as input
    mic.subscribe(asr)
    asr.subscribe(iasr)
    iasr.subscribe(wac)
    wac.subscribe(dm)
    dm.subscribe(cozmo_refer)

    # robot state as input
    # cozmo_state.subscribe(dm)
    wac.subscribe(cozmo_refer)
    object_detector.subscribe(dm)
    object_detector.subscribe(cozmo_refer)
    cozmo_refer.subscribe(dm)

    # robot camera as input
    cozmo_camera.subscribe(object_detector)
    object_detector.subscribe(feature_extractor)
    feature_extractor.subscribe(wac)
    # feature_extractor.subscribe(debug)

    # iasr.subscribe(psi)
    # wac.subscribe(psi)
    # dm.subscribe(psi)
    # feature_extractor.subscribe(psi)
    # object_detector.subscribe(psi)

    #
    # INITIALIZE MODULES
    # 
    mic.run()
    asr.run()
    iasr.run()
    dm.run()
    cozmo_refer.run() # IF I MAKE THIS RUN EVERYTHING SLOWS DOWN
    # cozmo_state.run()
    cozmo_camera.run()
    object_detector.run()
    feature_extractor.run()
    wac.run()
    debug.run()
    # psi.run()

    input() # keep everything running

    mic.stop()
    asr.stop()
    iasr.stop()
    cozmo_refer.stop()
    # cozmo_state.stop()
    dm.stop()
    cozmo_camera.stop()
    object_detector.stop()
    feature_extractor.stop()
    wac.stop()
    debug.stop()
Ejemplo n.º 9
0
    m2.run()
    db.run()

    input()

    m1.stop()
    m2.stop()
    db.stop()


if __name__ == "__main__":

    # google_asr()

    m1 = MicrophoneModule(5000)
    m2 = GoogleASRModule("en-US")  # en-US or de-DE or ....
    m3 = CustomTextModule()

    m1.subscribe(m2)
    m2.subscribe(m3)

    m1.run()
    m2.run()
    m3.run()

    input()

    m1.stop()
    m2.stop()
    m3.stop()