Beispiel #1
0
def rasa_nlu():
    m1 = MicrophoneModule(5000)
    m2 = GoogleASRModule("en-US")
    m3 = CallbackModule(callback=lambda x: print("%s (%f) - %s" % (
        x.text, x.confidence, x.final)))
    m4 = RasaNLUModule("data/rasa/models/nlu/default/current")
    m5 = CallbackModule(callback=lambda x: print(x.act, "-", x.concepts))

    m1.subscribe(m2)
    m2.subscribe(m3)
    m2.subscribe(m4)
    m4.subscribe(m5)

    m4.setup()
    m2.setup()
    m1.setup()
    m3.setup()
    m5.setup()

    print("All setup")

    m1.run(run_setup=False)
    m2.run(run_setup=False)
    m3.run(run_setup=False)
    m4.run(run_setup=False)
    m5.run(run_setup=False)

    input()

    m1.stop()
    m2.stop()
    m3.stop()
    m4.stop()
    m5.stop()
Beispiel #2
0
def repeat_demo():
    m1 = MicrophoneModule(5000)
    m2 = GoogleASRModule("en-US")
    m3 = TextDispatcherModule()
    m4 = GoogleTTSModule("en-US", "en-US-Wavenet-A")
    m5 = AudioDispatcherModule(5000)
    m6 = StreamingSpeakerModule(5000)

    m1.subscribe(m2)
    m2.subscribe(m3)
    m3.subscribe(m4)
    m4.subscribe(m5)
    m5.subscribe(m6)

    m1.setup()
    m2.setup()
    m3.setup()
    m4.setup()
    m5.setup()
    m6.setup()

    print("All setup")

    m1.run(run_setup=False)
    m2.run(run_setup=False)
    m3.run(run_setup=False)
    m4.run(run_setup=False)
    m5.run(run_setup=False)
    m6.run(run_setup=False)

    input()

    m1.stop()
    m2.stop()
    m3.stop()
    m4.stop()
    m5.stop()
    m6.stop()
Beispiel #3
0
def google_asr():
    m1 = MicrophoneModule(5000)
    m2 = GoogleASRModule("en-US")  # en-US or de-DE or ....
    m3 = CallbackModule(callback=lambda x: print("%s (%f) - %s" % (
        x.text, x.stability, x.final)))

    m1.subscribe(m2)
    m2.subscribe(m3)

    m1.run()
    m2.run()
    m3.run()

    input()

    m1.stop()
    m2.stop()
    m3.stop()
Beispiel #4
0
def audio():
    rate = 16000
    chunk_time = 0.1
    chunk_size = int(rate * chunk_time)
    sample_width = 2

    m1 = MicrophoneModule(chunk_size, rate=rate)
    m2 = StreamingSpeakerModule(chunk_size, rate)
    # db = DebugModule()
    db = CustomModule(chunk_size, rate=rate)

    m1.subscribe(m2)
    m1.subscribe(db)

    m1.run()
    m2.run()
    db.run()

    input()

    m1.stop()
    m2.stop()
    db.stop()
Beispiel #5
0
from retico.modules.azure.emotion_recognition import AzureEmotionDetectionModule
from retico.interop.ros.catkin_ws.src.rosretico.src.RosPublisher import RosPublisher
from retico.interop.ros.catkin_ws.src.rosretico.src.RosSubscriber import RosSubscriber

# how to restart an utterance?
# had to change is_running to _is_running because opendial modules have an is_running() method

# run: export GOOGLE_APPLICATION_CREDENTIALS=/home/casey/substutute-ca5bdacf1d9a.json

aod_endpoint = "https://slimcomputervision.cognitiveservices.azure.com/"
aod_key = "59bfd2dc248a4d08957edf7a6eb6331f"
aed_endpoint = "https://westus2.api.cognitive.microsoft.com/"
aed_key = "1837b9d29e0b4a22843d103a7ca8b3c9"

# instantiate modules
mic = MicrophoneModule(1000, rate=16000)
asr = AzureASRModule("179eaa4b8fc54e0fa5115ba5d14883f2")
emotion = AzureEmotionDetectionModule(aed_key, aed_endpoint)
iasr = IncrementalizeASRModule()

#initialize ros and its topics
rospy.init_node("Robot_Ready_SDS_RosNode", anonymous=True)
mic_publisher = RosPublisher('mic')
asr_publisher = RosPublisher('asr')
# asr_subscriber = RosSubscriber('asr',debug=True)

debug = DebugModule()

# hook modules up to each other
mic.subscribe(asr)
mic.subscribe(mic_publisher)
Beispiel #6
0
opendial_variables = ['face_count', # 
                        'num_objs', # ObjectDetector
                        'exploring', # MistyReferModule
                        'aligned', #  MistyReferModule
                        'word_to_find', # WordsAsClassifiersModule
                        'best_object', # WordsAsClassifiersModule
                        'obj_confidence'] # WordsAsClassifiersModule

#
# INSTANTIATE MODULES
#
# mic = RespeakerMicrophoneModule('192.168.20.49:8000')
# asr = GoogleASRModule(rate=16000)

mic = MicrophoneModule(1000)
asr = GoogleASRModule()
iasr = IncrementalizeASRModule()
dm = OpenDialModule(domain_dir=domain_dir, variables=opendial_variables)
# cozmo_camera = WebcamModule()
misty_camera = MistyCameraModule(misty_ip)
misty_refer = MistyReferModule(misty_ip)
misty_state = MistyStateModule(misty_ip)
cropper = ImageCropperModule(top=200)
# object_detector = AzureObjectDetectionModule(aod_key, aod_endpoint)
object_detector = MaskrRCNNObjectDetection(mask_rcnn_labels, mask_rcnn_model, max_objs=1)
feature_extractor = KerasObjectFeatureExtractorModule()
wac = WordsAsClassifiersModule(wac_dir=wac_dir)
debug = DebugModule()

print('All modules instantiated.')
Beispiel #7
0
    def __init__(
        self,
        chunk_time=None,
        chunk_size=None,
        sample_rate=16000,
        bytes_per_sample=2,
        language="en-US",
        nchunks=20,
        use_asr=True,
        use_iasr=False,
        record=False,
        debug=False,
        bypass=False,
        cache_dir="/tmp",
    ):
        self.sample_rate = sample_rate
        assert (chunk_time is not None or chunk_size
                is not None), "please provide either chunk_time or chunk_size"

        if chunk_size is not None:
            self.chunk_size = int(chunk_size)
            self.chunk_time = chunk_size / sample_rate
        else:
            self.chunk_time = chunk_time
            self.chunk_size = int(chunk_time * sample_rate)

        self.bytes_per_sample = bytes_per_sample
        self.use_asr = use_asr
        self.use_iasr = use_iasr
        self.record = record
        self.debug = debug

        self.cache_dir = cache_dir

        if self.use_iasr:
            self.use_asr = True

        # Components that are always used
        if bypass:
            self.in_mic = MicrophoneOutputBypass(
                chunk_size=self.chunk_size,
                rate=self.sample_rate,
                sample_width=self.bytes_per_sample,
            )
        else:
            self.in_mic = MicrophoneModule(
                chunk_size=self.chunk_size,
                rate=self.sample_rate,
                sample_width=self.bytes_per_sample,
            )
        self.vad_frames = VADFrames(
            chunk_time=self.chunk_time,
            sample_rate=self.sample_rate,
            mode=3,
            debug=debug,
        )
        self.in_mic.subscribe(self.vad_frames)

        # Optional Components
        if self.use_asr:
            self.asr = GoogleASRModule(
                language=language,
                nchunks=nchunks,  # m chunks to trigger a new prediction
                rate=self.sample_rate,
            )
            self.in_mic.subscribe(self.asr)

        if self.use_iasr:
            self.iasr = IncrementalizeASRModule(
                threshold=0.8
            )  # Gets only the newly added words at each increment
            self.asr.subscribe(self.iasr)

        if self.record:
            self.cache_dir = join(cache_dir, "hearing")
            makedirs(self.cache_dir, exist_ok=True)
            print("Hearing: ", self.cache_dir)
            wav_filename = join(self.cache_dir, "user_audio.wav")
            self.audio_record = AudioRecorderModule(
                wav_filename, rate=sample_rate, sample_width=bytes_per_sample)
            self.in_mic.subscribe(self.audio_record)

        if self.debug:
            self.asr_debug = ASRDebugModule()
            if self.use_asr:
                self.asr.subscribe(self.asr_debug)

        logging.info(f"{self.name}: Initialized @ {time.time()}")
Beispiel #8
0
class Hearing(object):
    """
    The Hearing component of the Spoken Dialog System.

    Components:
        - MicrophoneModule
        - ASRModule
        - IncrementalizeASRModule
    """

    CACHE_DIR = "/tmp"

    def __init__(
        self,
        chunk_time=None,
        chunk_size=None,
        sample_rate=16000,
        bytes_per_sample=2,
        language="en-US",
        nchunks=20,
        use_asr=True,
        use_iasr=False,
        record=False,
        debug=False,
        bypass=False,
        cache_dir="/tmp",
    ):
        self.sample_rate = sample_rate
        assert (chunk_time is not None or chunk_size
                is not None), "please provide either chunk_time or chunk_size"

        if chunk_size is not None:
            self.chunk_size = int(chunk_size)
            self.chunk_time = chunk_size / sample_rate
        else:
            self.chunk_time = chunk_time
            self.chunk_size = int(chunk_time * sample_rate)

        self.bytes_per_sample = bytes_per_sample
        self.use_asr = use_asr
        self.use_iasr = use_iasr
        self.record = record
        self.debug = debug

        self.cache_dir = cache_dir

        if self.use_iasr:
            self.use_asr = True

        # Components that are always used
        if bypass:
            self.in_mic = MicrophoneOutputBypass(
                chunk_size=self.chunk_size,
                rate=self.sample_rate,
                sample_width=self.bytes_per_sample,
            )
        else:
            self.in_mic = MicrophoneModule(
                chunk_size=self.chunk_size,
                rate=self.sample_rate,
                sample_width=self.bytes_per_sample,
            )
        self.vad_frames = VADFrames(
            chunk_time=self.chunk_time,
            sample_rate=self.sample_rate,
            mode=3,
            debug=debug,
        )
        self.in_mic.subscribe(self.vad_frames)

        # Optional Components
        if self.use_asr:
            self.asr = GoogleASRModule(
                language=language,
                nchunks=nchunks,  # m chunks to trigger a new prediction
                rate=self.sample_rate,
            )
            self.in_mic.subscribe(self.asr)

        if self.use_iasr:
            self.iasr = IncrementalizeASRModule(
                threshold=0.8
            )  # Gets only the newly added words at each increment
            self.asr.subscribe(self.iasr)

        if self.record:
            self.cache_dir = join(cache_dir, "hearing")
            makedirs(self.cache_dir, exist_ok=True)
            print("Hearing: ", self.cache_dir)
            wav_filename = join(self.cache_dir, "user_audio.wav")
            self.audio_record = AudioRecorderModule(
                wav_filename, rate=sample_rate, sample_width=bytes_per_sample)
            self.in_mic.subscribe(self.audio_record)

        if self.debug:
            self.asr_debug = ASRDebugModule()
            if self.use_asr:
                self.asr.subscribe(self.asr_debug)

        logging.info(f"{self.name}: Initialized @ {time.time()}")

    @property
    def name(self):
        return self.__class__.__name__

    def __repr__(self):
        s = "\n" + "=" * 40
        s += "\n" + self.__class__.__name__
        s += f"\nsample_rate: {self.sample_rate}"
        s += f"\nchunk_time: {self.chunk_time}"
        s += f"\nchunk_size: {self.chunk_size}"
        s += f"\nbytes_per_sample: {self.bytes_per_sample}"
        s += f"\nrecord: {self.record}"
        s += f"\nuse_asr: {self.use_asr}"
        s += f"\ndebug: {self.debug}"
        s += "\n" + "=" * 40
        return s

    def setup(self, **kwargs):
        self.in_mic.setup(**kwargs)
        if self.use_asr:
            self.asr.setup(**kwargs)
        if self.use_iasr:
            self.iasr.setup(**kwargs)
        if self.record:
            self.audio_record.setup(**kwargs)
        logging.info(f"{self.name}: Setup")

    def run(self, **kwargs):
        self.in_mic.run(**kwargs)
        self.vad_frames.run(**kwargs)

        if self.use_asr:
            self.asr.run(**kwargs)

        if self.use_iasr:
            self.iasr.run(**kwargs)

        if self.record:
            self.audio_record.run(**kwargs)

        if self.debug:
            if self.use_asr:
                self.asr_debug.run(**kwargs)
                # self.iasr_debug.run(run_setup=run_setup)
        logging.info(f"{self.name}: run @ {time.time()}")

    def stop(self, **kwargs):
        self.in_mic.stop(**kwargs)
        self.vad_frames.stop(**kwargs)

        if self.use_asr:
            self.asr.stop(**kwargs)

        if self.use_iasr:
            self.iasr.stop(**kwargs)

        if self.record:
            self.audio_record.stop(**kwargs)

        if self.debug:
            if self.use_asr:
                self.asr_debug.stop(**kwargs)
        logging.info(f"{self.name}: stop_components @ {time.time()}")
Beispiel #9
0
def init_all(robot : cozmo.robot.Robot):
    
    domain_dir = '/home/casey/git/retico/data/cozmo/dm/dialogue.xml'
    aod_endpoint = ""
    aod_key = ""
    wac_dir = '/home/casey/git/retico/data/wac/subset'
    mask_rcnn_labels = '/home/casey/git/retico/data/maskrcnn/label_map.pbtxt'
    mask_rcnn_model = '/home/casey/git/retico/data/maskrcnn/frozen_inference_graph.pb'

    opendial_variables = ['face_count', # CozmoStateModule
                           'num_objs', # ObjectDetector
                           'near_object', # CozmoRefer
                           'exploring', # CozmoRefer
                           'aligned', # CozmoRefer
                           'word_to_find', # WordsAsClassifiersModule
                           'best_object', # WordsAsClassifiersModule
                           'obj_confidence'] # WordsAsClassifiersModule

    #
    # INSTANTIATE MODULES
    #
    # mic = RespeakerMicrophoneModule('192.168.0.102:8000')
    # asr = GoogleASRModule(rate=16000)
    
    mic = MicrophoneModule(1000)
    asr = GoogleASRModule()
    iasr = IncrementalizeASRModule()
    dm = OpenDialModule(domain_dir=domain_dir, variables=opendial_variables)
    cozmo_refer = CozmoReferModule(robot)
    cozmo_camera = CozmoCameraModule(robot)
    # cozmo_camera = WebcamModule()
    # cozmo_state = CozmoStateModule(robot)
    # object_detector = AzureObjectDetectionModule(aod_key, aod_endpoint)
    object_detector = MaskrRCNNObjectDetection(mask_rcnn_labels, mask_rcnn_model)
    feature_extractor = KerasObjectFeatureExtractorModule()
    wac = WordsAsClassifiersModule(wac_dir=wac_dir)
    debug = DebugModule()

    # psi related modules
    # WriterSingleton should use the *source* ip address (i.e., this machine's)
    # WriterSingleton(ip='192.168.0.101', port='12346') # create the zeromq writer
    # psi = ZeroMQWriter(topic='retico')
   
    # mic as input
    mic.subscribe(asr)
    asr.subscribe(iasr)
    iasr.subscribe(wac)
    wac.subscribe(dm)
    dm.subscribe(cozmo_refer)

    # robot state as input
    # cozmo_state.subscribe(dm)
    wac.subscribe(cozmo_refer)
    object_detector.subscribe(dm)
    object_detector.subscribe(cozmo_refer)
    cozmo_refer.subscribe(dm)

    # robot camera as input
    cozmo_camera.subscribe(object_detector)
    object_detector.subscribe(feature_extractor)
    feature_extractor.subscribe(wac)
    # feature_extractor.subscribe(debug)

    # iasr.subscribe(psi)
    # wac.subscribe(psi)
    # dm.subscribe(psi)
    # feature_extractor.subscribe(psi)
    # object_detector.subscribe(psi)

    #
    # INITIALIZE MODULES
    # 
    mic.run()
    asr.run()
    iasr.run()
    dm.run()
    cozmo_refer.run() # IF I MAKE THIS RUN EVERYTHING SLOWS DOWN
    # cozmo_state.run()
    cozmo_camera.run()
    object_detector.run()
    feature_extractor.run()
    wac.run()
    debug.run()
    # psi.run()

    input() # keep everything running

    mic.stop()
    asr.stop()
    iasr.stop()
    cozmo_refer.stop()
    # cozmo_state.stop()
    dm.stop()
    cozmo_camera.stop()
    object_detector.stop()
    feature_extractor.stop()
    wac.stop()
    debug.stop()
Beispiel #10
0
def audio_demo():
    m1 = MicrophoneModule(5000)
    m2 = StreamingSpeakerModule(5000)

    m1.subscribe(m2)

    m1.run()
    m2.run()

    input()

    m1.stop()
    m2.stop()

    input()

    m1.run()
    m2.run()

    input()

    m1.stop()
    m2.stop()
Beispiel #11
0
    m1.run()
    m2.run()
    db.run()

    input()

    m1.stop()
    m2.stop()
    db.stop()


if __name__ == "__main__":

    # google_asr()

    m1 = MicrophoneModule(5000)
    m2 = GoogleASRModule("en-US")  # en-US or de-DE or ....
    m3 = CustomTextModule()

    m1.subscribe(m2)
    m2.subscribe(m3)

    m1.run()
    m2.run()
    m3.run()

    input()

    m1.stop()
    m2.stop()
    m3.stop()