Esempio n. 1
0
    def __init__(self):
        self._locker = threading.RLock()
        self.interrupt = threading.Event()
        self.sound = SoundFile()

        self.lipsync_enabled = rospy.get_param('lipsync', True)
        self.lipsync_blender = rospy.get_param('lipsync_blender', True)

        tts_control = rospy.get_param('tts_control', 'tts_control')
        rospy.Subscriber(tts_control, String, self.tts_control)
        self.speech_active = rospy.Publisher('speech_events',
                                             String,
                                             queue_size=10)
        self.expr_topic = rospy.Publisher('make_face_expr',
                                          MakeFaceExpr,
                                          queue_size=0)
        self.vis_topic = rospy.Publisher('/blender_api/queue_viseme',
                                         Viseme,
                                         queue_size=0)
        self.mux = rospy.ServiceProxy('lips_pau_select', MuxSelect)
        self.blink_publisher = rospy.Publisher('chatbot_blink',
                                               String,
                                               queue_size=1)

        self.animation_queue = Queue()
        self.animation_runner = AnimationRunner(self.animation_queue)
        self.animation_runner.start()
Esempio n. 2
0
class TTSExecutor(object):
    def __init__(self):
        self._locker = threading.RLock()
        self.interrupt = threading.Event()
        self.sound = SoundFile()

        self.lipsync_enabled = rospy.get_param('lipsync', True)
        self.lipsync_blender = rospy.get_param('lipsync_blender', True)

        tts_control = rospy.get_param('tts_control', 'tts_control')
        rospy.Subscriber(tts_control, String, self.tts_control)
        self.speech_active = rospy.Publisher('speech_events',
                                             String,
                                             queue_size=10)
        self.expr_topic = rospy.Publisher('make_face_expr',
                                          MakeFaceExpr,
                                          queue_size=0)
        self.vis_topic = rospy.Publisher('/blender_api/queue_viseme',
                                         Viseme,
                                         queue_size=0)
        self.mux = rospy.ServiceProxy('lips_pau_select', MuxSelect)
        self.blink_publisher = rospy.Publisher('chatbot_blink',
                                               String,
                                               queue_size=1)

        self.animation_queue = Queue()
        self.animation_runner = AnimationRunner(self.animation_queue)
        self.animation_runner.start()

    def enable_execute_marker(self, enable):
        self.animation_runner.enable_execute_marker(enable)

    def tts_control(self, msg):
        if msg.data == 'shutup':
            logger.info("Shut up!!")
            self.interrupt.set()

    def _startLipSync(self):
        self.speech_active.publish("start")
        if self.lipsync_enabled and not self.lipsync_blender:
            try:
                self.mux("lipsync_pau")
            except Exception as ex:
                logger.error(ex)

    def _stopLipSync(self):
        self.speech_active.publish("stop")
        if self.lipsync_enabled and not self.lipsync_blender:
            try:
                self.mux("head_pau")
            except Exception as ex:
                logger.error(ex)

    def _threadsafe(f):
        def wrap(self, *args, **kwargs):
            self._locker.acquire()
            try:
                return f(self, *args, **kwargs)
            finally:
                self._locker.release()

        return wrap

    @_threadsafe
    def execute(self, response):
        self.interrupt.clear()
        _, wavfile = tempfile.mkstemp(prefix='tts')
        success = response.write(wavfile)
        if not success:
            logger.error("No sound file")
            os.remove(wavfile)
            return

        threading.Timer(0.1, self.sound.play, (wavfile, )).start()

        duration = response.get_duration()
        self._startLipSync()
        self.speech_active.publish("duration:%f" % duration)

        phonemes = response.response['phonemes']
        markers = response.response['markers']
        words = response.response['words']
        visemes = response.response['visemes']

        typeorder = {'marker': 1, 'word': 2, 'viseme': 3}
        nodes = markers + words + visemes
        nodes = sorted(nodes, key=lambda x: (x['start'], typeorder[x['type']]))

        # Overwrite visemes during vocal gestures
        in_gesture = False
        vocal_gesture_nodes = []
        for node in nodes:
            if node['type'] == 'marker':
                if node['name'] == 'CPRC_GESTURE_START':
                    in_gesture = True
                if node['name'] == 'CPRC_GESTURE_END':
                    in_gesture = False
            if node['type'] == 'viseme' and in_gesture:
                vocal_gesture_nodes.append(node)
        if len(vocal_gesture_nodes) > 0:
            if len(vocal_gesture_nodes) > 1:
                mid = len(vocal_gesture_nodes) / 2
                for node in vocal_gesture_nodes[:mid]:
                    node['name'] = 'A-I'
                for node in vocal_gesture_nodes[mid:]:
                    node['name'] = 'M'
            else:
                vocal_gesture_nodes[0]['name'] = 'A-I'

        start = time.time()
        end = start + duration + 1
        stopat = 0

        for i, node in enumerate(nodes):
            while time.time() < end and time.time() < start + node['start']:
                time.sleep(0.001)
            if self.interrupt.is_set():
                logger.info("Interrupt is set")
                if node['type'] != 'phoneme':
                    # we still want to play the remaining phonemes
                    # until it meets 'word' or 'marker'
                    logger.info("Interrupt at {}".format(node))
                    break
            if node['type'] == 'marker':
                logger.info("marker {}".format(node))
                if node['name'].startswith('cp'):
                    continue
                self.animation_queue.put(node)
            elif node['type'] == 'word':
                logger.info("word {}".format(node))
                continue
            elif node['type'] == 'viseme':
                logger.debug("viseme {}".format(node))
                self.sendVisime(node)

        elapsed = time.time() - start
        supposed = nodes[-1]['end']
        logger.info("Elapsed {}, nodes duration {}".format(elapsed, supposed))

        if self.interrupt.is_set():
            self.interrupt.clear()
            self.sound.interrupt()
            logger.info("Interrupt flag is cleared")

        self.sendVisime({'name': 'Sil'})
        self._stopLipSync()
        os.remove(wavfile)

    def sendVisime(self, visime):
        if self.lipsync_enabled and self.lipsync_blender and (visime['name'] !=
                                                              'Sil'):
            #Need to have global shapekey_store class.
            msg = Viseme()
            # Duration should be overlapping
            duration = visime['duration']
            msg.duration.nsecs = duration * 1e9 * BaseVisemes.visemes_param[
                visime['name']]['duration']
            msg.name = visime['name']
            msg.magnitude = BaseVisemes.visemes_param[
                visime['name']]['magnitude']
            msg.rampin = BaseVisemes.visemes_param[visime['name']]['rampin']
            msg.rampout = BaseVisemes.visemes_param[visime['name']]['rampout']
            self.vis_topic.publish(msg)
        if self.lipsync_enabled and not self.lipsync_blender:
            msg = MakeFaceExpr()
            msg.exprname = 'vis_' + visime['name']
            msg.intensity = 1.0
            self.expr_topic.publish(msg)