Example #1
0
    def synthesize(self, text: str, solution=None) -> sr.AudioData:
        # Switch to default solution
        if solution is None:
            solution = self._solution

        if solution["name"] == SOLUTION_RHVOICE["name"]:
            voice = solution["voice"]
            synthesizer = RHVoice(default_voice=voice)
            return sr.AudioData(synthesizer.get_audio(text),
                                sample_rate=24000,
                                sample_width=2)
        elif solution["name"] == SOLUTION_GOOGLE["name"]:
            with io.BytesIO() as audiodata:
                # Read the mp3 file from the google translate
                gTTS(text, lang="ru").write_to_fp(audiodata)

                # Convert it to raw format so it is compatible with the speech recognition library
                audiodata.seek(0)
                song = AudioSegment.from_mp3(audiodata)

            with io.BytesIO() as audiodata:
                song.export(audiodata, format="raw")
                audiodata.seek(0)
                return sr.AudioData(audiodata.read(),
                                    sample_rate=song.frame_rate,
                                    sample_width=song.sample_width)
        else:
            raise Exception("There is no such solution.")
	def read_file(self, fname):
		if fname == '':
			return
		self.w = sr.WavFile(fname)
		with self.w as inf:
			data = inf.stream.read()
			ad = sr.AudioData(data, inf.SAMPLE_RATE, inf.SAMPLE_WIDTH)
		self.np_arr = np.fromstring(ad.get_wav_data())
		np.save('C:/Users/Brett/Desktop/data.np', self.np_arr)
		ad = sr.AudioData(self.np_arr, inf.SAMPLE_RATE, inf.SAMPLE_WIDTH)
		words = self.r.recognize_google(ad)
		print(words)
		return words
Example #3
0
 async def record_recog(self, ctx):
     """Speech recognize the current voice recording.
     Usage: recording recog"""
     or_check_perms(ctx,
                    ['manage_server', 'manage_channels', 'move_members'])
     with assert_msg(ctx, '**The bot owner has not set up this feature!**'):
         check(self.opus_decoder != None)
     with assert_msg(ctx, '**This server does not have a recording!**'):
         check(ctx.message.server.id in self.bot.pcm_data)
     status = await self.bot.say('Hmm, let me think... 🌚')
     pg_task = self.loop.create_task(
         asyncio.wait_for(self.progress(status, 'Hmm, let me think'),
                          timeout=30,
                          loop=self.loop))
     sr_data = sr.AudioData(self.recording_data[ctx.message.server.id],
                            48000, 2)
     try:
         with async_timeout.timeout(16):
             final = await self.loop.run_in_executor(
                 None, r.recognize_sphinx, sr_data)
     except asyncio.TimeoutError:
         pg_task.cancel()
         await self.bot.edit_message(
             status, '**It took too long to recognize your recording!**')
         return
     pg_task.cancel()
     await self.bot.edit_message(status,
                                 'I think you said: ' + final[:2000])
    def callback(self, data):
        r = sr.Recognizer()
        with sr.Microphone() as source2:

            numpydata = data.data

            audio = sr.AudioData(numpydata.tobytes(), source2.SAMPLE_RATE,
                                 source2.SAMPLE_WIDTH)

            try:
                HOUNDIFY_CLIENT_ID = "INSERT HOUNDIFY CLIENT ID HERE"
                HOUNDIFY_CLIENT_ID = "INSERT HOUNDIFY CLIENT ID HERE"
                self.text = r.recognize_houndify(
                    audio,
                    client_id=HOUNDIFY_CLIENT_ID,
                    client_key=HOUNDIFY_CLIENT_KEY)

            except sr.UnknownValueError:
                print("Houndify could not understand audio")

            except sr.RequestError as e:
                print("Could not request results from Houndify service; {0}".
                      format(e))

            else:

                self.text_pub.publish(str(self.text))
    def callback(self, data):
        r = sr.Recognizer()
        with sr.Microphone() as source2:

            numpydata = data.data

            audio = sr.AudioData(numpydata.tobytes(), source2.SAMPLE_RATE,
                                 source2.SAMPLE_WIDTH)

            try:
                GOOGLE_CLOUD_SPEECH_CREDENTIALS = r"""INSERT THE CONTENTS OF THE GOOGLE CLOUD SPEECH JSON CREDENTIALS FILE HERE"""
                self.text = r.recognize_google_cloud(
                    audio, credentials_json=GOOGLE_CLOUD_SPEECH_CREDENTIALS)

            except sr.UnknownValueError:
                print("Google Cloud Speech could not understand audio")

            except sr.RequestError as e:
                print(
                    "Could not request results from Google Cloud Speech service; {0}"
                    .format(e))

            else:

                self.text_pub.publish(str(self.text))
Example #6
0
    def recognize(self, va):
        with noalsaerr():
            p = pyaudio.PyAudio()

        stream = p.open(format=FORMAT,
                        channels=CHANNELS,
                        rate=RATE,
                        input=True,
                        output=True,
                        frames_per_buffer=CHUNK)

        try:
            data = stream.read(CHUNK)
            audio = None
            while data != '':
                rms = audioop.rms(data, 2)
                if rms >= THRESHOLD:
                    audio = data
                    silence_counter = 0
                    while silence_counter < SILENCE_DETECTION:
                        data = stream.read(CHUNK)
                        if LISTENING:
                            stream.write(data, CHUNK)
                        audio = audio + data

                        rms = audioop.rms(data, 2)
                        if rms < THRESHOLD:
                            silence_counter += 1
                        else:
                            silence_counter = 0

                    stream.stop_stream()

                    audio_data = sr.AudioData(audio, RATE,
                                              p.get_sample_size(FORMAT))
                    try:
                        com = self.recognizer.recognize_google(audio_data)
                        t = Thread(target=va.command, args=(com, ))
                        t.start()
                        t.join()
                    except sr.UnknownValueError:
                        print(
                            'Google Speech Recognition could not understand audio'
                        )
                    except sr.RequestError as e:
                        print(
                            f'Could not request results from Google Speech Recognition service; {e}'
                        )

                    stream.start_stream()
                    self.reset()

                data = stream.read(CHUNK)
                if LISTENING:
                    stream.write(data, CHUNK)
        except KeyboardInterrupt:
            stream.stop_stream()
            stream.close()
            p.terminate()
            raise KeyboardInterrupt
Example #7
0
    def audio_cb(self, msg):
        #if not self.is_sound_init: 
        #    self.init_sound()

        if self.is_canceling:
            rospy.loginfo("Speech is cancelled")
            return
        data = SR.AudioData(msg.data, self.sample_rate, self.sample_width)
        try:
            rospy.loginfo("Waiting for result %d" % len(data.get_raw_data()))
            result = self.recognizer.asr(data.get_raw_data(), 'pcm', 16000, {
                    'dev_pid': 1936#1536,
                    })
            if result['err_no']:
                #rospy.loginfo(result["err_msg"])
                return
            rospy.loginfo(";".join(result["result"]))
            #result = self.recognizer.recognize_google(
            #    data, language=self.language)
            #msg = SpeechRecognitionCandidates(transcript=[result])
            #self.pub_speech.publish(msg)
        except SR.UnknownValueError as e:
            rospy.logerr("Failed to recognize: %s" % str(e))
        except SR.RequestError as e:
            rospy.logerr("Failed to recognize: %s" % str(e))
Example #8
0
 async def recognize(self, data):
     mono = audioop.tomono(data, self.SAMPLE_WIDTH, 1, 0)
     audio = speech_recognition.AudioData(mono, self.SAMPLE_RATE,
                                          self.SAMPLE_WIDTH)
     return await self.loop.run_in_executor(
         None, self._recognizer.recognize_google_cloud, audio,
         self.CREDENTIALS)
Example #9
0
    def storeWav(self,
                 fileName=None,
                 maxDuration=None,
                 useCycleBufferLen=0,
                 response=None):

        if maxDuration is not None:
            maxBufferLen = round(maxDuration / self.seconds_per_buffer)

            if maxBufferLen <= 0 or maxBufferLen >= len(self.frames):
                maxBufferLen = None
        else:
            maxBufferLen = None

        if maxDuration is None or maxBufferLen is None:
            temp = self.frames[:]
        else:
            temp = self.frames[:maxBufferLen]

        frame_data = getByteArray(temp)
        audio = sr.AudioData(frame_data, self.sample_rate, self.sample_width)

        if useCycleBufferLen > 0:
            pluginEcho.echoStoreWavCycleBuffer(audio, fileName, "./",
                                               useCycleBufferLen, response)
        else:
            pluginEcho.echoStoreWav(audio, fileName, response)
Example #10
0
def audio(meta_data):
    audio = sr.AudioData(meta_data["wav blob"], meta_data["sampleRate"], 2)
    text = r.recognize_sphinx(audio, language=meta_data["language"])
    if text:
        text = punctate(text)
        text = text[0].upper() + text[1:]
    socketio.emit("textarea_text", text)
Example #11
0
    def record(self, stream, rate):
        threshold = 10
        short_normalize = (1.0 / 32768.0)
        chunk = 1024
        width = 2
        timeout_length = 1

        while True:
            input = stream.read(chunk)
            rms_val = self.rms(input, width, short_normalize)
            if rms_val > threshold:
                break

        print('Шум обнаружен, началась запись')
        logging.info('Шум обнаружен, началась запись')

        if self.tray_interface is not None:
            self.tray_interface.set_correct()

        rec = collections.deque()
        current = time.time()
        end = time.time() + timeout_length

        while current <= end and len(rec) <= 124:
            data = stream.read(chunk)
            if self.rms(data, width, short_normalize) >= threshold: end = time.time() + timeout_length
            current = time.time()
            rec.append(data)

        if self.tray_interface is not None:
            self.tray_interface.set_default()

        logging.info('Запись остановлена')
        return sr.AudioData(b"".join(rec), rate, width)
    def add_data(self, data):
        ##need to get the supplied data to valid audio stream to be passed through phonix or some speech to text parser

        #get the recognizer
        r = sr.Recognizer()

        ##data dont need to go through microphone as it is from that already
        ##maybe convert data to a temp wav file to pass into speech rec
        ##this should convert it to an audio file to be able to pass it through sphinx
        audio = sr.AudioData(data, self.audioSampleRate, self.audioSampleSize)

        #pass through sphinx to get the text
        ##if sphinx aint any good then chop out to a different handler (maybe even google speach engine api - limits apply
        try:
            text = r.recognize_sphinx(audio)
            print("Sphinx thinks you said " + text)
        except sr.UnknownValueError:
            print("Sphinx could not understand audio")
        except sr.RequestError as e:
            print("Sphinx error; {0}".format(e))

        #
        if not self.have_phrase:
            if text == self.phrase:
                logger.info("clap detected")
                self.have_phrase = True
                self.callback()

        ##prob dont need this as this is used for detecting a sharp clap sound i think
        self.prev_sample = audio[-1]
    def callback(self, data):
        r = sr.Recognizer()
        with sr.Microphone() as source2:

            numpydata = data.data

            audio = sr.AudioData(numpydata.tobytes(), source2.SAMPLE_RATE,
                                 source2.SAMPLE_WIDTH)

            try:
                BING_KEY = "INSERT BING API KEY HERE"
                self.text = r.recognize_bing(audio, key=BING_KEY)

            except sr.UnknownValueError:
                print(
                    "Microsoft Bing Voice Recognition could not understand audio"
                )

            except sr.RequestError as e:
                print(
                    "Could not request results from Microsoft Bing Voice Recognition service; {0}"
                    .format(e))

            else:

                self.text_pub.publish(str(self.text))
Example #14
0
    def build(self, file_name=''):
        # initialize variables
        self.chunk = 1024
        self.format = pyaudio.paInt16
        self.channels = 1
        self.rate = 44100
        if file_name:
            self.file_name = file_name + ".wav"
        else:
            self.file_name = re.sub('[:punc:]|\.', '', str(
                pd.datetime.today())) + ".wav"

        self.news_article, self.news_article_label = get_url_content()
        # Deselect useless titles and stuff
        self.news_article = [x for x in self.news_article if len(x) > 80]
        self.random_article = 0
        self.build_init = True

        self.clean_text = self.news_article[self.random_article]

        # initialize classes
        self.port_audio = pyaudio.PyAudio()
        self.record_seconds = int()
        self.sound_data = []
        self.audio_data = sr.AudioData(b'', 1, 1)
        self.recog = sr.Recognizer()
Example #15
0
def recognize(stream_text):
    global args

    def logger(s):
        f = open('radio_log.txt', 'a+', encoding='utf-8')
        f.write(datetime.datetime.now().strftime("[ %d-%b-%Y %H:%M:%S ] "))
        f.write(s)
        f.write("\x0A")
        f.close()

    # print('sync')
    audio_data = sr.AudioData(stream_text, audio_rate, 2)
    try:
        # result = recognizer.recognize_sphinx(audio_data)
        result = recognizer.recognize_google(audio_data, language=args.lang)
        print(result + " kk")
        command1 = "hello satellite"
        counter1 = result.find(command1)
        if counter1 > 0:
            print("<<<<<<Hellow Satellite was found>>>>>>>")
        logger(result)
    except sr.UnknownValueError:
        pass
    except sr.RequestError as e:
        print("Could not request results from GSR service; {0}".format(e))
Example #16
0
def analiseSpeech(audioWav):

    #Instanciate speech recogntion module
    r = sr.Recognizer()

    #Retrieve google credentials from S3 and put in a string
    credentials_json = retrieveS3File(
        "apneasleepbucket", "googleCredentials/ApenaSleep-c58f74b11fb6.json")
    credentialsStr = credentials_json['Body']._raw_stream.data.decode()

    try:
        #Get wav audio bytes and instaciate a AudioData Object
        samplerate, data = wavfile.read(io.BytesIO(audioWav.getvalue()))
        audio = sr.AudioData(audioWav.getvalue(), samplerate,
                             data.dtype.itemsize)

        print("Convertendo Audio para Texto ..... ")

        #Call Google Cloud API to try to find a speech on it
        result = r.recognize_google_cloud(audio,
                                          credentials_json=credentialsStr,
                                          language="pt-BR",
                                          show_all=True)

        hasSpoken = True
        if len(result) == 0:
            hasSpoken = False

        #Return JSON string of the speech analysis result
        return result, hasSpoken

    except Exception as e:
        print(e)
        print("Error: ", e)
        return jsonify({'errorMessage': str(e)}), 400
Example #17
0
def convert():
    language = request.form['language']
    hot_words = [(hot_word, ACCURACY)
                 for hot_word in json.loads(request.form['hot_words'])]
    recognizer = sr.Recognizer()
    frame_data_base64 = request.form['frame_data_base64']
    frame_data = base64.b64decode(frame_data_base64)
    sample_rate = int(request.form['sample_rate'])
    sample_width = int(request.form['sample_width'])

    logger.debug(
        f'Recieved request. Language: {language}. Hot word: {hot_words}.')

    audio = sr.AudioData(frame_data, sample_rate, sample_width)
    try:
        transcript = recognizer.recognize_sphinx(audio,
                                                 language=language,
                                                 keyword_entries=hot_words)
        logger.debug(f'Transcript: {transcript}')
        return Response(transcript, 200)
    except sr.UnknownValueError:
        logger.debug(f'Uknown value')
        return Response('', 200)
    except Exception as err:
        logger.error(f'Error while recognizing. Error: {err}')
        return Response('', 500)
Example #18
0
    def performAnalysis(self, expectedStr):
        #Format the expected result to remove words that cant be recognized correctly
        formatter = STTFormatter()
        expectedStr = self.convertSentenceToWords(expectedStr)
        expectedStr = formatter.performOperations(expectedStr)
        frame_data = self.frames.getvalue()
        #Create the audio data from the frames
        recievedData = sr.AudioData(frame_data, self.m.SAMPLE_RATE,
                                    self.m.SAMPLE_WIDTH)
        #generate the raw data for processing
        rd = self.r.getRawData(recievedData)
        #decode
        self.r.decodeAudio(rd)
        val = self.r.genHypothesis()
        print(val)
        self.numword = len(val.split())
        #Test the accuracy
        val = self.convertSentenceToWords(val)
        self.compareSentences(expectedStr, val)
        #Check to ensure wpm defaults to lowest possibe
        if len(expectedStr) < len(val):
            self.numword = len(expectedStr)
        else:
            self.numword = len(val)

        wpm = self.getWordsperMin()
        #return the accuracy and wpm
        return self.percentageAccuracy, wpm
    def __hotword_detected(self, hotword_index):
        """
        Hotword detected process:
         1) record audio until timeout
         2) convert audio to AudioData instance
         3) send AudioData to STT provider
         4) finally send event to raspiot

        Args:
            hotword_index (int): indicate voice_model detected in case of multiple models (not implemented yet)
        """
        #record audio until timeout
        self.logger.debug('Recordings audio during %d seconds...' % self.record_duration)
        start = time.time()
        recorded_data = bytes()
        while True:
            data = self.buffer.get()
            if len(data)>0:
                #append buffer
                recorded_data += data

            #check timeout
            if time.time()>(start+self.record_duration):
                #stop recording
                self.logger.debug(u'Stop recording')
                break
            else:
                #pause
                time.sleep(0.03)
        self.logger.debug(u'Recording finished')

        #check recorded data
        if len(recorded_data)==0:
            self.logger.debug('Nothing said during recording')
        elif self.logger.getEffectiveLevel()==logging.DEBUG:
            #write audio to file only during debug
            self.__write_wav(recorded_data, '/tmp/recording.wav')

        #speech to text recording
	try:
            #create audiodata object to use speechrecognition library
            audio = speechrecognition.AudioData(recorded_data, self.rate, 2)

            #recognize audio
            self.logger.debug(u'Recognizing audio...')
            command = self.recognizer.recognize_bing(audio, key=self.provider_token, language='fr-FR')
            self.logger.debug(u'Done: command=%s' % command)

        except speechrecognition.UnknownValueError:
            self.logger.warning(u'STT provider doesn\'t understand command')

        except speechrecognition.RequestError as e:
            self.logger.error(u'STT provider service seems to be unreachable: %s' % str(e))

        #send event
        params = {
            u'hotword': 'hello',
            u'command': command
        }
        self.command_event.send(params=params);
Example #20
0
    def callback(self, data):
        r = sr.Recognizer()
        with sr.Microphone() as source2:

            numpydata = data.data

            audio = sr.AudioData(numpydata.tobytes(), source2.SAMPLE_RATE,
                                 source2.SAMPLE_WIDTH)

            try:
                IBM_USERNAME = "******"
                IBM_PASSWORD = "******"
                self.text = r.recognize_ibm(audio,
                                            username=IBM_USERNAME,
                                            password=IBM_PASSWORD)

            except sr.UnknownValueError:
                print("IBM Speech to Text could not understand audio")

            except sr.RequestError as e:
                print(
                    "Could not request results from IBM Speech to Text service; {0}"
                    .format(e))

            else:

                self.text_pub.publish(str(self.text))
def recognize(stream_text):
    global args
    date_of_file = datetime.datetime.now().strftime("[ %d-%b-%Y %H:%M:%S ] ")
    def logger(s):
        date = datetime.datetime.now().strftime("[ %d-%b-%Y %H:%M:%S ] ")
        f = open(date +'radio_log.txt', 'a+', encoding='utf-8')
        #f.write(datetime.datetime.now().strftime("[ %d-%b-%Y %H:%M:%S ] "))
        f.write(date)
        f.write(s)
        f.write("\x0A")
        f.close()

    # print('sync')
    audio_data = sr.AudioData(stream_text, audio_rate, 2)
    try:
        # result = recognizer.recognize_sphinx(audio_data)
        result = recognizer.recognize_google(audio_data, language=args.lang)
        print(result)
        logger(result)
    except sr.UnknownValueError:
        pass
    except sr.RequestError as e:
        print("Could not request results from GSR service; {0}".format(e))
    # print('done')
    with open(date_of_file + "recording_of_radio_station.wav", "wb") as f:
        f.write(audio_data.get_wav_data())
Example #22
0
def speech_recog(audio, sr):
    r = sprecog.Recognizer()
    audio = sprecog.AudioData(audio.tobytes(), sr, 2)
    try:
        text = r.recognize_sphinx(audio, language=args.sr_lan, show_all=False)
    except sprecog.UnknownValueError:
        text = ""
    return text
Example #23
0
 async def recognize(self, data):
     mono = audioop.tomono(data, self.SAMPLE_WIDTH, 1, 0)
     audio = speech_recognition.AudioData(mono, self.SAMPLE_RATE,
                                          self.SAMPLE_WIDTH)
     func = functools.partial(self._recognizer.recognize_google,
                              audio,
                              key=self.KEY)
     return await self.loop.run_in_executor(None, func)
Example #24
0
    def raw_speech_to_audio_data(self, raw_data):

        """
        Here width of frame is assumed to be 2 bytes, according to information I found in examples of source code.
        However this value can be not valid, so it should be tested. All possible values according to `AudioData`
        source code is 1-4.
        """
        return speech.AudioData(frame_data=raw_data, sample_rate=self.rate, sample_width=2)
Example #25
0
 def callback (self,data):
    
     r = sr.Recognizer()
     with sr.Microphone() as source2:
            numpydata = data.data              
            audio=sr.AudioData(numpydata.tobytes(),source2.SAMPLE_RATE, source2.SAMPLE_WIDTH )
            data, samplerate = sf.read('myfile.raw', channels=1, samplerate=44100,
                        subtype='FLOAT') 
Example #26
0
def speech_recognition(samples,rate=16000):
    is_it_speech = 0
    recognizer = sr.Recognizer()
    harvard = sr.AudioData(samples.tobytes(), sample_rate=rate, sample_width=samples.dtype.itemsize)
    text_output = recognizer.recognize_google(harvard, show_all=True)
    if len(text_output) != 0:
        is_it_speech = 1
    return is_it_speech
Example #27
0
def test_live(message):
    speech = sr.AudioData(
        base64.b64decode(message['data']),
        message['sample_rate'],
        message['sample_width'],
    )
    value = r.recognize_google(speech, language = 'ms-MY')
    emit('speech_update', {'text': value}, broadcast = True)
Example #28
0
 def recognize(self, frames, begin, end):
     result = ''
     frame_data = b''.join(frames[begin:end])
     audio_data = sr.AudioData(frame_data, self.rate, self.width)
     try:
         result = self._recognize(audio_data, language=self.language)
     except sr.UnknownValueError:
         result = '###'
     return self.position2time(begin), self.position2time(end), result
Example #29
0
def rcgn_google(frame_data, sample_rate, sample_size_bytes):
    text = ''
    r = sr.Recognizer()
    audio = sr.AudioData(frame_data, sample_rate, sample_size_bytes)
    try:
        text = r.recognize_google(audio, language='ru-RU', show_all=False)
    except Exception as e:
        pass
        #print("Exception: " + str(e))
    return text
Example #30
0
    def get_msg(self, data, rate, sample_width, number_channels):
        audio_data = sr.AudioData(data, rate, sample_width)

        msg = self.recognizer.recognize_google(audio_data,
                                               key=self._api_key,
                                               show_all=True)
        if not msg:
            msg = self.recognizer.recognize_sphinx(audio_data)

        return msg