Example #1
0
def gat_with_breakdown(path_to_file, recognizer: sr.Recognizer, breakdown_duration=10, tempfile_name='temp.wav'):
    """
    gat = get audio transcript
    
    This breakdown is effective for writing down SRT(s).
    """
    for start, end, total in chop_chop(path_to_file, breakdown_duration, output_file=tempfile_name):
        last_yield = start
        with sr.AudioFile(tempfile_name) as source:
            try:
                if (content := recognizer.recognize_google(recognizer.record(source))):
                    yield {'start': format_to_ffmpeg_duration(start), 'end': format_to_ffmpeg_duration(end), 'text': content, 'total': total, 'delta': end - last_yield}
                    last_yield = end
            except sr.UnknownValueError:
                pass
def GetTextInAudio(audio_msg, chat_id, bot):
    from speech_recognition import AudioFile, Recognizer, UnknownValueError

    rec = Recognizer()

    file_id = audio_msg['file_id']

    tempAudio = dirLoc + file_id + '.' + audio_msg['mime_type'].split('/')[1]

    bot.download_file(file_id, tempAudio)

    from pydub import AudioSegment
    filename = file_id + '.wav'
    file_loc = dirLoc + filename

    sound = AudioSegment.from_file(tempAudio)
    sound.export(file_loc, format='wav')

    with AudioFile(file_loc) as AudioSrc:
        content = rec.record(AudioSrc)
    try:
        text = rec.recognize_google(
            audio_data=content,
            language='ja-JP',
        )
    except UnknownValueError:
        bot.sendMessage(chat_id, 'None')
        return

    mainText = t_j2k(text)
    bot.sendMessage(chat_id, mainText)

    remove(tempAudio)
    remove(file_loc)
Example #3
0
class MicrophoneMonitor(object):
    def __init__(self) -> None:
        self.micro = Recognizer()
        self.micro.energy_threshold = 4000
        self.micro.pause_threshold = 0.5

    def monitor_microphone(self, hotword='jarvis'):
        with Microphone() as source:
            self.micro.adjust_for_ambient_noise(source, duration=0.5)
            while True:
                print('Aguardando comando: ')
                audio = self.micro.listen(source)
                try:
                    trigger = self.micro.recognize_google(audio, language='pt')
                    # with open("microphone-results.wav", "wb") as f:
                    #     f.write(audio.get_wav_data())
                    print(trigger)
                    if hotword.lower() in trigger or hotword.capitalize(
                    ) in trigger:
                        print('Comando: ', trigger)
                        return trigger
                except UnknownValueError:
                    print(
                        "Google Speech Recognition could not understand audio")
                except RequestError as e:
                    print(
                        "Could not request results from Google Speech Recognition service; {0}"
                        .format(e))
Example #4
0
class Ear:  # pylint: disable=too-few-public-methods
    """
    listen() -- take audio from microphone, recognize it and return as string
    """
    def __init__(self):
        self.recognizer = Recognizer()

    def listen(self):
        """
        listen audio from microphone and convert to the string.
        :return:data
        """
        self.recognizer = Recognizer()
        with Microphone() as source:
            self.recognizer.adjust_for_ambient_noise(source)
            print("I am listening you...")
            audio = self.recognizer.listen(source, phrase_time_limit=6)

        data = ""
        try:
            # Uses the default API key
            # To use another API key:
            # `r.recognize_google(audio, key="GOOGLE_SPEECH_RECOGNITION_API_KEY")`
            data = self.recognizer.recognize_google(audio, language="tr")
            print("You said : " + data)
        except UnknownValueError:
            print("Google Speech Recognition could not understand audio")
            return self.listen()
        except RequestError as exception:
            print("Could not request results from "
                  "Google Speech Recognition service; {0}".format(exception))
            return self.listen()

        return data.lower()
Example #5
0
    def listen(self) -> str:
        # Record Audio
        data = ''
        recognizer = Recognizer()
        with Microphone() as source:
            self.log.debug('Listening on microphone...')
            try:
                audio = recognizer.listen(source, timeout=10)
            except WaitTimeoutError:
                self.log.debug('Listen timed out.')
                return data
        # Speech recognition using Google Speech Recognition
        try:
            # Uses the default API key
            # To use another API key: `r.recognize_google(audio, key='GOOGLE_SPEECH_RECOGNITION_API_KEY')`
            data = recognizer.recognize_google(audio)
            self.log.debug(f'You said: "{data}"')
        except UnknownValueError:
            self.log.debug(
                'Google Speech Recognition could not understand audio')
        except RequestError as e:
            self.log.error(
                f'Could not request results from Google Speech Recognition service: {e}'
            )

        return str(data)
Example #6
0
def Audio_file_Read(filename):
    universal_dict = {}
    cnt = {}
    gantu = [0, 0, 0, 0]
    analysis = {}
    token = Tokenizer()
    recog = Recognizer()
    try:
        audioFile = sr.AudioFile(filename)
        with audioFile as source:
            audio = recog.record(source)
            recognized = recog.recognize_google(audio, language="ko-KR")
            res = text_to_word_sequence(recognized)
            cnt = collections.Counter(res)
            universal_dict = dict(cnt)
            if "어" in universal_dict:
                gantu[0] = universal_dict["어"]
            if "아니" in universal_dict:
                gantu[1] = universal_dict["아니"]
            if "근데" in universal_dict:
                gantu[2] = universal_dict["근데"]
            if "이제" in universal_dict:
                gantu[3] = universal_dict["이제"]
            text = recognized
            analysis['text'] = text
            analysis['data'] = gantu
            return analysis
    except UnknownValueError:
        analysis['text'] = "당신이 말한 문장이 없습니다."
        analysis['data'] = [0, 0, 0, 0]
        return analysis
Example #7
0
def wav_to_text(wav_file_path, language="es-ES", show_all=False):
    r = Recognizer()
    with WavFile(wav_file_path) as source:
        audio = r.record(source)
    try:
        return r.recognize_google(audio_data=audio, language=language, show_all=show_all)
    except UnknownValueError:
        raise GolemException("Could not understand audio")
Example #8
0
class SimpleSTT(object):
    def __init__(self):
        self.recognizer = Recognizer()

    def transcribe(self, path_to_source):
        with AudioFile(path_to_source) as source:
            audio = self.recognizer.listen(source)
        return self.recognizer.recognize_google(audio)
Example #9
0
def VoiceInput():

    recog = Recognizer()
    mic = Microphone(device_index=1)

    with mic:
        audio = recog.listen(mic)
    try:
        recognized = recog.recognize_google(audio)
    except UnknownValueError:
        Response("Try Again")
        with mic:
            audio = recog.listen(mic)
            recognized = recog.recognize_google(audio)
    except RequestError as exc:
        Response("Sorry my service is down")
    print(recognized)
    return (recognized)
Example #10
0
def dothis(message):
    """
    From speech to text
    :param message:
    :return: text
    """
    session = message.get_session()
    ans = ''
    current_cmd = message.get_setting(session, 'active')
    if message.attachments['sound']:
        try:
            r = Recognizer()
            mode = 'google'
            lang = 'ru-RUS'
            ans = ''
            for attachment in message.attachments['sound']:
                ext = attachment[1]
                path = os.path.abspath(os.curdir)
                fname = time.strftime("%Y%m%d-%H%M%S") + '.'
                dir = path + '/temp/' + fname
                urllib.request.urlretrieve(
                    attachment[0], dir + ext)  # getting file

                if ext != 'wav':
                    subprocess.run(['ffmpeg', '-i', dir + ext, dir + 'wav'])
                    os.remove(dir + ext)

                with AudioFile(dir + 'wav') as source:
                    song = r.record(source)
                os.remove(dir + 'wav')

                if "en" in message.params:
                    lang = 'en-EN'
                if 'wit' in message.params:
                    mode = 'wit'
                recg = r.recognize_google(
                    song,
                    language=lang
                ) if mode == 'google' else r.recognize_wit(song, witkey)
                ans += f">>>>>>{recg}\n\n"
                yield ans
        except Exception as f:
            ans += "Произошла непредвиденная ошибка: " + str(f) + "\n"
        finally:
            if current_cmd:
                message.delete_active(session)
            yield str(ans)
    elif 'Выход' in message.params and current_cmd:
        message.delete_active(session)
        yield {'msg': 'Успешно!', 'keyboard': [[], False]}
    else:
        if current_cmd is None:
            message.add_setting(session, 'active', 'stt')
        yield {'msg': 'Прикрепите аудио или напишите Выход',
               'keyboard': [[[('Выход', 'negative')]], False]
               }
 def listen(self):
     try:
         with Microphone() as source:
             recognizer = Recognizer()
             recognizer.adjust_for_ambient_noise(source)
             audio = recognizer.listen(source)
             return recognizer.recognize_google(audio,
                                                language=self.lang).lower()
     except (UnknownValueError, RequestError):
         return ''
Example #12
0
def gettingWordsFromMic():
    mic = Microphone()
    recognizer = Recognizer()
    print("Say something...")
    print(Microphone.list_microphone_names())
    with mic as source:
        recognizer.adjust_for_ambient_noise(source)
        audio = recognizer.listen(source)

    input("Press a key to process")
    print(recognizer.recognize_google(audio))
Example #13
0
def speech_recog(file_name="input_sample/audio/introduction_ml.mp3",
                 duration=10):
    convert_mp3_to_wav("input_sample/audio/introduction_ml.mp3")
    result = 0
    class_audio = AudioFile("input_sample/audio/introduction_ml.wav")
    print(type(class_audio))

    recongizer = Recognizer()

    with class_audio as src_audio:
        audio = recongizer.record(src_audio, duration=duration)
        print(recongizer.recognize_google(audio))
Example #14
0
def on_listen(recognizer: sr.Recognizer, audio: sr.AudioData):
    l = len(audio.frame_data) / audio.sample_rate / audio.sample_width
    e, tr = energy(recognizer, audio)
    print(f'+{l:6.2f}s {e:6.2f}; [{recognizer.energy_threshold:.2f}]: ', end='')
    try:
        t = time()
        text = recognizer.recognize_google(audio, language='ru-RU').lower()
    except sr.UnknownValueError as e:
        print()
    else:
        dt = time() - t
        print(f'Вы сказали ({dt:5.2f}s): {text}')
def start(data):
    if data.endswith('.wav'):
        data = converter(data)
    else:
        print("Need Conversion")
    r = Recognizer()
    with AudioFile(data) as source:
        audio = r.listen(source)
        print("Status: Working\r", end="")
        query = r.recognize_google(audio)
        file = open("{}.txt".format(data.split('.')[0]), 'w')
        file.write(query)
        file.close()
        print(query)
def audio_to_text(message_input):
    # initialise the recognizer
    r = Recognizer()
    # Use the sysdefault microphone
    for i, microphone_name in enumerate(Microphone.list_microphone_names()):
        if microphone_name == "sysdefault":
            micro = Microphone(device_index=i)
    with micro as source:
        # Extract the audio and convert it to text
        audio = r.listen(source)
    # recognize speech using Google Speech Recognition and add it to the text input area
    try:
        message_input.setText(r.recognize_google(audio))
    except UnknownValueError:
        message_input.setText('The audio was not understood')
Example #17
0
class Listener:
    def __init__(self) -> None:
        super(Listener, self).__init__()
        self._recognizer = Recognizer()
        self._recognizer.energy_threshold = 4000

    def listen(self) -> None:
        """Listens while the microphone is open and turns the audio into readable text."""
        with Microphone() as source:
            print('listening...')
            self._recognizer.adjust_for_ambient_noise(source)
            audio_listened = self._recognizer.listen(source)
            text_listened: Any = self._recognizer.recognize_google(
                audio_listened, )
            self.text: str = text_listened.lower()
def get_audio():
    rObject = Recognizer()
    audio = ''
    with Microphone() as source:
        print("Speak...")

        # recording the audio using speech recognition
        audio = rObject.listen(source, phrase_time_limit=5)
    print("Stop.")  # limit 5 secs
    try:
        text = rObject.recognize_google(audio, language='en-US')
        print("You: ", text)
        return text
    except:
        chatbot_speaks("Could not understand your audio, Please try again !")
        return 0
Example #19
0
 def myCommand(self):
     #listens for commands
     r = Recognizer()
     with Microphone() as source:
         print('Say something...')
         r.pause_threshold = 1
         r.adjust_for_ambient_noise(source, duration=1)
         audio = r.listen(source)
     try:
         command = r.recognize_google(audio).lower()
         print('You said: ' + command + '\n')
     # loop back to continue to listen for commands if unrecognizable speech is received
     except UnknownValueError:
         print('....')
         command = speech.myCommand(self)
     return command
Example #20
0
class VoiceService:
    def __init__(self):
        self.client = Recognizer()

    def capture_audio(self):
        with Microphone() as mic:
            audio = self.client.listen(mic)
        return audio

    def get_transcript(self, audio, show_all=False):
        return self.client.recognize_google(audio, show_all=show_all)

    def capture_and_transcribe_audio(self):
        audio = self.capture_audio()
        #print(type(audio)) #> <class 'speech_recognition.AudioData'>
        transcript = self.get_transcript(audio, show_all=False)
        return transcript.upper()
Example #21
0
class Listener(object):
    def __init__(self):
        self.__recognizer = Recognizer()

    def __get_audio_from_file(self, filename, timeout=None, on_listen=None):
        with AudioFile(filename) as source:
            return self.__get_audio_from_source(source, timeout, on_listen)

    def __get_audio_from_microphone(self, timeout=None, on_listen=None):
        with Microphone() as source:
            self.__recognizer.adjust_for_ambient_noise(source, duration=1.0)
            return self.__get_audio_from_source(source, timeout, on_listen)

    def __get_audio_from_source(self, source, timeout=None, on_listen=None):
        if callable(on_listen): on_listen()
        return self.__recognizer.listen(source, timeout=timeout)

    def __recognize(self, audio, language, on_recognition=None):
        if callable(on_recognition): on_recognition()
        return self.__recognizer.recognize_google(audio, language=language)

    def get_text_from_audio(self,
                            filename,
                            language="en-us",
                            timeout=None,
                            on_listen=None,
                            on_recognition=None):
        try:
            audio = self.__get_audio_from_file(filename, timeout, on_listen)
            return self.__recognize(audio, language, on_recognition)
        except FileNotFoundError as error:
            raise error
        except:
            return str()

    def speech_to_text(self,
                       language="en-us",
                       timeout=None,
                       on_listen=None,
                       on_recognition=None):
        try:
            audio = self.__get_audio_from_microphone(timeout, on_listen)
            return self.__recognize(audio, language, on_recognition)
        except:
            return str()
Example #22
0
def listening_callback(recognizer: sr.Recognizer, audio) -> None:
    """
    Callback to be called when a phrase was recorded
    :param recognizer: sr.Recognizer to be used
    :param audio: phrase as audio
    """

    try:
        # try to use Google speech recognition to extract the text.
        # the Google one works best (out of the free ones)
        txt = recognizer.recognize_google(audio,
                                          language=config.recognizer_lang)
        log("text is: {0}".format(txt), remote=False)
        send_text_to_server(txt)
    except sr.UnknownValueError:  # happens if the detected phrase is empty (= silence) or cannot be detected
        log("Unknown value", level=logging.ERROR, remote=False)
    except sr.RequestError:
        log("Request error", level=logging.ERROR)
Example #23
0
def read_mic_input(r: sr.Recognizer, mic: sr.Microphone) -> Dict:
    """Use r to transcribe speech recorded from mic, and return a dictionary
    containing three keys:

    "success": boolean value indicating whether or not the recognizer's speech
               transcription was successful or not
    "error": 'None' if no error occured, or a string containing the error
              message if an error occured.
    "transcription": 'None' if speech could not be transcribed, or a string
                      containing the transcribed text.
    """

    with mic as source:
        # adjust the recognizer sensitivity to account for ambient noise
        r.adjust_for_ambient_noise(source, duration=0.3)
        # Record voice input from microhpone
        audio = r.listen(source)

    # intialize the response dictionary to be returned
    response = {"success": True, "error": None, "transcription": None}

    # Attempt to recognize speech in the recording
    try:
        response["transcription"] = r.recognize_google(audio).lower()

        # clean up the transcription of coordinates and measurements
        response["transcription"] = response["transcription"].replace("-", " ")
        response["transcription"] = response["transcription"].replace("/", " ")
        response["transcription"] = response["transcription"].replace(
            "\\", " ")
        response["transcription"] = response["transcription"].replace(
            " 00", " 0 0")

    # Update response object if a RequestError or UnknownValueError exception is
    #   caught
    except sr.RequestError:
        # API was unreachable or unresponsive
        response["success"] = False
        response["error"] = "Error occurred with the API request."
    except sr.UnknownValueError:
        # speech could not be transcribed
        response["error"] = "Unable to recognize speech."

    return response
Example #24
0
def listen():
    r = Recognizer()  # less writing
    with Microphone() as source:  # using microphone to detect audio
        r.adjust_for_ambient_noise(
            source, duration=0.5)  #adjust for ambiet sounds for 1 second
        audio = r.listen(source)  # listen for audio
    data = ''  # set data ad nothing
    try:  # in case of errors
        data = r.recognize_google(
            audio)  # recognize with google's speech recognition
        print('You said: ' + data)  # write what was heard
    except UnknownValueError:  # unknown audio
        print('I didn\'t get that')  # when google api didn't understand audio
        data = 'None'  # return none
    except RequestError as e:  # request error
        print('Api or connection is not working.\n The error is {0}'.format(
            e))  # when connection or Api offline
        data = 'Broken'  # return broken
    return data  # return recognized audio as string
Example #25
0
class RecognizerOfAudioSource(AudioSource):
    """Represent recogniser of audio source."""

    def __init__(self) -> None:
        self._recogniser = Recognizer()

    def __enter__(self) -> Recognizer:
        return self._recogniser.__enter__()

    def __exit__(self, exc_type, exc_val, exc_tb) -> None:
        return self._recogniser.__exit__(exc_type, exc_val, exc_tb)

    @property
    def pause(self) -> int:
        """Return seconds of non-speaking audio before a phrase is considered complete"""
        return self._recogniser.pause_threshold

    @pause.setter
    def pause(self, seconds: int) -> None:
        if not isinstance(seconds, int):
            raise TypeError('Seconds should be <int> data type.')

    def configure_noise(self, source: Microphone, duration: int = 1) -> None:
        self._recogniser.adjust_for_ambient_noise(source, duration)

    def listen(
            self,
            source: Microphone,
            timeout: int = None,
            phrase_time_limit: int = None,
            config: str= None
    ) -> AudioData:
        return self._recogniser.listen(source, timeout, phrase_time_limit, config)

    def recognise_google(
            self,
            audio_data: AudioData,
            key: int = None,
            language: str = "en-US",
            show_all: bool = False
    ) -> str:
        return self._recogniser.recognize_google(audio_data, key, language, show_all).lower()
def record_and_recognise(r: sr.Recognizer) -> str:
    with sr.Microphone() as source:
        print("Слушаю..")
        r.pause_threshold = 1
        r.adjust_for_ambient_noise(source, duration=1)
        audio = r.listen(source, phrase_time_limit=5)

    try:
        recognized = r.recognize_google(audio, language='ru-RU')
        if type(recognized) == list:
            recognized_string = " ".join(recognized)
            return recognized_string
        else:
            return recognized
    except sr.UnknownValueError:
        print(
            "Unknown sounds came from your mouth. Or perhaps we can't hear you."
        )
    except sr.RequestError as e:
        print("Sound recognition exception; {0}".format(e))
Example #27
0
async def process_audio(chat_id, msg):
    await bot.download_file(msg['voice']['file_id'], "./dest.ogg")
    filename = "dest.ogg"
    dest = "dest.flac"
    r = Recognizer()
    sound = AudioSegment.from_ogg(filename)
    os.unlink(filename)
    sound.export(dest, format="flac")
    with AudioFile(dest) as source:
        # listen for the data (load audio to memory)
        audio_data = r.record(source)
        # recognize (convert from speech to text)
        try:
            text = r.recognize_google(audio_data)
            print(f"VOICE LOG - {msg['from']['first_name']}: {text}")
            await process_result(chat_id, text)
        except UnknownValueError:
            await bot.sendMessage(chat_id, 'This audio is too short or corrupted, retry!')
            pass
    try:
        os.unlink(dest)
    except PermissionError:
        pass
Example #28
0
def SpeechToText():
    try:
        rec = Recognizer()

        with AudioFile(WAVE_OUTPUT_FILENAME) as AudioSrc:
            content = rec.record(AudioSrc)

        text = rec.recognize_google(
            audio_data=content,
            language='ko-KR',
        )

        mainText = str(text)
        print('main-> ', mainText)

        return mainText

        # for sW in START_WORDS:
        #     if mainText.startswith(sW):
        #         return mainText.split(sW)[1]

    except (UnknownValueError):
        print("SPEECH ERROR!")
        return False
    def solve(self):
        try:
            sleep(3)
            # Scrolling Down the Page
            pyautogui.scroll(-1000)
            # Locating and Clicking Captcha button on Page
            cap = pyautogui.locateCenterOnScreen("files/captcha.png")
            pyautogui.click(cap)
            try:
                sleep(3)
                # Locating and Clicking Headphones button on Page
                voi = pyautogui.locateCenterOnScreen("files/voice.png")
                pyautogui.click(voi)
            except:
                try:
                    # Move out mouse from the voice button
                    pyautogui.moveTo(200, 200)
                    # Locate Voice2 Button that is little Gray
                    pyautogui.locateCenterOnScreen("files/voice2.png")
                    return self.error
                except:
                    return self.done
        except:
            try:
                sleep(2)
                # Move out mouse from the voice button
                pyautogui.moveTo(200, 200)
                # Locate Voice2 Button that is little Gray
                pyautogui.locateCenterOnScreen("files/voice2.png")
                self.error[1] = "Captcha"
                return self.error
            except:
                return self.done
        sleep(2)
        try:
            # Locate and Right Click on the Download Button
            down = pyautogui.locateCenterOnScreen("files/down.png")
            pyautogui.rightClick(down)
            sleep(1)
            # Press down button 5 times
            pyautogui.press(['down'] * 5)
            sleep(1)
            # Press Enter (Cursor will be Copy Link Address)
            pyautogui.press('enter')
            # Download and Save that audio file from Link copied from above code
            with open('files/audio.mp3', 'wb') as file:
                r = ge(paste())
                file.write(r.content)

            sleep(2)
            if path.exists('files/audio.mp3'):
                # Convert that mp3 file into wav using ffpmeg
                call([
                    'files/ffmpeg.exe', '-i', 'files/audio.mp3', '-y',
                    'files/audio.wav'
                ])
                sleep(2)
                AUDIO_FILE = 'files/audio.wav'

                # Code to Send That Audio File to Google and Recognize The Audio
                r = Recognizer()
                with AudioFile(AUDIO_FILE) as source:
                    audio = r.record(source)
                try:
                    # Get the Recognized Text
                    capSolved = r.recognize_google(audio)
                    # Go to the text field and write it there
                    pyautogui.hotkey('shift', 'tab')
                    pyautogui.typewrite(capSolved)
                    pyautogui.press('enter')
                    sleep(5)
                    return self.done
                except UnknownValueError as e:
                    return self.error
                except RequestError as e:
                    return self.error
        except:
            self.error[1] = "Captcha"
            return self.error
from speech_recognition import Microphone, RequestError, Recognizer, UnknownValueError

dir(sr)
import pyaudio
import webbrowser as wb

r1 = Recognizer()  # this is the recognizer class from speechRecognition
r2 = Recognizer()
r3 = Recognizer()

with sr.Microphone as source:
    print('[search Google: search YouTube]')
    print('Speak Now!!')
    audio = r3.listen(source)

    if 'video' in r1.recognize_google(audio):
        r1 = Recognizer()
        url = 'https://www.youtube.com/results?search_query='
        with sr.Microphone() as source:
            print('search your query')
            audio = r1.listen(source)

            try:
                get = r1.recognize_google(audio)
                print(get)
                wb.get().open_new(url + get)
            except UnknownValueError:
                print('error ')
            except RequestError as e:
                print('failed'.format(e))
Example #31
0
from speech_recognition import Microphone, Recognizer

recog=Recognizer()
mic=Microphone()

with mic:
    print("talk")
    audio=recog.listen(mic)

recognize=recog.recognize_google(audio)

print(recognize)