Example #1
0
def record(filename, duration):
    mic = MutableMicrophone()
    recognizer = Recognizer()
    with mic as source:
        audio = recognizer.record(source, duration=duration)
        with open(filename, 'wb') as f:
            f.write(audio.get_wav_data())
Example #2
0
def Audio_file_Read(filename):
    universal_dict = {}
    cnt = {}
    gantu = [0, 0, 0, 0]
    analysis = {}
    token = Tokenizer()
    recog = Recognizer()
    try:
        audioFile = sr.AudioFile(filename)
        with audioFile as source:
            audio = recog.record(source)
            recognized = recog.recognize_google(audio, language="ko-KR")
            res = text_to_word_sequence(recognized)
            cnt = collections.Counter(res)
            universal_dict = dict(cnt)
            if "어" in universal_dict:
                gantu[0] = universal_dict["어"]
            if "아니" in universal_dict:
                gantu[1] = universal_dict["아니"]
            if "근데" in universal_dict:
                gantu[2] = universal_dict["근데"]
            if "이제" in universal_dict:
                gantu[3] = universal_dict["이제"]
            text = recognized
            analysis['text'] = text
            analysis['data'] = gantu
            return analysis
    except UnknownValueError:
        analysis['text'] = "당신이 말한 문장이 없습니다."
        analysis['data'] = [0, 0, 0, 0]
        return analysis
Example #3
0
def record(filename, duration):
    mic = MutableMicrophone()
    recognizer = Recognizer()
    with mic as source:
        audio = recognizer.record(source, duration=duration)
        with open(filename, 'wb') as f:
            f.write(audio.get_wav_data())
def GetTextInAudio(audio_msg, chat_id, bot):
    from speech_recognition import AudioFile, Recognizer, UnknownValueError

    rec = Recognizer()

    file_id = audio_msg['file_id']

    tempAudio = dirLoc + file_id + '.' + audio_msg['mime_type'].split('/')[1]

    bot.download_file(file_id, tempAudio)

    from pydub import AudioSegment
    filename = file_id + '.wav'
    file_loc = dirLoc + filename

    sound = AudioSegment.from_file(tempAudio)
    sound.export(file_loc, format='wav')

    with AudioFile(file_loc) as AudioSrc:
        content = rec.record(AudioSrc)
    try:
        text = rec.recognize_google(
            audio_data=content,
            language='ja-JP',
        )
    except UnknownValueError:
        bot.sendMessage(chat_id, 'None')
        return

    mainText = t_j2k(text)
    bot.sendMessage(chat_id, mainText)

    remove(tempAudio)
    remove(file_loc)
Example #5
0
def wav_to_text(wav_file_path, language="es-ES", show_all=False):
    r = Recognizer()
    with WavFile(wav_file_path) as source:
        audio = r.record(source)
    try:
        return r.recognize_google(audio_data=audio, language=language, show_all=show_all)
    except UnknownValueError:
        raise GolemException("Could not understand audio")
Example #6
0
def dothis(message):
    """
    From speech to text
    :param message:
    :return: text
    """
    session = message.get_session()
    ans = ''
    current_cmd = message.get_setting(session, 'active')
    if message.attachments['sound']:
        try:
            r = Recognizer()
            mode = 'google'
            lang = 'ru-RUS'
            ans = ''
            for attachment in message.attachments['sound']:
                ext = attachment[1]
                path = os.path.abspath(os.curdir)
                fname = time.strftime("%Y%m%d-%H%M%S") + '.'
                dir = path + '/temp/' + fname
                urllib.request.urlretrieve(
                    attachment[0], dir + ext)  # getting file

                if ext != 'wav':
                    subprocess.run(['ffmpeg', '-i', dir + ext, dir + 'wav'])
                    os.remove(dir + ext)

                with AudioFile(dir + 'wav') as source:
                    song = r.record(source)
                os.remove(dir + 'wav')

                if "en" in message.params:
                    lang = 'en-EN'
                if 'wit' in message.params:
                    mode = 'wit'
                recg = r.recognize_google(
                    song,
                    language=lang
                ) if mode == 'google' else r.recognize_wit(song, witkey)
                ans += f">>>>>>{recg}\n\n"
                yield ans
        except Exception as f:
            ans += "Произошла непредвиденная ошибка: " + str(f) + "\n"
        finally:
            if current_cmd:
                message.delete_active(session)
            yield str(ans)
    elif 'Выход' in message.params and current_cmd:
        message.delete_active(session)
        yield {'msg': 'Успешно!', 'keyboard': [[], False]}
    else:
        if current_cmd is None:
            message.add_setting(session, 'active', 'stt')
        yield {'msg': 'Прикрепите аудио или напишите Выход',
               'keyboard': [[[('Выход', 'negative')]], False]
               }
Example #7
0
def speech_recog(file_name="input_sample/audio/introduction_ml.mp3",
                 duration=10):
    convert_mp3_to_wav("input_sample/audio/introduction_ml.mp3")
    result = 0
    class_audio = AudioFile("input_sample/audio/introduction_ml.wav")
    print(type(class_audio))

    recongizer = Recognizer()

    with class_audio as src_audio:
        audio = recongizer.record(src_audio, duration=duration)
        print(recongizer.recognize_google(audio))
Example #8
0
def gat_with_breakdown(path_to_file, recognizer: sr.Recognizer, breakdown_duration=10, tempfile_name='temp.wav'):
    """
    gat = get audio transcript
    
    This breakdown is effective for writing down SRT(s).
    """
    for start, end, total in chop_chop(path_to_file, breakdown_duration, output_file=tempfile_name):
        last_yield = start
        with sr.AudioFile(tempfile_name) as source:
            try:
                if (content := recognizer.recognize_google(recognizer.record(source))):
                    yield {'start': format_to_ffmpeg_duration(start), 'end': format_to_ffmpeg_duration(end), 'text': content, 'total': total, 'delta': end - last_yield}
                    last_yield = end
            except sr.UnknownValueError:
                pass
Example #9
0
async def process_audio(chat_id, msg):
    await bot.download_file(msg['voice']['file_id'], "./dest.ogg")
    filename = "dest.ogg"
    dest = "dest.flac"
    r = Recognizer()
    sound = AudioSegment.from_ogg(filename)
    os.unlink(filename)
    sound.export(dest, format="flac")
    with AudioFile(dest) as source:
        # listen for the data (load audio to memory)
        audio_data = r.record(source)
        # recognize (convert from speech to text)
        try:
            text = r.recognize_google(audio_data)
            print(f"VOICE LOG - {msg['from']['first_name']}: {text}")
            await process_result(chat_id, text)
        except UnknownValueError:
            await bot.sendMessage(chat_id, 'This audio is too short or corrupted, retry!')
            pass
    try:
        os.unlink(dest)
    except PermissionError:
        pass
Example #10
0
def SpeechToText():
    try:
        rec = Recognizer()

        with AudioFile(WAVE_OUTPUT_FILENAME) as AudioSrc:
            content = rec.record(AudioSrc)

        text = rec.recognize_google(
            audio_data=content,
            language='ko-KR',
        )

        mainText = str(text)
        print('main-> ', mainText)

        return mainText

        # for sW in START_WORDS:
        #     if mainText.startswith(sW):
        #         return mainText.split(sW)[1]

    except (UnknownValueError):
        print("SPEECH ERROR!")
        return False
Example #11
0
async def speech_to_text(event):
    """ Note: telethon may borrow a different DC id to download audio """
    if event.reply_to_msg_id:
        msg = await event.get_reply_message()
    else:
        await event.edit(msgRep.REPLY_TO_VM)
        return

    filename, file_format = (None, ) * 2
    voice_note = False

    if msg.media and hasattr(msg.media, "document") and \
       isinstance(msg.media.document, Document) and \
       msg.media.document.mime_type.startswith("audio"):
        for attribute in msg.media.document.attributes:
            if isinstance(attribute, DocumentAttributeAudio):
                if not voice_note:  # set only if not True already
                    voice_note = attribute.voice
            if isinstance(attribute, DocumentAttributeFilename):
                if not file_format:  # set only if none
                    string = attribute.file_name.split(".")
                    file_format = string[-1]
        if not voice_note:
            await event.edit(msgRep.WORKS_WITH_VM_ONLY)
            return
        if not file_format:  # alternative way
            file_format = msg.media.document.mime_type.split("/")[1]
        filename = join(TEMP_DL_DIR, "audio." + file_format)
        await event.edit(msgRep.CONVERT_STT)
        try:
            await msg.download_media(file=filename)
        except Exception as e:
            log.warning(e)
            await event.edit(msgRep.FAILED_LOAD_AUDIO)
            return
    else:
        await event.edit(msgRep.REPLY_TO_VM)
        return

    try:
        audio_file = AudioSegment.from_file(filename, file_format)
        audio_wav = join(TEMP_DL_DIR, "audio.wav")
        audio_file.export(audio_wav, "wav")

        r = Recognizer()
        with AudioFile(audio_wav) as source:
            audio = r.record(source)
        result = r.recognize_google(audio)
        text = f"**{msgRep.STT}**\n\n"
        text += f"{msgRep.STT_TEXT}:\n"
        text += f"__{result}__"
        await event.edit(text)
    except UnknownValueError:
        await event.edit(msgRep.STT_NOT_RECOGNIZED)
    except RequestError:
        await event.edit(msgRep.STT_REQ_FAILED)
    except MessageTooLongError:
        await event.edit(msgRep.STT_OUTPUT_TOO_LONG)
    except Exception as e:
        log.warning(e)
        await event.edit(msgRep.UNABLE_TO_STT)

    try:
        remove(filename)
        remove(audio_wav)
    except Exception as e:
        log.warning(f"Unable to delete audio(s): {e}")
    return
Example #12
0
def gettingWordsFromAudio():
    print(version)

    r = Recognizer()

    print("captures any speech")
    harvard = AudioFile('harvard.wav')

    with harvard as source:
        audio = r.record(source)

    print(type(audio))
    print(r.recognize_google(audio))

    print("")
    print("")
    print("captures any speech in the first four seconds of the file")
    with harvard as source:
        audio = r.record(source, duration=4)

    print(r.recognize_google(audio))

    print("")
    print("")
    print(
        "The record() method, when used inside a with block, always moves ahead in the file stream."
    )
    with harvard as source:
        audio1 = r.record(source, duration=4)
        audio2 = r.record(source, duration=4)

    print(r.recognize_google(audio1))
    print(r.recognize_google(audio2))

    print("")
    print("")
    print(
        "To capture only the second phrase in the file, you could start with an offset of four seconds and record for, say, three seconds."
    )
    with harvard as source:
        audio = r.record(source, offset=4, duration=3)

    print(r.recognize_google(audio))

    print("")
    print("")
    print("****************")
    print("noisy audio")
    jackhammer = AudioFile('jackhammer.wav')
    with jackhammer as source:
        audio = r.record(source)

    print(r.recognize_google(audio))

    print("")
    print("")
    print(
        "The adjust_for_ambient_noise() method reads the first second of the file stream and calibrates the recognizer to the noise level of the audio."
    )
    with jackhammer as source:
        r.adjust_for_ambient_noise(source, duration=1)
        audio = r.record(source)

    print(r.recognize_google(audio))

    print("")
    print("")
    print("Prints all json alternatives")
    print(r.recognize_google(audio, show_all=True))
Example #13
0
def get_audio_transcript(path_to_file, recognizer: sr.Recognizer):
    with sr.AudioFile(path_to_file) as source:
        return recognizer.recognize_google(recognizer.record(source))
Example #14
0
import os
import subprocess
from speech_recognition import (
    AudioFile,
    Recognizer,
)

unique_id = 'file_name'

ogg_file_path = f'{unique_id}.ogg'
wav_file_path = f'{unique_id}.wav'

process = subprocess.run(['ffmpeg', '-i', ogg_file_path, wav_file_path])

recognizer = Recognizer()

with AudioFile(f'{unique_id}.wav') as audio_file:
    audio_content = recognizer.record(audio_file)

print(recognizer.recognize_google(audio_content, language="ru-RU"))

os.remove(wav_file_path)
Example #15
0
def process_voice(update: Update, context: CallbackContext):
    responce = update.message.voice
    chat_id = update.message.chat_id
    username = update.message.from_user.username

    user, _ = TelegramUser.objects.get_or_create(
        chat_id=chat_id,
        defaults={
            'username': username,
        }
    )

    if user.is_active != TelegramUser.ACTIVE:
        return update.message.reply_text(
            text="You are not registered!\n\nСontact the administrator."
        )

    file_ = responce.get_file()
    unique_id = file_.file_unique_id
    file_.download(f'{settings.VOICE_TEMP_FOLDER}/{unique_id}.ogg')

    ogg_file_path = f'{settings.VOICE_TEMP_FOLDER}/{unique_id}.ogg'
    wav_file_path = f'{settings.VOICE_TEMP_FOLDER}/{unique_id}.wav'

    subprocess.run(['ffmpeg', '-i', ogg_file_path, wav_file_path])

    recognizer = Recognizer()

    with AudioFile(wav_file_path) as audio_file:
        audio_content = recognizer.record(audio_file)

    os.remove(ogg_file_path)
    os.remove(wav_file_path)

    try:
        result = recognizer.recognize_google(audio_content, language=settings.LANGUAGE_OF_RECOGNION)
    except UnknownValueError:
        return update.message.reply_text(
            text=REPEAT_MESSAGE
        )

    optimal_id, total_score = max({command.id: calculate_score(result, command.key) for command in CommandModel.objects.all()}.items(), key=operator.itemgetter(1))

    if total_score < 0.8:
        return update.message.reply_text(
            text=REPEAT_MESSAGE
        )

    command = CommandModel.objects.get(id=optimal_id)

    # broadlink.setup(
    #     os.environ.get('BROADLINK_WIFI_SSID'),
    #     os.environ.get('BROADLINK_WIFI_PASSWORD'),
    #     int(os.environ.get('BROADLINK_WIFI_SECURITY_MODE')),
    # )

    # device = broadlink.discover(
    #     timeout=5,
    #     discover_ip_address=os.environ.get('BROADLINK_WIFI_DISCOVER_IP_ADDRESS')
    # )[0]
    # device.auth()
    # device.send_data(
    #     binascii.unhexlify(command.signal_to_broadlink.encode())
    # )

    update.message.reply_text(
        text=command.responce_message
    )
    try:
        if x['params']['type'] == 'Media':
            lastaudio = x['params']['request']['url']
    except:
        pass

with open('1.mpeg', 'wb') as f:
    f.write(get(lastaudio).content)
    
sound = AudioSegment.from_mp3("1.mpeg")
sound.export("transcript.wav", format="wav")
                  
r = Recognizer()

with AudioFile('transcript.wav') as source:
        audio = r.record(source)
        transcript =  r.recognize_google(audio)
        print("Transcription: " + transcript)

driver.switch_to.default_content()
iframe = driver.find_elements_by_tag_name('iframe')[-1]
driver.switch_to.frame(iframe)
i = driver.find_elements_by_tag_name('input')[1].send_keys(transcript + Keys.ENTER)

sleep(5)
driver.switch_to.default_content()
driver.find_element_by_xpath('/html/body/div[1]/form/fieldset/ul/li[6]/input').click()



Example #17
0
def googleRecognition(audioname):
    harvard_audio = sr.AudioFile(audioname)
    r = Recognizer()
    with harvard_audio as source:
        audio = r.record(source)
Example #18
0
def recognize_wav(filename, language="en-US", show_all=True):
    recognizer = Recognizer(language=language)
    with WavFile(filename) as source:
        audio_data = recognizer.record(source)
    return recognizer.recognize(audio_data, show_all)
Example #19
0
class GoogleSTTEndpoint(PublicEndpoint):
    """Endpoint to send a flac audio file with voice and get back a utterance"""
    def __init__(self):
        super(GoogleSTTEndpoint, self).__init__()
        self.google_stt_key = self.config['GOOGLE_STT_KEY']
        self.recognizer = Recognizer()
        self.account = None
        self.account_shares_data = False

    def post(self):
        self._authenticate()
        self._get_account()
        self._check_for_open_dataset_agreement()
        self._write_flac_audio_file()
        stt_response = self._call_google_stt()
        response = self._build_response(stt_response)
        self._write_stt_result_file(response)

        return response, HTTPStatus.OK

    def _get_account(self):
        if self.device_id is not None:
            account_repo = AccountRepository(self.db)
            self.account = account_repo.get_account_by_device_id(
                self.device_id)

    def _check_for_open_dataset_agreement(self):
        for agreement in self.account.agreements:
            if agreement.type == OPEN_DATASET:
                self.account_shares_data = True

    def _write_flac_audio_file(self):
        """Save the audio file for STT tagging"""
        self._write_open_dataset_file(self.request.data, file_type='flac')

    def _write_stt_result_file(self, stt_result):
        """Save the STT results for tagging."""
        file_contents = '\n'.join(stt_result)
        self._write_open_dataset_file(file_contents.encode(), file_type='stt')

    def _write_open_dataset_file(self, content, file_type):
        if self.account is not None:
            file_name = '{account_id}_{time}.{file_type}'.format(
                account_id=self.account.id, file_type=file_type, time=time())
            file_path = os.path.join(SELENE_DATA_DIR, file_name)
            with open(file_path, 'wb') as flac_file:
                flac_file.write(content)

    def _call_google_stt(self):
        """Use the audio data from the request to call the Google STT API

        We need to replicate the first 16 bytes in the audio due a bug with
        the Google speech recognition library that removes the first 16 bytes
        from the flac file we are sending.
        """
        lang = self.request.args['lang']
        audio = self.request.data
        with AudioFile(BytesIO(audio[:16] + audio)) as source:
            recording = self.recognizer.record(source)
        response = self.recognizer.recognize_google(recording,
                                                    key=self.google_stt_key,
                                                    language=lang,
                                                    show_all=True)

        return response

    def _build_response(self, stt_response):
        """Build the response to return to the device.

        Return n transcripts with the higher confidence. That is useful for
        the case when send a ambiguous voice file and the correct utterance is
        not the utterance with highest confidence and the API.
        """
        limit = int(self.request.args['limit'])
        if isinstance(stt_response, dict):
            alternative = stt_response.get("alternative")
            if 'confidence' in alternative:
                # Sorting by confidence:
                alternative = sorted(alternative,
                                     key=lambda alt: alt['confidence'],
                                     reverse=True)
                alternative = [alt['transcript'] for alt in alternative]
                # client is interested in test the utterances found.
                if len(alternative) <= limit:
                    response = alternative
                else:
                    response = alternative[:limit]
            else:
                response = [alternative[0]['transcript']]
        else:
            response = []

        return response
    def solve(self):
        try:
            sleep(3)
            # Scrolling Down the Page
            pyautogui.scroll(-1000)
            # Locating and Clicking Captcha button on Page
            cap = pyautogui.locateCenterOnScreen("files/captcha.png")
            pyautogui.click(cap)
            try:
                sleep(3)
                # Locating and Clicking Headphones button on Page
                voi = pyautogui.locateCenterOnScreen("files/voice.png")
                pyautogui.click(voi)
            except:
                try:
                    # Move out mouse from the voice button
                    pyautogui.moveTo(200, 200)
                    # Locate Voice2 Button that is little Gray
                    pyautogui.locateCenterOnScreen("files/voice2.png")
                    return self.error
                except:
                    return self.done
        except:
            try:
                sleep(2)
                # Move out mouse from the voice button
                pyautogui.moveTo(200, 200)
                # Locate Voice2 Button that is little Gray
                pyautogui.locateCenterOnScreen("files/voice2.png")
                self.error[1] = "Captcha"
                return self.error
            except:
                return self.done
        sleep(2)
        try:
            # Locate and Right Click on the Download Button
            down = pyautogui.locateCenterOnScreen("files/down.png")
            pyautogui.rightClick(down)
            sleep(1)
            # Press down button 5 times
            pyautogui.press(['down'] * 5)
            sleep(1)
            # Press Enter (Cursor will be Copy Link Address)
            pyautogui.press('enter')
            # Download and Save that audio file from Link copied from above code
            with open('files/audio.mp3', 'wb') as file:
                r = ge(paste())
                file.write(r.content)

            sleep(2)
            if path.exists('files/audio.mp3'):
                # Convert that mp3 file into wav using ffpmeg
                call([
                    'files/ffmpeg.exe', '-i', 'files/audio.mp3', '-y',
                    'files/audio.wav'
                ])
                sleep(2)
                AUDIO_FILE = 'files/audio.wav'

                # Code to Send That Audio File to Google and Recognize The Audio
                r = Recognizer()
                with AudioFile(AUDIO_FILE) as source:
                    audio = r.record(source)
                try:
                    # Get the Recognized Text
                    capSolved = r.recognize_google(audio)
                    # Go to the text field and write it there
                    pyautogui.hotkey('shift', 'tab')
                    pyautogui.typewrite(capSolved)
                    pyautogui.press('enter')
                    sleep(5)
                    return self.done
                except UnknownValueError as e:
                    return self.error
                except RequestError as e:
                    return self.error
        except:
            self.error[1] = "Captcha"
            return self.error
Example #21
0
driver.find_element_by_xpath("/html/body/div/div/div[3]/div/button").click()

try:
    src = driver.find_element_by_id("audio-source").get_attribute("src")
    print(src)
    urllib.request.urlretrieve(src, path+"\\audio.mp3")

    sound = pydub.AudioSegment.from_mp3(
        path+"\\audio.mp3").export(path+"\\audio.wav", format="wav")

    recognizer = Recognizer()

    recaptcha_audio = AudioFile(path+"\\audio.wav")

    with recaptcha_audio as source:
        audio = recognizer.record(source)

    text = recognizer.recognize_google(audio, language="de-DE")

    print(text)

    inputfield = driver.find_element_by_id("audio-response")
    inputfield.send_keys(text.lower())

    inputfield.send_keys(Keys.ENTER)

    sleep(10)
    print("Success")
    driver.quit()
except NameError:
    print("Failed")