Example #1
0
from os import path

import os

dict = {}

speechSource = 'filtered'

r = sr.Recognizer()

# Loop through the folder of filtered audios
for root, dirs, filenames in os.walk(speechSource):
    for file in filenames:
        if file != ".DS_Store":
            with sr.AudioFile("filtered/" + file) as sourcenew:
                audio = r.record(sourcenew)
                try:
                    # Send the audio file to API and get data
                    udata = r.recognize_sphinx(audio).decode('utf-8')
                except sr.UnknownValueError:
                    continue

                # Convert utf-8 to asciidata
                asciidata = udata.encode("ascii", "ignore")

                # Create a dictionary with keys being the text data that was received and with values being relative addresses of audios
                dict[asciidata.lower()] = "filtered/" + file
                print("Process...")

dict["*pause"] = "filtered/pause.wav"
Example #2
0
import datetime
import speech_recognition as sr
filename = f"Recording{datetime.datetime.now().strftime('%Y%m%d')}.wav"
# intialize the recognizer
r = sr.Recognizer()

# load and convert it to a text file
with sr.AudioFile(filename) as source:
    # listen for the data
    audio_data = r.record(source)
    # convert to text
    text = r.recognize_google(audio_data)

# print(text)
with  open(f"{datetime.datetime.now().strftime('%y%m%d')}.txt",mode='w') as file:
    file.write(text)
 def test_sphinx_english(self):
     r = sr.Recognizer()
     with sr.AudioFile(self.AUDIO_FILE_EN) as source:
         audio = r.record(source)
     self.assertEqual(r.recognize_sphinx(audio), "wanted to three")
Example #4
0
#-------------------------------------------------------------------------------
# Name:        Battleships - Blind Edition
# Purpose:     To help blind people
#
# Author:      Olipus & Mastercoder27
#
# Created:     30-01-2019
# Copyright:   (c) OLIVER 2019
# Licence:     We dont need a license, bitch lasagna
#-------------------------------------------------------------------------------
import speech_recognition as sr
from pygame import mixer

mixer.init()
mixer.music.load("coords6.wav")
mixer.music.play()

r = sr.Recognizer()

test = sr.AudioFile('coords6.wav')
with test as source:
    audio = r.record(source)

print(r.recognize_google(audio))
Example #5
0
def throw_to_google():
    AUDIO_FILE = path.join(current_dir, "recording.wav")
    r = sr.Recognizer()
    with sr.AudioFile(AUDIO_FILE) as source:
        audio = r.record(source) # read the entire audio file
def download():
    if request.method == 'POST':
        f = request.files['file']
        if not f: return render_template('upload.html')
        f_path = os.path.splitext(str(f))
        f_path = os.path.split(f_path[0])
        normalizedsound = normalized_sound(f)
        audio_chunks = split_slience(normalizedsound)
        save_script = ''
        female_list = list()
        male_list = list()
        for i, chunk in enumerate(audio_chunks):
            speaker_stt = list()
            out_file = "chunk.wav"
            chunk.export(out_file, format='wav')
            aaa = sr.AudioFile(out_file)
            try:
                f = open('c:/nmb/nada/web/static/test.txt',
                         'wt',
                         encoding='utf-8')
                ff = open('c:/nmb/nada/web/static/test_female.txt',
                          'wt',
                          encoding='utf-8')
                fm = open('c:/nmb/nada/web/static/test_male.txt',
                          'wt',
                          encoding='utf-8')
                stt_text = STT(aaa)
                speaker_stt.append(str(stt_text))
                y, sample_rate = librosa.load(out_file, sr=22050)

                if len(y) >= 22050 * 5:
                    y = y[:22050 * 5]
                    speaker = predict_speaker(y, sample_rate)
                    speaker_stt.append(str(speaker))
                    print(speaker_stt[1], " : ", speaker_stt[0])
                    if speaker == '여자':
                        female_list.append(str(speaker_stt[0]))
                    else:
                        male_list.append(str(speaker_stt[0]))

                else:
                    audio_copy = AudioSegment.from_wav(out_file)
                    audio_copy = copy.deepcopy(audio_copy)
                    for num in range(3):
                        audio_copy = audio_copy.append(
                            copy.deepcopy(audio_copy), crossfade=0)
                    out_file_over5s = "chunk_over_5s.wav"
                    audio_copy.export(out_file_over5s, format='wav')
                    y_copy, sample_rate = librosa.load(out_file_over5s,
                                                       sr=22050)
                    y_copy = y_copy[:22050 * 5]
                    speaker = predict_speaker(y_copy, sample_rate)
                    speaker_stt.append(str(speaker))
                    print(speaker_stt[1] + " : " + speaker_stt[0])
                    if speaker == '여자':
                        female_list.append(str(speaker_stt[0]))
                    else:
                        male_list.append(str(speaker_stt[0]))

                save_script += speaker_stt[1] + " : " + speaker_stt[0] + '\n\n'

                f.writelines(save_script)
                ff.writelines('\n\n'.join(female_list))
                fm.writelines('\n\n'.join(male_list))

                if os.path.isfile(out_file): os.remove(out_file)
                if os.path.isfile(out_file_over5s): os.remove(out_file_over5s)

            except:
                pass

        f.close()
        ff.close()
        fm.close()

        return render_template('/download.html')
import speech_recognition as sr
import sounddevice as sd
from scipy.io.wavfile import write

# recording from the microphone
fs = 44100  # Sample rate
seconds = 3  # Duration of recording

myrecording = sd.rec(int(seconds * fs), samplerate=fs, channels=2)
sd.wait()  # Wait until recording is finished
write('output.wav', fs, myrecording)  # Save as WAV file
sound = "output.wav"
recognizer = sr.Recognizer()

with sr.AudioFile(sound) as source:
    recognizer.adjust_for_ambient_noise(source)
    print("Converting audio file to text...")
    audio = recognizer.listen(source)

    try:
        text = recognizer.recognize_google(audio)
        print("The converted text:" + text)

    except Exception as e:
        print(e)
 def test_google_chinese(self):
     r = sr.Recognizer()
     with sr.AudioFile(self.AUDIO_FILE_ZH) as source: audio = r.record(source)
     self.assertEqual(r.recognize_google(audio, language="zh-CN"), u"砸自己的脚")
 def test_wit_english(self):
     r = sr.Recognizer()
     with sr.AudioFile(self.AUDIO_FILE_EN) as source: audio = r.record(source)
     self.assertEqual(r.recognize_wit(audio, key=os.environ["WIT_AI_KEY"]), "one two three")
import speech_recognition as sr
import time
r = sr.Recognizer()
audio_file = sr.AudioFile('sample.aiff')


def get_transcript(audio_file=audio_file):
    with audio_file as source:
        audio = r.record(source)
    result = r.recognize_google(audio)
    return result


if __name__ == "__main__":
    print(get_transcript())
 def test_google_french(self):
     r = sr.Recognizer()
     with sr.AudioFile(self.AUDIO_FILE_FR) as source: audio = r.record(source)
     self.assertEqual(r.recognize_google(audio, language="fr-FR"), u"et c'est la dictée numéro 1")
Example #12
0
        try:
            talk("What is the message")
            content = voice_data.split("for")[-1]
            to = "*****@*****.**"
            server = smtplib.SMTP('smtp.gmail.com', 587)
	        server.ehlo()
	        server.starttls()
	        server.login('*****@*****.**', 'your-password')
	        server.sendmail('*****@*****.**', to, content)
	        server.close()
            speak("Email sent!")
        except Exception as e:
            print(e)
            speak("Sorry sir, there was errors in connectivity.")

time.sleep(1)
person_obj = person()
while(1):
    voice_data = record_audio() # get the voice input
    respond(voice_data) # respond


r = sr.Recognizer()
file = sr.AudioFile('one.wav')
with file as source:
 r.adjust_for_ambient_noise(source)
 audio = r.record(source,duration=5)
 result = r.recognize_google(audio,language='es')
print(result)

Example #13
0
import speech_recognition as sr
import os

os.environ["GOOGLE_APPLICATION_CREDENTIALS"] = "SERVICE_ACCOUNT_KEY.json"

r = sr.Recognizer()
file = sr.AudioFile('FILE_NAME.wav')

with file as source:
    audio = r.record(source)

try:
    recog = r.recognize_google_cloud(audio, language='en-US')

    print("You said: " + recog)
except sr.UnknownValueError as u:
    print(u)
    print("Google Speech Recognition could not understand audio")
except sr.RequestError as e:
    print(
        "Could not request results from Google Speech Recognition service; {0}"
        .format(e))
Example #14
0
#!/usr/bin/env python
# coding: utf-8
import speech_recognition
from pydub import AudioSegment
sound = AudioSegment.from_file("temp/test.mp3")
sound.export("temp/test.flac", format="flac", bitrate="128k")
r = speech_recognition.Recognizer()
with speech_recognition.AudioFile("test.flac") as source:
    audio = r.record(source)
    print r.recognize_google(audio, language='zh-cn')
Example #15
0
mic = sr.Microphone()

sr.Microphone.list_microphone_names()  #To get a list of microphone names

#Recognizer class has seven methods for recognizing speech
#recognize_bing(): Microsoft Bing Speech
#recognize_google(): Google Web Speech API
#recognize_google_cloud(): Google Cloud Speech - requires installation of the google-cloud-speech package
#recognize_houndify(): Houndify by SoundHound
#recognize_ibm(): IBM Speech to Text
#recognize_sphinx(): CMU Sphinx - requires installing PocketSphinx
#recognize_wit(): Wit.ai

#Using audio file as input
song = sr.AudioFile(
    'D:\\Speech_Recognition\\Avicii - Hey Brother.wav'
)  #context manager opens the file and reads its contents, storing the data in an AudioFile instance called source
with song as source:
    r.adjust_for_ambient_noise(source, duration=0.5)
    audio = r.record(
        source
    )  #record() method records the data from the entire file into an AudioData instance.

r.recognize_google(audio, show_all=True)

#Using mic as input
with mic as source:
    r.adjust_for_ambient_noise(source)
    audio = r.listen(
        source, timeout=3)  #listen method to capture input from microphone
 def test_bing_english(self):
     r = sr.Recognizer()
     with sr.AudioFile(self.AUDIO_FILE_EN) as source: audio = r.record(source)
     self.assertEqual(r.recognize_bing(audio, key=os.environ["BING_KEY"]), "123.")
Example #17
0
def recognize(sphfile):
    r = sr.Recognizer()
    audiofile = sr.AudioFile(sphfile)
    with audiofile as source:
        speech = r.record(source)
        return(r.recognize_google(speech))
 def test_bing_french(self):
     r = sr.Recognizer()
     with sr.AudioFile(self.AUDIO_FILE_FR) as source: audio = r.record(source)
     self.assertEqual(r.recognize_bing(audio, key=os.environ["BING_KEY"], language="fr-FR"), u"Essaye la dictée numéro un.")
Example #19
0
def VideoToText(input):
    pb.start()
    pb['value'] = 10
    pb.update()
    mp3_file = "audio.wav"
    videoClip = VideoFileClip(input)
    pb['value'] = 20
    pb.update()
    audioclip = videoClip.audio
    audioclip.write_audiofile(mp3_file)
    pb['value'] = 30
    pb.update()
    audioclip.close()
    videoClip.close()
    pb['value'] = 40
    pb.update()
    time.sleep(1)
    path = "audio.wav"
    pb['value'] = 50
    pb.update()
    r = sr.Recognizer()
    sound = AudioSegment.from_wav(path)
    chunks = split_on_silence(
        sound,
        min_silence_len=1000,
        silence_thresh=sound.dBFS - 14,
        keep_silence=1000,
    )
    folder_name = "audio-chunks"
    # create a directory to store the audio chunks
    if not os.path.isdir(folder_name):
        os.mkdir(folder_name)
    whole_text = ""
    # process each chunk
    for i, audio_chunk in enumerate(chunks, start=1):
        chunk_filename = os.path.join(folder_name, f"chunk{i}.wav")
        audio_chunk.export(chunk_filename, format="wav")
        with sr.AudioFile(chunk_filename) as source:
            audio_listened = r.record(source)
            try:
                text = r.recognize_google(audio_listened)
            except sr.UnknownValueError as e:
                print("Error:", str(e))
            else:
                text = f"{text.capitalize()}. "
                print(chunk_filename, ":", text)
                whole_text += text
    pb['value'] = 60
    pb.update()
    time.sleep(1)
    pb['value'] = 70
    pb.update()
    with open('result.txt', mode='w') as file:
        pb['value'] = 80
        pb.update()
        pb['value'] = 90
        pb.update()
        file.write(whole_text)
        print("\nText file generated!")
    pb['value'] = 100
    pb.update()
    time.sleep(0.5)
    pb.stop()
    messagebox.showinfo('Info', "File uploaded")
    # return the text for all chunks detected
    return whole_text
 def test_bing_chinese(self):
     r = sr.Recognizer()
     with sr.AudioFile(self.AUDIO_FILE_ZH) as source: audio = r.record(source)
     self.assertEqual(r.recognize_bing(audio, key=os.environ["BING_KEY"], language="zh-CN"), u"砸自己的脚。")
Example #21
0
    def recognizerWithAudioFile(fname):
        
        '''chdir(r"static/uploads")
        audio_file=path.join(path.dirname(path.realpath(__file__)),"{}".format(fname))'''

        
        audio = AudioSegment.from_wav(r"static/uploads/{}".format(fname))
        n=len(audio)
        counter=1
        print("function works")
        fh = open("recognized.txt", "w+")
        interval = 60*1000
        overlap = 1.5*1000
        start=0
        end=0
        # When audio reaches its end, flag is set to 1 and we break 
        flag = 0
        for i in range(0,2*n,interval):
            if i == 0:
                start=0
                end=interval
            else:
                start=end-overlap
                end=start+interval
            # When end becomes greater than the file length, 
    # end is set to the file length 
    # flag is set to 1 to indicate break. 
            if end>=n:
                end=n
                flag=1
            chunk =audio[start:end]
            filename = 'chunk'+str(counter)+'.wav'
            chunk.export(r'audio_chunks/{}'.format(filename), format ="wav") 
            print("Processing chunk "+str(counter)+". Start = "
                        +str(start)+" end = "+str(end)) 
            counter = counter + 1

            AUDIO_FILE = filename
  
        
            r=sr.Recognizer()
            with sr.AudioFile(r'audio_chunks/{}'.format(AUDIO_FILE)) as source: 
            # remove this if it is not working 
            # correctly. 
                r.adjust_for_ambient_noise(source) 
                audio_listened = r.listen(source) 

            try: 
            # try converting it to text 
                rec = r.recognize_google(audio_listened) 
            # write the output to the file. 
                fh.write(rec+" ") 
  
            # catch any errors. 
            except sr.UnknownValueError: 
                print("Could not understand audio") 
            
            except sr.RequestError as e: 
                print("Could not request results. check your internet connection") 
  
            if flag==1:
                fh.close()
                break






        
  

        

        try:
            output = open("recognized.txt", "r")
            #output=r.recognize_google(audio)#show all = true will show all possibilites of how google translates this audio to text
            #print("Over")
            #print("output received from google speech api ")

            #sending output to punctuator api using post request
            url_punctuator="http://bark.phon.ioc.ee/punctuator"
            data={'text':'{}'.format(output.read())}
            response_from_punctuator=requests.request("POST",url_punctuator,data=data)
            #print("text returned from punctuator api:",response_from_punctuator.text)

            punctuated_text=response_from_punctuator.text
            print("output received from punctuator api ")
            response = requests.post("https://api.aylien.com/api/v1/summarize?title='text'&text={}&sentences_number=10".format(punctuated_text),
            headers={
                "X-AYLIEN-TextAPI-Application-Key":"{}".format(params["aylien-api-key"]),
                "X-AYLIEN-TextAPI-Application-ID":"{}".format(params["aylien-app-id"])
            }
            )
            

            #don't uncomment this meaning cloud 
            #sending the punctuated text to meaningcloud api using the api key
            '''    url = "https://api.meaningcloud.com/summarization-1.0"

                payload = "key=440a3e9fde785d6b5aa4bd1595052891&txt={}&url=&doc=&sentences=10".format(punctuated_text)
                headers = {'content-type': 'application/x-www-form-urlencoded'}
                #application/x-www-form-urlencoded

                response = requests.request("POST", url, data=payload,headers=headers)'''
                
            #headers=headers
            #dont uncomment :D

    
            #printing json object
           # print("text returned from meaning cloud api:",response.json()['summary'])
            text=response.json()['sentences']
            print("output received from aylien")

            
            
        # pprint("original output:",output)

            #output also shows the accuracy of its conversion...check key confidence and transcript in output means the difference conversions of the audio file
            
        except:
            return "Could not recognize" #exception if google api doesn't understand 

        return text
 def test_houndify_english(self):
     r = sr.Recognizer()
     with sr.AudioFile(self.AUDIO_FILE_EN) as source: audio = r.record(source)
     self.assertEqual(r.recognize_houndify(audio, client_id=os.environ["HOUNDIFY_CLIENT_ID"], client_key=os.environ["HOUNDIFY_CLIENT_KEY"]), "one two three")
Example #23
0
import speech_recognition as sr
import pyttsx3

r = sr.Recognizer()


def speak(command):
    eng = pyttsx3.init()
    eng.say(command)
    eng.runAndWait()


# while 1:
try:
    with sr.AudioFile("test1.wav") as source2:
        # r.adjust_for_ambient_noise(source2,duration=1)
        audio2 = r.listen(source2)
        text = r.recognize_google(audio2)
        text = text.lower()
        print("Did you say :" + text)
        speak(text)
except sr.RequestError as e:
    print(f"Couldn't request result : {0}".format(e))
except sr.UnknownValueError:
    print("Unknown error")
 def test_ibm_english(self):
     r = sr.Recognizer()
     with sr.AudioFile(self.AUDIO_FILE_EN) as source: audio = r.record(source)
     self.assertEqual(r.recognize_ibm(audio, username=os.environ["IBM_USERNAME"], password=os.environ["IBM_PASSWORD"]), "one two three ")
Example #25
0
def func(url_to_audio=None, isBase64=False, base64Data=None):
    if isBase64:
        img_64 = base64.decodestring(base64Data)
        with open(os.path.join(dir_path, 'test.wav'), 'wb') as img_file:
            img_file.write(img_64)

    if url_to_audio is not None and isBase64 is False:
        r = requests.get(url_to_audio, allow_redirects=True, stream=True)
        with open(os.path.join(dir_path, 'test.wav'), 'wb') as f:
            for chunk in r.iter_content():
                f.write(chunk)

    r = sr.Recognizer()
    isl_gif = [
        'all the best', 'any questions', 'are you angry', 'are you busy',
        'are you hungry', 'are you sick', 'be careful', 'can we meet tomorrow',
        'did you book tickets', 'did you finish homework',
        'do you go to office', 'do you have money',
        'do you want something to drink', 'do you want tea or coffee',
        'do you watch TV', 'dont worry', 'flower is beautiful',
        'good afternoon', 'good evening', 'good morning', 'good night',
        'good question', 'had your lunch', 'happy journey',
        'hello what is your name', 'how many people are there in your family',
        'i am a clerk', 'i am bore doing nothing', 'i am fine', 'i am sorry',
        'i am thinking', 'i am tired', 'i dont understand anything',
        'i go to a theatre', 'i love to shop',
        'i had to say something but i forgot', 'i have headache',
        'i like pink colour', 'i live in nagpur', 'lets go for lunch',
        'my mother is a homemaker', 'my name is john', 'nice to meet you',
        'no smoking please', 'open the door', 'please call an ambulance',
        'please call me later', 'please clean the room',
        'please give me your pen', 'please use dustbin dont throw garbage',
        'please wait for sometime', 'shall I help you',
        'shall we go together tommorow', 'sign language interpreter',
        'sit down', 'stand up', 'take care', 'there was traffic jam',
        'wait I am thinking', 'what are you doing', 'what is the problem',
        'what is todays date', 'what is your age', 'what is your father do',
        'what is your job', 'what is your mobile number', 'what is your name',
        'whats up', 'when is your interview', 'when we will go',
        'where do you stay', 'where is the bathroom',
        'where is the police station', 'you are wrong', 'address', 'agra',
        'ahemdabad', 'all', 'april', 'assam', 'august', 'australia', 'badoda',
        'banana', 'banaras', 'banglore', 'bihar', 'bihar', 'bridge', 'cat',
        'chandigarh', 'chennai', 'christmas', 'church', 'clinic', 'coconut',
        'crocodile', 'dasara', 'deaf', 'december', 'deer', 'delhi', 'dollar',
        'duck', 'febuary', 'friday', 'fruits', 'glass', 'grapes', 'gujrat',
        'hello', 'hindu', 'hyderabad', 'india', 'january', 'jesus', 'job',
        'july', 'july', 'karnataka', 'kerala', 'krishna', 'litre', 'mango',
        'may', 'mile', 'monday', 'mumbai', 'museum', 'muslim', 'nagpur',
        'october', 'orange', 'pakistan', 'pass', 'police station',
        'post office', 'pune', 'punjab', 'rajasthan', 'ram', 'restaurant',
        'saturday', 'september', 'shop', 'sleep', 'southafrica', 'story',
        'sunday', 'tamil nadu', 'temperature', 'temple', 'thursday', 'toilet',
        'tomato', 'town', 'tuesday', 'usa', 'village', 'voice', 'wednesday',
        'weight'
    ]

    arr = [
        'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n',
        'o', 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z'
    ]
    with sr.AudioFile(os.path.join(dir_path, 'test.wav')) as source:
        r.adjust_for_ambient_noise(source)
        i = 0
        r.pause_threshold = 3
        audio = r.listen(source)

        # recognize speech using Sphinx
        a = r.recognize_google(audio)
        print("you said " + a.lower())

        for c in string.punctuation:
            a = a.replace(c, "")

        if a.lower() in isl_gif:
            print(os.path.join(dir_path, 'ISL_Gifs/{0}.gif'.format(a.lower())))
            return os.path.join(dir_path, 'ISL_Gifs/{0}.gif'.format(a.lower()))
        else:
            images = []
            for i in range(len(a)):
                if a[i] in arr:
                    ImageAddress = os.path.join(dir_path,
                                                'letters/' + a[i] + '.jpg')
                    images.append(ImageAddress)
                    # ImageItself = Image.open(ImageAddress)
                    # ImageNumpyFormat = np.asarray(ImageItself)
                    # plt.imshow(ImageNumpyFormat)
                    # plt.draw()
                    # plt.pause(0.8) # pause how many seconds
                    #plt.close()
                else:
                    continue
            video_name = 'output.avi'
            frame = cv2.imread(images[0])
            height, width, layers = frame.shape

            video = cv2.VideoWriter(video_name,
                                    cv2.VideoWriter_fourcc(*'XVID'), 30,
                                    (width, height))

            for image in images:
                for _ in range(20):
                    video.write(cv2.imread(image))

            cv2.destroyAllWindows()
            video.release()
            print(os.path.join(dir_path, 'output.avi'))
            return os.path.join(dir_path, 'output.avi')
 def test_ibm_french(self):
     r = sr.Recognizer()
     with sr.AudioFile(self.AUDIO_FILE_FR) as source: audio = r.record(source)
     self.assertEqual(r.recognize_ibm(audio, username=os.environ["IBM_USERNAME"], password=os.environ["IBM_PASSWORD"], language="fr-FR"), u"si la dictée numéro un ")
Example #27
0
import speech_recognition as sr

AUDIO_FILE = "./sample.wav"

# use the audio file as the audio source
r = sr.Recognizer()
with sr.AudioFile(AUDIO_FILE) as source:
    audio = r.record(source)  # read the entire audio file

result=r.recognize_google(audio, language='ja-JP')

try:
    print(result)
except sr.UnknownValueError:
    print("Google Speech Recognition could not understand audio")
except sr.RequestError as e:
    print("Could not request results from Google Speech Recognition service; {0}".format(e))
 def test_ibm_chinese(self):
     r = sr.Recognizer()
     with sr.AudioFile(self.AUDIO_FILE_ZH) as source: audio = r.record(source)
     self.assertEqual(r.recognize_ibm(audio, username=os.environ["IBM_USERNAME"], password=os.environ["IBM_PASSWORD"], language="zh-CN"), u"砸 自己 的 脚 ")
 def test_google_english(self):
     r = sr.Recognizer()
     with sr.AudioFile(self.AUDIO_FILE_EN) as source:
         audio = r.record(source)
     self.assertEqual(r.recognize_google(audio), "one two three")
Example #30
0
 def get_test_sound_file(self):
     return sr.AudioFile('hello.wav')