from os import path import os dict = {} speechSource = 'filtered' r = sr.Recognizer() # Loop through the folder of filtered audios for root, dirs, filenames in os.walk(speechSource): for file in filenames: if file != ".DS_Store": with sr.AudioFile("filtered/" + file) as sourcenew: audio = r.record(sourcenew) try: # Send the audio file to API and get data udata = r.recognize_sphinx(audio).decode('utf-8') except sr.UnknownValueError: continue # Convert utf-8 to asciidata asciidata = udata.encode("ascii", "ignore") # Create a dictionary with keys being the text data that was received and with values being relative addresses of audios dict[asciidata.lower()] = "filtered/" + file print("Process...") dict["*pause"] = "filtered/pause.wav"
import datetime import speech_recognition as sr filename = f"Recording{datetime.datetime.now().strftime('%Y%m%d')}.wav" # intialize the recognizer r = sr.Recognizer() # load and convert it to a text file with sr.AudioFile(filename) as source: # listen for the data audio_data = r.record(source) # convert to text text = r.recognize_google(audio_data) # print(text) with open(f"{datetime.datetime.now().strftime('%y%m%d')}.txt",mode='w') as file: file.write(text)
def test_sphinx_english(self): r = sr.Recognizer() with sr.AudioFile(self.AUDIO_FILE_EN) as source: audio = r.record(source) self.assertEqual(r.recognize_sphinx(audio), "wanted to three")
#------------------------------------------------------------------------------- # Name: Battleships - Blind Edition # Purpose: To help blind people # # Author: Olipus & Mastercoder27 # # Created: 30-01-2019 # Copyright: (c) OLIVER 2019 # Licence: We dont need a license, bitch lasagna #------------------------------------------------------------------------------- import speech_recognition as sr from pygame import mixer mixer.init() mixer.music.load("coords6.wav") mixer.music.play() r = sr.Recognizer() test = sr.AudioFile('coords6.wav') with test as source: audio = r.record(source) print(r.recognize_google(audio))
def throw_to_google(): AUDIO_FILE = path.join(current_dir, "recording.wav") r = sr.Recognizer() with sr.AudioFile(AUDIO_FILE) as source: audio = r.record(source) # read the entire audio file
def download(): if request.method == 'POST': f = request.files['file'] if not f: return render_template('upload.html') f_path = os.path.splitext(str(f)) f_path = os.path.split(f_path[0]) normalizedsound = normalized_sound(f) audio_chunks = split_slience(normalizedsound) save_script = '' female_list = list() male_list = list() for i, chunk in enumerate(audio_chunks): speaker_stt = list() out_file = "chunk.wav" chunk.export(out_file, format='wav') aaa = sr.AudioFile(out_file) try: f = open('c:/nmb/nada/web/static/test.txt', 'wt', encoding='utf-8') ff = open('c:/nmb/nada/web/static/test_female.txt', 'wt', encoding='utf-8') fm = open('c:/nmb/nada/web/static/test_male.txt', 'wt', encoding='utf-8') stt_text = STT(aaa) speaker_stt.append(str(stt_text)) y, sample_rate = librosa.load(out_file, sr=22050) if len(y) >= 22050 * 5: y = y[:22050 * 5] speaker = predict_speaker(y, sample_rate) speaker_stt.append(str(speaker)) print(speaker_stt[1], " : ", speaker_stt[0]) if speaker == '여자': female_list.append(str(speaker_stt[0])) else: male_list.append(str(speaker_stt[0])) else: audio_copy = AudioSegment.from_wav(out_file) audio_copy = copy.deepcopy(audio_copy) for num in range(3): audio_copy = audio_copy.append( copy.deepcopy(audio_copy), crossfade=0) out_file_over5s = "chunk_over_5s.wav" audio_copy.export(out_file_over5s, format='wav') y_copy, sample_rate = librosa.load(out_file_over5s, sr=22050) y_copy = y_copy[:22050 * 5] speaker = predict_speaker(y_copy, sample_rate) speaker_stt.append(str(speaker)) print(speaker_stt[1] + " : " + speaker_stt[0]) if speaker == '여자': female_list.append(str(speaker_stt[0])) else: male_list.append(str(speaker_stt[0])) save_script += speaker_stt[1] + " : " + speaker_stt[0] + '\n\n' f.writelines(save_script) ff.writelines('\n\n'.join(female_list)) fm.writelines('\n\n'.join(male_list)) if os.path.isfile(out_file): os.remove(out_file) if os.path.isfile(out_file_over5s): os.remove(out_file_over5s) except: pass f.close() ff.close() fm.close() return render_template('/download.html')
import speech_recognition as sr import sounddevice as sd from scipy.io.wavfile import write # recording from the microphone fs = 44100 # Sample rate seconds = 3 # Duration of recording myrecording = sd.rec(int(seconds * fs), samplerate=fs, channels=2) sd.wait() # Wait until recording is finished write('output.wav', fs, myrecording) # Save as WAV file sound = "output.wav" recognizer = sr.Recognizer() with sr.AudioFile(sound) as source: recognizer.adjust_for_ambient_noise(source) print("Converting audio file to text...") audio = recognizer.listen(source) try: text = recognizer.recognize_google(audio) print("The converted text:" + text) except Exception as e: print(e)
def test_google_chinese(self): r = sr.Recognizer() with sr.AudioFile(self.AUDIO_FILE_ZH) as source: audio = r.record(source) self.assertEqual(r.recognize_google(audio, language="zh-CN"), u"砸自己的脚")
def test_wit_english(self): r = sr.Recognizer() with sr.AudioFile(self.AUDIO_FILE_EN) as source: audio = r.record(source) self.assertEqual(r.recognize_wit(audio, key=os.environ["WIT_AI_KEY"]), "one two three")
import speech_recognition as sr import time r = sr.Recognizer() audio_file = sr.AudioFile('sample.aiff') def get_transcript(audio_file=audio_file): with audio_file as source: audio = r.record(source) result = r.recognize_google(audio) return result if __name__ == "__main__": print(get_transcript())
def test_google_french(self): r = sr.Recognizer() with sr.AudioFile(self.AUDIO_FILE_FR) as source: audio = r.record(source) self.assertEqual(r.recognize_google(audio, language="fr-FR"), u"et c'est la dictée numéro 1")
try: talk("What is the message") content = voice_data.split("for")[-1] to = "*****@*****.**" server = smtplib.SMTP('smtp.gmail.com', 587) server.ehlo() server.starttls() server.login('*****@*****.**', 'your-password') server.sendmail('*****@*****.**', to, content) server.close() speak("Email sent!") except Exception as e: print(e) speak("Sorry sir, there was errors in connectivity.") time.sleep(1) person_obj = person() while(1): voice_data = record_audio() # get the voice input respond(voice_data) # respond r = sr.Recognizer() file = sr.AudioFile('one.wav') with file as source: r.adjust_for_ambient_noise(source) audio = r.record(source,duration=5) result = r.recognize_google(audio,language='es') print(result)
import speech_recognition as sr import os os.environ["GOOGLE_APPLICATION_CREDENTIALS"] = "SERVICE_ACCOUNT_KEY.json" r = sr.Recognizer() file = sr.AudioFile('FILE_NAME.wav') with file as source: audio = r.record(source) try: recog = r.recognize_google_cloud(audio, language='en-US') print("You said: " + recog) except sr.UnknownValueError as u: print(u) print("Google Speech Recognition could not understand audio") except sr.RequestError as e: print( "Could not request results from Google Speech Recognition service; {0}" .format(e))
#!/usr/bin/env python # coding: utf-8 import speech_recognition from pydub import AudioSegment sound = AudioSegment.from_file("temp/test.mp3") sound.export("temp/test.flac", format="flac", bitrate="128k") r = speech_recognition.Recognizer() with speech_recognition.AudioFile("test.flac") as source: audio = r.record(source) print r.recognize_google(audio, language='zh-cn')
mic = sr.Microphone() sr.Microphone.list_microphone_names() #To get a list of microphone names #Recognizer class has seven methods for recognizing speech #recognize_bing(): Microsoft Bing Speech #recognize_google(): Google Web Speech API #recognize_google_cloud(): Google Cloud Speech - requires installation of the google-cloud-speech package #recognize_houndify(): Houndify by SoundHound #recognize_ibm(): IBM Speech to Text #recognize_sphinx(): CMU Sphinx - requires installing PocketSphinx #recognize_wit(): Wit.ai #Using audio file as input song = sr.AudioFile( 'D:\\Speech_Recognition\\Avicii - Hey Brother.wav' ) #context manager opens the file and reads its contents, storing the data in an AudioFile instance called source with song as source: r.adjust_for_ambient_noise(source, duration=0.5) audio = r.record( source ) #record() method records the data from the entire file into an AudioData instance. r.recognize_google(audio, show_all=True) #Using mic as input with mic as source: r.adjust_for_ambient_noise(source) audio = r.listen( source, timeout=3) #listen method to capture input from microphone
def test_bing_english(self): r = sr.Recognizer() with sr.AudioFile(self.AUDIO_FILE_EN) as source: audio = r.record(source) self.assertEqual(r.recognize_bing(audio, key=os.environ["BING_KEY"]), "123.")
def recognize(sphfile): r = sr.Recognizer() audiofile = sr.AudioFile(sphfile) with audiofile as source: speech = r.record(source) return(r.recognize_google(speech))
def test_bing_french(self): r = sr.Recognizer() with sr.AudioFile(self.AUDIO_FILE_FR) as source: audio = r.record(source) self.assertEqual(r.recognize_bing(audio, key=os.environ["BING_KEY"], language="fr-FR"), u"Essaye la dictée numéro un.")
def VideoToText(input): pb.start() pb['value'] = 10 pb.update() mp3_file = "audio.wav" videoClip = VideoFileClip(input) pb['value'] = 20 pb.update() audioclip = videoClip.audio audioclip.write_audiofile(mp3_file) pb['value'] = 30 pb.update() audioclip.close() videoClip.close() pb['value'] = 40 pb.update() time.sleep(1) path = "audio.wav" pb['value'] = 50 pb.update() r = sr.Recognizer() sound = AudioSegment.from_wav(path) chunks = split_on_silence( sound, min_silence_len=1000, silence_thresh=sound.dBFS - 14, keep_silence=1000, ) folder_name = "audio-chunks" # create a directory to store the audio chunks if not os.path.isdir(folder_name): os.mkdir(folder_name) whole_text = "" # process each chunk for i, audio_chunk in enumerate(chunks, start=1): chunk_filename = os.path.join(folder_name, f"chunk{i}.wav") audio_chunk.export(chunk_filename, format="wav") with sr.AudioFile(chunk_filename) as source: audio_listened = r.record(source) try: text = r.recognize_google(audio_listened) except sr.UnknownValueError as e: print("Error:", str(e)) else: text = f"{text.capitalize()}. " print(chunk_filename, ":", text) whole_text += text pb['value'] = 60 pb.update() time.sleep(1) pb['value'] = 70 pb.update() with open('result.txt', mode='w') as file: pb['value'] = 80 pb.update() pb['value'] = 90 pb.update() file.write(whole_text) print("\nText file generated!") pb['value'] = 100 pb.update() time.sleep(0.5) pb.stop() messagebox.showinfo('Info', "File uploaded") # return the text for all chunks detected return whole_text
def test_bing_chinese(self): r = sr.Recognizer() with sr.AudioFile(self.AUDIO_FILE_ZH) as source: audio = r.record(source) self.assertEqual(r.recognize_bing(audio, key=os.environ["BING_KEY"], language="zh-CN"), u"砸自己的脚。")
def recognizerWithAudioFile(fname): '''chdir(r"static/uploads") audio_file=path.join(path.dirname(path.realpath(__file__)),"{}".format(fname))''' audio = AudioSegment.from_wav(r"static/uploads/{}".format(fname)) n=len(audio) counter=1 print("function works") fh = open("recognized.txt", "w+") interval = 60*1000 overlap = 1.5*1000 start=0 end=0 # When audio reaches its end, flag is set to 1 and we break flag = 0 for i in range(0,2*n,interval): if i == 0: start=0 end=interval else: start=end-overlap end=start+interval # When end becomes greater than the file length, # end is set to the file length # flag is set to 1 to indicate break. if end>=n: end=n flag=1 chunk =audio[start:end] filename = 'chunk'+str(counter)+'.wav' chunk.export(r'audio_chunks/{}'.format(filename), format ="wav") print("Processing chunk "+str(counter)+". Start = " +str(start)+" end = "+str(end)) counter = counter + 1 AUDIO_FILE = filename r=sr.Recognizer() with sr.AudioFile(r'audio_chunks/{}'.format(AUDIO_FILE)) as source: # remove this if it is not working # correctly. r.adjust_for_ambient_noise(source) audio_listened = r.listen(source) try: # try converting it to text rec = r.recognize_google(audio_listened) # write the output to the file. fh.write(rec+" ") # catch any errors. except sr.UnknownValueError: print("Could not understand audio") except sr.RequestError as e: print("Could not request results. check your internet connection") if flag==1: fh.close() break try: output = open("recognized.txt", "r") #output=r.recognize_google(audio)#show all = true will show all possibilites of how google translates this audio to text #print("Over") #print("output received from google speech api ") #sending output to punctuator api using post request url_punctuator="http://bark.phon.ioc.ee/punctuator" data={'text':'{}'.format(output.read())} response_from_punctuator=requests.request("POST",url_punctuator,data=data) #print("text returned from punctuator api:",response_from_punctuator.text) punctuated_text=response_from_punctuator.text print("output received from punctuator api ") response = requests.post("https://api.aylien.com/api/v1/summarize?title='text'&text={}&sentences_number=10".format(punctuated_text), headers={ "X-AYLIEN-TextAPI-Application-Key":"{}".format(params["aylien-api-key"]), "X-AYLIEN-TextAPI-Application-ID":"{}".format(params["aylien-app-id"]) } ) #don't uncomment this meaning cloud #sending the punctuated text to meaningcloud api using the api key ''' url = "https://api.meaningcloud.com/summarization-1.0" payload = "key=440a3e9fde785d6b5aa4bd1595052891&txt={}&url=&doc=&sentences=10".format(punctuated_text) headers = {'content-type': 'application/x-www-form-urlencoded'} #application/x-www-form-urlencoded response = requests.request("POST", url, data=payload,headers=headers)''' #headers=headers #dont uncomment :D #printing json object # print("text returned from meaning cloud api:",response.json()['summary']) text=response.json()['sentences'] print("output received from aylien") # pprint("original output:",output) #output also shows the accuracy of its conversion...check key confidence and transcript in output means the difference conversions of the audio file except: return "Could not recognize" #exception if google api doesn't understand return text
def test_houndify_english(self): r = sr.Recognizer() with sr.AudioFile(self.AUDIO_FILE_EN) as source: audio = r.record(source) self.assertEqual(r.recognize_houndify(audio, client_id=os.environ["HOUNDIFY_CLIENT_ID"], client_key=os.environ["HOUNDIFY_CLIENT_KEY"]), "one two three")
import speech_recognition as sr import pyttsx3 r = sr.Recognizer() def speak(command): eng = pyttsx3.init() eng.say(command) eng.runAndWait() # while 1: try: with sr.AudioFile("test1.wav") as source2: # r.adjust_for_ambient_noise(source2,duration=1) audio2 = r.listen(source2) text = r.recognize_google(audio2) text = text.lower() print("Did you say :" + text) speak(text) except sr.RequestError as e: print(f"Couldn't request result : {0}".format(e)) except sr.UnknownValueError: print("Unknown error")
def test_ibm_english(self): r = sr.Recognizer() with sr.AudioFile(self.AUDIO_FILE_EN) as source: audio = r.record(source) self.assertEqual(r.recognize_ibm(audio, username=os.environ["IBM_USERNAME"], password=os.environ["IBM_PASSWORD"]), "one two three ")
def func(url_to_audio=None, isBase64=False, base64Data=None): if isBase64: img_64 = base64.decodestring(base64Data) with open(os.path.join(dir_path, 'test.wav'), 'wb') as img_file: img_file.write(img_64) if url_to_audio is not None and isBase64 is False: r = requests.get(url_to_audio, allow_redirects=True, stream=True) with open(os.path.join(dir_path, 'test.wav'), 'wb') as f: for chunk in r.iter_content(): f.write(chunk) r = sr.Recognizer() isl_gif = [ 'all the best', 'any questions', 'are you angry', 'are you busy', 'are you hungry', 'are you sick', 'be careful', 'can we meet tomorrow', 'did you book tickets', 'did you finish homework', 'do you go to office', 'do you have money', 'do you want something to drink', 'do you want tea or coffee', 'do you watch TV', 'dont worry', 'flower is beautiful', 'good afternoon', 'good evening', 'good morning', 'good night', 'good question', 'had your lunch', 'happy journey', 'hello what is your name', 'how many people are there in your family', 'i am a clerk', 'i am bore doing nothing', 'i am fine', 'i am sorry', 'i am thinking', 'i am tired', 'i dont understand anything', 'i go to a theatre', 'i love to shop', 'i had to say something but i forgot', 'i have headache', 'i like pink colour', 'i live in nagpur', 'lets go for lunch', 'my mother is a homemaker', 'my name is john', 'nice to meet you', 'no smoking please', 'open the door', 'please call an ambulance', 'please call me later', 'please clean the room', 'please give me your pen', 'please use dustbin dont throw garbage', 'please wait for sometime', 'shall I help you', 'shall we go together tommorow', 'sign language interpreter', 'sit down', 'stand up', 'take care', 'there was traffic jam', 'wait I am thinking', 'what are you doing', 'what is the problem', 'what is todays date', 'what is your age', 'what is your father do', 'what is your job', 'what is your mobile number', 'what is your name', 'whats up', 'when is your interview', 'when we will go', 'where do you stay', 'where is the bathroom', 'where is the police station', 'you are wrong', 'address', 'agra', 'ahemdabad', 'all', 'april', 'assam', 'august', 'australia', 'badoda', 'banana', 'banaras', 'banglore', 'bihar', 'bihar', 'bridge', 'cat', 'chandigarh', 'chennai', 'christmas', 'church', 'clinic', 'coconut', 'crocodile', 'dasara', 'deaf', 'december', 'deer', 'delhi', 'dollar', 'duck', 'febuary', 'friday', 'fruits', 'glass', 'grapes', 'gujrat', 'hello', 'hindu', 'hyderabad', 'india', 'january', 'jesus', 'job', 'july', 'july', 'karnataka', 'kerala', 'krishna', 'litre', 'mango', 'may', 'mile', 'monday', 'mumbai', 'museum', 'muslim', 'nagpur', 'october', 'orange', 'pakistan', 'pass', 'police station', 'post office', 'pune', 'punjab', 'rajasthan', 'ram', 'restaurant', 'saturday', 'september', 'shop', 'sleep', 'southafrica', 'story', 'sunday', 'tamil nadu', 'temperature', 'temple', 'thursday', 'toilet', 'tomato', 'town', 'tuesday', 'usa', 'village', 'voice', 'wednesday', 'weight' ] arr = [ 'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o', 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z' ] with sr.AudioFile(os.path.join(dir_path, 'test.wav')) as source: r.adjust_for_ambient_noise(source) i = 0 r.pause_threshold = 3 audio = r.listen(source) # recognize speech using Sphinx a = r.recognize_google(audio) print("you said " + a.lower()) for c in string.punctuation: a = a.replace(c, "") if a.lower() in isl_gif: print(os.path.join(dir_path, 'ISL_Gifs/{0}.gif'.format(a.lower()))) return os.path.join(dir_path, 'ISL_Gifs/{0}.gif'.format(a.lower())) else: images = [] for i in range(len(a)): if a[i] in arr: ImageAddress = os.path.join(dir_path, 'letters/' + a[i] + '.jpg') images.append(ImageAddress) # ImageItself = Image.open(ImageAddress) # ImageNumpyFormat = np.asarray(ImageItself) # plt.imshow(ImageNumpyFormat) # plt.draw() # plt.pause(0.8) # pause how many seconds #plt.close() else: continue video_name = 'output.avi' frame = cv2.imread(images[0]) height, width, layers = frame.shape video = cv2.VideoWriter(video_name, cv2.VideoWriter_fourcc(*'XVID'), 30, (width, height)) for image in images: for _ in range(20): video.write(cv2.imread(image)) cv2.destroyAllWindows() video.release() print(os.path.join(dir_path, 'output.avi')) return os.path.join(dir_path, 'output.avi')
def test_ibm_french(self): r = sr.Recognizer() with sr.AudioFile(self.AUDIO_FILE_FR) as source: audio = r.record(source) self.assertEqual(r.recognize_ibm(audio, username=os.environ["IBM_USERNAME"], password=os.environ["IBM_PASSWORD"], language="fr-FR"), u"si la dictée numéro un ")
import speech_recognition as sr AUDIO_FILE = "./sample.wav" # use the audio file as the audio source r = sr.Recognizer() with sr.AudioFile(AUDIO_FILE) as source: audio = r.record(source) # read the entire audio file result=r.recognize_google(audio, language='ja-JP') try: print(result) except sr.UnknownValueError: print("Google Speech Recognition could not understand audio") except sr.RequestError as e: print("Could not request results from Google Speech Recognition service; {0}".format(e))
def test_ibm_chinese(self): r = sr.Recognizer() with sr.AudioFile(self.AUDIO_FILE_ZH) as source: audio = r.record(source) self.assertEqual(r.recognize_ibm(audio, username=os.environ["IBM_USERNAME"], password=os.environ["IBM_PASSWORD"], language="zh-CN"), u"砸 自己 的 脚 ")
def test_google_english(self): r = sr.Recognizer() with sr.AudioFile(self.AUDIO_FILE_EN) as source: audio = r.record(source) self.assertEqual(r.recognize_google(audio), "one two three")
def get_test_sound_file(self): return sr.AudioFile('hello.wav')