def main(): while True: record() try: text = recognize_audio_file() print(text) if text.strip() == "play music": speak("Which song you want to play?") record(seconds=5) try: text = recognize_audio_file() speak(f'Finished recording. Playing {text}') play_music(text) except sr.UnknownValueError: speak( 'I didn\'t recognise the text. Say play music if you want to play music' ) if text.strip() == 'stop': speak("I'm outta here") break if ''.join(text.strip().split()[:2]) == 'setvolume': m = alsaaudio.Mixer() current_volume = m.getvolume() m.setvolume(int(text.strip().split()[-1])) except sr.UnknownValueError: continue
def record(file='test.wav'): rgb.write() audio.recorder_init() rgb[0] = (255, 0, 0) # 用LED指示录音开始结束 rgb.write() audio.record(file, 1) rgb[0] = (0, 0, 0) rgb.write() audio.recorder_deinit()
def main(): os.environ[ "GOOGLE_APPLICATION_CREDENTIALS"] = "./resources/credentials/e3682f457e02.json" os.system("cls") project_id = "recruitertest-dd3ab" session_id = str(uuid.uuid4().hex[:12]) language_code = "en-US" input_file_path = "./resources/audio/subject_input.wav" # [START DIALOG] complete_transcript = [[], []] # [QUICK DIALOG] # complete_transcript = detect_intent_texts(project_id, session_id, [ # "DEMO_START", "Diana Moon", "18", "Adaptable , ambitious, clever and blunt", "I have a bachelor in mathematics", # "I have worked for Google as a data scientist of the past 3 years", "I am good at analysis and computers", "My communication is not great", "40 hours", "No"], language_code) # [NORMAL DIALOG] detect_intent_texts(project_id, session_id, ["DEMO_START"], language_code) while True: # text_input = input("Text input: ") # partial_transcript = detect_intent_texts(project_id, session_id, [ # text_input], language_code) audio.record(input_file_path) partial_transcript = detect_intent_audio(project_id, session_id, input_file_path, language_code) # audio.record(input_file_path) # partial_transcript = detect_intent_stream(project_id, session_id, # input_file_path, language_code) complete_transcript[0] = complete_transcript[0] + partial_transcript[0] complete_transcript[1] = complete_transcript[1] + partial_transcript[1] if poke(project_id, session_id, language_code): break # [END DIALOG] # [DATA] subject_info = get_subject_info(project_id, session_id, language_code) clean_subject_info = data.clean(subject_info) match_scores = data.match(subject_info) report.create(clean_subject_info, match_scores, complete_transcript, session_id)
def activeListen(self, use_local=None): self.stop_itunes() if use_local is None: use_local = self.use_local audio.play(fullpath('static/beep_hi.wav')) if self.prompt: self.userinput = raw_input("YOU: ") else: self.wav, energies = audio.record( verbose=True, threshold=self.threshold, emptyFrames=20 ) with open('test.wav', 'wb') as fp: fp.write(self.wav) if sum(energies) / len(energies) < self.threshold: self.undo_itunes() return "" try: if use_local: self.userinput = self.localSTT.transcribe(self.wav) else: self.userinput = self.cloudSTT.transcribe(self.wav) except: print "Something went wrong" return '' print "YOU:", self.userinput audio.play(fullpath('static/beep_lo.wav')) self.undo_itunes() return self.userinput
def run(self): # set up presentation window color, and size bgcolor = 'black' txtcolor = 'white' self.win = visual.Window(fullscr=True, color=bgcolor) #self.win = visual.Window((1200, 900), color=bgcolor) # temporary presentation window setup, exchange for line above when running actual experiment self.text = visual.TextStim(self.win, color=txtcolor) words = [ audio.read(self.stimuli_prefix + str(i + 1) + '.wav') for i in range(24) ] recordings = [] self.text.text = '||' self.text.draw() self.win.flip() audio.play(audio.read(self.instructions_folder + self.mode + '.wav'), wait=True) key = event.waitKeys(keyList=['return']) for word in words: self.text.text = '+' self.text.draw() self.win.flip() audio.play(word, wait=True) self.text.text = '-' self.text.draw() self.win.flip() recordings += [audio.record(((len(word) / 44100) + 1), wait=True)] for i in range(len(words)): audio.write(self.log_prefix + str(i + 1) + '.wav', recordings[i]) self.win.close()
def run(self): # set up presentation window color, and size bgcolor = 'black' txtcolor = 'white' self.win = visual.Window(fullscr=True, color=bgcolor) #self.win = visual.Window((1200, 900), color=bgcolor) # temporary presentation window setup, exchange for line above when running actual experiment self.text = visual.TextStim(self.win, color=txtcolor) for i in range(5, 0, -1): self.text.text = '+' * (2 * i - 1) self.text.draw() self.win.flip() core.wait(1) self.text.text = '-' self.text.draw() self.win.flip() # record samples = audio.record(60, wait=True) self.text.text = '+' self.text.draw() self.win.flip() audio.write(self.log_fname, samples) core.wait(3) self.win.close()
def test_trial(self, trial): # present instruction trial self.image.image = self.stimuli_folder + trial['Picture'] self.image.draw() self.win.callOnFlip(self.clock.reset) self.isi.complete() self.win.flip() if trial['trialAudio'] != '': audio.play(self.instructions[trial['trialAudio']], wait=True) if trial['answer_type'] == 'spoken': audio.write(self.log_prefix + '_' + trial['Picture'][:-4] + '.wav', audio.record(25, wait=True)) else: keys = event.waitKeys(keyList=['escape'] + trial['keyboard'].split(' '), timeStamped=self.clock) trial['keypress'], trial['RT'] = keys[0] if trial['keypress'] == 'escape': core.quit() if trial['keypress'] == trial['key']: trial['ACC'] = 1 else: trial['ACC'] = 0 self.win.callOnFlip(self.isi.start, float(trial['ITI']) / 1000 - self.frame_dur) # flip buffer again and start ISI timer self.win.flip() return trial
def speechToText(): url = 'https://speech.platform.bing.com/speech/recognition/interactive/cognitiveservices/v1?language=en-GB&format=simple' data = open(audio.record(), 'rb').read() headers = { 'Ocp-Apim-Subscription-Key': 'e5f16159f94c40f78cc5e720421ea231', 'Content-type': 'audio/wav; codec=audio/pcm; samplerate=16000' } res = requests.post(url=url, data=data, headers=headers) print(res.json()) text = res.json()['DisplayText'].lower() if reset(text): return [-1] if setRainbow(text): return [1] #mode for rainbow c = getTheColor(text) if c != []: return c f = setFlash(text) if f != -1: return [3, f] l = setLevel(text) if l != -1: return [4, l] sl = setStartFlash(text) if sl != -1: return [5, sl] return []
def rec(i, text_output, filename=""): """ Initiate the loop of recording and analyzing audio for I iterations. If TEXT_OUTPUT, an output file containing an integer, named FILENAME, is created. """ iteration = 0 while iteration < i: record() loudness = str(int(analyze())) print("Loudness measure: " + loudness) if text_output: with open(filename, 'w', encoding='utf-8') as file: file.write(loudness) iteration += 1 terminate()
def passing_var(audio_input, passing): if audio_input in keyword_list and passing == 0: programs.tts(response) if arg == '-t': command(input("> ").lower()) else: reg_input = audio.record(command_seconds, 1) if reg_input is not None and reg_input != False: passing_var(reg_input, 1) elif reg_input == "invalid input": audio.tts(reg_input) elif audio_input in running_list and passing == 0: command(audio_input) elif audio_input not in keyword_list and passing == 0: listen() elif passing == 1: command(audio_input)
def run(self): # set up presentation window color, and size bgcolor = 'black' txtcolor = 'white' self.win = visual.Window(fullscr=True, color=bgcolor) #self.win = visual.Window((1200, 900), color=bgcolor) # temporary presentation window setup, exchange for line above when running actual experiment self.text = visual.TextStim(self.win, color=txtcolor) with open(self.trials_fname, 'rU') as trial_file: # read trial structure trials = list(csv.DictReader(trial_file, delimiter='\t')) # preload stimuli stimuli = [audio.read(self.stimuli_folder + trial['stimulus']) for trial in trials] recordings = [] self.text.text = '||' self.text.draw() self.win.flip() audio.play(audio.read(self.instructions_folder + self.mode + '.wav'), wait=True) key = event.waitKeys(keyList=['return']) self.win.flip() for stimulus in stimuli: self.text.text = '+' self.text.draw() self.win.flip() audio.play(stimulus, wait=True) self.text.text = '-' self.text.draw() self.win.flip() recordings += [audio.record((len(stimulus) / 44100.0) + 1, wait=True)] keys = event.getKeys(['escape']) if 'escape' in keys: break for i in range(len(recordings)): audio.write(self.log_prefix + trials[i]['stimulus'], recordings[i]) self.win.close()
def semiActiveListen(self, listen_time, use_local=None): self.stop_itunes() if use_local is None: use_local = self.use_local if self.prompt: self.userinput = raw_input("YOU: ") else: frames, self.threshold = audio.keyRecording( LISTEN_TIME=listen_time, verbose=True ) if not frames: self.undo_itunes() return '' self.wav, energies = audio.record( verbose=True, threshold=self.threshold, initialFrames=frames, emptyFrames=20 ) if use_local: self.userinput = self.localSTT.transcribe(self.wav) else: self.userinput = self.cloudSTT.transcribe(self.wav) self.undo_itunes() return self.userinput
import tensorflow as tf from tensorflow.keras import models, layers, regularizers import audio from mfcc import mfcc import numpy as np import constant def predict(model_name, filename): samples, sample_rate = audio.decode_wav(filename) coefficients = mfcc(samples, sample_rate) coefficients = tf.reshape(tf.cast(coefficients, tf.float32), [1, 98, 13, 1]) model = tf.keras.models.load_model(model_name) prediction = model.predict(coefficients) if np.argmax(prediction) >= 0.8: output = constant.train_commands[np.argmax(prediction)] else: output = None return output if __name__ == "__main__": audio.record("test.wav") prediction = predict('./models/model2.h5', 'test.wav') print(prediction)
def audioIn(loopback_Fs, loopback_Fc, upsample_factor, duration=5.): input = audio.record(int(loopback_Fs * duration), loopback_Fs) return processInput(input, loopback_Fs, loopback_Fc, upsample_factor)
action='store_true', default=False, help='listen for incoming Blurts') parser.add_argument('--tx', action='store_false', dest='rx', help='send a Blurt') parser.add_argument('message', nargs='?', default='Hello, world!') args = parser.parse_args(namespace=argparse.Namespace(upsample_factor=16)) bandwidth = args.Fs / args.upsample_factor if args.Fc + bandwidth * .5 > args.Fs * .5: parser.error( 'Center frequency plus half of bandwidth cannot exceed half of sampling frequency (Nyquist criterion)' ) if args.rx: print('Listening for transmissions with a center frequency of %.0f Hz and a bandwidth of %.0f Hz (sample rate %.0f)' % \ (args.Fc, args.Fs / args.upsample_factor, args.Fs)) audio.record( ContinuousReceiver(Fs=args.Fs, Fc=args.Fc, upsample_factor=args.upsample_factor), args.Fs) else: print('Transmitting %r with a center frequency of %.0f Hz and a bandwidth of %.0f Hz (sample rate %.0f)' % \ (args.message, args.Fc, args.Fs / args.upsample_factor, args.Fs)) input_octets = np.array(map(ord, args.message), dtype=np.uint8) output = wifi.encode(input_octets, args.rate) audioLoopback.audioOut(output, args.Fs, args.Fc, args.upsample_factor, None)
# Please make a selection from below: # # # # 1. Record audio # # 2. List files # # 3. Upload audio # # X. Exit # # # ############################################# """) selection = input("Selection: ") if selection == "1": os.system("clear") os.system("cd {}".format(LOCAL_PATH)) print("Recording... Press Enter to end recording.") audio.record() wavFileName = input("Enter a filename: ") if wavFileName[-4:] != '.wav': wavFileName = wavFileName + '.wav' wavFileName = wavFileName.replace(' ', '') audio.saveAudio(wavFileName) if selection == "2": os.system("clear") for root, dirs, files in os.walk(LOCAL_PATH): for file in files: if file.endswith(".wav"): print(os.path.join(root, file)) if selection == "3":
def recv(self, duration): return list(audio.record(duration * self.samplerate, self.samplerate))
def recordAudio(self, duration, file): audio.record(duration, file)
def record_audio(): record("record_one") f = open("./recordings/record_one", "rb") requests.post("http://" + PI_IP + ":8000/upload", files={'upload': open("./recordings/record_one", "rb")})
def startListening(): audio.record(ContinuousReceiver(), Fs)
def audioIn(loopback_Fs, loopback_Fc, upsample_factor, duration=5.): input = audio.record(int(loopback_Fs*duration), loopback_Fs) return processInput(input, loopback_Fs, loopback_Fc, upsample_factor)
def recv(self, duration): return list(audio.record(duration*self.samplerate, self.samplerate))
def answer(driver: webdriver.Chrome, ans: str) -> None: """ Gives an answer. """ box = driver.find_element_by_id("qpAnswerInput") box.send_keys(ans) box.send_keys(Keys.RETURN) if __name__ == "__main__": driver = login() enter_game(driver, input("Room name? "), int(input("Room number? ")), input("Room password? ")) while True: try: block_recording(driver) print("starting recording...") data = audio.record(LEN) audio.sd.wait() # block on the recording print("processing...") vol1, clip = audio.preprocess(data) ans = main.find_song(vol1, clip, VERBOSE) if audio.np.max(clip) == 128: # 0 is at 128 because of the scaling print("Clip is silent. Are you sure loopback is working?") answer(driver, ans) except KeyboardInterrupt: driver.quit() exit() except Exception as e: print(e) ans = input("quit driver?\n") if len(ans) > 0 and ans[0] == "y": driver.quit()
parser.add_argument('--Fs', '-s', metavar='fs', type=float, help='audio sample rate in Hz', default=48000.) parser.add_argument('--Fc', '-c', metavar='fc', type=float, help='carrier (center) frequency in Hz', default=19000.) parser.add_argument('--rate', '-r', metavar='rate', type=int, help='modulation and coding scheme (MCS) index, from 0 to 7', choices=range(8), default=0) class BandwidthAction(argparse.Action): def __call__(self, parser, namespace, values, option_string=None): namespace.upsample_factor = int(round(namespace.Fs/values)) parser.add_argument('-b', '--bandwidth', metavar='bw', type=float, action=BandwidthAction, help='desired bandwidth in Hz; if used with a non-default --Fs, this option must follow that one', dest='bandwidth', default=3000.0) parser.add_argument('--rx', action='store_true', default=False, help='listen for incoming Blurts') parser.add_argument('--tx', action='store_false', dest='rx', help='send a Blurt') parser.add_argument('message', nargs='?', default='Hello, world!') args = parser.parse_args(namespace=argparse.Namespace(upsample_factor=16)) bandwidth = args.Fs / args.upsample_factor if args.Fc + bandwidth*.5 > args.Fs * .5: parser.error('Center frequency plus half of bandwidth cannot exceed half of sampling frequency (Nyquist criterion)') if args.rx: print('Listening for transmissions with a center frequency of %.0f Hz and a bandwidth of %.0f Hz (sample rate %.0f)' % \ (args.Fc, args.Fs / args.upsample_factor, args.Fs)) audio.record(ContinuousReceiver(Fs=args.Fs, Fc=args.Fc, upsample_factor=args.upsample_factor), args.Fs) else: print('Transmitting %r with a center frequency of %.0f Hz and a bandwidth of %.0f Hz (sample rate %.0f)' % \ (args.message, args.Fc, args.Fs / args.upsample_factor, args.Fs)) input_octets = np.array(map(ord, args.message), dtype=np.uint8) output = wifi.encode(input_octets, args.rate) audioLoopback.audioOut(output, args.Fs, args.Fc, args.upsample_factor, None)
from create import create_envs from audio import play, record import argparse if __name__ == '__main__': parser = argparse.ArgumentParser( formatter_class=argparse.RawTextHelpFormatter) parser.add_argument( '-o', '--option', dest='option', help= "To Play Audio, python audio.py --option play \nTo Record Audio, python audio.py --option record" ) arg = parser.parse_args() if arg.option is None: create_envs() print(parser.print_help()) print("To activate env : conda activate py36") elif arg.option == "play": play() elif arg.option == "record": record() else: print(parser.print_help())