def record(self, time): audio = PyAudio() stream = audio.open(input_device_index=self.device_index, output_device_index=self.device_index, format=self.format, channels=self.channel, rate=self.rate, input=True, frames_per_buffer=self.chunk ) print "Recording..." frames = [] for i in range(0, self.rate / self.chunk * time): data = stream.read(self.chunk) frames.append(data) stream.stop_stream() print "Recording Complete" stream.close() audio.terminate() write_frames = open_audio(self.file, 'wb') write_frames.setnchannels(self.channel) write_frames.setsampwidth(audio.get_sample_size(self.format)) write_frames.setframerate(self.rate) write_frames.writeframes(''.join(frames)) write_frames.close() self.convert()
def record(self, time, device_i=None): audio = PyAudio() print(audio.get_device_info_by_index(1)) stream = audio.open(input_device_index=device_i, output_device_index=device_i, format=self.format, channels=self.channel, rate=self.rate, input=True, frames_per_buffer=self.chunk) print("REC: ") frames = [] for i in range(0, self.rate / self.chunk * time): data = stream.read(self.chunk) frames.append(data) stream.stop_stream() print("END") stream.close() audio.terminate() write_frames = open_audio(self.file, 'wb') write_frames.setnchannels(self.channel) write_frames.setsampwidth(audio.get_sample_size(self.format)) write_frames.setframerate(self.rate) write_frames.writeframes(''.join(frames)) write_frames.close() self.convert()
class AudioRecorder(): # Audio class based on pyAudio and Wave def __init__(self, filename): self.open = True self.rate = 44100 self.frames_per_buffer = 1024 self.channels = 2 self.startTime = 0 self.endTime = 0 self.duration = 0 self.format = 8 self.audio_filename = filename+".wav" self.audio = PyAudio() self.stream = self.audio.open(format=self.format, channels=self.channels, rate=self.rate, input=True, frames_per_buffer=self.frames_per_buffer) self.audio_frames = [] # Audio starts being recorded in separate thread def record(self): self.stream.start_stream() while(self.open == True): data = self.stream.read(self.frames_per_buffer) self.audio_frames.append(data) if self.open == False: break # Finishes the audio recording therefore the thread too def stop(self): if self.open == True: self.open = False time.sleep(0.5) self.stream.stop_stream() self.stream.close() self.audio.terminate() self.endTime = time.time() self.duration = self.endTime - self.startTime time.sleep(0.5) waveFile = wave.open(self.audio_filename, 'wb') waveFile.setnchannels(self.channels) waveFile.setsampwidth(self.audio.get_sample_size(self.format)) waveFile.setframerate(self.rate) waveFile.writeframes(b''.join(self.audio_frames)) # Audio write out at this point waveFile.close() time.sleep(0.5) return self.duration # Launches the audio recording function using a thread def start(self): self.startTime = time.time() self.open = True self.audio_thread = threading.Thread(target=self.record) self.audio_thread.start()
def Microphone(File, Seconds): CHUNK = 1024 FORMAT = paInt16 CHANNELS = 2 RATE = 44100 RECORD_SECONDS = float(Seconds) WAVE_OUTPUT_FILENAME = File p = PyAudio() stream = p.open(format=FORMAT, channels=CHANNELS, rate=RATE, input=True, frames_per_buffer=CHUNK) frames = [] for i in range(0, int(RATE/CHUNK * RECORD_SECONDS)): data = stream.read(CHUNK) frames.append(data) stream.stop_stream() stream.close() p.terminate() wf = wave.open(WAVE_OUTPUT_FILENAME, 'wb') wf.setnchannels(CHANNELS) wf.setsampwidth(p.get_sample_size(FORMAT)) wf.setframerate(RATE) wf.writeframes(b''.join(frames)) wf.close()
def save_wav(filename, audio_frames, audio: pyaudio.PyAudio): waveFile = wave.open(filename, 'wb') waveFile.setnchannels(cfg['audio_channels']) waveFile.setsampwidth(audio.get_sample_size(pyaudio.paInt16)) waveFile.setframerate(cfg['sample_rate']) waveFile.writeframes(b''.join(audio_frames)) waveFile.close()
def pyrec(file_name,CHUNK = 1024,FORMAT = paInt16,CHANNELS = 2,RATE = 16000,RECORD_SECONDS = 2): p = PyAudio() stream = p.open(format=FORMAT, channels=CHANNELS, rate=RATE, input=True, frames_per_buffer=CHUNK) print("开始录音,请说话......") frames = [] for i in range(0, int(RATE / CHUNK * RECORD_SECONDS)): data = stream.read(CHUNK) frames.append(data) print("录音结束,请闭嘴!") stream.stop_stream() stream.close() p.terminate() wf = wave.open(file_name, 'wb') wf.setnchannels(CHANNELS) wf.setsampwidth(p.get_sample_size(FORMAT)) wf.setframerate(RATE) wf.writeframes(b''.join(frames)) wf.close() return
def start_recording(db, global_config): """ Main routine to perform environment records. :param db: DB Connection to Pony :param global_config: Global Configuration dict :return: Record Object or -1 on error """ if not path.isdir(global_config['record_path']): return -1 audio = PyAudio() frames = list() stream = audio.open(format=FORM_1, rate=SAMPLE_RATE, channels=CHANNELS, input_device_index=DEVICE_INDEX, input=True, frames_per_buffer=CHUNK) record_uuid = uuid4() record_file = get_filename(file_name_parts=[ str(record_uuid)[35 - 11:], str(int(datetime.now().timestamp())) ]) # Start the Recording time_start = datetime.now() for _ in range(0, (SAMPLE_RATE // CHUNK) * int(global_config['record_period'])): data = stream.read(CHUNK, exception_on_overflow=False) frames.append(data) stream.stop_stream() stream.close() audio.terminate() time_end = datetime.now() # Finish the Recording input_file = global_config['record_path'] + record_file wavefile = wave.open(input_file, 'wb') wavefile.setnchannels(CHANNELS) wavefile.setsampwidth(audio.get_sample_size(FORM_1)) wavefile.setframerate(SAMPLE_RATE) wavefile.writeframes(b''.join(frames)) wavefile.close() record_new = db.Record(start=time_start, end=time_end, size=stat(input_file).st_size / (1024 * 1024), path=input_file, status='recorded') return record_new
class QAudio: def __init__(self): self.CHUNK = Para.CHUNK self.FORMAT = paInt16 self.CHANNELS = Para.CHANNELS self.RATE = Para.RATE self.RECORD_SECONDS = Para.RECORD_SECONDS self.SavePath = Para.SaveAudioMP3 self.paudio = PyAudio() self.stream = self.paudio.open(format=self.FORMAT, channels=self.CHANNELS, rate=self.RATE, input=True, frames_per_buffer=self.CHUNK) def au_st(self): return self.paudio, self.stream def read(self): data = self.stream.read(self.CHUNK) return data # def close(self): # self.stream.close() # self.paudio.terminate() def ReFrames(self, Render, w, h): Frames = [] N = int(self.RATE / self.CHUNK * self.RECORD_SECONDS) PerAng = 360 / N for i in range(0, N): start = i * PerAng end = (i + 1) * PerAng Render = cv2ImgAddText(Render, Para.Listen, int(w * 6 / 17), int(h * 6 / 21), (0, 0, 0), 100) cv2.ellipse(Render, (int(w / 2), int(h / 3)), (250, 250), 0, start, end, (255, 255, 0), -1) cv2.imshow("Render", Render) cv2.waitKey(1) Data = self.stream.read(self.CHUNK) Frames.append(Data) # 这里需要添加可视化内容,根据语音时长 # self.stream.stop_stream() # self.stream.close() # self.paudio.terminate() wf = wave.open(self.SavePath, "wb") wf.setnchannels(self.CHANNELS) wf.setsampwidth(self.paudio.get_sample_size(self.FORMAT)) wf.setframerate(self.RATE) wf.writeframes(b''.join(Frames)) wf.close() print("Saved Mp3 audio successfully")
def record_wave(to_dir=None): '''to_dir:wave file dir''' if to_dir is None: to_dir = './' pa = PyAudio() stream = pa.open(format=FORMAT, channels=CHANNELS, rate=RATE, input=True, stream_callback=callback_record) '''阻塞式的录音''' # print '***Recording***' # for i in range(0,int(RATE / CHUNK * RECORD_SECONDS)): # audio_data = stream.read(CHUNK) # save_buffer.append(audio_data) # print '***Record Finished***' '''非阻塞式''' stream.start_stream() try: while stream.is_active(): get = raw_input("Type '\q' or 'q' to stop:\n") if 'q' in get or '\q' in get: stream.stop_stream() time.sleep(0.1) finally: stream.close() pa.terminate() '''Use pyaudio.Stream.stop_stream() to pause playing/recording, and pyaudio.Stream.close() to terminate the stream Finally, terminate the portaudio session using pyaudio.PyAudio.terminate() ''' stream.stop_stream() stream.close() pa.terminate() # wave_name = time.strftime("%Y%m%d_%H%M", time.localtime())+'.wav' wave_name = 'file.wav' if to_dir.endswith('/'): wave_path = to_dir + wave_name else: wave_path = to_dir + '/' + wave_name ## save wave file wf = wave.open(wave_path, 'wb') wf.setnchannels(CHANNELS) wf.setsampwidth(pa.get_sample_size(FORMAT)) wf.setframerate(RATE) wf.writeframes("".join(save_buffer)) wf.close() ## 返回 print wave_name + ' saved' return wave_path
def record_audio(outfile_name=None, record_seconds=1, chunk=1024, channels=1, rate=22050): """ Opens microphone (OS may ask for permission) and records <record_seconds> if audio. If an outfile_name is provided, recording is saved into the file. returns: A normalized numpy array of the waveform. """ format_ = paInt16 p = PyAudio() stream = p.open(format=format_, channels=channels, rate=rate, input=True, frames_per_buffer=chunk) print("Recording Audio") frames_for_file = [] frames_for_np = [] mic_fudge = 1 # in chunks, delay start of recording to avoid dead time for i in range(0, int(rate / chunk * record_seconds) + mic_fudge + 1): if i < mic_fudge: # discard first chunk(s) stream.read(chunk) continue data = stream.read(chunk) frames_for_file.append(data) frames_for_np.append(np.frombuffer(data, dtype=np.int16)) print("* Done Recording") stream.stop_stream() stream.close() p.terminate() if outfile_name is not None: with wave.open(outfile_name, 'wb') as wf: wf.setnchannels(channels) wf.setsampwidth(p.get_sample_size(format_)) wf.setframerate(rate) wf.writeframes(b''.join(frames_for_file)) wf.close() frames_for_np = np.array(frames_for_np).flatten() return frames_for_np / np.max(frames_for_np)
def run(self): audio = PyAudio() wavfile = wave.open(self.audiofile, 'ab') wavfile.setnchannels(self.channels) wavfile.setsampwidth(audio.get_sample_size(self.format)) wavfile.setframerate(self.rate) wavstream = audio.open(format=self.format, channels=self.channels, rate=self.rate, input=True, frames_per_buffer=self.chunk) while self.bRecord: wavfile.writeframes(wavstream.read(self.chunk)) wavstream.stop_stream() wavstream.close() audio.terminate()
def read_data(self): pa = PyAudio() stream = pa.open(format=paInt16, channels=1, rate=16000, input=True, frames_per_buffer=1) count = 0 while True: if count >= 2: break print('.' + str(count)) data = stream.read(16000) count += 1 data = audioop.lin2lin(data, pa.get_sample_size(paInt16), TARGET_WIDTH) yield data
def button_record_Click(): global sampwidth, wav_in, stream_in, state state = 1 # change the state flag into "recording" # update button states button_stop.configure(state = NORMAL) button_play.configure(state = DISABLED) button_back.configure(state = DISABLED) button_next.configure(state = DISABLED) button_record.configure(state = DISABLED) wav_in = PyAudio() stream_in = wav_in.open(format = FORMAT, channels = CHANNELS, rate = RATE, input = True, frames_per_buffer = CHUNK) sampwidth = wav_in.get_sample_size(FORMAT) echo_text.configure(text = "Recording...", bg = 'red', fg = 'white', font = ("Helvetica", 50)) record_wav()
def liveListen(self) -> Path: """ Records `self.recordLen` (default: 2) seconds of live audio to be tested. Returns the path to which the audio was saved. """ # The audio will be saved in `../data/tmp`, because the output is temporary, # and will be changing constantly. SAVE_PATH = Path(f"../data/tmp/llout.wav") # Some data needed for recording the audio CHUNK = 1024 FORMAT = paInt16 CHANNELS = 2 RATE = 44100 RECORD_SECONDS = self.recordLen OUTPUT = SAVE_PATH pa = PyAudio() with self.__openstream(pa, FORMAT, CHANNELS, RATE, True, CHUNK) as stream: print("Recording...") frames = list() for i in range(0, int(RATE / CHUNK * RECORD_SECONDS)): data = stream.read(CHUNK) frames.append(data) stream.stop_stream() stream.close() pa.terminate() print("Done!") with self.__openwf(str(OUTPUT), "wb") as wf: wf.setnchannels(CHANNELS) wf.setsampwidth(pa.get_sample_size(FORMAT)) wf.setframerate(RATE) wf.writeframes(b"".join(frames)) del pa return OUTPUT
def bytes_to_wav(data: bytes, audio: pyaudio.PyAudio) -> bytes: """ 将原始字节序列转换为 wav 文件数据. """ wav_file = io.BytesIO() wf = wave.open(wav_file, 'wb') wf.setnchannels(CHANNELS) wf.setsampwidth(audio.get_sample_size(FORMAT)) wf.setframerate(INPUT_RATE) wf.writeframes(data) wf.close() if INPUT_RATE != OUTPUT_RATE: wav_file = io.BytesIO(wav_file.getvalue()) seg = pydub.AudioSegment.from_file(wav_file) seg = seg.set_frame_rate(OUTPUT_RATE) wav_file = io.BytesIO() seg.export(wav_file, format='wav') return wav_file.getvalue()
def record(self, time=5): audio = PyAudio() stream = audio.open(format=self.format, channels=self.channel, rate=self.rate, input=True, frames_per_buffer=self.chunk) print "RECORDING START" frames = [] for i in range(0, self.rate / self.chunk * time): data = stream.read(self.chunk) frames.append(data) stream.stop_stream() stream.close() audio.terminate() print "RECORDING STOP" write_frames = open_audio(self.audio_file, 'wb') write_frames.setnchannels(self.channel) write_frames.setsampwidth(audio.get_sample_size(self.format)) write_frames.setframerate(self.rate) write_frames.writeframes(''.join(frames)) write_frames.close() self.convert()
def run(self): audio = PyAudio() print("Sound device:", self.dev_idx) device_info = audio.get_device_info_by_index(self.dev_idx) self.channels = device_info["maxInputChannels"] if ( device_info["maxOutputChannels"] < device_info["maxInputChannels"] ) else device_info["maxOutputChannels"] self.rate = int(device_info["defaultSampleRate"]) print(color.yellow(str(device_info))) wavstream = audio.open(format=self.format, channels=self.channels, rate=self.rate, input=True, frames_per_buffer=self.chunk, input_device_index=device_info["index"], as_loopback=True) # wavstream = audio.open(format=self.format, # channels=self.channels, # rate=self.rate, # input=True, # frames_per_buffer=self.chunk) # 如果没有外放的话,loopback会没有数据,造成阻塞 # 循环读取输入流 while self.bRecord: data = wavstream.read(self.chunk) self._frames.append(data) self._status = 1 wavstream.stop_stream() wavstream.close() # 保存到文件 print("Saveing .... ", self.audiofile) with wave.open(self.audiofile, 'wb') as wavfile: wavfile.setnchannels(self.channels) wavfile.setsampwidth(audio.get_sample_size(self.format)) wavfile.setframerate(self.rate) wavfile.writeframes(b''.join(self._frames)) audio.terminate() self._status = 2
def record(): # global signal CHUNK = 1024 FORMAT = paInt16 CHANNELS = 1 RATE = 16000 RECORD_SECONDS = 10 # 理论上可以设置任意数值,一定要足够大于你实际工作中需要录音的最大时长 if os.path.exists("latestSpeech/") == False: os.mkdir("latestSpeech/") WAVE_OUTPUT_FILENAME = "latestSpeech/output.wav" p = PyAudio() stream = p.open(format=FORMAT, channels=CHANNELS, rate=RATE, input=True, frames_per_buffer=CHUNK) frames = [] begin = time.time() for i in range(0, int(RATE / CHUNK * RECORD_SECONDS)): if signal == 'n': # 通过判断标志位的状态来决定何时结束录音 break data = stream.read(CHUNK) frames.append(data) end = time.time() print('录音结束,时长为: %s 秒' % round((end - begin), 2)) stream.stop_stream() stream.close() p.terminate() wf = wave.open(WAVE_OUTPUT_FILENAME, 'wb') wf.setnchannels(CHANNELS) wf.setsampwidth(p.get_sample_size(FORMAT)) wf.setframerate(RATE) wf.writeframes(b''.join(frames)) wf.close()
def record_audio(self): """Record audio data from microphone.""" self.is_alive = True self.done = False current_datetime = time.strftime("%Y%m%d%H%M%S", time.localtime()) output_file_path = os.path.join(config.VOICE_PATH, current_datetime + '.wav') recoder = PyAudio() audio_stream = recoder.open(format=self.format, channels=self.channels, rate=self.rate, input=True, frames_per_buffer=self.chunk) audio_frames = [] # begin recoding while self.is_alive: audio_data = audio_stream.read(self.chunk) audio_frames.append(audio_data) # close stream audio_stream.stop_stream() audio_stream.close() recoder.terminate() # write into file wave_file = wave.open(output_file_path, 'wb') wave_file.setnchannels(self.channels) wave_file.setsampwidth(recoder.get_sample_size(self.format)) wave_file.setframerate(self.rate) wave_file.writeframes(b''.join(audio_frames)) wave_file.close() self.last_path = output_file_path if self.translate: self.last_translate_result = self.translate_to_text(self.last_path) os.remove(self.last_path) # remove wave file self.last_path = None self.done = True
def Record(RECORD_SECONDS, FILENAME): ''' Record a RECORD_SECONDS wav audio, and return the .wav file path''' CHUNK = 512 FORMAT = paInt16 CHANNELS = 2 RATE = 44100 # 用当前时间做文件名 # WAVE_OUTPUT_FILENAME = strftime('%m_%d_%H_%M_%S', localtime(time())) + '.wav' WAVE_OUTPUT_FILENAME = FILENAME OUTPUT_PATH = 'recordings' p = PyAudio() stream = p.open(format=FORMAT, channels=CHANNELS, rate=RATE, input=True, frames_per_buffer=CHUNK) print('*' * 30 + 'Recording' + '*' * 30) frames = [] # 一共读 fs * 录音时间个数据点, 除以CHUNK得到帧数 for i in range(0, int(RATE * RECORD_SECONDS / CHUNK)): data = stream.read(CHUNK) frames.append(data) print('*' * 30 + 'Done recording' + '*' * 30) stream.stop_stream() stream.close() p.terminate() wf = wave.open(path.join(OUTPUT_PATH, WAVE_OUTPUT_FILENAME + '.wav'), 'wb') wf.setnchannels(CHANNELS) wf.setsampwidth(p.get_sample_size(FORMAT)) wf.setframerate(RATE) wf.writeframes(b''.join(frames)) wf.close() return path.join(OUTPUT_PATH, WAVE_OUTPUT_FILENAME)
def run(self): # Get data from the parsed arguments CHUNK = self.args.chunk FORMAT = self.args.pa_format CHANNELS = self.args.channels RATE = self.args.rate DEVICE_INDEX = self.args.device if DEVICE_INDEX < 0: DEVICE_INDEX = None WAVE_OUTPUT_FILENAME = self.args.output # Initialise a PyAudio connection and open a stream. p = PyAudio() stream = p.open(format=FORMAT, channels=CHANNELS, rate=RATE, input=True, input_device_index=DEVICE_INDEX, frames_per_buffer=CHUNK) # Set up the output wave file wf = wave.open(WAVE_OUTPUT_FILENAME, 'wb') wf.setnchannels(CHANNELS) wf.setsampwidth(p.get_sample_size(FORMAT)) wf.setframerate(RATE) # Record into the specified file (or stdout) until interrupted. self._recording = True while self._recording: wf.writeframes(stream.read(CHUNK)) time.sleep(0.05) # not sure if we need this # Stop and close the stream, close the file and terminate the PyAudio # connection. stream.stop_stream() stream.close() wf.close() p.terminate()
def record(self, time, device_i=None): audio = PyAudio() print audio.get_device_info_by_index(1) stream = audio.open(input_device_index=device_i,output_device_index=device_i,format=self.format, channels=self.channel, rate=self.rate, input=True, frames_per_buffer=self.chunk) playDing() print "REC: " frames = [] for i in range(0, self.rate / self.chunk * time): data = stream.read(self.chunk) frames.append(data) stream.stop_stream() print "END" stream.close() playDing() audio.terminate() write_frames = open_audio(self.file, 'wb') write_frames.setnchannels(self.channel) write_frames.setsampwidth(audio.get_sample_size(self.format)) write_frames.setframerate(self.rate) write_frames.writeframes(''.join(frames)) write_frames.close()
class recgnition: def __init__(self): self.pa = PyAudio() self.NUM_SAMPLES = 2000 # pyaudio内置缓冲大小 self.SAMPLING_RATE = 8000 # 取样频率 self.LEVEL = 1500 # 声音保存的阈值 self.COUNT_NUM = 20 # NUM_SAMPLES个取样之内出现COUNT_NUM个大于LEVEL的取样则记录声音 self.SAVE_LENGTH = 20 # 声音记录的最小长度:SAVE_LENGTH * NUM_SAMPLES 个取样 self.TIME_COUNT = 60 # 录音时间,单位s self.Voice_String = [] def getToken(self, type): if not os.path.exists('./temp' + str(type) + '.txt'): token, expires = self.requestToken(type) else: f = open('./temp' + str(type) + '.txt', 'r').readlines() if float(list(f)[1].replace('\n', '')) > time.time(): token = list(f)[0].replace('\n', '') else: token, _ = self.requestToken(type) return token def requestToken(self, type=0): url = "https://openapi.baidu.com/oauth/2.0/token" if type == 0: values = { 'grant_type': 'client_credentials', 'client_id': 'gD5I9FwGLvIdcX55jXZrzT0Y', 'client_secret': 'Sh43Mr460Kwh2ibHNPlwkzwBltfYiG1T' } else: values = { 'grant_type': 'client_credentials', 'client_id': 'MbktLVPac7Ywh8v8FlHqNUIL', 'client_secret': 'REGs8YHi9nDg9HGnGv8mYLFSafoTQ6Tv' } data = urllib.urlencode(values) req = urllib2.Request(url, data) resp = urllib2.urlopen(req) result = resp.read() json_data = json.loads(result) token = json_data['access_token'] expires = json_data['expires_in'] + time.time() f = open('./temp' + str(type) + '.txt', 'w') f.write(str(token) + '\n') f.write(str(expires) + '\n') f.close() return token, expires # def dump_res(self,buf): # print buf def savewav(self, filename): wf = wave.open(filename, 'wb') # print self.Voice_String wf.setnchannels(1) wf.setsampwidth(self.pa.get_sample_size(paInt16)) wf.setframerate(self.SAMPLING_RATE) # wf.writeframes("".join(self.Voice_String)) wf.writeframes(self.Voice_String) # wf.writeframes(self.Voice_String) wf.close() def speech(self, text, cuid): token = self.getToken(0) url = "http://tsn.baidu.com/text2audio?tex=" + text.decode( 'utf-8' ) + "&lan=zh&per=0&pit=1&spd=7&rate=8000&cuid=" + cuid + "&ctp=1&tok=" + token os.system('mpg123 "%s"' % (url))
class AudioTool: ''' This function include record and play, if you want to play and record, please set the play is True. The sample rate is 44100 Bit:16 ''' def __init__(self): self.chunk = 1024 self.channels = 2 self.samplerate = 44100 self.format = paInt16 #open audio stream self.pa = PyAudio() self.save_buffer = [] def record_play(self,seconds,play=False,file_play_path=None,file_save_path=None): NUM = int((self.samplerate/float(self.chunk)) * seconds) if play is True: swf = wave.open(file_play_path, 'rb') stream = self.pa.open( format = self.format, channels = self.channels, rate = self.samplerate, input = True, output = play, frames_per_buffer = self.chunk ) # wave_data = [] while NUM: data = stream.read(self.chunk) self.save_buffer.append(data) wave_data=np.fromstring(data, dtype = np.short) wave_data.shape = -1,2 wave_data = wave_data.T #transpose multiprocessing.Process # print int(data) print wave_data NUM -= 1 if play is True: data = swf.readframes(self.chunk) stream.write(data) if data == " ": break if play is True: swf.close() #stop stream stream.stop_stream() stream.close() # save wav file def _save_wave_file(filename,data): wf_save = wave.open(filename, 'wb') wf_save.setnchannels(self.channels) wf_save.setsampwidth(self.pa.get_sample_size(self.format)) wf_save.setframerate(self.samplerate) wf_save.writeframes("".join(data)) wf_save.close() _save_wave_file(file_save_path, self.save_buffer) del self.save_buffer[:] print file_save_path," Record Sucessful!" def play(self,filepath): wf = wave.open(filepath, 'rb') stream =self.pa.open( format = self.pa.get_format_from_width(wf.getsampwidth()), channels = wf.getnchannels(), rate = wf.getframerate(), output = True, ) NUM = int(wf.getframerate()/self.chunk * 15) print "playing.." while NUM: data = wf.readframes(self.chunk) if data == " ": break stream.write(data) NUM -= 1 stream.stop_stream() del data stream.close() def close(self): self.pa.terminate()
def main(): class Beatcounter(object): def __init__(self, threshold = THRES, dtype=FORMAT): self.prev_envelope = 0 self.envelope = 0 def onset(self, signal): signal = fromstring(signal, FORMAT) self.envelope = 0 for i in arange(len(signal)): sample = signal[i] self.envelope += abs(sample) if self.envelope - self.prev_envelope > THRES: self.prev_envelope = self.envelope return True else: self.prev_envelope = self.envelope return False def callback(in_data, frame_count, time_info, flag): if flag: print("Playback Error: %i" % flag) played_frames = callback.counter callback.counter += frame_count wf.writeframes(b''.join(in_data)) if beatcounter.onset(in_data): callback.tempo = 60 / (time_info['current_time'] - callback.prev_time) if callback.tempo > 250: return in_data, paContinue callback.tapcounter += 1 callback.prev_time = time_info['current_time'] if callback.tapcounter != 1: print callback.tapcounter, callback.tempo else: print callback.tapcounter, "N/A" if callback.tapcounter >= 4: return in_data, paComplete return in_data, paContinue callback.counter = 0 callback.tapcounter = 0 callback.prev_time = 0 pa = PyAudio() beatcounter = Beatcounter(THRES) wf = wave.open(WAVE_TEMPO_FILENAME, 'wb') wf.setnchannels(CHANNELS) wf.setsampwidth(pa.get_sample_size(paInt16)) wf.setframerate(FS) sleep(0.5) print("Tap four beat\n============") stream = pa.open(format = paInt16, channels = CHANNELS, input = True, rate = FS, frames_per_buffer = BLOCK_LENGTH, output = False, stream_callback = callback) while stream.is_active(): sleep(0.1) stream.close() pa.terminate() wf.close() sleep(60 / callback.tempo) print("Record after four beat\n============") pa2 = PyAudio() wf2 = wave.open(WAVE_OUTPUT_FILENAME, 'wb') wf2.setnchannels(CHANNELS) wf2.setsampwidth(pa.get_sample_size(paInt16)) wf2.setframerate(FS) for i in arange(4): print('%d\a' % (4-i)) sleep(60 / callback.tempo) print("Go\n=======") RECORD_SECONDS = 60 / callback.tempo * 8 stream2 = pa2.open(format = paInt16, channels = CHANNELS, input = True, rate = FS, frames_per_buffer = BLOCK_LENGTH, output = False) print("* recording") frames = [] for i in range(0, int(FS / BLOCK_LENGTH * RECORD_SECONDS)): data = stream2.read(BLOCK_LENGTH) frames.append(data) print("* done recording") stream2.stop_stream() stream2.close() pa2.terminate() wf2.writeframes(b''.join(frames)) wf2.close()
class Listener(object): paudio = None aformat = None stream = None nchannels = 1 bitrate = 16000 chunk = 1024 # Size of one second of recorded sound. fragment = 16000 / 1024 max_silence_secs = 2 max_buffer_secs = 1 threshold_sound = 13000 # threshold_filter = 10000 min_cmd_secs = 1.8 max_cmd_secs = 4.0 data_silence_name = "resources/sound/silence.wav" data_silence = None def __init__(self): super(Listener, self).__init__() self.paudio = PyAudio() self.aformat = paInt16 self.stream = self.paudio.open(format=self.aformat, \ channels=self.nchannels, rate=self.bitrate, input=True, \ frames_per_buffer=self.chunk) self.check_data_silence() return def __del__(self): self.stream.stop_stream() self.stream.close() self.paudio.terminate() return def listen(self, worker): # f = wave.open("chunk.wav", 'rb') # data_chunk = f.readframes(f.getnframes()) # f.close() # count_filter = 0 # Container for the sound levels of the previous # `max_silence_secs` seconds. If the sound level within this # time frame is above the specified threshold value, the user # is considered to be speaking. Q_silence = deque(maxlen=self.fragment * self.max_silence_secs) # Container for the sound of the previous `max_buffer_secs` # seconds before recording starts. Needed because user's voice # does not start at or above the threshold value, but from a # lower sound level. Also, adds some sound padding to the # beginning of the sound data. Q_buffer = deque(maxlen=self.fragment * self.max_buffer_secs) # Container for the sound data that will be processed. data = [] # Time per iteration of the loop, i.e. recorded time. time_per_iter = float(self.chunk) / self.bitrate # Total recording time. time_total = 0.0 # Specifies whether the sound is currently being recorded. is_recording = False while True: # Read a sample of sound of time `time_per_iter`. sample = self.stream.read(self.chunk) # Root Mean Squared of the sample is taken to determine the # sound level of the sample. # level = abs(rms(sample, 2)) # Q_silence.append(level) Q_silence.append(abs(rms(sample, 2))) # Only add the current sample if the system is not currently # recording since it is not needed. if not is_recording: # if level > self.threshold_filter: # Q_buffer.append(sample) # else: # if count_filter < 5: # count_filter += 1 # Q_buffer.append(sample) # else: # count_filter = 0 # count_push = 0 # for i in range(0, 5): # if len(Q_buffer) > 0: # Q_buffer.pop() # count_push += 1 # for i in range(0, count_push): # Q_buffer.append(data_chunk) Q_buffer.append(sample) # If the maximum sound level is above the sound threshold, # then the user is speaking and the sound needs to be # recorded. if max(Q_silence) >= self.threshold_sound: # The first few samples of sound obtain tend to contain # incorrect sound level data, so we wait till the # silence queue has filled then try to process the data. if len(Q_silence) < Q_silence.maxlen: continue # If the user has now started to speak, then we want to # start recording. if not is_recording: print "***** recording *****", max(Q_silence) is_recording = True # The user has now started speaking, so save the # sound data that is stored in the buffer, i.e. the # previous `max_buffer_secs` seconds. for pos in range(0, len(Q_buffer)): data.insert(pos, Q_buffer.popleft()) # Add the current sample to the data since the user # already started speaking. else: # if level > self.threshold_filter: # data.append(sample) # else: # if count_filter < 5: count_filter += 1 # else: # count_filter = 0 # count_push = 0 # for i in range(0, 5): # if len(Q_buffer) > 0: # Q_buffer.pop() # count_push += 1 # for i in range(0, count_push): # Q_buffer.append(data_chunk) data.append(sample) # We are now recording the samples, so increment the # recording time. time_total += time_per_iter # If the recording time is greater than the max allowed # command time, then the data needs to be truncated. if time_total > self.max_cmd_secs: print "\t***** too long, dropping beginning *****" # The silence queue size could be greater than the # maximum command recording time, in that case the # sound level data from the beginning second that # will be deleted are still in the queue. if Q_silence.maxlen >= (time_total-1)*self.fragment: # Remove 1 second of data from the queue. for i in range(0, Q_silence.maxlen - self.fragment): Q_silence.popleft() # Remove 1 second of sound from the data. data = data[self.fragment:] time_total -= 1.0 # If we are recording the user's speech and the user has # stopped speaking, then we need to process the data. elif is_recording: # The first few samples of sound obtain tend to contain # incorrect sound level data, so we wait till the # silence queue has filled then try to process the data. if len(Q_silence) < Q_silence.maxlen: data = [] continue # The sound level has been below the sepcified level for # `max_silence_secs`, so the sound data needs to be # processed and recording stopped. print "***** done recording *****", time_total, max(Q_silence) is_recording = False # If the total recording time is less than the minimum # command time, then we should not process the data. if time_total >= self.min_cmd_secs: self.process_data_sound(data, worker) # Reset the time, silence queue, and data. time_total = 0.0 Q_silence.clear() data = [] return True def process_data_sound(self, data, worker): # Add silence to the end of the recording since some # speech at the end tends to get cut off during the # recognition process. data.extend(self.data_silence) self.save_wav(data) flac = self.convert_wav_flac() res = self.stt_google(flac) if res: # if res[0]['confidence'] > 0.75: # print res[0] # return worker.transition( # [res[0]["utterance"].lower()], True) # else: utters = [] for item in res: utters.append(item["utterance"].lower()) return worker.transition(utters, True) return False def stt_google(self, data): lang_code='en-US' googl_speech_url = 'https://www.google.com/speech-api/v1/recognize?xjerr=1&client=chromium&pfilter=2&lang=%s&maxresults=6' % (lang_code) hrs = {"User-Agent": "Mozilla/5.0 (X11; Linux i686) AppleWebKit/535.7 (KHTML, like Gecko) Chrome/16.0.912.63 Safari/535.7",'Content-type': 'audio/x-flac; rate=16000'} req = urllib2.Request(googl_speech_url, data=data, headers=hrs) p = urllib2.urlopen(req) j = json.loads(p.read()) if "hypotheses" in j and len(j["hypotheses"]) > 0: return j["hypotheses"] return "" def save_wav(self, data, filename = "temp.wav"): f = wave.open(filename, 'wb') f.setnchannels(self.nchannels) f.setsampwidth(self.paudio.get_sample_size(self.aformat)) f.setframerate(self.bitrate) f.writeframes(b''.join(data)) f.close() return True def convert_wav_flac(self, filename = "temp.wav"): name, ext = path.splitext(filename) flac = "%s.flac" % name audiotools.open(filename).convert(flac, audiotools.FlacAudio) f = open(flac) data = f.read() f.close() return data def check_data_silence(self): if not path.exists(self.data_silence_name): print "Warning: Silence buffer not found! Generate one." return False f = wave.open(self.data_silence_name, 'rb') self.data_silence = f.readframes(f.getnframes()) f.close() return True def generate_data_silence(self): data = [] print "***** recording " + "*"*64 for i in range(0, self.fragment): data.append(stream.read(self.chunk)) print "***** done recording " + "*"*59 return self.save_wav(data, self.data_silence_name)
class AudioRecorder: def __init__(self, rate): self.pa = PyAudio() self.stream = self.pa.open(format=paInt16, channels=1, rate=rate, input=True, frames_per_buffer=conf.block_size) self.stream.stop_stream() self.rate = rate self.format = paInt16 def __enter__(self, *args): return self def __exit__(self, *args): self.close() def start_recording(self): self.lock = Lock() self._do_stop_recording = False def record_func(self): data = b'' while True: with self.lock: if self._do_stop_recording: break data += self.record(conf.block_size) with self.lock: self._do_stop_recording = False self._recorded_data = data self._recorder_thread = Thread(target=record_func, args=(self, )) self._recorder_thread.daemon = True self._recorder_thread.start() def finish_recording(self): sys.stdout.flush() with self.lock: sys.stdout.flush() self._do_stop_recording = True sys.stdout.flush() self._recorder_thread.join() sys.stdout.flush() return self._recorded_data def record(self, length): self.stream.start_stream() data = self.stream.read(length) self.stream.stop_stream() return data def bytes_to_numseq(self, b): size = self.pa.get_sample_size(self.format) i = 0 while i < len(b): yield int.from_bytes(b[i:i + size], signed=True, byteorder='little') i += size def close(self): self.stream.close() self.pa.terminate()
def listen(self, level = 1000,timeout = 1,ignore_shoter_than = 0.5,ignore_longer_than = 5 ,language = "sv_SE", device_i=None): audio = PyAudio() #print audio.get_device_info_by_index(1) stream = audio.open(input_device_index=device_i,output_device_index=device_i,format=self.format, channels=self.channel, rate=self.rate, input=True, frames_per_buffer=self.chunk) timeout_chuncks = self.rate / self.chunk * timeout minmessage = self.rate / self.chunk * ignore_shoter_than maxmessage = self.rate / self.chunk * ignore_longer_than try: while(True): print "Start listening... " frames = [] data = "" olddata = "" self.count_silence = 0 self.active = False while(True): #for i in range(0, self.rate / self.chunk * time): data = stream.read(self.chunk) rms = audioop.rms(data, 2) #print str(rms) + '\r' #There is some noise start recording if rms > level: self.count_silence = 0 if self.active == False: print "Recording..." self.active = True self.count_silence = 0 frames.append(olddata) if self.active: frames.append(data) if rms < level and self.active: self.count_silence += 1 #If we have enough silence send for processing if (self.count_silence > timeout_chuncks) and self.active == True: self.active = False #print len(frames) #10 12 #print self.count_silence #8 if not len(frames)> self.count_silence + minmessage: print "Disregarding noise" frames = [] continue if len(frames)> self.count_silence + maxmessage: print "Ignoring to long recording" frames = [] continue print "Processing..." break olddata = data write_frames = open_audio(self.file, 'wb') write_frames.setnchannels(self.channel) write_frames.setsampwidth(audio.get_sample_size(self.format)) write_frames.setframerate(self.rate) write_frames.writeframes(''.join(frames)) write_frames.close() self.convert() try: phrase, complete_response = self.speech_to_text(language) # select the language except: phrase = "" print phrase except KeyboardInterrupt: # quit stream.stop_stream() #print "END" stream.close() audio.terminate() sys.exit() return
def record(fname = "output", \ T = 10, \ ch = 2, \ sr = 44100, \ chunk = 1024, \ gdir = r"C:\Users\Usuario\Documents\Git\Público\GrupoFWP"): #¿Qué onda el chunk? """Graba un archivo wav de nombre fname.wav en gdir Variables: >> fname (str) [nombre del archivo sin formato] >> T (int, float) [duración de la grabación] >> ch (int) [número de canales] >> sr (int) [frecuencia de muestreo] >> chunk (int) [???] >> gdir (str) [directorio del archivo a guardar] """ from os import getcwd, makedirs, chdir from os.path import isdir, isfile from pyaudio import PyAudio, paInt16 import wave home = getcwd() if isdir(gdir) == False: makedirs(gdir) chdir(gdir) p = PyAudio() stream = p.open(format=paInt16, channels=ch, rate=sr, input=True, frames_per_buffer=chunk) print("* recording") frames = [] for i in range( 0, int(sr / chunk * T) ): data = stream.read(chunk) frames.append(data) print("* done recording") stream.stop_stream() stream.close() p.terminate() fnameg = fname if isfile(fnameg+'.wav') == True: fnameg = fnameg + ' (2)' wf = wave.open((fnameg+'.wav'), 'wb') wf.setnchannels(ch) wf.setsampwidth(p.get_sample_size(paInt16)) wf.setframerate(sr) wf.writeframes(b''.join(frames)) wf.close() chdir(home)
class AudioRecorder(DIWA_THREAD): """ A thread for capturing audio continuously. It keeps a buffer that can be saved to a file. By convention AudioRecorder is usually written in mixed case even as we prefer upper case for threading types. :param parent: Parent of the thread. :type parent: :py:class:`diwacs.GraphicalUserInterface` """ def __init__(self, parent): DIWA_THREAD.__init__(self, name='AudioRecorder') self.parent = parent self.py_audio = PyAudio() self.stream = self.open_mic_stream() self.buffer = deque(maxlen=diwavars.MAX_LENGTH) def stop(self): """ Stop the audio recorder thread. """ DIWA_THREAD.stop(self) sleep(0.1) self.stream.close() def find_input_device(self): """ Find a microphone device. """ for i in range(self.py_audio.get_device_count()): # Internationalization hack... # LOGGER.debug("Selecting audio device %s / %s " % # (str(i),str(self.py_audio.get_device_count()))) # device_index = i # return device_index devinfo = self.py_audio.get_device_info_by_index(i) for keyword in ['microphone']: if keyword in devinfo['name'].lower(): return i default_device = self.py_audio.get_default_input_device_info() if default_device: return default_device['index'] return None def open_mic_stream(self): """ Opens the stream object for microphone. """ device_index = None # uncomment the next line to search for a device. # device_index = self.find_input_device() stream = self.py_audio.open( format=diwavars.FORMAT, channels=diwavars.CHANNELS, rate=diwavars.RATE, input=True, input_device_index=device_index, frames_per_buffer=diwavars.INPUT_FRAMES_PER_BLOCK) return stream def run(self): """ Continuously record from the microphone to the buffer. The size should be limited at diwavars.MAX_LENGTH constant. The implementation keeps only the most recent data in the case that there's too much data to store. """ while not self._stop.is_set(): try: data = self.stream.read(diwavars.INPUT_FRAMES_PER_BLOCK) while len(self.buffer) >= self.buffer.maxlen: element = self.buffer.popleft() del element self.buffer.append(data) except IOError as excp: _logger().exception('Error recording: {0!s}'.format(excp)) def save(self, event_id, path): """ Save the buffer to a file. """ try: _logger().debug('Saving audio buffer') date_string = datetime.now().strftime('%d%m%Y%H%M') filename = '{0}_{1}.wav'.format(event_id, date_string) filepath = os.path.join(path, 'Audio') if not os.path.exists(filepath): os.makedirs(filepath) filepath = os.path.join(filepath, filename) sample_size = self.py_audio.get_sample_size(diwavars.FORMAT) wave_file = wave.open(filepath, 'wb') wave_file.setnchannels(diwavars.CHANNELS) wave_file.setsampwidth(sample_size) wave_file.setframerate(diwavars.RATE) wave_file.writeframes(b''.join(self.buffer)) wave_file.close() except: _logger().exception('audio save exception') #CallAfter(self.parent.ClearStatusText) self.parent.diwa_state.remove_from_swnp_data('audio') CallAfter(self.parent.UpdateScreens(update=True))
def make_wav_file(frames: List, wav_file: str = 'intercom.wav'): with wave.open(wav_file, 'wb') as wavFile: wavFile.setnchannels(CHANNELS) wavFile.setsampwidth(PyAudio.get_sample_size(FORMAT)) wavFile.setframerate(RATE) wavFile.writeframes(b''.join(frames))
#zplot.add_img_plot(zname="blah", zdata=mag_vec)#z, ydata=linspace(0, len(anr)-1, len(anr)), xdata=linspace(0, len(yok)-1, len(yok))) #plot.add_plot("cross_sec", yname="Macvec1", ydata=c) # plot.add_plot("cross_se2", yname="Macvec2", ydata=mag_vec[:, 75]) plot.show() for x in xrange(NUMBEROFFRAMES): WAVEDATA = WAVEDATA+chr(c[x]) #fill remainder of frameset with silence for x in xrange(RESTFRAMES): WAVEDATA = WAVEDATA+chr(128) p = PyAudio() FORMAT=p.get_format_from_width(1) stream = p.open(format = p.get_format_from_width(1), channels = 1, rate = BITRATE, output = True) stream.write(WAVEDATA) stream.stop_stream() stream.close() p.terminate() if 0: import wave wf = wave.open('short_pulse.wav', 'wb') wf.setnchannels(1) wf.setsampwidth(p.get_sample_size(FORMAT)) wf.setframerate(BITRATE) wf.writeframes(WAVEDATA) wf.close()
stream = p.open(format=FORMAT, channels=1, rate=RATE, input=True, output=True, frames_per_buffer=CHUNK_SIZE) plot_range = CHUNK_SIZE / 2 + 1 # RECORD print 'recording started' while record: chunk = stream.read(CHUNK_SIZE) npdata = fromstring(chunk, dtype=int16) sample_width = p.get_sample_size(FORMAT) #print npdata, sample_width # silence check npdata = silence(npdata) Y = fft.rfft(npdata, CHUNK_SIZE) #print X, len(X) Y_abs = nplog(absolute(Y) + 1) #print Y_abs, len(Y_abs) #print max(Y_abs) record = quit()
# INIT p = PyAudio() stream = p.open(format=FORMAT, channels=1, rate=RATE, input=True, output=True, frames_per_buffer=CHUNK_SIZE) plot_range = CHUNK_SIZE/2 + 1 # RECORD print 'recording started' while record: chunk = stream.read(CHUNK_SIZE) npdata = fromstring(chunk, dtype=int16) sample_width = p.get_sample_size(FORMAT) #print npdata, sample_width # silence check npdata = silence(npdata) Y = fft.rfft(npdata, CHUNK_SIZE) #print X, len(X) Y_abs = nplog(absolute(Y) + 1) #print Y_abs, len(Y_abs) #print max(Y_abs) record = quit()