def fun2(hk,fp): start = dt.now() os.remove(filepath) rec(hk,fp) print(search.searchA(transcribe(fp),[s[:-4] for s in songs])) end = dt.now() elapsed=end-start print(elapsed.total_seconds())
def searchThenPlay(): global songnum lap() rec(searchHotkey,recordingfile) print('listening took ',lap(),' seconds') transcription = transcribe(recordingfile) print('transcribing took ',lap(),' seconds and I think you said ',transcription) found = search(transcription,[s[:-4] for s in songs]) print('searching took ',lap(),' seconds and found the song ',found) index = songs.index(found+'.mp3') if isinstance(index,int): songnum=index-1 next()
def voice_recognize(): record.rec('test1.wav') #生成录音 time_start = time.time() #计时开始 pcm_file = wav2pcm.wav_to_pcm('test1.wav') #将wav转成pcm # 识别本地文件 result = client.asr(get_file_content(pcm_file), 'pcm', 16000, { 'dev_pid': 1536, }) if (result.get("err_no") == 0): time_end = time.time() #计时结束 res_str = '识别结果:%s,耗时%.2fs' % (result.get("result")[0], (time_end - time_start)) #截取结果 wav2pcm.deletef(pcm_file) #将此次识别的pcm文件删除 else: res_str = "识别失败" wav2pcm.deletef(pcm_file) #将此次识别的pcm文件删除 return res_str
def main(): face_c = cv2.CascadeClassifier( "classifier/haarcascade_frontalface_default.xml") recs = record.rec() subprocess.call("reproduce_results.bat", stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL) f = glob.glob("Results/*.avi")[0] vid = cv2.VideoCapture(f) # x, y, w, h = 312, 90, 16, 16 samples = 750 avg_r = np.zeros(samples) avg_g = np.zeros(samples) avg_b = np.zeros(samples) # ready = False # print("Waiting ...") # print("Enter q to start") # while not ready: # ret, img = vid.read() # cv2.rectangle(img, (x, y), (x + w, y + h), (255, 0, 0), 1) # cv2.imshow('vid', img) # if cv2.waitKey(1) & 0xFF == ord('q'): # break print("Starting ... ") if not vid.isOpened(): print("not opened") input() for i in range(samples): ret, img = vid.read() blur = cv2.GaussianBlur(img, (7, 7), 0, 0) x, y, h, w = recs[i] face_r, face_g, face_b = blur[y:(y + h), x:(x + w), 0], blur[y:(y + h), x:(x + w), 1], blur[y:(y + h), x:(x + w), 2] avg_r[i], avg_g[i], avg_b[i] = np.sum(face_r) / (w * h), np.sum(face_g) / (w * h), np.sum(face_b) / (w * h) cv2.rectangle(blur, (x, y), (x + w, y + h), (255, 0, 0), 1) printProgressBar(i, samples - 1, prefix='Progress:', suffix='Complete', length=50) cv2.imshow('vid', blur) if cv2.waitKey(1) & 0xFF == ord('q'): break print("... Done") vid.release() cv2.destroyAllWindows() plt.plot(np.arange(len(avg_r)), avg_r, color="r") plt.plot(np.arange(len(avg_g)), avg_g, color="g") plt.plot(np.arange(len(avg_b)), avg_b, color="b") plt.show() fftr = np.abs(np.fft.rfft(avg_r)) fftg = np.abs(np.fft.rfft(avg_g)) fftb = np.abs(np.fft.rfft(avg_b)) rrange = np.arange(0, len(fftr)) * 30 * 30 / len(fftr) grange = np.arange(0, len(fftg)) * 30 * 30 / len(fftg) brange = np.arange(0, len(fftb)) * 30 * 30 / len(fftb) minf = np.where(rrange - 40 > 0)[0][0] maxf = np.where(rrange > 120)[0][0] ri = npmax(fftr[minf:maxf]) + minf gi = npmax(fftg[minf:maxf]) + minf bi = npmax(fftb[minf:maxf]) + minf print("\n\nRed max freq is {} bpm\n".format(rrange[ri])) print("Green max freq is {} bpm\n".format(rrange[gi])) print("Blue max freq is {} bpm\n".format(rrange[bi])) print("Average max freq is {} bpm\n".format((rrange[ri] + rrange[bi] + rrange[bi]) / 3)) plt.plot(rrange, fftr) plt.plot(grange, fftg) plt.plot(brange, fftb) plt.plot(np.arange(0, len(fftr)) * 30 * 30 / len(fftr), fftr[:] + fftg[:] + fftb[:]) plt.show()
import record import wav2pcm import sound2word import tuling import speakout record.rec("1k.wav") # 实现录音,将文件存储在1k.wav #pcm_file = wav2pcm.wav_to_pcm("1k.wav") #将wav格式的语音转化为pcm格式 words = sound2word.asr_main("1k.wav") # 读取录音文件,通过API实现语音转写 new_words = tuling.Tuling(words) #实现与图灵机器人会话 speakout.tts_main(new_words) #将机器人回复的文字转化为语音 wav2pcm.play_mp3("test.mp3") #播放机器人的语音文件
def recordcommand(): rec().record()