def testLabelWav(self): tmp_dir = self.get_temp_dir() wav_data = self._getWavData() wav_filename = os.path.join(tmp_dir, "wav_file.wav") self._saveTestWavFile(wav_filename, wav_data) input_name = "test_input" output_name = "test_output" graph_filename = os.path.join(tmp_dir, "test_graph.pb") with tf.Session() as sess: tf.placeholder(tf.string, name=input_name) tf.zeros([1, 3], name=output_name) with open(graph_filename, "wb") as f: f.write(sess.graph.as_graph_def().SerializeToString()) labels_filename = os.path.join(tmp_dir, "test_labels.txt") with open(labels_filename, "w") as f: f.write("a\nb\nc\n") label_wav.label_wav(wav_filename, labels_filename, graph_filename, input_name + ":0", output_name + ":0", 3)
def get_result(self, filename): # graph_name = './866_895.pb' # labels_name = './conv_labels.txt' wav = filename input_name = 'wav_data:0' output_name = 'labels_softmax:0' how_many_labels = 1 result_dict = label_wav.label_wav(wav, self.labels, self.graph, input_name, output_name, how_many_labels) return result_dict
def infer(wav): ## wav should be a name in string form that is under recorder ## 将需要识别结果的 wav 文件放到 recorder 里面 ## 要求 必须是 无压缩 16000Hz 的单英文单词的 1s音频 ## e.g. wav = '0b40aa8e_nohash_0.wav' labels = nlp_path + '/speech_commands_train/conv_labels.txt' #xxx改变label生成位置 graph = nlp_path + '/graph/my_frozen_graph.pb' #freeze里面改变graph生成位置 input_name = 'wav_data:0' output_name = 'labels_softmax:0' how_many_labels = 1 wav = nlp_path + "/recorder/" + wav return label_wav.label_wav(wav, labels, graph, input_name, output_name, how_many_labels)
def recog(request): sound = AudioSegment.from_wav("documents/theRecog.wav") sound = sound.set_frame_rate(16000) sound.export("documents/theNewRecog.wav", format="wav") result = label_wav( wav='documents/theNewRecog.wav', graph='my_frozen_graph.pb', labels='conv_labels.txt', input_name='wav_data:0', output_name='labels_softmax:0', how_many_labels=3, ) notResult = ",".join(result) strResult = "\"" + notResult + "\"" print(strResult) return render(request, 'recorder.html', {'recogResult': strResult})
def recog(request): sound = AudioSegment.from_wav("documents/the_" + ctx['rlt'] + "_Recog.wav") sound = sound.set_frame_rate(16000) sound.export("documents/the_" + ctx['rlt'] + "_NewRecog.wav", format="wav") result = label_wav( wav='documents/the_' + ctx['rlt'] + '_NewRecog.wav', graph='my_frozen_graph.pb', labels='conv_labels.txt', input_name='wav_data:0', output_name='labels_softmax:0', how_many_labels=3, ) notResult = ",".join(result) strResult = "\"" + notResult + "\"" print(strResult) return HttpResponse(strResult)
def post(self): audio_webm = request.files['blob'] audio_webm.save(os.path.join(app.config['UPLOAD_FOLDER'], str(timestamp)+".webm")) try: if(get_chunk(str(timestamp)+".webm")=="silence"): raise FileExistsError else: top = label_wav("uploads/chunk0.wav", "sp_train/conv_labels.txt", "model/my_frozen_graph.pb", "wav_data:0", "labels_softmax:0", 4) print(top) return make_response(jsonify( top ), 200) except FileExistsError: return make_response(jsonify( "silence" ), 200)
def listen_label(graph, labels, wav_file_path): #listen for 3s RECORDING = True while (RECORDING): inp = input("standby: ") if inp == 'r': print("\n*listening") frames = [] #open stream stream = p.open(format=FORMAT, channels=CHANNELS, rate=RATE, input=True, frames_per_buffer=CHUNK) for i in range(0, int(RATE / CHUNK * RECORD_SECONDS)): data = stream.read(CHUNK) frames.append(data) print("the chord is: ") stream.stop_stream() stream.close() #write out file WAVE_OUTPUT_FILENAME = "temp.wav" wf = wave.open(WAVE_OUTPUT_FILENAME, 'wb') wf.setnchannels(CHANNELS) wf.setsampwidth(p.get_sample_size(FORMAT)) wf.setframerate(RATE) wf.writeframes(b''.join(frames)) wf.close() chord = lw.label_wav(wav_file_path, labels, graph, 'wav_data:0', 'labels_softmax:0', 1) return chord elif inp == 'q': print("\n*exiting") RECORDING = False p.terminate()
def check_new_files(): while True: next_wav_id = r.lpop('wav_id') if not next_wav_id: return next_file = f'/data/{next_wav_id}.wav' logger.info(f'processing {next_file}') result, error = None, None try: result = label_wav.label_wav(next_file) logger.info('result from label_wav: ' + str(result)) except Exception as e: error = e logger.info('error from label_wav: ' + str(error)) continue for k, v in result.items(): label = k label_prob = v if label_prob > LABEL_PROB_THRESHOLD: notifications.push_police_notification({ 'wav_id': next_wav_id, 'label': label, 'label_prob': str(label_prob), })
def listen(graph, labels, wav_file_path): frames = [] stream = p.open(format=FORMAT, channels=CHANNELS, rate=RATE, input=True, frames_per_buffer=CHUNK) for i in range(0, int(RATE / CHUNK * RECORD_SECONDS)): data = stream.read(CHUNK) frames.append(data) stream.stop_stream() stream.close() WAVE_OUTPUT_FILENAME = "temp.wav" wf = wave.open(WAVE_OUTPUT_FILENAME, 'wb') wf.setnchannels(CHANNELS) wf.setsampwidth(p.get_sample_size(FORMAT)) wf.setframerate(RATE) wf.writeframes(b''.join(frames)) wf.close() chord = lw.label_wav(wav_file_path, labels, graph, 'wav_data:0', 'labels_softmax:0', 1) return str(chord)