def inference_wav(wav_file): """Test audio model on a wav file.""" label = util.urban_labels([wav_file])[0] graph = tf.Graph() with tf.Session(graph=graph, config=SESS_CONFIG) as sess: with VGGishExtractor(VGGISH_CKPT, VGGISH_PCA, audio_params.VGGISH_INPUT_TENSOR_NAME, audio_params.VGGISH_OUTPUT_TENSOR_NAME) as ve: vggish_features = ve.wavfile_to_features(wav_file) assert vggish_features is not None labels = [label] * vggish_features.shape[0] # restore graph # _restore_from_meta_and_ckpt(sess, META, CKPT) _restore_from_defined_and_ckpt(sess, CKPT) # get input and output tensor # graph = tf.get_default_graph() inputs = graph.get_tensor_by_name(audio_params.AUDIO_INPUT_TENSOR_NAME) outputs = graph.get_tensor_by_name(audio_params.AUDIO_OUTPUT_TENSOR_NAME) predictions = sess.run(outputs, feed_dict={inputs: vggish_features}) idxes = np.argmax(predictions, 1) probs = np.max(predictions, 1) print(predictions) print(idxes) print(labels) print(probs) acc = accuracy_score(labels, idxes) print('acc:', acc)
def arange_urban_sound_file_by_class(): """Arange urban sound file by it's class.""" src_paths = '/data1/data/UrbanSound8K-16bit/audio' dst_dir = '/data1/data/UrbanSound8K-16bit/audio-classfied' CLASSES = [ 'air conditioner', 'car horn', 'children playing', 'dog bark', 'drilling', 'engine idling', 'gun shot', 'jackhammer', 'siren', 'street music' ] CLASSES_STRIPED = [c.replace(' ', '_') for c in CLASSES] for src in src_paths: lbl = urban_labels([src])[0] dst = '{dir}/{label}'.format(dir=dst_dir, label=CLASSES_STRIPED[lbl]) maybe_create_directory(dst) maybe_copy_file( src, '{dst}/{name}'.format(dst=dst, name=os.path.split(src)[-1]))
def close(self): self.sess.close() if __name__ == '__main__': import audio_params import vggish_params import timeit from audio_util import urban_labels wav_file = 'F:/3rd-datasets/UrbanSound8K-16bit/audio-classified/siren/90014-8-0-1.wav' wav_dir = 'F:/3rd-datasets/UrbanSound8K-16bit/audio-classified/siren' wav_filenames = os.listdir(wav_dir) wav_files = [os.path.join(wav_dir, wav_filename) for wav_filename in wav_filenames] wav_labels = urban_labels(wav_files) # test VGGishExtractor time_start = timeit.default_timer() with VGGishExtractor(audio_params.VGGISH_CHECKPOINT, audio_params.VGGISH_PCA_PARAMS, vggish_params.INPUT_TENSOR_NAME, vggish_params.OUTPUT_TENSOR_NAME) as ve: vggish_features = ve.wavfile_to_features(wav_file) print(vggish_features, vggish_features.shape) ve.create_records('./vggish_test.records', wav_files[:10], wav_labels[:10]) time_end = timeit.default_timer() # print('cost time: {}s, {}s/wav'.format((time_end-time_start), (time_end-time_start)/(i+1)))