def inference_wav(wav_file):
    """Test audio model on a wav file."""
    label = util.urban_labels([wav_file])[0]
    graph = tf.Graph()
    with tf.Session(graph=graph, config=SESS_CONFIG) as sess:
        with VGGishExtractor(VGGISH_CKPT,
                         VGGISH_PCA,
                         audio_params.VGGISH_INPUT_TENSOR_NAME,
                         audio_params.VGGISH_OUTPUT_TENSOR_NAME) as ve:
            vggish_features = ve.wavfile_to_features(wav_file)
        assert vggish_features is not None
        labels = [label] * vggish_features.shape[0]
        
        # restore graph
        # _restore_from_meta_and_ckpt(sess, META, CKPT)
        _restore_from_defined_and_ckpt(sess, CKPT)

        # get input and output tensor
        # graph = tf.get_default_graph()
        inputs = graph.get_tensor_by_name(audio_params.AUDIO_INPUT_TENSOR_NAME)
        outputs = graph.get_tensor_by_name(audio_params.AUDIO_OUTPUT_TENSOR_NAME)
        
        predictions = sess.run(outputs, feed_dict={inputs: vggish_features})
        idxes = np.argmax(predictions, 1)
        probs = np.max(predictions, 1)
        print(predictions)
        print(idxes)
        print(labels)
        print(probs)
        acc = accuracy_score(labels, idxes)
        print('acc:', acc)
def arange_urban_sound_file_by_class():
    """Arange urban sound file by it's class."""
    src_paths = '/data1/data/UrbanSound8K-16bit/audio'
    dst_dir = '/data1/data/UrbanSound8K-16bit/audio-classfied'
    CLASSES = [
        'air conditioner', 'car horn', 'children playing', 'dog bark',
        'drilling', 'engine idling', 'gun shot', 'jackhammer', 'siren',
        'street music'
    ]
    CLASSES_STRIPED = [c.replace(' ', '_') for c in CLASSES]
    for src in src_paths:
        lbl = urban_labels([src])[0]
        dst = '{dir}/{label}'.format(dir=dst_dir, label=CLASSES_STRIPED[lbl])
        maybe_create_directory(dst)
        maybe_copy_file(
            src, '{dst}/{name}'.format(dst=dst, name=os.path.split(src)[-1]))
Esempio n. 3
0
    def close(self):
        self.sess.close()


if __name__ == '__main__':
    import audio_params
    import vggish_params
    import timeit
    from audio_util import urban_labels

    wav_file = 'F:/3rd-datasets/UrbanSound8K-16bit/audio-classified/siren/90014-8-0-1.wav'
    wav_dir = 'F:/3rd-datasets/UrbanSound8K-16bit/audio-classified/siren'
    wav_filenames = os.listdir(wav_dir)
    wav_files = [os.path.join(wav_dir, wav_filename) for wav_filename in wav_filenames]
    wav_labels = urban_labels(wav_files)

    # test VGGishExtractor
    time_start = timeit.default_timer()
    with VGGishExtractor(audio_params.VGGISH_CHECKPOINT,
                         audio_params.VGGISH_PCA_PARAMS,
                         vggish_params.INPUT_TENSOR_NAME,
                         vggish_params.OUTPUT_TENSOR_NAME) as ve:
        
        vggish_features = ve.wavfile_to_features(wav_file)
        print(vggish_features, vggish_features.shape)

        ve.create_records('./vggish_test.records', wav_files[:10], wav_labels[:10])

    time_end = timeit.default_timer()
    # print('cost time: {}s, {}s/wav'.format((time_end-time_start), (time_end-time_start)/(i+1)))