예제 #1
0
def get_model_output_10_classes(filename):
    with tf.gfile.GFile(filename, 'rb') as f:
        graph_def = tf.GraphDef()
        graph_def.ParseFromString(f.read())

    with tf.Graph().as_default() as graph:
        tensors_and_ops = tf.import_graph_def(graph_def, name='')
        with tf.Session(graph=graph) as sess:
            output_op = graph.get_operation_by_name('Minimum_3').outputs[0]
            xs = []
            ys = []
            filename_list = []
            for idx, filename in enumerate(glob.glob(os.path.join(Constants.AUDIO_DATA_FOLDER, '*wav*', '*.wav'))):
                name = filename.split('/')[-2:]
                if int(name[-1].strip('.wav')) < 1000:
                    continue
                y = name[-1].strip('.wav')
                y = str(int(y) - 1000)
                name = '/'.join(name)
                name = name.replace('.wav', '')
                filename_list.append(name)
                if idx % 50 == 0:
                    print(name)
                fs, audio = wav.read(filename)
                x = audioToInputVector(audio, fs, N_FEATURES, N_CONTEXT)
                out = sess.run(output_op, {'input_node:0': [
                               x], 'input_lengths:0': [len(x)]})
                xs.append(out)
                ys.append(y)
            xs = fix_seq_length(xs, length=20)
            xs = apply_pca(xs, n_components=25)
            xs = np.array([np.ravel(x) for x in xs])
            to_csv(xs, ys, os.path.join(Constants.DATA_FOLDER, 'audio10classes.csv'),
                   filename_list=filename_list)
예제 #2
0
def audiofile_to_input_vector(audio_filename, numcep, numcontext):
    r"""
    Given a WAV audio file at ``audio_filename``, calculates ``numcep`` MFCC features
    at every 0.01s time step with a window length of 0.025s. Appends ``numcontext``
    context frames to the left and right of each time step, and returns this data
    in a numpy array.
    """
    # Load wav files
    fs, audio = wav.read(audio_filename)

    return audioToInputVector(audio, fs, numcep, numcontext)
예제 #3
0
def audiofile_to_input_vector(audio_filename, numcep, numcontext):
    r"""
    Given a WAV audio file at ``audio_filename``, calculates ``numcep`` MFCC features
    at every 0.01s time step with a window length of 0.025s. Appends ``numcontext``
    context frames to the left and right of each time step, and returns this data
    in a numpy array.
    """
    # Load wav files
    fs, audio = wav.read(audio_filename)

    return audioToInputVector(audio, fs, numcep, numcontext)
예제 #4
0
def infer_audio(input_file_path):
    initialize_globals()
    n_input = 26
    n_context = 9

    graph = load_graph(model_path)

    # We access the input and output nodes
    inp = graph.get_tensor_by_name('prefix/input_node:0')
    inp_len = graph.get_tensor_by_name('prefix/input_lengths:0')
    logits = graph.get_tensor_by_name('prefix/logits:0')
    op = graph.get_tensor_by_name('prefix/output_node:0')

    with tf.Session(graph=graph) as session:
        fs, audio = wav.read(input_file_path)
        mfcc = audioToInputVector(audio, fs, n_input, n_context)

        output = session.run(logits,
                             feed_dict={
                                 inp: [mfcc],
                                 inp_len: [len(mfcc)],
                             })

    return output