Ejemplo n.º 1
0
def load_data(filenames_batch, labels_batch):
    minibatch_X = None
    minibatch_Y = None

    for filename, label in zip(filenames_batch, labels_batch):
        examples_of_wav_file = np.array(
            vggish_input.wavfile_to_examples(filename))

        # Convert label to one-hot vector, and then repeat itfor each sub-example created from vggish_input.wavfile_to_examples
        label = np.repeat(convert_scalar_to_one_hot(label),
                          examples_of_wav_file.shape[0],
                          axis=0)

        if minibatch_X is not None:
            minibatch_X = np.append(minibatch_X, examples_of_wav_file, axis=0)
            minibatch_Y = np.append(minibatch_Y, label, axis=0)
        else:
            minibatch_X = np.array(examples_of_wav_file)
            minibatch_Y = np.array(label)

    return minibatch_X, minibatch_Y
Ejemplo n.º 2
0
def make_prediction(wav_file, checkpoint):
    graph, prediction, softmax_prediction = model(False)

    class_map, _, _ = utils.read_csv()

    with graph.as_default(), tf.Session(graph=graph) as sess:
        model_var_names = [v.name for v in tf.global_variables()]

        model_vars = [
            v for v in tf.global_variables() if v.name in model_var_names
        ]

        # Use a Saver to restore just the variables selected above.
        saver = tf.train.Saver(model_vars,
                               name='model_load_pretrained',
                               write_version=1)

        checkpoint_path = params.CHECKPOINT_FOLDER + checkpoint

        saver.restore(sess, checkpoint_path)

        input_data = vggish_input.wavfile_to_examples(wav_file)

        features_tensor = sess.graph.get_tensor_by_name(
            vggish_params.INPUT_TENSOR_NAME)

        argmax_predict = sess.run(prediction,
                                  feed_dict={features_tensor: input_data})
        top_5_predict = sess.run(softmax_prediction,
                                 feed_dict={features_tensor: input_data})

        print(argmax_predict)

        print("\n".join(format_top5(class_map, top_5_predict)))

        return argmax_predict, top_5_predict
    write("WaveFiles/xtrain_"+str(i)+".wav", SAMPLE_RATE, (32768*X_train[i]).astype(np.int16))

print("Done!")
'''

print("Processing .wav files...")

wav_file_direc = "WaveFiles/"
wav_files = listdir(wav_file_direc)

#Initialize array of batches and read each wav_file in wav_files array
batches = []
count = 0
for wav_file in wav_files:
    if "wav" in wav_file:
        examples_batch = vggish_input.wavfile_to_examples(
            join(wav_file_direc, wav_file))
        batches.append(examples_batch)
        count += 1
        if count % 100 == 0:
            print("At File ", count, "/", N)

print("Done!")

print("Computing Tensorflow Embeddings...")
# Prepare a postprocessor to munge the model embeddings.
pproc = vggish_postprocess.Postprocessor(pca_params)

output_sequences = []

with tf.Graph().as_default(), tf.Session() as sess:
    # Define the model in inference mode, load the checkpoint, and
def main(_):
    # In this simple example, we run the examples from a single audio file through
    # the model. If none is provided, we generate a synthetic input.
    if FLAGS.wav_file:
        wav_file = FLAGS.wav_file
    else:
        # Write a WAV of a sine wav into an in-memory file object.
        num_secs = 5
        freq = 1000
        sr = 44100
        t = np.linspace(0, num_secs, int(num_secs * sr))
        x = np.sin(2 * np.pi * freq * t)
        # Convert to signed 16-bit samples.
        samples = np.clip(x * 32768, -32768, 32767).astype(np.int16)
        wav_file = six.BytesIO()
        wavfile.write(wav_file, sr, samples)
        wav_file.seek(0)
    examples_batch = vggish_input.wavfile_to_examples(wav_file)
    print(examples_batch)

    # Prepare a postprocessor to munge the model embeddings.
    pproc = vggish_postprocess.Postprocessor(FLAGS.pca_params)

    # If needed, prepare a record writer to store the postprocessed embeddings.
    writer = tf.python_io.TFRecordWriter(
        FLAGS.tfrecord_file) if FLAGS.tfrecord_file else None

    with tf.Graph().as_default(), tf.Session() as sess:
        # Define the model in inference mode, load the checkpoint, and
        # locate input and output tensors.
        vggish_slim.define_vggish_slim(training=False)
        vggish_slim.load_vggish_slim_checkpoint(sess, FLAGS.checkpoint)
        features_tensor = sess.graph.get_tensor_by_name(
            vggish_params.INPUT_TENSOR_NAME)
        embedding_tensor = sess.graph.get_tensor_by_name(
            vggish_params.OUTPUT_TENSOR_NAME)

        # Run inference and postprocessing.
        [embedding_batch
         ] = sess.run([embedding_tensor],
                      feed_dict={features_tensor: examples_batch})
        print(embedding_batch)
        postprocessed_batch = pproc.postprocess(embedding_batch)
        print(postprocessed_batch)

        # Write the postprocessed embeddings as a SequenceExample, in a similar
        # format as the features released in AudioSet. Each row of the batch of
        # embeddings corresponds to roughly a second of audio (96 10ms frames), and
        # the rows are written as a sequence of bytes-valued features, where each
        # feature value contains the 128 bytes of the whitened quantized embedding.
        seq_example = tf.train.SequenceExample(
            feature_lists=tf.train.FeatureLists(
                feature_list={
                    vggish_params.AUDIO_EMBEDDING_FEATURE_NAME:
                    tf.train.FeatureList(feature=[
                        tf.train.Feature(bytes_list=tf.train.BytesList(
                            value=[embedding.tobytes()]))
                        for embedding in postprocessed_batch
                    ])
                }))
        print(seq_example)
        if writer:
            writer.write(seq_example.SerializeToString())

    if writer:
        writer.close()
def make_extract_vggish_embedding(frame_duration,
                                  hop_duration,
                                  input_op_name='vggish/input_features',
                                  output_op_name='vggish/embedding',
                                  embedding_size=128,
                                  resources_dir=None):
    """
    Creates a coroutine generator for extracting and saving VGGish embeddings

    Parameters
    ----------
    frame_duration
    hop_duration
    input_op_name
    output_op_name
    embedding_size
    resources_dir

    Returns
    -------
    coroutine

    """
    params = {
        'frame_win_sec': frame_duration,
        'frame_hop_sec': hop_duration,
        'embedding_size': embedding_size
    }

    if not resources_dir:
        resources_dir = os.path.join(os.path.dirname(__file__),
                                     'vggish/resources')

    pca_params_path = os.path.join(resources_dir, 'vggish_pca_params.npz')
    model_path = os.path.join(resources_dir, 'vggish_model.ckpt')

    try:
        with tf.Graph().as_default(), tf.Session() as sess:
            # Define the model in inference mode, load the checkpoint, and
            # locate input and output tensors.
            vggish_slim.define_vggish_slim(training=False, **params)
            vggish_slim.load_vggish_slim_checkpoint(sess, model_path, **params)

            while True:
                # We use a coroutine to more easily keep open the Tensorflow contexts
                # without having to constantly reload the model
                audio_path, output_path = (yield)

                if os.path.exists(output_path):
                    continue

                try:
                    examples_batch = vggish_input.wavfile_to_examples(
                        audio_path, **params)
                except ValueError:
                    print("Error opening {}. Skipping...".format(audio_path))
                    continue

                # Prepare a postprocessor to munge the model embeddings.
                pproc = vggish_postprocess.Postprocessor(
                    pca_params_path, **params)

                input_tensor_name = input_op_name + ':0'
                output_tensor_name = output_op_name + ':0'

                features_tensor = sess.graph.get_tensor_by_name(
                    input_tensor_name)
                embedding_tensor = sess.graph.get_tensor_by_name(
                    output_tensor_name)

                # Run inference and postprocessing.
                [embedding_batch
                 ] = sess.run([embedding_tensor],
                              feed_dict={features_tensor: examples_batch})

                emb = pproc.postprocess(embedding_batch,
                                        **params).astype(np.float32)

                with gzip.open(output_path, 'wb') as f:
                    emb.dump(f)

    except GeneratorExit:
        pass