Exemple #1
0
def vggish(model_path):
    with tf.Graph().as_default() as default_grapth:
        sess = tf.Session()
        vggish_slim.define_vggish_slim(training=False)
        vggish_slim.load_vggish_slim_checkpoint(sess, model_path)
        features_tensor = sess.graph.get_tensor_by_name(vggish_params.INPUT_TENSOR_NAME)
        embedding_tensor = sess.graph.get_tensor_by_name(vggish_params.OUTPUT_TENSOR_NAME)

    return sess, embedding_tensor, embedding_tensor, features_tensor
def create_vggish_network(sess, config):
    """
    Define VGGish model, load the checkpoint, and return a dictionary that points
    to the different tensors defined by the model.
    """
    vggish_slim.define_vggish_slim(training=False)
    vggish_params.EXAMPLE_HOP_SECONDS = config.vggish_hop_size

    vggish_slim.load_vggish_slim_checkpoint(
        sess, config.vggish_model_checkpoint_path)

    features_tensor = sess.graph.get_tensor_by_name(
        vggish_params.INPUT_TENSOR_NAME)
    embedding_tensor = sess.graph.get_tensor_by_name(
        vggish_params.OUTPUT_TENSOR_NAME)

    return {'features': features_tensor, 'embedding': embedding_tensor}
        if count % 100 == 0:
            print("At File ", count, "/", N)

print("Done!")

print("Computing Tensorflow Embeddings...")
# Prepare a postprocessor to munge the model embeddings.
pproc = vggish_postprocess.Postprocessor(pca_params)

output_sequences = []

with tf.Graph().as_default(), tf.Session() as sess:
    # Define the model in inference mode, load the checkpoint, and
    # locate input and output tensors.
    vggish_slim.define_vggish_slim(training=False)
    vggish_slim.load_vggish_slim_checkpoint(sess, checkpoint)
    features_tensor = sess.graph.get_tensor_by_name(
        vggish_params.INPUT_TENSOR_NAME)
    embedding_tensor = sess.graph.get_tensor_by_name(
        vggish_params.OUTPUT_TENSOR_NAME)

    count = 0
    for batch in batches:
        # Run inference and postprocessing.
        [embedding_batch] = sess.run([embedding_tensor],
                                     feed_dict={features_tensor: batch})
        postprocessed_batch = pproc.postprocess(embedding_batch)
        output_sequences.append(postprocessed_batch)
        count += 1
        if count % 100 == 0:
            print("At Embedding ", count, "/", N)
def train(X_train,
          Y_train,
          X_test,
          Y_test,
          test_fold,
          num_epochs=100,
          minibatch_size=params.BATCH_SIZE,
          save_checkpoint=True):
    m = X_train.shape[0]

    graph, accuracy_tensor, softmax_prediction = model(learning_rate=0.01)

    # Define a shallow classification model and associated training ops on top
    # of VGGish.
    with graph.as_default(), tf.Session(graph=graph) as sess:

        # Initialize all variables in the model, and then load the pre-trained
        # VGGish checkpoint.
        sess.run(tf.global_variables_initializer())
        sess.run(tf.local_variables_initializer())
        vggish_slim.load_vggish_slim_checkpoint(sess, FLAGS.checkpoint)

        # Locate all the tensors and ops we need for the training loop.
        features_tensor = sess.graph.get_tensor_by_name(
            vggish_params.INPUT_TENSOR_NAME)
        labels_tensor = sess.graph.get_tensor_by_name('mymodel/train/labels:0')
        global_step_tensor = sess.graph.get_tensor_by_name(
            'mymodel/train/global_step:0')
        loss_tensor = sess.graph.get_tensor_by_name('mymodel/train/loss_op:0')

        train_op = sess.graph.get_operation_by_name('mymodel/train/train_op')

        # Init summary writer
        merged = tf.summary.merge_all()

        train_writer = tf.summary.FileWriter(
            "./logs/train/fold_" + str(test_fold), sess.graph)

        test_writer = tf.summary.FileWriter(
            "./logs/test/fold_" + str(test_fold), sess.graph)

        # Init checkpoint saver
        saver = tf.train.Saver()

        tf.global_variables_initializer().run()

        chekpoint = tf.train.latest_checkpoint(
            checkpoint_dir=params.CHECKPOINT_FOLDER + str(test_fold))

        if chekpoint is not None:
            print("Checkpoint exists. Loading from disk..")
            saver.restore(sess, chekpoint)

        for epoch in range(num_epochs):
            minibatch_cost = 0.
            batch_accuracy_average = 0
            print("Epoch: %d" % epoch)
            # number of minibatches of size minibatch_size in the train set

            minibatches = utils.random_mini_batches(X_train, Y_train,
                                                    minibatch_size)

            num_minibatches = len(minibatches)

            # for minibatch in minibatches:
            for minibatch in minibatches:
                (minibatch_X, minibatch_Y) = minibatch

                [summary_train, num_steps, loss, _] = sess.run(
                    [merged, global_step_tensor, loss_tensor, train_op],
                    feed_dict={
                        features_tensor: minibatch_X,
                        labels_tensor: minibatch_Y
                    })

                minibatch_cost += loss / num_minibatches

                print('Step %d: loss %g minibatch_cost: %g' %
                      (num_steps, loss, minibatch_cost))

                if epoch % 10 == 0:
                    accuracy = sess.run(accuracy_tensor,
                                        feed_dict={
                                            features_tensor: minibatch_X,
                                            labels_tensor: minibatch_Y
                                        })
                    batch_accuracy_average += accuracy / num_minibatches

                train_writer.add_summary(summary_train, num_steps)
                train_writer.flush()

            summary_test, test_accuracy = sess.run([merged, accuracy_tensor],
                                                   feed_dict={
                                                       features_tensor: X_test,
                                                       labels_tensor: Y_test
                                                   })

            test_writer.add_summary(summary_test, num_steps)

            print("batch cost: %g" % minibatch_cost)

            if epoch % 10 == 0:
                print("batch accuracy: %g" % batch_accuracy_average)

            print("test_acc: %g" % test_accuracy)

            if save_checkpoint and epoch % 200 == 0 and epoch > 0:
                saver.save(
                    sess, params.CHECKPOINT_FOLDER + str(test_fold) +
                    "/checkpoint.ckpt", num_steps)
                print("Checkpoint saved")

        print("Training has finished!")
def make_extract_vggish_embedding(frame_duration,
                                  hop_duration,
                                  input_op_name='vggish/input_features',
                                  output_op_name='vggish/embedding',
                                  embedding_size=128,
                                  resources_dir=None):
    """
    Creates a coroutine generator for extracting and saving VGGish embeddings

    Parameters
    ----------
    frame_duration
    hop_duration
    input_op_name
    output_op_name
    embedding_size
    resources_dir

    Returns
    -------
    coroutine

    """
    params = {
        'frame_win_sec': frame_duration,
        'frame_hop_sec': hop_duration,
        'embedding_size': embedding_size
    }

    if not resources_dir:
        resources_dir = os.path.join(os.path.dirname(__file__),
                                     'vggish/resources')

    pca_params_path = os.path.join(resources_dir, 'vggish_pca_params.npz')
    model_path = os.path.join(resources_dir, 'vggish_model.ckpt')

    try:
        with tf.Graph().as_default(), tf.Session() as sess:
            # Define the model in inference mode, load the checkpoint, and
            # locate input and output tensors.
            vggish_slim.define_vggish_slim(training=False, **params)
            vggish_slim.load_vggish_slim_checkpoint(sess, model_path, **params)

            while True:
                # We use a coroutine to more easily keep open the Tensorflow contexts
                # without having to constantly reload the model
                audio_path, output_path = (yield)

                if os.path.exists(output_path):
                    continue

                try:
                    examples_batch = vggish_input.wavfile_to_examples(
                        audio_path, **params)
                except ValueError:
                    print("Error opening {}. Skipping...".format(audio_path))
                    continue

                # Prepare a postprocessor to munge the model embeddings.
                pproc = vggish_postprocess.Postprocessor(
                    pca_params_path, **params)

                input_tensor_name = input_op_name + ':0'
                output_tensor_name = output_op_name + ':0'

                features_tensor = sess.graph.get_tensor_by_name(
                    input_tensor_name)
                embedding_tensor = sess.graph.get_tensor_by_name(
                    output_tensor_name)

                # Run inference and postprocessing.
                [embedding_batch
                 ] = sess.run([embedding_tensor],
                              feed_dict={features_tensor: examples_batch})

                emb = pproc.postprocess(embedding_batch,
                                        **params).astype(np.float32)

                with gzip.open(output_path, 'wb') as f:
                    emb.dump(f)

    except GeneratorExit:
        pass
Exemple #6
0
def train(filenames,
          file_labels,
          num_epochs=100,
          minibatch_size=params.BATCH_SIZE):
    m = len(filenames)

    graph, prediction_op = model()

    # Define a shallow classification model and associated training ops on top
    # of VGGish.
    with graph.as_default(), tf.Session(graph=graph) as sess:

        # Initialize all variables in the model, and then load the pre-trained
        # VGGish checkpoint.
        sess.run(tf.global_variables_initializer())
        sess.run(tf.local_variables_initializer())
        vggish_slim.load_vggish_slim_checkpoint(sess, FLAGS.checkpoint)

        # Locate all the tensors and ops we need for the training loop.
        features_tensor = sess.graph.get_tensor_by_name(
            vggish_params.INPUT_TENSOR_NAME)
        labels_tensor = sess.graph.get_tensor_by_name('mymodel/train/labels:0')
        global_step_tensor = sess.graph.get_tensor_by_name(
            'mymodel/train/global_step:0')
        loss_tensor = sess.graph.get_tensor_by_name('mymodel/train/loss_op:0')
        all_tensors = [
            n.name for n in tf.get_default_graph().as_graph_def().node
        ]
        print(all_tensors)
        #accuracy_tensor = sess.graph.get_tensor_by_name('mymodel/train/accuracy_0:0')

        train_op = sess.graph.get_operation_by_name('mymodel/train/train_op')

        # Init summary writer
        summary = tf.summary.merge_all()
        summary_writer = tf.summary.FileWriter(
            "./logs/train/fold" + str(params.TEST_FOLD), sess.graph)

        # Init checkpoint saver
        saver = tf.train.Saver()

        tf.global_variables_initializer().run()

        chekpoint = tf.train.latest_checkpoint(
            checkpoint_dir=params.CHECKPOINT_FOLDER)

        if chekpoint is not None:
            print("Checkpoint exists. Loading from disk..")
            saver.restore(sess, chekpoint)

        for epoch in range(num_epochs):
            minibatch_cost = 0.

            # number of minibatches of size minibatch_size in the train set
            num_minibatches = int(m / minibatch_size)
            minibatches = utils.make_random_batches(filenames, file_labels,
                                                    minibatch_size)

            # for minibatch in minibatches:
            for minibatch in minibatches:
                filenames_batch, labels_batch = minibatch
                minibatch_X, minibatch_Y = utils.load_data(
                    filenames_batch, labels_batch)
                [summary_str, num_steps, loss, _] = sess.run(
                    [summary, global_step_tensor, loss_tensor, train_op],
                    feed_dict={
                        features_tensor: minibatch_X,
                        labels_tensor: minibatch_Y
                    })

                summary_writer.add_summary(summary_str, num_steps)
                summary_writer.flush()

                minibatch_cost += loss / num_minibatches
                print('Step %d: loss %g ' % (num_steps, loss))

            if epoch % 5 == 0:
                saver.save(sess, params.CHECKPOINT_FOLDER + "/checkpoint.ckpt",
                           epoch)
                print("Checkpoint saved")

        print("Training has finished!")

    return prediction_op
Exemple #7
0
def main(_):
    with tf.Graph().as_default(), tf.Session() as sess:
        # Define VGGish.
        embeddings = vggish_slim.define_vggish_slim(FLAGS.train_vggish)

        # Define a shallow classification model and associated training ops on top
        # of VGGish.
        with tf.variable_scope('mymodel'):
            # Add a fully connected layer with 100 units.
            num_units = 100
            fc = slim.fully_connected(embeddings, num_units)

            # Add a classifier layer at the end, consisting of parallel logistic
            # classifiers, one per class. This allows for multi-class tasks.
            logits = slim.fully_connected(fc,
                                          _NUM_CLASSES,
                                          activation_fn=None,
                                          scope='logits')
            tf.sigmoid(logits, name='prediction')

            # Add training ops.
            with tf.variable_scope('train'):
                global_step = tf.Variable(0,
                                          name='global_step',
                                          trainable=False,
                                          collections=[
                                              tf.GraphKeys.GLOBAL_VARIABLES,
                                              tf.GraphKeys.GLOBAL_STEP
                                          ])

                # Labels are assumed to be fed as a batch multi-hot vectors, with
                # a 1 in the position of each positive class label, and 0 elsewhere.
                labels = tf.placeholder(tf.float32,
                                        shape=(None, _NUM_CLASSES),
                                        name='labels')

                # Cross-entropy label loss.
                xent = tf.nn.softmax_cross_entropy_with_logits_v2(
                    logits=logits, labels=labels, name='xent')
                loss = tf.reduce_mean(xent, name='loss_op')
                tf.summary.scalar('loss', loss)

                # We use the same optimizer and hyperparameters as used to train VGGish.
                optimizer = tf.train.AdamOptimizer(
                    learning_rate=vggish_params.LEARNING_RATE,
                    epsilon=vggish_params.ADAM_EPSILON)
                optimizer.minimize(loss,
                                   global_step=global_step,
                                   name='train_op')

        # Initialize all variables in the model, and then load the pre-trained
        # VGGish checkpoint.
        sess.run(tf.global_variables_initializer())
        vggish_slim.load_vggish_slim_checkpoint(sess, FLAGS.checkpoint)

        # Locate all the tensors and ops we need for the training loop.
        features_tensor = sess.graph.get_tensor_by_name(
            vggish_params.INPUT_TENSOR_NAME)
        labels_tensor = sess.graph.get_tensor_by_name('mymodel/train/labels:0')
        global_step_tensor = sess.graph.get_tensor_by_name(
            'mymodel/train/global_step:0')
        loss_tensor = sess.graph.get_tensor_by_name('mymodel/train/loss_op:0')
        train_op = sess.graph.get_operation_by_name('mymodel/train/train_op')

        # The training loop.
        for _ in range(FLAGS.num_batches):
            (features, labels) = _get_examples_batch()
            [num_steps, loss,
             _] = sess.run([global_step_tensor, loss_tensor, train_op],
                           feed_dict={
                               features_tensor: features,
                               labels_tensor: labels
                           })
            print('Step %d: loss %g' % (num_steps, loss))
def main(_):
    # In this simple example, we run the examples from a single audio file through
    # the model. If none is provided, we generate a synthetic input.
    if FLAGS.wav_file:
        wav_file = FLAGS.wav_file
    else:
        # Write a WAV of a sine wav into an in-memory file object.
        num_secs = 5
        freq = 1000
        sr = 44100
        t = np.linspace(0, num_secs, int(num_secs * sr))
        x = np.sin(2 * np.pi * freq * t)
        # Convert to signed 16-bit samples.
        samples = np.clip(x * 32768, -32768, 32767).astype(np.int16)
        wav_file = six.BytesIO()
        wavfile.write(wav_file, sr, samples)
        wav_file.seek(0)
    examples_batch = vggish_input.wavfile_to_examples(wav_file)
    print(examples_batch)

    # Prepare a postprocessor to munge the model embeddings.
    pproc = vggish_postprocess.Postprocessor(FLAGS.pca_params)

    # If needed, prepare a record writer to store the postprocessed embeddings.
    writer = tf.python_io.TFRecordWriter(
        FLAGS.tfrecord_file) if FLAGS.tfrecord_file else None

    with tf.Graph().as_default(), tf.Session() as sess:
        # Define the model in inference mode, load the checkpoint, and
        # locate input and output tensors.
        vggish_slim.define_vggish_slim(training=False)
        vggish_slim.load_vggish_slim_checkpoint(sess, FLAGS.checkpoint)
        features_tensor = sess.graph.get_tensor_by_name(
            vggish_params.INPUT_TENSOR_NAME)
        embedding_tensor = sess.graph.get_tensor_by_name(
            vggish_params.OUTPUT_TENSOR_NAME)

        # Run inference and postprocessing.
        [embedding_batch
         ] = sess.run([embedding_tensor],
                      feed_dict={features_tensor: examples_batch})
        print(embedding_batch)
        postprocessed_batch = pproc.postprocess(embedding_batch)
        print(postprocessed_batch)

        # Write the postprocessed embeddings as a SequenceExample, in a similar
        # format as the features released in AudioSet. Each row of the batch of
        # embeddings corresponds to roughly a second of audio (96 10ms frames), and
        # the rows are written as a sequence of bytes-valued features, where each
        # feature value contains the 128 bytes of the whitened quantized embedding.
        seq_example = tf.train.SequenceExample(
            feature_lists=tf.train.FeatureLists(
                feature_list={
                    vggish_params.AUDIO_EMBEDDING_FEATURE_NAME:
                    tf.train.FeatureList(feature=[
                        tf.train.Feature(bytes_list=tf.train.BytesList(
                            value=[embedding.tobytes()]))
                        for embedding in postprocessed_batch
                    ])
                }))
        print(seq_example)
        if writer:
            writer.write(seq_example.SerializeToString())

    if writer:
        writer.close()