def load_data(filenames_batch, labels_batch): minibatch_X = None minibatch_Y = None for filename, label in zip(filenames_batch, labels_batch): examples_of_wav_file = np.array( vggish_input.wavfile_to_examples(filename)) # Convert label to one-hot vector, and then repeat itfor each sub-example created from vggish_input.wavfile_to_examples label = np.repeat(convert_scalar_to_one_hot(label), examples_of_wav_file.shape[0], axis=0) if minibatch_X is not None: minibatch_X = np.append(minibatch_X, examples_of_wav_file, axis=0) minibatch_Y = np.append(minibatch_Y, label, axis=0) else: minibatch_X = np.array(examples_of_wav_file) minibatch_Y = np.array(label) return minibatch_X, minibatch_Y
def make_prediction(wav_file, checkpoint): graph, prediction, softmax_prediction = model(False) class_map, _, _ = utils.read_csv() with graph.as_default(), tf.Session(graph=graph) as sess: model_var_names = [v.name for v in tf.global_variables()] model_vars = [ v for v in tf.global_variables() if v.name in model_var_names ] # Use a Saver to restore just the variables selected above. saver = tf.train.Saver(model_vars, name='model_load_pretrained', write_version=1) checkpoint_path = params.CHECKPOINT_FOLDER + checkpoint saver.restore(sess, checkpoint_path) input_data = vggish_input.wavfile_to_examples(wav_file) features_tensor = sess.graph.get_tensor_by_name( vggish_params.INPUT_TENSOR_NAME) argmax_predict = sess.run(prediction, feed_dict={features_tensor: input_data}) top_5_predict = sess.run(softmax_prediction, feed_dict={features_tensor: input_data}) print(argmax_predict) print("\n".join(format_top5(class_map, top_5_predict))) return argmax_predict, top_5_predict
write("WaveFiles/xtrain_"+str(i)+".wav", SAMPLE_RATE, (32768*X_train[i]).astype(np.int16)) print("Done!") ''' print("Processing .wav files...") wav_file_direc = "WaveFiles/" wav_files = listdir(wav_file_direc) #Initialize array of batches and read each wav_file in wav_files array batches = [] count = 0 for wav_file in wav_files: if "wav" in wav_file: examples_batch = vggish_input.wavfile_to_examples( join(wav_file_direc, wav_file)) batches.append(examples_batch) count += 1 if count % 100 == 0: print("At File ", count, "/", N) print("Done!") print("Computing Tensorflow Embeddings...") # Prepare a postprocessor to munge the model embeddings. pproc = vggish_postprocess.Postprocessor(pca_params) output_sequences = [] with tf.Graph().as_default(), tf.Session() as sess: # Define the model in inference mode, load the checkpoint, and
def main(_): # In this simple example, we run the examples from a single audio file through # the model. If none is provided, we generate a synthetic input. if FLAGS.wav_file: wav_file = FLAGS.wav_file else: # Write a WAV of a sine wav into an in-memory file object. num_secs = 5 freq = 1000 sr = 44100 t = np.linspace(0, num_secs, int(num_secs * sr)) x = np.sin(2 * np.pi * freq * t) # Convert to signed 16-bit samples. samples = np.clip(x * 32768, -32768, 32767).astype(np.int16) wav_file = six.BytesIO() wavfile.write(wav_file, sr, samples) wav_file.seek(0) examples_batch = vggish_input.wavfile_to_examples(wav_file) print(examples_batch) # Prepare a postprocessor to munge the model embeddings. pproc = vggish_postprocess.Postprocessor(FLAGS.pca_params) # If needed, prepare a record writer to store the postprocessed embeddings. writer = tf.python_io.TFRecordWriter( FLAGS.tfrecord_file) if FLAGS.tfrecord_file else None with tf.Graph().as_default(), tf.Session() as sess: # Define the model in inference mode, load the checkpoint, and # locate input and output tensors. vggish_slim.define_vggish_slim(training=False) vggish_slim.load_vggish_slim_checkpoint(sess, FLAGS.checkpoint) features_tensor = sess.graph.get_tensor_by_name( vggish_params.INPUT_TENSOR_NAME) embedding_tensor = sess.graph.get_tensor_by_name( vggish_params.OUTPUT_TENSOR_NAME) # Run inference and postprocessing. [embedding_batch ] = sess.run([embedding_tensor], feed_dict={features_tensor: examples_batch}) print(embedding_batch) postprocessed_batch = pproc.postprocess(embedding_batch) print(postprocessed_batch) # Write the postprocessed embeddings as a SequenceExample, in a similar # format as the features released in AudioSet. Each row of the batch of # embeddings corresponds to roughly a second of audio (96 10ms frames), and # the rows are written as a sequence of bytes-valued features, where each # feature value contains the 128 bytes of the whitened quantized embedding. seq_example = tf.train.SequenceExample( feature_lists=tf.train.FeatureLists( feature_list={ vggish_params.AUDIO_EMBEDDING_FEATURE_NAME: tf.train.FeatureList(feature=[ tf.train.Feature(bytes_list=tf.train.BytesList( value=[embedding.tobytes()])) for embedding in postprocessed_batch ]) })) print(seq_example) if writer: writer.write(seq_example.SerializeToString()) if writer: writer.close()
def make_extract_vggish_embedding(frame_duration, hop_duration, input_op_name='vggish/input_features', output_op_name='vggish/embedding', embedding_size=128, resources_dir=None): """ Creates a coroutine generator for extracting and saving VGGish embeddings Parameters ---------- frame_duration hop_duration input_op_name output_op_name embedding_size resources_dir Returns ------- coroutine """ params = { 'frame_win_sec': frame_duration, 'frame_hop_sec': hop_duration, 'embedding_size': embedding_size } if not resources_dir: resources_dir = os.path.join(os.path.dirname(__file__), 'vggish/resources') pca_params_path = os.path.join(resources_dir, 'vggish_pca_params.npz') model_path = os.path.join(resources_dir, 'vggish_model.ckpt') try: with tf.Graph().as_default(), tf.Session() as sess: # Define the model in inference mode, load the checkpoint, and # locate input and output tensors. vggish_slim.define_vggish_slim(training=False, **params) vggish_slim.load_vggish_slim_checkpoint(sess, model_path, **params) while True: # We use a coroutine to more easily keep open the Tensorflow contexts # without having to constantly reload the model audio_path, output_path = (yield) if os.path.exists(output_path): continue try: examples_batch = vggish_input.wavfile_to_examples( audio_path, **params) except ValueError: print("Error opening {}. Skipping...".format(audio_path)) continue # Prepare a postprocessor to munge the model embeddings. pproc = vggish_postprocess.Postprocessor( pca_params_path, **params) input_tensor_name = input_op_name + ':0' output_tensor_name = output_op_name + ':0' features_tensor = sess.graph.get_tensor_by_name( input_tensor_name) embedding_tensor = sess.graph.get_tensor_by_name( output_tensor_name) # Run inference and postprocessing. [embedding_batch ] = sess.run([embedding_tensor], feed_dict={features_tensor: examples_batch}) emb = pproc.postprocess(embedding_batch, **params).astype(np.float32) with gzip.open(output_path, 'wb') as f: emb.dump(f) except GeneratorExit: pass