def main(): pairs = load_audio_path_label_pairs() for index, (audio_path, _) in enumerate(pairs): print('{} / {} ...'.format(index + 1, len(pairs))) mg = compute_melgram(audio_path) print('max: ', np.max(mg)) print('min: ', np.min(mg))
def main(): audio_path_label_pairs = load_audio_path_label_pairs() shuffle(audio_path_label_pairs) print('loaded: ', len(audio_path_label_pairs)) with tf.gfile.FastGFile('./models/tensorflow_models/cifar10/cifar10.pb', 'rb') as f: graph_def = tf.GraphDef() graph_def.ParseFromString(f.read()) _ = tf.import_graph_def(graph_def, name='') with tf.Session() as sess: [print(n.name) for n in sess.graph.as_graph_def().node] predict_op = sess.graph.get_tensor_by_name('output_node0:0') for i in range(0, 20): audio_path, actual_label_id = audio_path_label_pairs[i] mg = compute_melgram(audio_path) mg = np.expand_dims(mg, axis=0) predicted = sess.run(predict_op, feed_dict={"conv2d_1_input:0": mg}) predicted_label_idx = np.argmax(predicted, axis=1)[0] predicted_label = gtzan_labels[predicted_label_idx] actual_label = gtzan_labels[actual_label_id] print('predicted: ', predicted_label, 'actual: ', actual_label)
def compute_melgram(self, audio_path): if audio_path in self.cache: return self.cache[audio_path] else: mg = compute_melgram(audio_path) # mg = (mg + 100) / 200 # scale the values self.cache[audio_path] = mg return mg
def generate_batch(self, audio_paths, labels, batch_size): num_batches = len(audio_paths) // batch_size while True: for batchIdx in range(0, num_batches): start = batchIdx * batch_size end = (batchIdx + 1) * batch_size X = np.zeros(shape=(batch_size, self.input_shape[0], self.input_shape[1], self.input_shape[2]), dtype=np.float32) for i in range(start, end): audio_path = audio_paths[i] mg = compute_melgram(audio_path) X[i - start, :, :, :] = mg yield X, labels[start:end]
def main(): audio_file_path = '../data/audio_samples/example.mp3' # melgram_v1(audio_file_path, '../data/output/example_mp3.png') # melgram_v2(audio_file_path) arr = compute_melgram(audio_file_path) print('melgram: ', arr.shape)
def predict(self, audio_path): mg = compute_melgram(audio_path) mg = np.expand_dims(mg, axis=0) return self.model.predict(mg)[0]