def main(): pairs = load_audio_path_label_pairs() for index, (audio_path, _) in enumerate(pairs): print('{} / {} ...'.format(index + 1, len(pairs))) mg = compute_melgram(audio_path) print('max: ', np.max(mg)) print('min: ', np.min(mg))
def compute_melgram(self, audio_path): if audio_path in self.cache: return self.cache[audio_path] else: mg = compute_melgram(audio_path) # mg = (mg + 100) / 200 # scale the values self.cache[audio_path] = mg return mg
def main(): sys.path.append(patch_path('..')) audio_file_path = patch_path('data/audio_samples/example.mp3') from mxnet_audio.library.utility.audio_utils import compute_melgram arr = compute_melgram(audio_file_path) print('melgram: ', arr.shape)
def main(): sys.path.append('..') pairs = load_audio_path_label_pairs() from mxnet_audio.library.utility.audio_utils import compute_melgram for index, (audio_path, _) in enumerate(pairs): print('{} / {} ...'.format(index + 1, len(pairs))) mg = compute_melgram(audio_path) print('max: ', np.max(mg)) print('min: ', np.min(mg))
def generate_batch(self, audio_paths, labels, batch_size, shuffled): num_batches = len(audio_paths) // batch_size while True: batch_index_list = list(range(0, num_batches)) if shuffled: shuffle(batch_index_list) for batchIdx in batch_index_list: start = batchIdx * batch_size end = (batchIdx + 1) * batch_size X = np.zeros(shape=(batch_size, self.input_shape[0], self.input_shape[1], self.input_shape[2]), dtype=np.float32) for i in range(start, end): audio_path = audio_paths[i] mg = compute_melgram(audio_path) X[i - start, :, :, :] = mg yield nd.array(X, ctx=self.data_ctx), nd.array(labels[start:end], ctx=self.data_ctx)
def encode_audio(self, audio_path): mg = compute_melgram(audio_path) mg = nd.array(np.expand_dims(mg, axis=0), ctx=self.model_ctx) return self.model(mg).asnumpy()[0]
def main(): audio_file_path = '../data/audio_samples/example.mp3' # melgram_v1(audio_file_path, '../data/output/example_mp3.png') # melgram_v2(audio_file_path) arr = compute_melgram(audio_file_path) print('melgram: ', arr.shape)