예제 #1
0
    def generate_batch(self, audio_paths, labels, batch_size, shuffled):
        num_batches = len(audio_paths) // batch_size
        while True:
            batch_index_list = list(range(0, num_batches))
            if shuffled:
                shuffle(batch_index_list)
            for batchIdx in batch_index_list:
                start = batchIdx * batch_size
                end = (batchIdx + 1) * batch_size

                X = np.zeros(
                    shape=(
                        batch_size,
                        self.input_shape[0],
                        self.input_shape[1],
                        self.input_shape[2],
                    ),
                    dtype=np.float32,
                )
                for i in range(start, end):
                    audio_path = audio_paths[i]
                    mg = compute_melgram(audio_path)
                    X[i - start, :, :, :] = mg
                yield nd.array(X,
                               ctx=self.data_ctx), nd.array(labels[start:end],
                                                            ctx=self.data_ctx)
예제 #2
0
 def compute_melgram(self, audio_path):
     if audio_path in self.cache:
         return self.cache[audio_path]
     else:
         mg = compute_melgram(audio_path)
         # mg = (mg + 100) / 200  # scale the values
         self.cache[audio_path] = mg
         return mg
예제 #3
0
 def encode_audio(self, audio_path):
     mg = compute_melgram(audio_path)
     mg = nd.array(np.expand_dims(mg, axis=0), ctx=self.model_ctx)
     return self.model(mg).asnumpy()[0]