def spec_from_midi(midi_file): sig_proc = SignalProcessor(num_channels=1, sample_rate=spec_params["sample_rate"]) fsig_proc = FramedSignalProcessor(frame_size=spec_params["frame_size"], fps=spec_params["fps"]) spec_proc = FilteredSpectrogramProcessor(filterbank=LogarithmicFilterbank, num_bands=12, fmin=60, fmax=6000, norm_filters=True, unique_filters=False) log_proc = LogarithmicSpectrogramProcessor() processor = SequentialProcessor([sig_proc, fsig_proc, spec_proc, log_proc]) # print(midi_file) if not os.path.isfile(midi_file.replace('.mid', '.wav')): # render audio file from midi render_audio(midi_file, sound_font=SOUND_FONT_PATH) # compute spectrogram audio_path = midi_file.replace('.mid', '.wav') # if the spectrogram doesn't exist it will be computed and stored if not os.path.isfile(midi_file.replace('.mid', '.spec.npy')): spec = processor.process(audio_path).T np.save(midi_file.replace('.mid', '.spec'), spec) else: spec = np.load(midi_file.replace('.mid', '.spec.npy')) return spec
class MadmomFeatureIteratorV2(FreeSoundDataIteratorBase): """ Custom feature extraction using Madmom library pipepline Reference: https://github.com/CPJKU/dcase_task2/blob/master/dcase_task2/prepare_spectrograms.py """ def __init__(self, hparams, dataset: FreeSoundAudioDataset): super(MadmomFeatureIteratorV2, self).__init__(hparams, dataset) if not isinstance(dataset, FreeSoundAudioDataset): raise AssertionError("dataset should be FreeSoundAudioDataset") sig_proc = SignalProcessor(num_channels=1, sample_rate=32000, norm=True) fsig_proc = FramedSignalProcessor(frame_size=1024, hop_size=128, origin='future') spec_proc = SpectrogramProcessor(frame_size=1024) filt_proc = LogarithmicFilteredSpectrogramProcessor( filterbank=LogFilterbank, num_bands=26, fmin=20, fmax=14000) processor_pipeline2 = [sig_proc, fsig_proc, spec_proc, filt_proc] self.processor_version2 = SequentialProcessor(processor_pipeline2) @overrides def _user_map_func(self, file_path, label): """ Function that maps the audio files into features and labels as on-hot vector :param file_path: :param label: :return: """ data = self.processor_version2.process(file_path) label = self._dataset.get_one_hot_encoded(label) return data, label @overrides def _user_resize_func(self, data, label): """ Function that sets up the sizes of the tensor, after execution of `tf.py_func` call :param data: :param label: :return: """ data = tf.reshape(data, shape=[128, 33]) label = tf.reshape(label, shape=[42]) return data, label
class Preprocessor(): def __init__(self, spectrogram_path=None, version=1, test=False, dump=False, preprocessing=True, sample_rate=32000, silence_threshold=40): if (version != 1 and version != 2): raise NameError("version must be 1 or 2") self.version = version self.spectrogram_path = spectrogram_path self.sample_rate = sample_rate self.preprocessing = preprocessing self.test = test self.dump = dump self.silence_threshold = silence_threshold sig_proc = SignalProcessor(num_channels=1, sample_rate=self.sample_rate, norm=True) fsig_proc = FramedSignalProcessor(frame_size=1024, hop_size=128, origin='future') spec_proc = SpectrogramProcessor(frame_size=1024) filt_proc = LogarithmicFilteredSpectrogramProcessor( filterbank=LogFilterbank, num_bands=26, fmin=20, fmax=14000) processor_pipeline = [sig_proc, fsig_proc, spec_proc, filt_proc] self.processor_version2 = SequentialProcessor(processor_pipeline) def __spectrogram_V1(self, signal, fft_window_size, hop_length, log_spectrogram, n_mels, fmax): # compute stft stft = librosa.stft(signal, n_fft=fft_window_size, hop_length=hop_length, win_length=None, window='hann', center=True, pad_mode='reflect') # keep only magnitude stft = np.abs(stft) # spectrogram weighting if log_spectrogram: stft = np.log10(stft + 1) else: freqs = librosa.core.fft_frequencies(sr=self.sample_rate, n_fft=fft_window_size) stft = librosa.perceptual_weighting(stft**2, freqs, ref=1.0, amin=1e-10, top_db=99.0) # apply mel filterbank spectrogram = librosa.feature.melspectrogram(S=stft, sr=self.sample_rate, n_mels=n_mels, fmax=fmax) spectrogram = np.asarray(spectrogram) return spectrogram def __spectrogram_V2(self, signal): spectrogram = self.processor_version2.process(signal) return spectrogram def normalize_and_trim_silence(self, signal): # trim silence at beginning and end and normalize to -0.1 signal_normalized = librosa.util.normalize(signal, norm=100) signal_normalized, index = librosa.effects.trim( signal_normalized, top_db=self.silence_threshold) return signal_normalized def compute_spectrogram(self, signal, file_name=None): if (self.dump and file_name == None): raise NameError("A file_name must be specified") if (self.preprocessing): signal = self.normalize_and_trim_silence(signal) if (self.version == 1): spectrogram = self.__spectrogram_V1(signal, fft_window_size=1024, hop_length=192, log_spectrogram=False, n_mels=128, fmax=None) else: spectrogram = self.__spectrogram_V2(signal) spectrogram = np.swapaxes(spectrogram, 0, 1) # plot spectrogram if self.test: print("Spectrogram Shape:", spectrogram.shape) plt.figure("General-Purpose ") plt.clf() plt.subplots_adjust(right=0.98, left=0.1, bottom=0.1, top=0.99) plt.imshow(spectrogram, origin="lower", interpolation="nearest", cmap="viridis") plt.xlabel("%d frames" % spectrogram.shape[1]) plt.ylabel("%d bins" % spectrogram.shape[0]) plt.colorbar() plt.show() plt.show(block=True) if self.dump: # save spectrograms if not os.path.exists(self.spectrogram_path): os.makedirs(self.spectrogram_path) spec_file = os.path.join(self.spectrogram_path, file_name) np.save(spec_file, spectrogram) return spectrogram def set_version(self, version): self.version = version def set_spectrogram_path(self, spectrogram_path): self.spectrogram_path = spectrogram_path