def __init__(self, sr=SR, n_fft=NFFT, hop_length=HOP, n_mels=NMEL, n_bins=NBIN, mode='mel'): self.sr = sr self.n_fft = n_fft self.hop_length = hop_length self.n_mels = n_mels self.n_bins = n_bins if mode == 'mel': self.spectrogram = Spectrogram.MelSpectrogram( sr=sr, n_fft=n_fft, n_mels=n_mels, hop_length=hop_length, fmin=20, fmax=11000) elif mode == 'cqt': self.spectrogram = Spectrogram.CQT(sr=sr, hop_length=hop_length, fmin=22.5, n_bins=n_bins, bins_per_octave=24, pad_mode='constant')
import nnAudio.Spectrogram as Spec from plots import plot_cqt from parameters import * if USE_CQT: cqt_layer = Spec.CQT(sr=FS, hop_length=HOP_LENGTH, fmin=F_MIN, n_bins=N_BINS, bins_per_octave=BINS_PER_OCTAVE, norm=NORM, pad_mode='constant', window=WINDOW) cqt_layer.to(DEVICE) def cqt(signal, numpy=True, db=True): time_array = np.arange(np.ceil( signal.size / HOP_LENGTH).astype(int)) / (FS / HOP_LENGTH) signal_tensor = torch.tensor(signal, device=DEVICE, dtype=torch.float) cqt_tensor = cqt_layer(signal_tensor, normalization_type='wrap') if db: cqt_tensor = 20 * torch.log10(cqt_tensor + EPS) if numpy: cqt_array = cqt_tensor.cpu().numpy()[0, :, :] torch.cuda.empty_cache() return cqt_array, time_array else:
print("Using CPU") device = "cpu" aug = Augment(Chords()) config = yaml.load(open("./config/config.yaml")) sr = config['preprocess']['sample_rate'] hop_size = config['preprocess']['hop_size'] window_size = config['preprocess']['window_size'] song_hz = config['preprocess']['song_hz'] save_dir = config['preprocess']['save_dir'] cqt_layer = Spectrogram.CQT(device=device, sr=sr, hop_length=hop_size, fmin=220, fmax=None, n_bins=108, bins_per_octave=24, norm=1, window='hann', center=True, pad_mode='reflect') p = Preprocess(sr, hop_size, song_hz, window_size, save_dir, aug, cqt_layer) num_epochs = config['model'].get('num_epochs') def get_data(): datasets = { "isophonics-beetles": { "mp3": config['preprocess']['data_path'] + "/beetles_albums", "labels":