class ConstantQ: def __init__(self, num_bands, fmin, num_octaves, fps, align, log_div, sample_rate=44100, fold=None): self.fps = fps self.num_bands = num_bands self.align = align self.fmin = fmin self.num_octaves = num_octaves self.log_div = log_div self.sample_rate = sample_rate from yaafelib import FeaturePlan, Engine fp = FeaturePlan(sample_rate=sample_rate) cqt_config = " ".join([ 'cqt: CQT', 'CQTAlign={}'.format(align), 'CQTBinsPerOctave={}'.format(num_bands), 'CQTMinFreq={}'.format(fmin), 'CQTNbOctaves={}'.format(num_octaves), 'stepSize={}'.format(sample_rate / fps) ]) fp.addFeature(cqt_config) df = fp.getDataFlow() self.engine = Engine() self.engine.load(df) @property def name(self): return 'cqt_fps={}_num-bands={}_align={}_fmin={}_num_oct={}'\ '_logdiv={}'.format(self.fps, self.num_bands, self.align, self.fmin, self.num_octaves, self.log_div) def __call__(self, audio_file): audio = mm.audio.signal.Signal(audio_file, sample_rate=self.sample_rate, num_channels=1).astype(np.float64) cqt = self.engine.processAudio(audio.reshape((1, -1)))['cqt'] # compensate for different padding in madmom vs. yaafe and convert # to float32 cqt = np.vstack((cqt, np.zeros(cqt.shape[1:]))).astype(np.float32) if self.log_div: return np.log(cqt / self.log_div + 1) else: return cqt