Esempio n. 1
0
class ConstantQ:
    def __init__(self,
                 num_bands,
                 fmin,
                 num_octaves,
                 fps,
                 align,
                 log_div,
                 sample_rate=44100,
                 fold=None):

        self.fps = fps
        self.num_bands = num_bands
        self.align = align
        self.fmin = fmin
        self.num_octaves = num_octaves
        self.log_div = log_div

        self.sample_rate = sample_rate

        from yaafelib import FeaturePlan, Engine

        fp = FeaturePlan(sample_rate=sample_rate)

        cqt_config = " ".join([
            'cqt: CQT', 'CQTAlign={}'.format(align),
            'CQTBinsPerOctave={}'.format(num_bands),
            'CQTMinFreq={}'.format(fmin),
            'CQTNbOctaves={}'.format(num_octaves),
            'stepSize={}'.format(sample_rate / fps)
        ])

        fp.addFeature(cqt_config)

        df = fp.getDataFlow()
        self.engine = Engine()
        self.engine.load(df)

    @property
    def name(self):
        return 'cqt_fps={}_num-bands={}_align={}_fmin={}_num_oct={}'\
               '_logdiv={}'.format(self.fps, self.num_bands, self.align,
                                   self.fmin, self.num_octaves, self.log_div)

    def __call__(self, audio_file):

        audio = mm.audio.signal.Signal(audio_file,
                                       sample_rate=self.sample_rate,
                                       num_channels=1).astype(np.float64)

        cqt = self.engine.processAudio(audio.reshape((1, -1)))['cqt']
        # compensate for different padding in madmom vs. yaafe and convert
        # to float32
        cqt = np.vstack((cqt, np.zeros(cqt.shape[1:]))).astype(np.float32)

        if self.log_div:
            return np.log(cqt / self.log_div + 1)
        else:
            return cqt