def feature_allframes(input_features, frame_indexer = None): audio = input_features['audio'] beats = input_features['beats'] # Initialise the algorithms w = Windowing(type = 'hann') spectrum = Spectrum() # FFT would return complex FFT, we only want magnitude melbands = MelBands(numberBands = NUMBER_BANDS) #~ mfcc = MFCC(numberBands = NUMBER_BANDS, numberCoefficients = NUMBER_COEFF) pool = Pool() if frame_indexer is None: frame_indexer = range(4,len(beats) - 1) # Exclude first frame, because it has no predecessor to calculate difference with # 13 MFCC coefficients # 40 Mel band energies #~ mfcc_coeffs = np.zeros((len(beats), NUMBER_COEFF)) mfcc_bands = np.zeros((len(beats), NUMBER_BANDS)) # 1 cosine distance value between every mfcc feature vector # 13 differences between MFCC coefficient of this frame and previous frame # 13 differences between MFCC coefficient of this frame and frame - 4 # 13 differences between the differences above # Idem for mel band energies #~ mfcc_coeff_diff = np.zeros((len(beats), NUMBER_COEFF)) mfcc_bands_diff = np.zeros((len(beats), NUMBER_BANDS * 4)) # Step 1: Calculate framewise for all output frames # Calculate this for all frames where this frame, or its successor, is in the frame_indexer for i in [i for i in range(len(beats)) if (i in frame_indexer) or (i+1 in frame_indexer) or (i-1 in frame_indexer) or (i-2 in frame_indexer) or (i-3 in frame_indexer)]: SAMPLE_RATE = 44100 start_sample = int(beats[i] * SAMPLE_RATE) end_sample = int(beats[i+1] * SAMPLE_RATE) #print start_sample, end_sample frame = audio[start_sample : end_sample if (start_sample - end_sample) % 2 == 0 else end_sample - 1] bands = melbands(spectrum(w(frame))) #~ bands, coeffs = mfcc(spectrum(w(frame))) #~ mfcc_coeffs[i] = coeffs mfcc_bands[i] = bands # Step 2: Calculate the cosine distance between the MFCC values for i in frame_indexer: # The norm of difference is usually very high around downbeat, because of melodic changes there! #~ mfcc_coeff_diff[i] = mfcc_coeffs[i+1] - mfcc_coeffs[i] mfcc_bands_diff[i][0*NUMBER_BANDS : 1*NUMBER_BANDS] = mfcc_bands[i+1] - mfcc_bands[i] mfcc_bands_diff[i][1*NUMBER_BANDS : 2*NUMBER_BANDS] = mfcc_bands[i+2] - mfcc_bands[i] mfcc_bands_diff[i][2*NUMBER_BANDS : 3*NUMBER_BANDS] = mfcc_bands[i+3] - mfcc_bands[i] mfcc_bands_diff[i][3*NUMBER_BANDS : 4*NUMBER_BANDS] = mfcc_bands[i] - mfcc_bands[i-1] # Include the MFCC coefficients as features result = mfcc_bands_diff[frame_indexer] #~ result = np.append(mfcc_coeff_diff[frame_indexer], mfcc_bands_diff[frame_indexer], axis=1) #~ print np.shape(result), np.shape(mfcc_coeff_diff), np.shape(mfcc_bands_diff) return preprocessing.scale(result)
def create_analyzers(fs=44100.0, nhop=512, nffts=[1024, 2048, 4096], mel_nband=80, mel_freqlo=27.5, mel_freqhi=16000.0): analyzers = [] for nfft in nffts: window = Windowing(size=nfft, type='blackmanharris62') spectrum = Spectrum(size=nfft) mel = MelBands(inputSize=(nfft // 2) + 1, numberBands=mel_nband, lowFrequencyBound=mel_freqlo, highFrequencyBound=mel_freqhi, sampleRate=fs) analyzers.append((window, spectrum, mel)) return analyzers
def mel40_analyzer(): window = Windowing(size=256, type='blackmanharris62') spectrum = Spectrum(size=256) mel = MelBands( inputSize=129, numberBands=40, lowFrequencyBound=27.5, highFrequencyBound=8000.0, sampleRate=16000.0) def analyzer(samples): feats = [] for frame in FrameGenerator(samples, 256, 160): frame_feats = mel(spectrum(window(frame))) frame_feats = np.log(frame_feats + 1e-16) feats.append(frame_feats) return np.array(feats) return analyzer