def _is_energy_active(self, frame): frame_energy = stEnergy(frame) self.logger.debug("Frame energy: " + str(frame_energy)) self.logger.debug("Energy threshold " + str(self.energy_thresh)) return stEnergy(frame) > self.energy_k * self.energy_thresh
def _inactive_st_energies(self): energy = np.zeros((self.noise_buf_len,), np.uint32) for i in range(self.noise_buf_len): energy[i] = stEnergy(self.noise_frames[i]) return energy
def _inactive_spectral_energy_bands(self): bands_energies = np.zeros((self.noise_buf_len, self.spectral_bands), np.float64) for i in range(self.noise_buf_len): frame = self.noise_frames[i] frame_bands = self._get_spectral_bands(frame) for j in range(self.spectral_bands): bands_energies[i][j] = stEnergy(frame_bands[j]) return bands_energies
def _function(self, recording): time_frames = recording.shape[0] features = np.zeros([time_frames, 14], np.float32) for time in range(time_frames): frame = recording[time, :] X_new = np.abs(np.fft.rfft(frame)) X_prev = X if time else np.zeros_like(np.fft.rfft) X = X_new features[time, 0] = aF.stZCR(frame) features[time, 1] = aF.stEnergy(frame) features[time, 2] = aF.stEnergyEntropy(frame) features[time, 3:5] = aF.stSpectralCentroidAndSpread(X + 2e-12, self.sr) features[time, 5] = aF.stSpectralEntropy(X) features[time, 6] = aF.stSpectralRollOff(X, 0.85, self.sr) features[time, 7] = aF.stSpectralFlux(X, X_prev) features[time, 8:14] = HandCrafted.formants(frame) / 1000 # division for normalization (results in kHz) return features
def _is_spectral_energy_active(self, frame): bands = self._get_spectral_bands(frame) bands_mean = np.zeros((4,), np.float64) for i in range(self.spectral_bands): bands_mean[i] = stEnergy(bands[i]) self.logger.debug("Spectral bands energies: " + str(bands_mean)) self.logger.debug("Spectral bands thresholds " + str(self.spectral_energy_bands_thresh)) if bands_mean[0] > self.spectral_energy_bands_thresh[0] * self.spectral_energy_bands_k: active_bands = 0 for i in range(1, self.spectral_bands): if bands_mean[i] > self.spectral_energy_bands_thresh[i] * self.spectral_energy_bands_k: active_bands += 1 if active_bands >= 2: return True return False
def get_Energy(y): ''' Energy- The sum of squares of the signal values, normalized by the respective frame length. ''' return af.stEnergy(y)
def stFeatureExtraction(signal, fs, win, step, feats): """ This function implements the shor-term windowing process. For each short-term window a set of features is extracted. This results to a sequence of feature vectors, stored in a numpy matrix. ARGUMENTS signal: the input signal samples fs: the sampling freq (in Hz) win: the short-term window size (in samples) step: the short-term window step (in samples) steps: list of main features to compute ("mfcc" and/or "gfcc") RETURNS st_features: a numpy array (n_feats x numOfShortTermWindows) """ if "gfcc" in feats: ngfcc = 22 gfcc = getGfcc.GFCCFeature(fs) else: ngfcc = 0 if "mfcc" in feats: n_mfcc_feats = 13 else: n_mfcc_feats = 0 win = int(win) step = int(step) # Signal normalization signal = numpy.double(signal) signal = signal / (2.0**15) DC = signal.mean() MAX = (numpy.abs(signal)).max() signal = (signal - DC) / (MAX + 0.0000000001) N = len(signal) # total number of samples cur_p = 0 count_fr = 0 nFFT = int(win / 2) [fbank, freqs] = mfccInitFilterBanks( fs, nFFT ) # compute the triangular filter banks used in the mfcc calculation n_harmonic_feats = 0 feature_names = [] if "spectral" in feats: n_time_spectral_feats = 8 feature_names.append("zcr") feature_names.append("energy") feature_names.append("energy_entropy") feature_names += ["spectral_centroid", "spectral_spread"] feature_names.append("spectral_entropy") feature_names.append("spectral_flux") feature_names.append("spectral_rolloff") else: n_time_spectral_feats = 0 if "mfcc" in feats: feature_names += [ "mfcc_{0:d}".format(mfcc_i) for mfcc_i in range(1, n_mfcc_feats + 1) ] if "gfcc" in feats: feature_names += [ "gfcc_{0:d}".format(gfcc_i) for gfcc_i in range(1, ngfcc + 1) ] if "chroma" in feats: nChroma, nFreqsPerChroma = stChromaFeaturesInit(nFFT, fs) n_chroma_feats = 13 feature_names += [ "chroma_{0:d}".format(chroma_i) for chroma_i in range(1, n_chroma_feats) ] feature_names.append("chroma_std") else: n_chroma_feats = 0 n_total_feats = n_time_spectral_feats + n_mfcc_feats + n_harmonic_feats + n_chroma_feats + ngfcc st_features = [] while (cur_p + win - 1 < N): # for each short-term window until the end of signal count_fr += 1 x = signal[cur_p:cur_p + win] # get current window cur_p = cur_p + step # update window position X = abs(fft(x)) # get fft magnitude X = X[0:nFFT] # normalize fft X = X / len(X) if count_fr == 1: X_prev = X.copy() # keep previous fft mag (used in spectral flux) curFV = numpy.zeros((n_total_feats, 1)) if "spectral" in feats: curFV[0] = stZCR(x) # zero crossing rate curFV[1] = stEnergy(x) # short-term energy curFV[2] = stEnergyEntropy(x) # short-term entropy of energy [curFV[3], curFV[4]] = stSpectralCentroidAndSpread( X, fs) # spectral centroid and spread curFV[5] = stSpectralEntropy(X) # spectral entropy curFV[6] = stSpectralFlux(X, X_prev) # spectral flux curFV[7] = stSpectralRollOff(X, 0.90, fs) # spectral rolloff if "mfcc" in feats: curFV[n_time_spectral_feats:n_time_spectral_feats+n_mfcc_feats, 0] = \ stMFCC(X, fbank, n_mfcc_feats).copy() # MFCCs if "gfcc" in feats: curFV[n_time_spectral_feats + n_mfcc_feats:n_time_spectral_feats + n_mfcc_feats + ngfcc, 0] = gfcc.get_gfcc(x) if "chroma" in feats: chromaNames, chromaF = stChromaFeatures(X, fs, nChroma, nFreqsPerChroma) curFV[n_time_spectral_feats + n_mfcc_feats + ngfcc: n_time_spectral_feats + n_mfcc_feats + n_chroma_feats + ngfcc - 1] = \ chromaF curFV[n_time_spectral_feats + n_mfcc_feats + n_chroma_feats + ngfcc - 1] = \ chromaF.std() st_features.append(curFV) X_prev = X.copy() st_features = numpy.concatenate(st_features, 1) return st_features, feature_names
def feature_engineer(self, audio_data): """ Extract features using librosa.feature. Each signal is cut into frames, features are computed for each frame and averaged [median]. The numpy array is transformed into a data frame with named columns. :param audio_data: the input signal samples with frequency 44.1 kHz :return: a numpy array (numOfFeatures x numOfShortTermWindows) """ loop_length = len(audio_data) / self.FRAME concat_feat = [] zcr_feat = [] rmse_feat = [] spectral_bandwidth_feat = [] spectral_centroid_feat = [] spectral_rolloff_feat = [] mfcc_feat = np.empty(shape=[13, 0]) for i in range(loop_length): audio_data_batch = (audio_data[i * loop_length:(i * loop_length) + loop_length]) zcr_feat_1 = af.stZCR(audio_data_batch) zcr_feat.append(zcr_feat_1) rmse_feat_1 = af.stEnergy(audio_data_batch) rmse_feat.append(rmse_feat_1) if rmse_feat_1.shape == (1, 427): rmse_feat_1 = np.concatenate((rmse_feat, np.zeros((1, 4))), axis=1) [fbank, freqs] = af.mfccInitFilterBanks(self.RATE, self.nFFT) #mfcc_feat = af.stMFCC(audio_data, fbank, 13) mfcc_feat_1 = psf.mfcc(audio_data_batch, self.RATE, nfft=1103) # mfcc_feat_1 = np.squeeze(mfcc_feat_1).shape mfcc_feat_1 = np.transpose(mfcc_feat_1) mfcc_feat = np.append(mfcc_feat, mfcc_feat_1, axis=1) spectral_centroid_and_spread_1 = af.stSpectralCentroidAndSpread( audio_data_batch, self.RATE) spectral_centroid_feat_1 = spectral_centroid_and_spread_1[0] spectral_centroid_feat.append(spectral_centroid_feat_1) spectral_bandwidth_feat_1 = spectral_centroid_and_spread_1[1] spectral_bandwidth_feat.append(spectral_bandwidth_feat_1) spectral_rolloff_feat_1 = af.stSpectralRollOff( audio_data_batch, 0.90, self.RATE) spectral_rolloff_feat.append(spectral_rolloff_feat_1) # chroma_cens_feat = chroma_cens(y=audio_data, sr=self.RATE, hop_length=self.FRAME) # zcr_feat = np.asarray(zcr_feat) # rmse_feat = np.asarray(rmse_feat) # spectral_bandwidth_feat = np.asarray(spectral_bandwidth_feat) # spectral_centroid_feat = np.asarray(spectral_centroid_feat) # spectral_rolloff_feat = np.asarray(spectral_rolloff_feat) concat_feat.append(zcr_feat) concat_feat.append(rmse_feat) concat_feat.append(spectral_bandwidth_feat) concat_feat.append(spectral_centroid_feat) concat_feat.append(spectral_rolloff_feat) # concat_feat.append(mfcc_feat) # mfcc_feat = np.asarray(mfcc_feat, dtype=np.float32) concat_feat = np.array(concat_feat) concat_feat = np.concatenate((concat_feat, mfcc_feat), axis=0) # print concat_feat.shape return np.mean(concat_feat, axis=1, keepdims=True).transpose(), self.label
def test_power(self): for _, data in self.__database: generated = temporal.power(data) reference = extractor.stEnergy(data) self.assertAlmostEqual(generated, reference)
def percentile(data, p): a = np.array(data) return np.percentile(a, p) def spectral_energy(data): data_fft = fft(data) return np.square(data_fft) def fft(data): a = np.array(data) b = np.fft.fft(a) return b if __name__ == '__main__': b = range(-50, 100) print(median(b)) print(mean(b)) print(percentile(b, 25)) print(percentile(b, 75)) print(standard_deviation(b)) bb = np.array(b) f = audioFeatureExtraction.stEnergy(bb) print(f) ff = audioFeatureExtraction.stZCR(bb) print(ff)