def compute_librosa_features(self, audio_data, feat_name): """ Compute feature using librosa methods :param audio_data: signal :param feat_name: feature to compute :return: np array """ ## /!\ rmse in librosa 0.4.3 and 0.5.0 ## /!\ rms in librosa 0.7.0 ## /!\ librosa 0.5.0 # if rmse_feat.shape == (1, 427): # rmse_feat = np.concatenate((rmse_feat, np.zeros((1, 4))), axis=1) if feat_name == 'zero_crossing_rate': return zero_crossing_rate(y=audio_data, hop_length=self.FRAME) elif feat_name == 'rmse': return rms(y=audio_data, hop_length=self.FRAME) elif feat_name == 'mfcc': return mfcc(y=audio_data, sr=self.RATE, n_mfcc=13) elif feat_name == 'spectral_centroid': return spectral_centroid(y=audio_data, sr=self.RATE, hop_length=self.FRAME) elif feat_name == 'spectral_rolloff': return spectral_rolloff(y=audio_data, sr=self.RATE, hop_length=self.FRAME, roll_percent=0.90) elif feat_name == 'spectral_bandwidth': return spectral_bandwidth(y=audio_data, sr=self.RATE, hop_length=self.FRAME)
def compute_librosa_features(self, audio_data, feat_name): """ Compute feature using librosa methods :param audio_data: signal :param feat_name: feature to compute :return: np array """ # # http://stackoverflow.com/questions/41896123/librosa-feature-tonnetz-ends-up-in-typeerror # chroma_cens_feat = chroma_cens(y=audio_data, sr=self.RATE, hop_length=self.FRAME) logging.info('=> Computing {}'.format(feat_name)) if feat_name == 'zero_crossing_rate': return zero_crossing_rate(y=audio_data, hop_length=self.FRAME) elif feat_name == 'rmse': return rms(y=audio_data, hop_length=self.FRAME) elif feat_name == 'mfcc': return mfcc(y=audio_data, sr=self.RATE, n_mfcc=13) elif feat_name == 'spectral_centroid': return spectral_centroid(y=audio_data, sr=self.RATE, hop_length=self.FRAME) elif feat_name == 'spectral_rolloff': return spectral_rolloff(y=audio_data, sr=self.RATE, hop_length=self.FRAME, roll_percent=0.90) elif feat_name == 'spectral_bandwidth': return spectral_bandwidth(y=audio_data, sr=self.RATE, hop_length=self.FRAME)
def rms_audio(filename, sampling_rate, frame_length, display=True, save=False, savename='XXX'): # loads signal signal, sr = load(filename, sr=sampling_rate) print('sr', sr) # performs the rms rms_signal = rms(y=signal, frame_length=frame_length)[0] # display waveform and rms on a same plot if display == True: t_wf = np.linspace(0, len(signal) / sr, len(signal)) t_rms = np.linspace(0, len(signal) / sr, rms_signal.shape[0]) plt.figure('plot') plt.plot(t_wf, signal, label='Waveform') plt.plot(t_rms, rms_signal, label='RMS') plt.title(f'Waveform and RMS of {filename}') plt.legend() plt.show() if save == True: np.savez(savename, x=rms_signal) print('RMS saved') return rms_signal
def percussive_ratio(y=None, y_frames=None, n_fft=2048, hop_size=512, margin=1.0): """ Compute ratio of percussive power to total power """ # default for 22050 if y is not None: D = stft(y, n_fft=n_fft, hop_length=hop_size) elif y_frames is not None: D = frames_stft(y_frames, n_fft=n_fft, hop_length=hop_size) H, P = hpss(D, margin=margin) Pm, Pp = magphase(P) S, phase = magphase(D) P_rms = rms(S=Pm) S_rms = rms(S=S) return amplitude_to_db(P_rms / S_rms), P_rms, S_rms
def _truncate_audio(self, audio): '''Truncates audio to desired length using a triggering threshold \naudio -> mone audio signal''' if len(audio) < T: raise ValueError('audio too short') frame_len = 2048 level = 20 * np.log10(rms(audio, frame_length=frame_len) + eps)[0] start_idx = 0 while level[start_idx] < thr: start_idx += 1 #Find onset start_idx *= frame_len #Convert to sample if start_idx + T >= len(audio): start_idx = len(audio) - (T + 1) #Ensure sufficient length return audio[frame_len:frame_len + T]
def find_some_background_noise(rate, y): thresh = 0.0 # A window consistently hop_length = 256 frame_length = 2048 energy = rms(y=y, frame_length=frame_length, hop_length=hop_length) mean_energy = np.mean(energy) std_energy = np.std(energy) # find longest sequence of background noise longest_noise_n_frames = 0 current_noise_n_frames = 0 longest_noise_start_frame = -1 current_noise_start_frame = -1 # print(energy.T, mean_energy, std_energy) is_noise = energy < mean_energy - thresh * std_energy is_noise = is_noise[0] # print(is_noise) hop_length_secs = hop_length / rate frame_length_secs = frame_length / rate for i in range(len(is_noise)): # print(f'Frame at {hop_length_secs * i: 6.4}s') current_frame_is_noise = is_noise[i] if current_noise_n_frames > 0: # Finding noise if current_frame_is_noise: current_noise_n_frames += 1 else: # end of noise frames, reset counter and save start frame and length is > longest # print(f'Found end of noise at {i}') if current_noise_n_frames > longest_noise_n_frames: print( f'Found noise of length {current_noise_n_frames} {current_noise_n_frames * hop_length_secs: 3.3} at {hop_length_secs * current_noise_start_frame: 3.3}s' ) longest_noise_n_frames = current_noise_n_frames longest_noise_start_frame = current_noise_start_frame current_noise_n_frames = 0 else: # Not finding noise if current_frame_is_noise: # print(f'Found noise at {i}') current_noise_start_frame = i current_noise_n_frames = 1 pad_frames = 0 return y[(longest_noise_start_frame + pad_frames) * hop_length:(longest_noise_start_frame + longest_noise_n_frames - pad_frames) * hop_length]
def feature_extractor (y, sr): print('вошли в процедyрy feature_extractor') from librosa import feature as f print('либрозy как f загрyзили') rmse = f.rms(y=y)[0] #f.rmse (y = y) spec_cent = f.spectral_centroid (y = y, sr = sr) spec_bw = f.spectral_bandwidth (y = y, sr = sr) rolloff = f.spectral_rolloff (y = y, sr = sr) zcr = f.zero_crossing_rate (y) mfcc = f.mfcc(y = y, sr = sr) # mel cepstral coefficients chroma = f.chroma_stft(y=y, sr=sr) output = np.vstack([rmse, spec_cent, spec_bw, rolloff, zcr, chroma, mfcc]).T print('feature_extractor закончил работy') return (output)
def feature_low_energy(wave): rms_per_win = feature.rms(y=wave, hop_length=hop_size) num_texture_win = 30 analy_in_tex = 43 # every 43 analysis window forms a texture window total_rms_texture_win = np.zeros(num_texture_win) for i in range(num_texture_win): total_rms_texture_win[i] = np.sum( rms_per_win[0][analy_in_tex * i:analy_in_tex * (i + 1)]) / analy_in_tex avr_texture_win = np.sum(total_rms_texture_win) / num_texture_win # analysis windows rms energy < average of texture window p = sum(analysis_win < avr_texture_win for analysis_win in rms_per_win[0]) / len(rms_per_win[0]) return p
def preCompute_music(): ''' Compute the CQT features of a music dataset Returns ----- features_dict: {wav_name : CQT_feature_vector} ''' wav_paths = sorted( glob.glob(os.path.join(DATA_DIR, "*/audio/dry_mix", "*hits*.wav"))) HOP_SIZE = 512 Q = 24 CQT_OCTAVES = 7 features_dict = {} for wav_path in tqdm(wav_paths): # Read audio files wav, sr = load(path=wav_path, sr=None) # Compute CQTs cqt_complex = cqt(y=wav, sr=sr, hop_length=HOP_SIZE, n_bins=Q * CQT_OCTAVES, bins_per_octave=Q, sparsity=1e-6) scalogram = np.abs(cqt_complex)**2 # Find frame of maximum RMS value wav_rms = rms(y=wav, hop_length=HOP_SIZE) rms_argmax = np.argmax(wav_rms) frame = scalogram[:, rms_argmax] # Store in features_dict wav_key = os.path.basename(wav_path) features_dict[wav_key] = frame dataset = os.path.basename(DATA_DIR) h5py_path = os.path.join(MAIN_DIR, "{}.h5".format(dataset)) with h5py.File(h5py_path, "w") as f: for key in features_dict.keys(): f[key] = features_dict[key]
def get_signal_stats(signal: np.ndarray, signal_features_config: dict): """ Extracts various statistics from the raw signal Parameters: signal: np.ndarray - input 1D signal signal_features_config: dict - stat. features that should be extracted Returns: features: np.ndarray - extracted stat. features feature_names: list - names of extracted features """ features, feature_names = [], [] file_types = { 'signal': signal, 'abs': np.abs(signal), 'diff': np.diff(signal), 'zero_cross': zero_crossing_rate(signal)[0], 'rms': rms(signal)[0] } for signal_feature_name, config in signal_features_config.items(): for stat_feature_key, included in config.items(): if not included: continue stat_feature_name = stat_feature_key.split('_')[0] feature = stat_features[stat_feature_name]( file_types[signal_feature_name]) if stat_feature_key == 'mode_val': feature = feature.mode[0] elif stat_feature_key == 'mode_cnt': feature = feature.count[0] features.append(feature) name = f'{signal_feature_name}_{stat_feature_key}' feature_names.append(name) return features, feature_names
def feature_extraction_all(signal, sr, n_mfcc, buffer_len, normalization_values): """ Feature extraction interface :param signal: Signal :param sr: Signal :param n_mfcc: Signal :param buffer_len: Signal :param normalization_values: normalization values of the dataset :output features: Array of features Features are extracted from the incoming audio signal when an onset is detected. """ features = [] signal = np.array(signal) if signal.size != 0: S, phase = librosa.magphase( librosa.stft(y=signal, n_fft=buffer_len, hop_length=int(buffer_len / 4))) # Mel Frequency cepstral coefficients mfcc = feature.mfcc(y=signal, sr=sr, n_mfcc=n_mfcc, n_fft=int(512 * 2), hop_length=int(128 * 2)) mfcc_mean = np.mean(mfcc, axis=1) mfcc_std = np.std(mfcc, axis=1) # RMS rms = feature.rms(S=S, frame_length=buffer_len, hop_length=int(buffer_len / 4)) rms_mean = np.mean(rms, axis=1) rms_std = np.std(rms, axis=1) # Spectral Centroid spectral_centroid = feature.spectral_centroid(S=S, sr=sr) spectral_centroid_mean = np.mean(spectral_centroid, axis=1) spectral_centroid_std = np.std(spectral_centroid, axis=1) # Rolloff spectral_rolloff = feature.spectral_rolloff(S=S, sr=sr) spectral_rolloff_mean = np.mean(spectral_rolloff, axis=1) spectral_rolloff_std = np.std(spectral_rolloff, axis=1) # Bandwidth spectral_bandwidth = feature.spectral_bandwidth(S=S, sr=sr) spectral_bandwidth_mean = np.mean(spectral_bandwidth, axis=1) spectral_bandwidth_std = np.std(spectral_bandwidth, axis=1) # Contrast spectral_contrast = feature.spectral_contrast(S=S, sr=sr) spectral_contrast_mean = np.mean(spectral_contrast, axis=1) spectral_contrast_std = np.std(spectral_contrast, axis=1) # Flatness spectral_flatness = feature.spectral_flatness(S=S) spectral_flatness_mean = np.mean(spectral_flatness, axis=1) spectral_flatness_std = np.std(spectral_flatness, axis=1) if len(normalization_values) > 1: # Duration features.append( normalize(len(signal), normalization_values['duration'])) features.extend( normalize( mfcc_mean, normalization_values[[ 'mfcc_mean_1', 'mfcc_mean_2', 'mfcc_mean_3', 'mfcc_mean_4', 'mfcc_mean_5', 'mfcc_mean_6', 'mfcc_mean_7', 'mfcc_mean_8', 'mfcc_mean_9', 'mfcc_mean_10' ]])) features.extend( normalize( mfcc_std, normalization_values[[ 'mfcc_std_1', 'mfcc_std_2', 'mfcc_std_3', 'mfcc_std_4', 'mfcc_std_5', 'mfcc_std_6', 'mfcc_std_7', 'mfcc_std_8', 'mfcc_std_9', 'mfcc_std_10' ]])) features.extend( normalize(rms_mean, normalization_values['rms_mean'])) features.extend(normalize(rms_std, normalization_values['rms_std'])) features.extend( normalize(spectral_centroid_mean, normalization_values['spectral_centroid_mean'])) features.extend( normalize(spectral_centroid_std, normalization_values['spectral_centroid_std'])) features.extend( normalize(spectral_rolloff_mean, normalization_values['spectral_rolloff_mean'])) features.extend( normalize(spectral_rolloff_std, normalization_values['spectral_rolloff_std'])) features.extend( normalize(spectral_bandwidth_mean, normalization_values['spectral_bandwidth_mean'])) features.extend( normalize(spectral_bandwidth_std, normalization_values['spectral_bandwidth_std'])) features.extend( normalize( spectral_contrast_mean, normalization_values[[ 'spectral_contrast_mean_1', 'spectral_contrast_mean_2', 'spectral_contrast_mean_3', 'spectral_contrast_mean_4', 'spectral_contrast_mean_5', 'spectral_contrast_mean_6', 'spectral_contrast_mean_7' ]])) features.extend( normalize( spectral_contrast_std, normalization_values[[ 'spectral_contrast_std_1', 'spectral_contrast_std_2', 'spectral_contrast_std_3', 'spectral_contrast_std_4', 'spectral_contrast_std_5', 'spectral_contrast_std_6', 'spectral_contrast_std_7' ]])) features.extend( normalize(spectral_flatness_mean, normalization_values['spectral_flatness_mean'])) features.extend( normalize(spectral_flatness_std, normalization_values['spectral_flatness_std'])) else: features.append(len(signal)) features.extend(mfcc_mean) features.extend(mfcc_std) features.extend(rms_mean) features.extend(rms_std) features.extend(spectral_centroid_mean) features.extend(spectral_centroid_std) features.extend(spectral_rolloff_mean) features.extend(spectral_rolloff_std) features.extend(spectral_bandwidth_mean) features.extend(spectral_bandwidth_std) features.extend(spectral_contrast_mean) features.extend(spectral_contrast_std) features.extend(spectral_flatness_mean) features.extend(spectral_flatness_std) features = np.array(features) return features
def get_rms(self): rms_features = rms(S=self._magnitude_spectrum) return rms_features
print(crop_feat.shape) print(crop_feat) crop_feat = np.pad(crop_feat, (0, maxlen - len(crop_feat)), mode='constant') print(crop_feat) return crop_feat features = [] feat = mfcc(y, sr, nfilt=10, winstep=0.02) for i in range(0, feat.shape[0] - 10, 5): print(i) x = crop_feature(feat, i, nb_step=10) print(x.shape) features.append(x) print("shape {}".format(librosa.feature.rms(y).shape)) centroid = feature.spectral_centroid(y, sr) print(centroid.shape) bandwidth = feature.spectral_bandwidth(y, sr) print(bandwidth.shape) print(feature.spectral_rolloff(y, sr).shape) print(feature.rms(y).shape) name = np.full(bandwidth.shape, "haha") max = np.concatenate((name.T, centroid.T, bandwidth.T, data.T), axis=1) # # test = np.zeros((41, 10)) # print(test.shape) # test =test[0 : 11] # print(test.shape)
def rms(wave_form, sample_rate, hop_length): return feature.rms(y=wave_form, hop_length=hop_length).T