def extract(args): audio_directory, output_directory, af, overwrite = args subdir, output_file = os.path.split(af.split(audio_directory)[1]) output_file = os.path.splitext(output_file)[0] output_file = os.path.join(output_directory, output_file) if os.path.exists(output_file) and not overwrite: print('Skipping {}. Already exists.'.format(output_file)) return output = dict() try: y, _sr = soundfile.read(af) y = to_mono(y) sr = 22050 y = resample(y, _sr, sr) except Exception as e: y, sr = load(af) output['linspec_mag'], output['linspec_phase'] = linspec(y) output['melspec'] = melspec(y, sr=sr) output['logspec'] = logspec(y, sr=sr) output['hcqt_mag'], output['hcqt_phase'] = hcqt(y, sr=sr) output['vggish_melspec'] = vggish_melspec(y, sr=sr) # high-level output['percussive_ratio'], output['percussive_rms'], output[ 'total_rms'] = percussive_ratio(y, margin=3.0) output['onset_strength'] = onset_strength(y, detrend=True) output['tempogram'] = tempogram(y) output['onset_patterns'] = onset_patterns(y, sr=sr) np.savez_compressed(output_file, **output)
def findTimbral(wave): # 19 dimensions timbral_feature = {} centroid = feature.spectral_centroid(wave) timbral_feature['mu_centroid'] = np.mean(centroid) timbral_feature['var_centroid'] = np.var(centroid, ddof=1) rolloff = feature.spectral_rolloff(wave) timbral_feature['mu_rolloff'] = np.mean(rolloff) timbral_feature['var_rolloff'] = np.var(rolloff, ddof=1) flux = onset_strength(wave, lag=1) # spectral flux timbral_feature['mu_flux'] = np.mean(flux) timbral_feature['var_flux'] = np.var(flux, ddof=1) zero_crossing = feature.zero_crossing_rate(wave) timbral_feature['mu_zcr'] = np.mean(zero_crossing) timbral_feature['var_zcr'] = np.var(zero_crossing) five_mfcc = feature.mfcc(wave, n_mfcc=10) # n_mfcc = 10 dim i = 1 for coef in five_mfcc: timbral_feature['mu_mfcc' + str(i)] = np.mean(coef) timbral_feature['var_mfcc' + str(i)] = np.var(coef, ddof=1) i = i + 1 percent = feature_low_energy(wave) # 1 dim timbral_feature['low_energy'] = percent return timbral_feature
def findRhythmic(wave): # 3 dimensions rhythm_feature = {} env = onset_strength(wave) tempogram = feature.tempogram(onset_envelope=env, hop_length=hop_size) rhythm_feature['tempo_sum'] = np.sum(tempogram) return rhythm_feature
def initialize_bpf(filename, filepath, only_show=False, rewrite=False): wav_filename = audioconvert.convert_to_monowav(filename, filepath) timestart = time.time() y, sr = load(wav_filename, dtype="float32", res_type=TYPE) print("{LOAD TIME}:%f" % (time.time() - timestart)) tempo, beats = beat_track(y=y, tightness=100) # 计算主要节拍点 tempo1, beats1 = beat_track(y=y, tightness=1) # 计算节拍点,tightness就是对节拍的吸附性,越低越混乱 onset_envelope = onset_strength(y=y) rms_envelope = rmse(y=y) # -----------RMS ENVELOPE tempo = normalize_tempo(tempo) MAX_RMS = np.max(rms_envelope) AVERAGE_RMS = np.mean(rms_envelope) onset_all_beat = [] frame_all_beat = [] for beat in beats1: onset_all_beat.append(onset_envelope[beat]) frame_all_beat.append(beat) AVERAGE_ONSET = np.mean(onset_all_beat) new_frames_list = [] if not os.path.exists("dat/plt/%s.plt" % filename) or rewrite: print("No plt found, initializing...") plt_file = open("dat/plt/%s.plt" % filename, mode="w") plt_file.write( repr((filename, rms_envelope.T.tolist(), onset_all_beat, frame_all_beat, MAX_RMS, AVERAGE_RMS, AVERAGE_ONSET))) plt_file.close() plt_file = open("dat/plt/%s.plt" % filename, mode="r") plt_file_content = eval(plt_file.read()) plt_process = Process(target=plt_show, args=plt_file_content) plt_process.start() if not only_show: for beat in beats1: if onset_envelope[beat] > AVERAGE_ONSET / ONSET_DETECT_RATIO \ or rms_envelope.T[beat] > MAX_RMS / RMS_RATIO: new_frames_list.append(beat) print("{MAX_ONSET}:%f" % onset_envelope.max()) new_beats_frame = np.array(new_frames_list) mainbeatlocation = frames_to_time(beats) beatlocation = frames_to_time(new_beats_frame).tolist() beatmain = [] for beat in beatlocation: # 分别计算出每个节拍到主要节拍点的距离,也就是这个节拍的主要程度 p = abs(mainbeatlocation - beat) # print("%f: %f" % (beat, p.min())) beatmain.append(p.min()) file = open("dat/bpf/%s.bpf" % filename, mode="w") file.write( repr([tempo, beatlocation, beatmain, mainbeatlocation.tolist()])) file.close() if (os.path.exists("dat/%s.wav" % filename)): os.remove("dat/%s.wav" % filename) return "dat/bpf/%s.bpf" % filename
def get_bpm(self): """ Get BPM (Beats Per Minute) :return: Integer """ onset_env = onset_strength(self.y, sr=self.sr) return int(tempo(onset_envelope=onset_env, sr=self.sr)[0])
def plt_show_solo(filename, filepath): wav_filename = audioconvert.convert_to_monowav(filename, filepath) timestart = time.time() y, sr = load(wav_filename, dtype="float32", res_type=TYPE) print("{LOAD TIME}:%f" % (time.time() - timestart)) tempo1, beats1 = beat_track(y=y, tightness=1) # 计算节拍点,tightness就是对节拍的吸附性,越低越混乱 onset_envelope = onset_strength(y=y) rms_envelope = rmse(y=y) # -----------RMS ENVELOPE MAX_RMS = np.max(rms_envelope) AVERAGE_RMS = np.mean(rms_envelope) onset_all_beat = [] frame_all_beat = [] for beat in beats1: onset_all_beat.append(onset_envelope[beat]) frame_all_beat.append(beat) AVERAGE_ONSET = np.mean(onset_all_beat) plt_show(filename, rms_envelope.T, onset_all_beat, frame_all_beat, MAX_RMS, AVERAGE_RMS, AVERAGE_ONSET)
def onset_detect( y=None, sr=22050, onset_envelope=None, hop_length=512, backtrack=False, energy=None, units="frames", normalize=True, **kwargs ): """Basic onset detector. Locate note onset events by picking peaks in an onset strength envelope. Modified from `librosa.onset.onset_detect` to add a `normalize` flag. The `peak_pick` parameters were chosen by large-scale hyper-parameter optimization over the dataset provided by [1]_. .. [1] https://github.com/CPJKU/onset_db Parameters ---------- y : np.ndarray [shape=(n,)] audio time series sr : number > 0 [scalar] sampling rate of `y` onset_envelope : np.ndarray [shape=(m,)] (optional) pre-computed onset strength envelope hop_length : int > 0 [scalar] hop length (in samples) units : {'frames', 'samples', 'time'} The units to encode detected onset events in. By default, 'frames' are used. backtrack : bool If `True`, detected onset events are backtracked to the nearest preceding minimum of `energy`. This is primarily useful when using onsets as slice points for segmentation. energy : np.ndarray [shape=(m,)] (optional) An energy function to use for backtracking detected onset events. If none is provided, then `onset_envelope` is used. noramlize : bool (optional) If `True`, normalize the onset envelope before peak picking. By default this parameter is `True`. kwargs : additional keyword arguments Additional parameters for peak picking. See `librosa.util.peak_pick` for details. Returns ------- onsets : np.ndarray [shape=(n_onsets,)] estimated positions of detected onsets, in whichever units are specified. By default, frame indices. .. note:: If no onset strength could be detected, onset_detect returns an empty list. Raises ------ ParameterError if neither `y` nor `onsets` are provided or if `units` is not one of 'frames', 'samples', or 'time' """ # First, get the frame->beat strength profile if we don't already have one if onset_envelope is None: if y is None: raise ParameterError("y or onset_envelope must be provided") onset_envelope = onset_strength(y=y, sr=sr, hop_length=hop_length) # Shift onset envelope up to be non-negative # (a common normalization step to make the threshold more consistent) onset_envelope -= onset_envelope.min() # Do we have any onsets to grab? if not onset_envelope.any(): return np.array([], dtype=np.int) if normalize: # Normalize onset strength function to [0, 1] range onset_envelope /= onset_envelope.max() # These parameter settings found by large-scale search kwargs.setdefault("pre_max", 0.03 * sr // hop_length) # 30ms kwargs.setdefault("post_max", 0.00 * sr // hop_length + 1) # 0ms kwargs.setdefault("pre_avg", 0.10 * sr // hop_length) # 100ms kwargs.setdefault("post_avg", 0.10 * sr // hop_length + 1) # 100ms kwargs.setdefault("wait", 0.03 * sr // hop_length) # 30ms kwargs.setdefault("delta", 0.07) # Peak pick the onset envelope onsets = util.peak_pick(onset_envelope, **kwargs) # Optionally backtrack the events if backtrack: if energy is None: energy = onset_envelope onsets = onset_backtrack(onsets, energy) if units == "frames": pass elif units == "samples": onsets = core.frames_to_samples(onsets, hop_length=hop_length) elif units == "time": onsets = core.frames_to_time(onsets, hop_length=hop_length, sr=sr) else: raise ParameterError("Invalid unit type: {}".format(units)) return onsets
def compute_tempo(y, sr): return float(tempo(onset_envelope=onset_strength(y, sr=sr), sr=sr))