def get_transients(audio, metadata, frame_size=256, hop_size=128, num_partials=10): ''' Return a list of transient regions in a given audio signal. Transient start: onset location Transient end: * find largest peak following onset * if no other large peaks (value > 25% of largest peak) within 10 hop sizes of next minima, just get up to next minima. * if larger peaks found, get up to minima after last peak Returns a dictionary of the form: {'start': <int>, 'end': <int>} ''' sampling_rate = int(metadata.get('sampling_rate', 44100)) transients = [] stability = get_stability(audio, metadata, frame_size, hop_size, num_partials) peaks = util.find_peaks(stability, np.mean(stability)) for onset in metadata['onsets']: transient = {'start': onset, 'end': onset} # ignore if not within 200ms of a reference onset candidate_peaks = [ p for p in peaks if (p >= (onset / hop_size)) and ( (p * hop_size) - onset <= ((sampling_rate / 1000) * 200)) ] if not candidate_peaks: continue # find the largest peak after each onset transient_peak = 0 peak_value = stability[candidate_peaks[0]] for p in range(1, len(candidate_peaks)): if stability[candidate_peaks[p]] > peak_value: transient_peak = p peak_value = stability[candidate_peaks[p]] # find minima after last peak in the cluster, if any transient_peak = candidate_peaks[transient_peak] close_peaks = [] last_peak = transient_peak for p in candidate_peaks: if (np.abs(p - last_peak) <= 10) and \ stability[p] >= (0.25 * peak_value): close_peaks.append(p) last_peak = p last_peak = transient_peak if not close_peaks else close_peaks[-1] transient['end'] = min( onset + ((sampling_rate / 1000) * 200), util.next_minima(stability, last_peak) * hop_size) transients.append(transient) return transients
def get_transients(audio, metadata, frame_size=256, hop_size=128, num_partials=10): """ Return a list of transient regions in a given audio signal. Transient start: onset location Transient end: * find largest peak following onset * if no other large peaks (value > 25% of largest peak) within 10 hop sizes of next minima, just get up to next minima. * if larger peaks found, get up to minima after last peak Returns a dictionary of the form: {'start': <int>, 'end': <int>} """ sampling_rate = int(metadata.get("sampling_rate", 44100)) transients = [] stability = get_stability(audio, metadata, frame_size, hop_size, num_partials) peaks = util.find_peaks(stability, np.mean(stability)) for onset in metadata["onsets"]: transient = {"start": onset, "end": onset} # ignore if not within 200ms of a reference onset candidate_peaks = [ p for p in peaks if (p >= (onset / hop_size)) and ((p * hop_size) - onset <= ((sampling_rate / 1000) * 200)) ] if not candidate_peaks: continue # find the largest peak after each onset transient_peak = 0 peak_value = stability[candidate_peaks[0]] for p in range(1, len(candidate_peaks)): if stability[candidate_peaks[p]] > peak_value: transient_peak = p peak_value = stability[candidate_peaks[p]] # find minima after last peak in the cluster, if any transient_peak = candidate_peaks[transient_peak] close_peaks = [] last_peak = transient_peak for p in candidate_peaks: if (np.abs(p - last_peak) <= 10) and stability[p] >= (0.25 * peak_value): close_peaks.append(p) last_peak = p last_peak = transient_peak if not close_peaks else close_peaks[-1] transient["end"] = min( onset + ((sampling_rate / 1000) * 200), util.next_minima(stability, last_peak) * hop_size ) transients.append(transient) return transients
def cbr(audio, metadata, verbose=False): """ Implementation of the automatic note segmentation given in: Caetano, M., Burred, J. J., Rodet, X. Automatic Segmentation of the Temporal Evolution of Isolated Acoustic Musical Instrument Sounds Using Spectro-Temporal Cues. In Proc. DAFx 2010 Returns a list of dictionaries (one for each note) with the following keys: * onset * end_attack * sustain * release * offset """ if verbose: print 'Segmenting %s with the Caetano, Burred, Rodet method' % \ metadata.get('name', 'unknown audio file') notes = [] onsets = odf(audio, metadata) if not onsets: raise NoOnsetsFound() onsets = [onsets[0]] env = ae.tae(np.abs(audio)) sc = spectral_centroid(audio, metadata) for onset_number, onset in enumerate(onsets): # if time to prev onset is < 200 ms, ignore this onset note_duration = onset - onsets[onset_number - 1] max_note_duration = (metadata['sampling_rate'] * 200) / 1000 if onset_number > 0 and note_duration < max_note_duration: if verbose: print 'Warning: Detected note duration is too short', print '(%d to %d, %d samples). Skipping.' % ( onsets[onset_number - 1], onset, note_duration) continue boundaries = { 'onset': onset, 'end_attack': onset, 'sustain': onset, 'release': onset, 'offset': len(audio) } # offset: last point TAE has same energy as onset n = 512 onset_energy = np.sum(env[np.max(onset - n, 0):onset]**2) audio_pos = onset + (metadata['sampling_rate'] * 200 / 1000) while audio_pos <= len(audio) - n: frame = env[audio_pos:audio_pos + n] energy = np.sum(frame**2) if energy <= onset_energy: break audio_pos += n boundaries['offset'] = audio_pos # beginning of sustain / end of attack # using a modified version of Peeters' efforts method max_transient = min(onset + (metadata['sampling_rate'] * 500) / 1000, len(env) - onset) if onset == max_transient: if verbose: print 'Warning: onset detected at end of signal, ignoring' continue max_env = np.max(env[onset:max_transient]) max_env_loc = np.argmax(env[onset:max_transient]) + onset effort_thresholds = np.linspace(0, max_env, 10) effort_times = \ _effort_times(env[onset:max_env_loc], effort_thresholds) + onset efforts = np.diff(effort_times) w = np.mean(efforts) M = len(efforts[efforts > w]) # no. efforts greater than mean # start of sustain: first point at which effort > M * w sustain = max_env_loc for i, effort in enumerate(efforts): if effort > M * w: sustain = effort_times[i] boundaries['sustain'] = sustain # end of attack: first local minima (in spectral centroid) # between onset and sustain sc_minima = util.next_minima(sc, onset / 512) * 512 boundaries['end_attack'] = min(sustain, sc_minima) # start of release: using a modified version of Peeters' # efforts method (in reverse) max_env = np.max(env[onset:boundaries['offset']]) max_env_loc = np.argmax(env[onset:boundaries['offset']]) + onset effort_thresholds = np.linspace(0, max_env, 10) effort_times = _effort_times(env[boundaries['offset']:onset:-1], effort_thresholds) efforts = np.diff(effort_times) w = np.mean(efforts) M = len(efforts[efforts > w]) # first point at which effort > M * w release = max_env_loc for i, effort in enumerate(efforts): if effort > M * w: release = boundaries['offset'] - effort_times[i] boundaries['release'] = release notes.append(boundaries) return notes
def cbr(audio, metadata, verbose=False): """ Implementation of the automatic note segmentation given in: Caetano, M., Burred, J. J., Rodet, X. Automatic Segmentation of the Temporal Evolution of Isolated Acoustic Musical Instrument Sounds Using Spectro-Temporal Cues. In Proc. DAFx 2010 Returns a list of dictionaries (one for each note) with the following keys: * onset * end_attack * sustain * release * offset """ if verbose: print 'Segmenting %s with the Caetano, Burred, Rodet method' % \ metadata.get('name', 'unknown audio file') notes = [] onsets = odf(audio, metadata) if not onsets: raise NoOnsetsFound() onsets = [onsets[0]] env = ae.tae(np.abs(audio)) sc = spectral_centroid(audio, metadata) for onset_number, onset in enumerate(onsets): # if time to prev onset is < 200 ms, ignore this onset note_duration = onset - onsets[onset_number - 1] max_note_duration = (metadata['sampling_rate'] * 200) / 1000 if onset_number > 0 and note_duration < max_note_duration: if verbose: print 'Warning: Detected note duration is too short', print '(%d to %d, %d samples). Skipping.' % ( onsets[onset_number - 1], onset, note_duration ) continue boundaries = { 'onset': onset, 'end_attack': onset, 'sustain': onset, 'release': onset, 'offset': len(audio) } # offset: last point TAE has same energy as onset n = 512 onset_energy = np.sum(env[np.max(onset - n, 0):onset] ** 2) audio_pos = onset + (metadata['sampling_rate'] * 200 / 1000) while audio_pos <= len(audio) - n: frame = env[audio_pos:audio_pos + n] energy = np.sum(frame ** 2) if energy <= onset_energy: break audio_pos += n boundaries['offset'] = audio_pos # beginning of sustain / end of attack # using a modified version of Peeters' efforts method max_transient = min(onset + (metadata['sampling_rate'] * 500) / 1000, len(env) - onset) if onset == max_transient: if verbose: print 'Warning: onset detected at end of signal, ignoring' continue max_env = np.max(env[onset:max_transient]) max_env_loc = np.argmax(env[onset:max_transient]) + onset effort_thresholds = np.linspace(0, max_env, 10) effort_times = \ _effort_times(env[onset:max_env_loc], effort_thresholds) + onset efforts = np.diff(effort_times) w = np.mean(efforts) M = len(efforts[efforts > w]) # no. efforts greater than mean # start of sustain: first point at which effort > M * w sustain = max_env_loc for i, effort in enumerate(efforts): if effort > M * w: sustain = effort_times[i] boundaries['sustain'] = sustain # end of attack: first local minima (in spectral centroid) # between onset and sustain sc_minima = util.next_minima(sc, onset / 512) * 512 boundaries['end_attack'] = min(sustain, sc_minima) # start of release: using a modified version of Peeters' # efforts method (in reverse) max_env = np.max(env[onset:boundaries['offset']]) max_env_loc = np.argmax(env[onset:boundaries['offset']]) + onset effort_thresholds = np.linspace(0, max_env, 10) effort_times = _effort_times(env[boundaries['offset']:onset:-1], effort_thresholds) efforts = np.diff(effort_times) w = np.mean(efforts) M = len(efforts[efforts > w]) # first point at which effort > M * w release = max_env_loc for i, effort in enumerate(efforts): if effort > M * w: release = boundaries['offset'] - effort_times[i] boundaries['release'] = release notes.append(boundaries) return notes