def get_transients(audio,
                   metadata,
                   frame_size=256,
                   hop_size=128,
                   num_partials=10):
    '''
    Return a list of transient regions in a given audio signal.

    Transient start: onset location
    Transient end:
        * find largest peak following onset
        * if no other large peaks (value > 25% of largest peak)
          within 10 hop sizes of next minima, just get up to next minima.
        * if larger peaks found, get up to minima after last peak

    Returns a dictionary of the form:
        {'start': <int>, 'end': <int>}
    '''
    sampling_rate = int(metadata.get('sampling_rate', 44100))
    transients = []
    stability = get_stability(audio, metadata, frame_size, hop_size,
                              num_partials)
    peaks = util.find_peaks(stability, np.mean(stability))

    for onset in metadata['onsets']:
        transient = {'start': onset, 'end': onset}

        # ignore if not within 200ms of a reference onset
        candidate_peaks = [
            p for p in peaks if (p >= (onset / hop_size)) and (
                (p * hop_size) - onset <= ((sampling_rate / 1000) * 200))
        ]
        if not candidate_peaks:
            continue

        # find the largest peak after each onset
        transient_peak = 0
        peak_value = stability[candidate_peaks[0]]
        for p in range(1, len(candidate_peaks)):
            if stability[candidate_peaks[p]] > peak_value:
                transient_peak = p
                peak_value = stability[candidate_peaks[p]]

        # find minima after last peak in the cluster, if any
        transient_peak = candidate_peaks[transient_peak]
        close_peaks = []
        last_peak = transient_peak
        for p in candidate_peaks:
            if (np.abs(p - last_peak) <= 10) and \
                stability[p] >= (0.25 * peak_value):
                close_peaks.append(p)
                last_peak = p
        last_peak = transient_peak if not close_peaks else close_peaks[-1]
        transient['end'] = min(
            onset + ((sampling_rate / 1000) * 200),
            util.next_minima(stability, last_peak) * hop_size)

        transients.append(transient)

    return transients
def get_transients(audio, metadata, frame_size=256, hop_size=128, num_partials=10):
    """
    Return a list of transient regions in a given audio signal.

    Transient start: onset location
    Transient end:
        * find largest peak following onset
        * if no other large peaks (value > 25% of largest peak)
          within 10 hop sizes of next minima, just get up to next minima.
        * if larger peaks found, get up to minima after last peak

    Returns a dictionary of the form:
        {'start': <int>, 'end': <int>}
    """
    sampling_rate = int(metadata.get("sampling_rate", 44100))
    transients = []
    stability = get_stability(audio, metadata, frame_size, hop_size, num_partials)
    peaks = util.find_peaks(stability, np.mean(stability))

    for onset in metadata["onsets"]:
        transient = {"start": onset, "end": onset}

        # ignore if not within 200ms of a reference onset
        candidate_peaks = [
            p for p in peaks if (p >= (onset / hop_size)) and ((p * hop_size) - onset <= ((sampling_rate / 1000) * 200))
        ]
        if not candidate_peaks:
            continue

        # find the largest peak after each onset
        transient_peak = 0
        peak_value = stability[candidate_peaks[0]]
        for p in range(1, len(candidate_peaks)):
            if stability[candidate_peaks[p]] > peak_value:
                transient_peak = p
                peak_value = stability[candidate_peaks[p]]

        # find minima after last peak in the cluster, if any
        transient_peak = candidate_peaks[transient_peak]
        close_peaks = []
        last_peak = transient_peak
        for p in candidate_peaks:
            if (np.abs(p - last_peak) <= 10) and stability[p] >= (0.25 * peak_value):
                close_peaks.append(p)
                last_peak = p
        last_peak = transient_peak if not close_peaks else close_peaks[-1]
        transient["end"] = min(
            onset + ((sampling_rate / 1000) * 200), util.next_minima(stability, last_peak) * hop_size
        )

        transients.append(transient)

    return transients
Beispiel #3
0
def cbr(audio, metadata, verbose=False):
    """
    Implementation of the automatic note segmentation given in:

        Caetano, M., Burred, J. J., Rodet, X.
        Automatic Segmentation of the Temporal Evolution of Isolated Acoustic
        Musical Instrument Sounds Using Spectro-Temporal Cues.
        In Proc. DAFx 2010

    Returns a list of dictionaries (one for each note) with the following keys:
    * onset
    * end_attack
    * sustain
    * release
    * offset
    """
    if verbose:
        print 'Segmenting %s with the Caetano, Burred, Rodet method' % \
            metadata.get('name', 'unknown audio file')

    notes = []

    onsets = odf(audio, metadata)
    if not onsets:
        raise NoOnsetsFound()
    onsets = [onsets[0]]

    env = ae.tae(np.abs(audio))
    sc = spectral_centroid(audio, metadata)

    for onset_number, onset in enumerate(onsets):
        # if time to prev onset is < 200 ms, ignore this onset
        note_duration = onset - onsets[onset_number - 1]
        max_note_duration = (metadata['sampling_rate'] * 200) / 1000
        if onset_number > 0 and note_duration < max_note_duration:
            if verbose:
                print 'Warning: Detected note duration is too short',
                print '(%d to %d, %d samples). Skipping.' % (
                    onsets[onset_number - 1], onset, note_duration)
            continue

        boundaries = {
            'onset': onset,
            'end_attack': onset,
            'sustain': onset,
            'release': onset,
            'offset': len(audio)
        }

        # offset: last point TAE has same energy as onset
        n = 512
        onset_energy = np.sum(env[np.max(onset - n, 0):onset]**2)
        audio_pos = onset + (metadata['sampling_rate'] * 200 / 1000)
        while audio_pos <= len(audio) - n:
            frame = env[audio_pos:audio_pos + n]
            energy = np.sum(frame**2)
            if energy <= onset_energy:
                break
            audio_pos += n
        boundaries['offset'] = audio_pos

        # beginning of sustain / end of attack
        # using a modified version of Peeters' efforts method
        max_transient = min(onset + (metadata['sampling_rate'] * 500) / 1000,
                            len(env) - onset)
        if onset == max_transient:
            if verbose:
                print 'Warning: onset detected at end of signal, ignoring'
            continue
        max_env = np.max(env[onset:max_transient])
        max_env_loc = np.argmax(env[onset:max_transient]) + onset
        effort_thresholds = np.linspace(0, max_env, 10)
        effort_times = \
            _effort_times(env[onset:max_env_loc], effort_thresholds) + onset
        efforts = np.diff(effort_times)
        w = np.mean(efforts)
        M = len(efforts[efforts > w])  # no. efforts greater than mean

        # start of sustain: first point at which effort > M * w
        sustain = max_env_loc
        for i, effort in enumerate(efforts):
            if effort > M * w:
                sustain = effort_times[i]
        boundaries['sustain'] = sustain

        # end of attack: first local minima (in spectral centroid)
        # between onset and sustain
        sc_minima = util.next_minima(sc, onset / 512) * 512
        boundaries['end_attack'] = min(sustain, sc_minima)

        # start of release:  using a modified version of Peeters'
        # efforts method (in reverse)
        max_env = np.max(env[onset:boundaries['offset']])
        max_env_loc = np.argmax(env[onset:boundaries['offset']]) + onset
        effort_thresholds = np.linspace(0, max_env, 10)
        effort_times = _effort_times(env[boundaries['offset']:onset:-1],
                                     effort_thresholds)
        efforts = np.diff(effort_times)
        w = np.mean(efforts)
        M = len(efforts[efforts > w])

        # first point at which effort > M * w
        release = max_env_loc
        for i, effort in enumerate(efforts):
            if effort > M * w:
                release = boundaries['offset'] - effort_times[i]
        boundaries['release'] = release

        notes.append(boundaries)
    return notes
def cbr(audio, metadata, verbose=False):
    """
    Implementation of the automatic note segmentation given in:

        Caetano, M., Burred, J. J., Rodet, X.
        Automatic Segmentation of the Temporal Evolution of Isolated Acoustic
        Musical Instrument Sounds Using Spectro-Temporal Cues.
        In Proc. DAFx 2010

    Returns a list of dictionaries (one for each note) with the following keys:
    * onset
    * end_attack
    * sustain
    * release
    * offset
    """
    if verbose:
        print 'Segmenting %s with the Caetano, Burred, Rodet method' % \
            metadata.get('name', 'unknown audio file')

    notes = []

    onsets = odf(audio, metadata)
    if not onsets:
        raise NoOnsetsFound()
    onsets = [onsets[0]]

    env = ae.tae(np.abs(audio))
    sc = spectral_centroid(audio, metadata)

    for onset_number, onset in enumerate(onsets):
        # if time to prev onset is < 200 ms, ignore this onset
        note_duration = onset - onsets[onset_number - 1]
        max_note_duration = (metadata['sampling_rate'] * 200) / 1000
        if onset_number > 0 and note_duration < max_note_duration:
                if verbose:
                    print 'Warning: Detected note duration is too short',
                    print '(%d to %d, %d samples). Skipping.' % (
                        onsets[onset_number - 1], onset, note_duration
                    )
                continue

        boundaries = {
            'onset': onset,
            'end_attack': onset,
            'sustain': onset,
            'release': onset,
            'offset': len(audio)
        }

        # offset: last point TAE has same energy as onset
        n = 512
        onset_energy = np.sum(env[np.max(onset - n, 0):onset] ** 2)
        audio_pos = onset + (metadata['sampling_rate'] * 200 / 1000)
        while audio_pos <= len(audio) - n:
            frame = env[audio_pos:audio_pos + n]
            energy = np.sum(frame ** 2)
            if energy <= onset_energy:
                break
            audio_pos += n
        boundaries['offset'] = audio_pos

        # beginning of sustain / end of attack
        # using a modified version of Peeters' efforts method
        max_transient = min(onset + (metadata['sampling_rate'] * 500) / 1000,
                            len(env) - onset)
        if onset == max_transient:
            if verbose:
                print 'Warning: onset detected at end of signal, ignoring'
            continue
        max_env = np.max(env[onset:max_transient])
        max_env_loc = np.argmax(env[onset:max_transient]) + onset
        effort_thresholds = np.linspace(0, max_env, 10)
        effort_times = \
            _effort_times(env[onset:max_env_loc], effort_thresholds) + onset
        efforts = np.diff(effort_times)
        w = np.mean(efforts)
        M = len(efforts[efforts > w])  # no. efforts greater than mean

        # start of sustain: first point at which effort > M * w
        sustain = max_env_loc
        for i, effort in enumerate(efforts):
            if effort > M * w:
                sustain = effort_times[i]
        boundaries['sustain'] = sustain

        # end of attack: first local minima (in spectral centroid)
        # between onset and sustain
        sc_minima = util.next_minima(sc, onset / 512) * 512
        boundaries['end_attack'] = min(sustain, sc_minima)

        # start of release:  using a modified version of Peeters'
        # efforts method (in reverse)
        max_env = np.max(env[onset:boundaries['offset']])
        max_env_loc = np.argmax(env[onset:boundaries['offset']]) + onset
        effort_thresholds = np.linspace(0, max_env, 10)
        effort_times = _effort_times(env[boundaries['offset']:onset:-1],
                                     effort_thresholds)
        efforts = np.diff(effort_times)
        w = np.mean(efforts)
        M = len(efforts[efforts > w])

        # first point at which effort > M * w
        release = max_env_loc
        for i, effort in enumerate(efforts):
            if effort > M * w:
                release = boundaries['offset'] - effort_times[i]
        boundaries['release'] = release

        notes.append(boundaries)
    return notes