Ejemplo n.º 1
0
def is_single_event(audiofile, max_duration=7):
    '''
    Estimate if the audio signal contains one single event using the 'estimate_number_of_events'
    function above. We store the result of 'estimate_number_of_events' in a global variable so
    it can be reused in the different calls of 'is_single_event'.
    '''
    global _is_single_event_cache
    if _is_single_event_cache is None:
        sample_rate = 44100
        audio_file = MonoLoader(filename=audiofile, sampleRate=sample_rate)
        audio = audio_file.compute()
        if len(audio)/sample_rate > max_duration:
            # If file is longer than max duration, we don't consider it to be single event
            _is_single_event_cache = False
        else:
            _is_single_event_cache = estimate_number_of_events(audiofile, audio, sample_rate=sample_rate) == 1
    return _is_single_event_cache
Ejemplo n.º 2
0
def is_single_event(audiofile, max_duration=7):
    '''
    Estimate if the audio signal contains one single event using the 'estimate_number_of_events'
    function above. We store the result of 'estimate_number_of_events' in a global variable so
    it can be reused in the different calls of 'is_single_event'.
    '''
    global _is_single_event_cache
    if _is_single_event_cache is None:
        sample_rate = 44100
        try:
            audio_file = MonoLoader(filename=audiofile, sampleRate=sample_rate)
        except RuntimeError as e:
            if MORE_THAN_2_CHANNELS_EXCEPTION_MATCH_TEXT in str(e):
                converted_audiofile = convert_to_wav(audiofile)
                audio_file = MonoLoader(filename=converted_audiofile,
                                        sampleRate=sample_rate)
        audio = audio_file.compute()
        if len(audio) / sample_rate > max_duration:
            # If file is longer than max duration, we don't consider it to be single event
            _is_single_event_cache = False
        else:
            _is_single_event_cache = estimate_number_of_events(
                audiofile, audio, sample_rate=sample_rate) == 1
    return _is_single_event_cache
Ejemplo n.º 3
0
def estimate_number_of_events(audiofile,
                              region_energy_thr=2,
                              silence_thr_scale=4,
                              group_regions_ms=100):
    """
    Returns list of activity "onsets" for an audio signal based on its energy envelope. 
    This is more like "activity detecton" than "onset detection".
    """
    logger.debug('{0}: estimating number of sound events'.format(audiofile))

    def group_regions(regions, group_regions_ms):
        """
        Group together regions which are very close in time (i.e. the end of a region is very close to the start of the following).
        """
        if len(regions) <= 1:
            grouped_regions = regions[:]  # Don't do anything if only one region or no regions at all
        else:
            # Iterate over regions and mark which regions should be grouped with the following regions
            to_group = []
            for count, ((at0, at1, a_energy),
                        (bt0, bt1,
                         b_energy)) in enumerate(zip(regions[:-1],
                                                     regions[1:])):
                if bt0 - at1 < group_regions_ms / 1000:
                    to_group.append(1)
                else:
                    to_group.append(0)
            to_group.append(
                0
            )  # Add 0 for the last one which will never be grouped with next (there is no "next region")

            # Now generate the grouped list of regions based on the marked ones in 'to_group'
            grouped_regions = []
            i = 0
            while i < len(to_group):
                current_group_start = None
                current_group_end = None
                x = to_group[i]
                if x == 1 and current_group_start is None:
                    # Start current grouping
                    current_group_start = i
                    while x == 1:
                        i += 1
                        x = to_group[i]
                        current_group_end = i
                    grouped_regions.append(
                        (regions[current_group_start][0],
                         regions[current_group_end][1],
                         sum([
                             z for x, y, z in
                             regions[current_group_start:current_group_end + 1]
                         ])))
                    current_group_start = None
                    current_group_end = None
                else:
                    grouped_regions.append(regions[i])
                i += 1
        return grouped_regions

    # Load audio file
    sample_rate = 44100
    audio_file = MonoLoader(filename=audiofile, sampleRate=sample_rate)
    audio = audio_file.compute()
    t = np.linspace(0, len(audio) / sample_rate, num=len(audio))

    # Compute envelope and average signal energy
    env_algo = essentia.standard.Envelope(
        attackTime=15,
        releaseTime=50,
    )
    envelope = env_algo(audio)
    average_signal_energy = np.sum(np.array(envelope)**2) / len(envelope)
    silence_thr = average_signal_energy * silence_thr_scale

    # Get energy regions above threshold
    # Implementation based on https://stackoverflow.com/questions/43258896/extract-subarrays-of-numpy-array-whose-values-are-above-a-threshold
    mask = np.concatenate(([False], envelope > silence_thr, [False]))
    idx = np.flatnonzero(mask[1:] != mask[:-1])
    idx -= 1  # Avoid index out of bounds (0-index)
    regions = [
        (t[idx[i]], t[idx[i + 1]], np.sum(envelope[idx[i]:idx[i + 1]]**2))
        for i in range(0, len(idx), 2)
    ]  # Energy is a list of tuples like (start_time, end_time, energy)
    regions = [region for region in regions if region[2] > region_energy_thr
               ]  # Discard those below region_energy_thr

    # Group detected regions that happen close together
    regions = group_regions(regions, group_regions_ms)

    return len(regions)  # Return number of sound events detected