Example #1
0
def process_syllable(syl, hparams, mel_basis, debug):
    # Skip silences
    syl_len = len(syl)
    if syl_len == 0:
        return None, None, None
    if np.max(syl) == 0:
        return None, None, None
    # If too long skip, else pad
    if syl_len > hparams.chunk_len_samples:
        return None, None, None
    else:
        syl_pad = np.zeros((hparams.chunk_len_samples))
        syl_pad[:syl_len] = syl
    # Normalise
    sn = syl_pad / np.max(syl_pad)
    # convert to float
    if type(sn[0]) == int:
        sn = int16_to_float32(sn)
    # create spec
    mS, debug_info = spectrogram_sp(y=sn,
                                    sr=hparams.sr,
                                    n_fft=hparams.n_fft,
                                    win_length=hparams.win_length_samples,
                                    hop_length=hparams.hop_length_samples,
                                    ref_level_db=hparams.ref_level_db,
                                    _mel_basis=mel_basis,
                                    pre_emphasis=hparams.preemphasis,
                                    power=hparams.power,
                                    debug=debug)

    return sn, mS, debug_info
def prepare_wav(wav_loc, hparams=None):
    """ load wav and convert to correct format
    """

    # get rate and date
    rate, data = load_wav(wav_loc)

    # convert data if needed
    if np.issubdtype(type(data[0]), np.integer):
        data = int16_to_float32(data)
    # bandpass filter
    if hparams is not None:
        data = butter_bandpass_filter(data,
                                      hparams.butter_lowcut,
                                      hparams.butter_highcut,
                                      rate,
                                      order=5)

        # reduce noise
        if hparams.reduce_noise:
            data = nr.reduce_noise(audio_clip=data,
                                   noise_clip=data,
                                   **hparams.noise_reduce_kwargs)

    return rate, data
def make_spec(
    syll_wav,
    fs,
    hparams,
    mel_matrix=None,
    use_tensorflow=False,
    use_mel=True,
    return_tensor=False,
    norm_uint8=False,
):
    """
    """
    if use_tensorflow:
        import tensorflow as tf
        from avgn.signalprocessing.spectrogramming_tf import spectrogram_tensorflow
    # convert to float
    if type(syll_wav[0]) == int:
        syll_wav = int16_to_float32(syll_wav)

    # create spec

    if use_tensorflow:
        spec = spectrogram_tensorflow(syll_wav, fs, hparams)
        if use_mel:
            spec = tf.transpose(tf.tensordot(spec, mel_matrix, 1))
            if not return_tensor:
                spec = spec.numpy()
    else:
        spec = spectrogram(syll_wav, fs, hparams)
        if use_mel:
            spec = np.dot(spec.T, mel_matrix).T
    if norm_uint8:
        spec = (norm(spec) * 255).astype("uint8")

    return spec
def get_element(datafile,
                indv=None,
                element_number=1,
                element="syllable",
                hparams=None):

    # if an individual isnt specified, grab the first one
    if indv == None:
        indv = datafile.indvs[0]

    # get the element
    element = datafile.data["indvs"][indv][element]

    # get the part of the wav we want to load
    st = element["start_times"][element_number]
    et = element["end_times"][element_number]

    # load the data
    rate, element = load_wav(datafile.data["wav_loc"],
                             offset=st,
                             duration=et - st,
                             sr=None)

    if np.issubdtype(type(element[0]), np.integer):
        element = int16_to_float32(data)

    if hparams is not None:
        element = butter_bandpass_filter(element,
                                         hparams.butter_lowcut,
                                         hparams.butter_highcut,
                                         rate,
                                         order=5)

    return rate, element
def prepare_wav(wav_loc, hparams, debug):
    """ load wav and convert to correct format
    """
    if debug:
        debug_data = {}
    else:
        debug_data = None

    # get rate and date
    data, _ = librosa.load(wav_loc, sr=hparams.sr)

    # convert data if needed
    if np.issubdtype(type(data[0]), np.integer):
        data = int16_to_float32(data)

    # Chunks to avoid memory issues
    len_chunk_minutes = 10
    len_chunk_sample = hparams.sr * 60 * len_chunk_minutes
    data_chunks = []
    for t in range(0, len(data), len_chunk_sample):
        start = t
        end = min(len(data), t + len_chunk_sample)
        data_chunks.append(data[start:end])
        # only keep one chunk for debug
        if debug:
            break

    # bandpass filter
    data_cleaned = []
    if hparams is not None:
        for data in data_chunks:

            if debug:
                debug_data['x'] = data

            data = butter_bandpass_filter(data,
                                          hparams.butter_lowcut,
                                          hparams.butter_highcut,
                                          hparams.sr,
                                          order=5)
            if debug:
                debug_data['x_filtered'] = data

            # reduce noise
            if hparams.reduce_noise:
                data = nr.reduce_noise(audio_clip=data,
                                       noise_clip=data,
                                       **hparams.noise_reduce_kwargs)
            if debug:
                debug_data['x_rn'] = data
            data_cleaned.append(data)
    else:
        data_cleaned = data_chunks

    #  concatenate chunks
    data = np.concatenate(data_cleaned)
    return data, debug_data
def subset_syllables(json_dict,
                     indv,
                     unit="syllables",
                     hparams=None,
                     include_labels=True):
    """ Grab syllables from wav data
    """
    if type(indv) == list:
        indv = indv[0]
    if type(json_dict) != OrderedDict:
        json_dict = read_json(json_dict)
    # get unit info
    start_times = json_dict["indvs"][indv][unit]["start_times"]
    # stop times vs end_times is a quick fix that should be fixed on the parsing side
    if "end_times" in json_dict["indvs"][indv][unit].keys():
        end_times = json_dict["indvs"][indv][unit]["end_times"]
    else:
        end_times = json_dict["indvs"][indv][unit]["stop_times"]
    if include_labels:
        labels = json_dict["indvs"][indv][unit]["labels"]
    else:
        labels = None
    # get rate and date
    rate, data = load_wav(json_dict["wav_loc"])

    # convert data if needed
    if np.issubdtype(type(data[0]), np.integer):
        data = int16_to_float32(data)
    # bandpass filter
    if hparams is not None:
        data = butter_bandpass_filter(data,
                                      hparams.butter_lowcut,
                                      hparams.butter_highcut,
                                      rate,
                                      order=5)

        # reduce noise
        if hparams.reduce_noise:
            data = nr.reduce_noise(audio_clip=data,
                                   noise_clip=data,
                                   **hparams.noise_reduce_kwargs)
    syllables = [
        data[int(st * rate):int(et * rate)]
        for st, et in zip(start_times, end_times)
    ]
    return syllables, rate, labels
Example #7
0
def process_bird_wav(
    bird,
    wav_info,
    wav_time,
    params,
    save_to_folder,
    visualize=False,
    skip_created=False,
    seconds_timeout=300,
    save_spectrograms=True,
    verbose=False,
):
    """splits a wav file into periods of silence and periods of sound based on params
    """
    # Load up the WAV
    rate, data = load_wav(wav_info)
    params["sample_rate"] = rate
    if rate is None or data is None:
        return

    # bandpass filter
    data = butter_bandpass_filter(data.astype("float32"),
                                  params["lowcut"],
                                  params["highcut"],
                                  rate,
                                  order=2)
    data = float32_to_int16(data)

    # we only want one channel
    if len(np.shape(data)) == 2:
        data = data[:, 0]

    # threshold the (root mean squared of the) audio
    rms_data, sound_threshed = RMS(
        data,
        rate,
        params["rms_stride"],
        params["rms_window"],
        params["rms_padding"],
        params["noise_thresh"],
    )
    # Find the onsets/offsets of sound
    onset_sounds, offset_sounds = detect_onsets_offsets(
        np.repeat(sound_threshed, int(params["rms_stride"] * rate)),
        threshold=0,
        min_distance=0,
    )
    # make sure all onset sounds are at least zero (due to downsampling in RMS)
    onset_sounds[onset_sounds < 0] = 0

    # threshold clips of sound
    for onset_sound, offset_sound in zip(onset_sounds, offset_sounds):

        # segment the clip
        clip = data[onset_sound:offset_sound]
        ### if the clip is thresholded, as noise, do not save it into dataset
        # bin width in Hz of spectrogram
        freq_step_size_Hz = (rate / 2) / params["num_freq"]
        bout_spec = threshold_clip(clip,
                                   rate,
                                   freq_step_size_Hz,
                                   params,
                                   visualize=visualize,
                                   verbose=verbose)
        if bout_spec is None:
            # visualize spectrogram if desired
            if visualize:
                # compute spectrogram of clip
                wav_spectrogram = spectrogram(int16_to_float32(clip), params)
                visualize_spec(wav_spectrogram, show=True)
            continue

        # determine the datetime of this clip
        start_time = wav_time + timedelta(seconds=onset_sound / float(rate))
        time_string = start_time.strftime("%Y-%m-%d_%H-%M-%S-%f")

        # create a subfolder for the individual bird if it doesn't already exist
        bird_folder = Path(save_to_folder).resolve() / bird
        ensure_dir(bird_folder)

        # save data
        save_bout_wav(data, rate, bird_folder, bird, wav_info, time_string,
                      skip_created)

        # save the spectrogram of the data
        if save_spectrograms:
            save_bout_spec(bird_folder, bout_spec, time_string, skip_created)
Example #8
0
def threshold_clip(clip,
                   rate,
                   freq_step_size_Hz,
                   params,
                   visualize=False,
                   verbose=False):
    """ determines if a clip is a bout, or noise based on threshold parameters
    """
    # get the length of the segment
    segment_length = len(clip) / float(rate)

    # return if the clip is the wrong length
    if segment_length <= params["min_segment_length_s"]:
        if verbose:
            print("Segment length {} less than minimum of {}".format(
                segment_length, params["min_segment_length_s"]))
        return
    if segment_length >= params["max_segment_length_s"]:
        if verbose:
            print("Segment length {} greather than maximum of {}".format(
                segment_length, params["max_segment_length_s"]))
        return

    # compute spectrogram of clip
    wav_spectrogram = spectrogram(int16_to_float32(clip), params)
    # determine the power of the spectral envelope
    norm_power = np.mean(wav_spectrogram, axis=0)
    norm_power = (norm_power - np.min(norm_power)) / (np.max(norm_power) -
                                                      np.min(norm_power))

    # get the maximum power region of the frequency envelope
    peak_power_Hz = np.argmax(norm_power) * freq_step_size_Hz

    # threshold for the location of peak power
    if peak_power_Hz < params["vocal_range_Hz"][0]:
        if verbose:
            print("Peak power {} Hz less than minimum of {}".format(
                peak_power_Hz, params["vocal_range_Hz"][0]))
        return

    # threshold based on silence
    vocal_power = zero_one_norm(
        np.sum(
            wav_spectrogram[:,
                            int(params["vocal_range_Hz"][0] / freq_step_size_Hz
                                ):int(params["vocal_range_Hz"][1] /
                                      freq_step_size_Hz), ],
            axis=1,
        ))
    # the percent of the spectrogram below the noise threshold
    pct_silent = np.sum(vocal_power <= params["noise_thresh"]) / float(
        len(vocal_power))
    if pct_silent < params["min_silence_pct"]:
        if verbose:
            print("Percent silent {} /% less than maximum of {}".format(
                pct_silent, params["min_silence_pct"]))
        return

    if visualize:
        visualize_spec(wav_spectrogram, show=True)

    # compute threshold statistics
    return wav_spectrogram