def get_element(datafile,
                indv=None,
                element_number=1,
                element="syllable",
                hparams=None):

    # if an individual isnt specified, grab the first one
    if indv == None:
        indv = datafile.indvs[0]

    # get the element
    element = datafile.data["indvs"][indv][element]

    # get the part of the wav we want to load
    st = element["start_times"][element_number]
    et = element["end_times"][element_number]

    # load the data
    rate, element = load_wav(datafile.data["wav_loc"],
                             offset=st,
                             duration=et - st,
                             sr=None)

    if np.issubdtype(type(element[0]), np.integer):
        element = int16_to_float32(data)

    if hparams is not None:
        element = butter_bandpass_filter(element,
                                         hparams.butter_lowcut,
                                         hparams.butter_highcut,
                                         rate,
                                         order=5)

    return rate, element
def prepare_wav(wav_loc, hparams=None):
    """ load wav and convert to correct format
    """

    # get rate and date
    rate, data = load_wav(wav_loc)

    # convert data if needed
    if np.issubdtype(type(data[0]), np.integer):
        data = int16_to_float32(data)
    # bandpass filter
    if hparams is not None:
        data = butter_bandpass_filter(data,
                                      hparams.butter_lowcut,
                                      hparams.butter_highcut,
                                      rate,
                                      order=5)

        # reduce noise
        if hparams.reduce_noise:
            data = nr.reduce_noise(audio_clip=data,
                                   noise_clip=data,
                                   **hparams.noise_reduce_kwargs)

    return rate, data
def subset_syllables(json_dict,
                     indv,
                     unit="syllables",
                     hparams=None,
                     include_labels=True):
    """ Grab syllables from wav data
    """
    if type(indv) == list:
        indv = indv[0]
    if type(json_dict) != OrderedDict:
        json_dict = read_json(json_dict)
    # get unit info
    start_times = json_dict["indvs"][indv][unit]["start_times"]
    # stop times vs end_times is a quick fix that should be fixed on the parsing side
    if "end_times" in json_dict["indvs"][indv][unit].keys():
        end_times = json_dict["indvs"][indv][unit]["end_times"]
    else:
        end_times = json_dict["indvs"][indv][unit]["stop_times"]
    if include_labels:
        labels = json_dict["indvs"][indv][unit]["labels"]
    else:
        labels = None
    # get rate and date
    rate, data = load_wav(json_dict["wav_loc"])

    # convert data if needed
    if np.issubdtype(type(data[0]), np.integer):
        data = int16_to_float32(data)
    # bandpass filter
    if hparams is not None:
        data = butter_bandpass_filter(data,
                                      hparams.butter_lowcut,
                                      hparams.butter_highcut,
                                      rate,
                                      order=5)

        # reduce noise
        if hparams.reduce_noise:
            data = nr.reduce_noise(audio_clip=data,
                                   noise_clip=data,
                                   **hparams.noise_reduce_kwargs)
    syllables = [
        data[int(st * rate):int(et * rate)]
        for st, et in zip(start_times, end_times)
    ]
    return syllables, rate, labels
Esempio n. 4
0
def segment_spec_custom(key, df, DT_ID, save=False, plot=False):
    # load wav
    rate, data = load_wav(df.data["wav_loc"])
    # filter data
    data = butter_bandpass_filter(data, butter_min, butter_max, rate)

    # segment
    # results = dynamic_threshold_segmentation(
    #     data,
    #     rate,
    #     n_fft=n_fft,
    #     hop_length_ms=hop_length_ms,
    #     win_length_ms=win_length_ms,
    #     min_level_db_floor=min_level_db_floor,
    #     db_delta=db_delta,
    #     ref_level_db=ref_level_db,
    #     pre=pre,
    #     min_silence_for_spec=min_silence_for_spec,
    #     max_vocal_for_spec=max_vocal_for_spec,
    #     min_level_db=min_level_db,
    #     silence_threshold=silence_threshold,
    #     verbose=True,
    #     min_syllable_length_s=min_syllable_length_s,
    #     spectral_range=spectral_range,
    # )
    
    results = dynamic_threshold_segmentation(data,
                                          hparams,
                                          verbose=True,
                                          min_syllable_length_s=min_syllable_length_s,
                                          spectral_range=spectral_range)
    
    if results is None:
        return
    
    if plot:
        plot_segmentations(
            results["spec"],
            results["vocal_envelope"],
            results["onsets"],
            results["offsets"],
            hop_length_ms,
            rate,
            figsize=(15, 3)
        )
        plt.show()

    # save the results
    json_out = DATA_DIR / "processed" / (DATASET_ID + "_segmented") / DT_ID / "JSON" / (
        key + ".JSON"
    )

    json_dict = df.data.copy()

    json_dict["indvs"][list(df.data["indvs"].keys())[0]]["syllables"] = {
        "start_times": list(results["onsets"]),
        "end_times": list(results["offsets"]),
    }

    json_txt = json.dumps(json_dict, cls=NoIndentEncoder, indent=2)
    # save json
    if save:
        ensure_dir(json_out.as_posix())
        with open(json_out.as_posix(), "w") as json_file:
            json.dump(json_dict, json_file, cls=NoIndentEncoder, indent=2)
        json_file.close()
 #       print(json_txt, file=open(json_out.as_posix(), "w"))

    #print(json_txt)

    return results
Esempio n. 5
0
#     'ebr_min': 0.05, # expected syllabic rate (/s) low 
#     'ebr_max':  0.2, # expected syllabic rate (/s) high 
#     'max_thresh':  0.02, # maximum pct of syllabic envelope to threshold at in second pass
#     'thresh_delta':  0.005, # delta change in threshold to match second pass syllabification
#     'slow_threshold': 0.005, # starting threshold for second pass syllabification
    
#     'pad_length' : syll_size, # length to pad spectrograms to 

####
DATASET_ID = 'bengalese_finch_sakata'
dataset = DataSet(DATASET_ID, hparams = hparams)

# Print sample dataset

dataset.sample_json
rate, data = load_wav(dataset.sample_json["wav_loc"])

mypath = r'I:\avgn_paper-vizmerge\data\processed\bengalese_finch_sakata\2020-04-29_21-12-51\WAV'
# file_current = 'br81bl41_0016.wav'
file_current = 'br82bl42_0016.wav'
file_current = 'tutor_bl5w5_0017.WAV'


rate, data_loaded = load_wav(mypath+'\\'+file_current)
data = data_loaded
times = np.linspace(0,len(data)/rate,len(data));


# filter data
data = butter_bandpass_filter(data, butter_min, butter_max, rate)
plt.plot(times,data)
#     'ebr_min': 0.05, # expected syllabic rate (/s) low
#     'ebr_max':  0.2, # expected syllabic rate (/s) high
#     'max_thresh':  0.02, # maximum pct of syllabic envelope to threshold at in second pass
#     'thresh_delta':  0.005, # delta change in threshold to match second pass syllabification
#     'slow_threshold': 0.005, # starting threshold for second pass syllabification

#     'pad_length' : syll_size, # length to pad spectrograms to

####
DATASET_ID = 'bengalese_finch_sakata'
dataset = DataSet(DATASET_ID, hparams=hparams)
import numpy as np
# Print sample dataset

dataset.sample_json
rate, data = load_wav(dataset.sample_json["wav_loc"])

hparams.sample_rate = rate

### Individual data file segmentation

# import librosa
# rate, data = load_wav(dataset.sample_json["wav_loc"])
# np.min(data), np.max(data)
# data = data / np.max(np.abs(data))

# filter data
data = butter_bandpass_filter(data, butter_min, butter_max, rate)

plt.plot(data)
Esempio n. 7
0
hparams.db_delta = db_delta
hparams.min_silence_for_spec = min_silence_for_spec
hparams.max_vocal_for_spec = max_vocal_for_spec
hparams.silence_threshold = silence_threshold


syll_size = 128

hparams.mel_fiter = True # should a mel filter be used?
hparams.num_mels = syll_size, # how many channels to use in the mel-spectrogram (eg 128)
hparams.fmin =  300, # low frequency cutoff for mel filter
hparams.fmax = None, # high frequency cutoff for mel filter

hparams.power = 1.5;

rate, data = load_wav('I:/avgn_paper-vizmerge/data/processed/bengalese_finch_sakata/2020-04-29_21-12-51/WAV/br82bl42_0001.WAV')
rate, data = load_wav('I:/avgn_paper-vizmerge/data/processed/bengalese_finch_sakata/2020-04-29_21-12-51/WAV/tutor_bl5w5_0000.WAV')
hparams.sample_rate = rate

data = butter_bandpass_filter(data, butter_min, butter_max, rate)
plt.plot(data)

hparams.ref_level_db = 90

spec_orig = spectrogram(data,
                        rate,
                            hparams)
plot_spec(
    norm(spec_orig),
    fig=None,
    ax=None,
Esempio n. 8
0
hparams.db_delta = db_delta
hparams.min_silence_for_spec = min_silence_for_spec
hparams.max_vocal_for_spec = max_vocal_for_spec
hparams.silence_threshold = silence_threshold

syll_size = 128

hparams.mel_fiter = True  # should a mel filter be used?
hparams.num_mels = syll_size,  # how many channels to use in the mel-spectrogram (eg 128)
hparams.fmin = 300,  # low frequency cutoff for mel filter
hparams.fmax = None,  # high frequency cutoff for mel filter

hparams.power = 1.5

rate, data = load_wav(
    'I:/avgn_paper-vizmerge/data/processed/bengalese_finch_sakata/2020-04-29_21-12-51/WAV/br81bl41_0000.WAV'
)
hparams.sample_rate = rate

data = butter_bandpass_filter(data, butter_min, butter_max, rate)
plt.plot(data)

# hparams.ref_level_db = 10

spec_orig = spectrogram_nn(data, hparams)

plot_spec(
    spec_orig,
    fig=None,
    ax=None,
    rate=None,
Esempio n. 9
0
mypath = 'F:\data_for_avishek\Dir_Undir\done\o122p123'
file_current = 'motif.1614.898o22p23_dir5.wav'

mypath = 'F:\data_for_avishek\Dir_Undir\done\o121p122'
file_current = 'motif.2210.9524o21p22_dir7_new.wav'

mypath = r'F:\data_for_avishek\motif_all_dataset'
file_current = 'motif.2636.5533blublu50-58_dir4_new.wav'
file_current = 'motif.53.8549blublu50-58_dir5_new.wav'

mypath = r'F:\data_for_avishek\Stimuli\blu19org61\hctsadata'
file_current = 'directed_b19o61.3-041103.385.wav'

#rate, data_loaded = wavfile.read(mypath+'\\'+file_current)
rate, data = load_wav(mypath + '\\' + file_current)

# filter data
data = butter_bandpass_filter(data, butter_min, butter_max, rate)
plt.plot(data)

hparams.ref_level_db = 70
spec_orig = spectrogram(data, rate, hparams)

figsize = (5, 3)
# norm = matplotlib.colors.Normalize(vmin=np.min(spec_dir), vmax=np.max(spec_dir))

# current_map = cm.get_cmap('RdPu',512)
current_map = cm.get_cmap('Blues', 512)
# current_map = cm.get_cmap('Reds',512)
# current_map = cm.get_cmap('Oranges',512)
Esempio n. 10
0
def process_bird_wav(
    bird,
    wav_info,
    wav_time,
    params,
    save_to_folder,
    visualize=False,
    skip_created=False,
    seconds_timeout=300,
    save_spectrograms=True,
    verbose=False,
):
    """splits a wav file into periods of silence and periods of sound based on params
    """
    # Load up the WAV
    rate, data = load_wav(wav_info)
    params["sample_rate"] = rate
    if rate is None or data is None:
        return

    # bandpass filter
    data = butter_bandpass_filter(data.astype("float32"),
                                  params["lowcut"],
                                  params["highcut"],
                                  rate,
                                  order=2)
    data = float32_to_int16(data)

    # we only want one channel
    if len(np.shape(data)) == 2:
        data = data[:, 0]

    # threshold the (root mean squared of the) audio
    rms_data, sound_threshed = RMS(
        data,
        rate,
        params["rms_stride"],
        params["rms_window"],
        params["rms_padding"],
        params["noise_thresh"],
    )
    # Find the onsets/offsets of sound
    onset_sounds, offset_sounds = detect_onsets_offsets(
        np.repeat(sound_threshed, int(params["rms_stride"] * rate)),
        threshold=0,
        min_distance=0,
    )
    # make sure all onset sounds are at least zero (due to downsampling in RMS)
    onset_sounds[onset_sounds < 0] = 0

    # threshold clips of sound
    for onset_sound, offset_sound in zip(onset_sounds, offset_sounds):

        # segment the clip
        clip = data[onset_sound:offset_sound]
        ### if the clip is thresholded, as noise, do not save it into dataset
        # bin width in Hz of spectrogram
        freq_step_size_Hz = (rate / 2) / params["num_freq"]
        bout_spec = threshold_clip(clip,
                                   rate,
                                   freq_step_size_Hz,
                                   params,
                                   visualize=visualize,
                                   verbose=verbose)
        if bout_spec is None:
            # visualize spectrogram if desired
            if visualize:
                # compute spectrogram of clip
                wav_spectrogram = spectrogram(int16_to_float32(clip), params)
                visualize_spec(wav_spectrogram, show=True)
            continue

        # determine the datetime of this clip
        start_time = wav_time + timedelta(seconds=onset_sound / float(rate))
        time_string = start_time.strftime("%Y-%m-%d_%H-%M-%S-%f")

        # create a subfolder for the individual bird if it doesn't already exist
        bird_folder = Path(save_to_folder).resolve() / bird
        ensure_dir(bird_folder)

        # save data
        save_bout_wav(data, rate, bird_folder, bird, wav_info, time_string,
                      skip_created)

        # save the spectrogram of the data
        if save_spectrograms:
            save_bout_spec(bird_folder, bout_spec, time_string, skip_created)