Python ensure_dir Examples, avgn.utils.paths.ensure_dir Python Examples

Example #1

0

Show file

File: download.py Project: robinmeier/birds_latent_generation

def download_tqdm(url, output_location, block_size=1024):
    """ Download a file using requests and tqdm
    https://stackoverflow.com/questions/37573483/progress-bar-while-download-file-over-http-with-requests
    """

    # ensure that the file is in the correct spot
    if type(output_location) != PosixPath:
        output_location = Path(output_location)
    if output_location.is_dir():
        output_location = output_location / Path(url).name

    if output_location.exists():
        warnings.warn("File {} already exists".format(output_location))
        return
    # make directory inf needed
    if not output_location.parent.exists():
        ensure_dir(output_location.parent)
    # Streaming, so we can iterate over the response.
    r = requests.get(url, stream=True)

    # Total size in bytes.
    total_size = int(r.headers.get("content-length", 0))

    wrote = 0
    with open(output_location, "wb") as f:
        for data in tqdm(
                r.iter_content(block_size),
                total=math.ceil(total_size // block_size),
                unit="KB",
                unit_scale=True,
        ):
            wrote = wrote + len(data)
            f.write(data)
    if total_size != 0 and wrote != total_size:
        print("ERROR, something went wrong")

Example #2

0

Show file

def generate_json(row, DT_ID):
    datet = datetime.strptime(row.wavdate, "%Y-%m-%d_%H-%M-%S-%f")
    datestr = datet.strftime("%Y-%m-%d_%H-%M-%S")
    sr = get_samplerate(row.wavloc.as_posix())
    wav_duration = librosa.get_duration(filename=row.wavloc.as_posix())
    # general json info
    # make json dictionary
    json_dict = {}
    json_dict["species"] = "European starling"
    json_dict["common_name"] = "Sturnus vulgaris"
    json_dict["indvs"] = {row.indv: {}}
    json_dict["datetime"] = datestr
    # rate and length
    json_dict["samplerate_hz"] = sr
    json_dict["length_s"] = wav_duration
    json_dict["wav_loc"] = row.wavloc.as_posix()

    # generate json
    json_txt = json.dumps(json_dict, cls=NoIndentEncoder, indent=2)

    json_out = (DATA_DIR / "processed" / DATASET_ID / DT_ID / "JSON" /
                (row.wavloc.stem + ".JSON"))

    # save json
    ensure_dir(json_out.as_posix())
    print(json_txt, file=open(json_out.as_posix(), "w"))

Example #3

0

Show file

File: zebra_finch_gardner.py Project: AvisP/AVGN_Avishek

def generate_json_wav_noise(indv, wav_num, song, nonsong, sr, DT_ID):
    
    wav_duration = len(song) / sr

    wav_stem = indv + "_" + str(wav_num).zfill(4)

    json_out = ( DATA_DIR / "processed" / DATASET_ID / DT_ID / "JSON" / (wav_stem + ".JSON") )
    # json_out = pathlib2.PureWindowsPath(DATA_DIR).joinpath("processed",DATASET_ID,DT_ID,"JSON",(wav_stem+".JSON"))
    # head,tail = os.path.split(json_out)
    # ensure_dir(pathlib.PureWindowsPath(json_out).parents[0])
    ensure_dir(json_out)
    
    # noise_out = pathlib2.PureWindowsPath(DATA_DIR).joinpath("processed",DATASET_ID,DT_ID,"NOISE",(wav_stem+".WAV"))
    noise_out = (DATA_DIR / "processed" / DATASET_ID / DT_ID / "NOISE" / (wav_stem + ".WAV"))
    # head,tail = os.path.split(noise_out)
    # ensure_dir(pathlib.PureWindowsPath(noise_out).parents[0])
    # print('Noise Out directory')
    # print(type(pathlib2.Path(noise_out)) == pathlib2.PureWindowsPath)
    # print(type(pathlib2.Path(noise_out)))
    # ensure_dir(pathlib2.Path(noise_out))
    
    # wav_out = pathlib2.PureWindowsPath(DATA_DIR).joinpath("processed",DATASET_ID,DT_ID,"WAV",(wav_stem+".WAV"))
    wav_out = ( DATA_DIR / "processed" / DATASET_ID / DT_ID / "WAV" / (wav_stem + ".WAV"))
    # head,tail = os.path.split(wav_out)
    # ensure_dir(pathlib.PureWindowsPath(wav_out).parents[0])
    print('Wave Out directory')
    ensure_dir(wav_out)

    # make json dictionary
    json_dict = {}
    # add species
    json_dict["species"] = "Taeniopygia guttata"
    json_dict["common_name"] = "Zebra finch"
    json_dict["wav_loc"] = wav_out.as_posix()
    json_dict["noise_loc"] = noise_out.as_posix()

    # rate and length
    json_dict["samplerate_hz"] = sr
    json_dict["length_s"] = wav_duration
    json_dict["wav_num"] = wav_num

    # add syllable information
    json_dict["indvs"] = {
        indv: {"motifs": {"start_times": [0.0], "end_times": [wav_duration]}}
    }

    json_txt = json.dumps(json_dict, cls=NoIndentEncoder, indent=2)

    # save wav file
    print(wav_out)
    avgn.utils.paths.ensure_dir(wav_out)
    librosa.output.write_wav(wav_out, y=song, sr=int(sr), norm=True)

    # save json
    avgn.utils.paths.ensure_dir(json_out.as_posix())
    print(json_txt, file=open(json_out.as_posix(), "w"))

    # save noise
    avgn.utils.paths.ensure_dir(noise_out)
    librosa.output.write_wav(noise_out, y=nonsong, sr=int(sr), norm=True)

Example #4

0

Show file

File: general.py Project: robinmeier/birds_latent_generation

def unzip_file(zip_path, directory_to_extract_to):
    """ unzip file using tqdm
    """
    ensure_dir(directory_to_extract_to)
    with zipfile.ZipFile(file=zip_path) as zip_file:
        # Loop over each file
        for file in tqdm(iterable=zip_file.namelist(),
                         total=len(zip_file.namelist())):
            try:
                zip_file.extract(member=file, path=directory_to_extract_to)
            except BadZipFile as e:
                print(e)

Example #5

0

Show file

def save_bout_spec(bird_folder,
                   wav_spectrogram,
                   time_string,
                   skip_created=False,
                   figsize=(20, 4)):

    # save the spec file
    spec_folder = bird_folder / "specs"
    ensure_dir(spec_folder)
    spec_loc = spec_folder / (time_string + ".jpg")
    if skip_created and os.path.isfile(spec_loc):
        return
    # plot
    visualize_spec(wav_spectrogram.T,
                   save_loc=spec_loc,
                   show=False,
                   figsize=(20, 5))
    return

Example #6

0

Show file

File: bird_db.py Project: robinmeier/birds_latent_generation

def generate_json_custom(wavfile, DT_ID):
    indv = wavfile.parent.parent.stem
    dataset_id = wavfile.parent.parent.parent.stem
    wav_loc = wavfile.as_posix()
    dt = datetime.now()
    datestring = dt.strftime("%Y-%m-%d")

    DATASET_ID = f'{dataset_id}_{indv}'
    sr = get_samplerate(wavfile.as_posix())
    wav_duration = librosa.get_duration(filename=wavfile.as_posix())
    wav_loc = wavfile.as_posix()

    # make json dictionary
    json_dict = {
        "sample_rate": sr,
        "species": indv,
        "datetime": datestring,
        "wav_loc": wav_loc,
        "samplerate_hz": sr,
        "length_s": wav_duration,
    }

    # no manual segmentation
    json_dict["indvs"] = {
        indv: {
            "syllables": {
                "start_times": [],
                "end_times": [],
                "labels": [],
            }
        }
    }

    # generate json
    json_txt = json.dumps(json_dict, cls=NoIndentEncoder, indent=2)

    json_out = (DATA_DIR / "processed" / DATASET_ID / DT_ID / "JSON" /
                (wavfile.stem + ".JSON"))

    # save json
    ensure_dir(json_out.as_posix())
    print(json_txt, file=open(json_out.as_posix(), "w"))
    return

Example #7

0

Show file

def save_bout_wav(data,
                  rate,
                  bird_folder,
                  bird,
                  orig_wav,
                  time_string,
                  skip_created=False):
    """ Save the wav and a csv of the extracted bout
        
    Arguments:
        data {[type]} -- [description]
        rate {[type]} -- [description]
        save_to_folder {[type]} -- [description]
        bird {[type]} -- [description]
        orig_wav {[type]} -- [description]
        time_string {[type]} -- [description]
    
    Keyword Arguments:
        skip_created {bool} -- [description] (default: {False})
    """

    # save the wav file
    wav_folder = bird_folder / "wavs"
    ensure_dir(wav_folder)
    wav_loc = wav_folder / (time_string + ".wav")
    # if the file already exists and skip created flag is true, dont overwrite
    if skip_created and os.path.isfile(wav_loc):
        return
    write_wav(wav_loc, rate, data)

    # write to a csv with bird, original wav location, datetime of bout
    csv_folder = bird_folder / "csv"
    csv_loc = csv_folder / (time_string + ".csv")
    ensure_dir(csv_folder)
    with open(csv_loc, "w") as csv_file:
        wr = csv.writer(csv_file, quoting=csv.QUOTE_ALL)
        wr.writerow([bird, orig_wav, time_string])
    return

Example #8

0

Show file

def segment_spec_custom(key, df, DT_ID, save=False, plot=False):
    # load wav
    rate, data = load_wav(df.data["wav_loc"])
    # filter data
    data = butter_bandpass_filter(data, butter_min, butter_max, rate)

    # segment
    # results = dynamic_threshold_segmentation(
    #     data,
    #     rate,
    #     n_fft=n_fft,
    #     hop_length_ms=hop_length_ms,
    #     win_length_ms=win_length_ms,
    #     min_level_db_floor=min_level_db_floor,
    #     db_delta=db_delta,
    #     ref_level_db=ref_level_db,
    #     pre=pre,
    #     min_silence_for_spec=min_silence_for_spec,
    #     max_vocal_for_spec=max_vocal_for_spec,
    #     min_level_db=min_level_db,
    #     silence_threshold=silence_threshold,
    #     verbose=True,
    #     min_syllable_length_s=min_syllable_length_s,
    #     spectral_range=spectral_range,
    # )
    
    results = dynamic_threshold_segmentation(data,
                                          hparams,
                                          verbose=True,
                                          min_syllable_length_s=min_syllable_length_s,
                                          spectral_range=spectral_range)
    
    if results is None:
        return
    
    if plot:
        plot_segmentations(
            results["spec"],
            results["vocal_envelope"],
            results["onsets"],
            results["offsets"],
            hop_length_ms,
            rate,
            figsize=(15, 3)
        )
        plt.show()

    # save the results
    json_out = DATA_DIR / "processed" / (DATASET_ID + "_segmented") / DT_ID / "JSON" / (
        key + ".JSON"
    )

    json_dict = df.data.copy()

    json_dict["indvs"][list(df.data["indvs"].keys())[0]]["syllables"] = {
        "start_times": list(results["onsets"]),
        "end_times": list(results["offsets"]),
    }

    json_txt = json.dumps(json_dict, cls=NoIndentEncoder, indent=2)
    # save json
    if save:
        ensure_dir(json_out.as_posix())
        with open(json_out.as_posix(), "w") as json_file:
            json.dump(json_dict, json_file, cls=NoIndentEncoder, indent=2)
        json_file.close()
 #       print(json_txt, file=open(json_out.as_posix(), "w"))

    #print(json_txt)

    return results

Example #9

0

Show file

import seaborn as sns
with Parallel(n_jobs=n_jobs, verbose=verbosity) as parallel:

    syllables_spec = parallel(
        delayed(pad_spectrogram)(spec, pad_length)
        for spec in tqdm.tqdm(
            syllables_spec, desc="padding spectrograms", leave=False
        )
    )
# Check to see how the specrograms look like after padding
draw_spec_set(syllables_spec, zoom=1, maxrows=10, colsize=25)

np.shape(syllables_spec)
syllable_df['spectrogram'] = syllables_spec
syllable_df[:3]

# View syllables per individual

for indv in np.sort(syllable_df.indv.unique()):
    print(indv, np.sum(syllable_df.indv == indv))
    specs = np.array([i/np.max(i) for i in syllable_df[syllable_df.indv == indv].spectrogram.values])
    specs[specs<0] = 0
    draw_spec_set(specs, zoom=2,
                  maxrows=16, 
                  colsize=25,
                  fig_title=indv,
                  num_indv=str(np.sum(syllable_df.indv == indv)))

save_loc = DATA_DIR / 'syllable_dfs' / DATASET_ID / 'bf_sakata_Bluebrown.pickle'
ensure_dir(save_loc)
syllable_df.to_pickle(save_loc)

Example #10

0

Show file

def gen_wav_json(wf, wav_df, DT_ID, save_wav=False):
    """ generates a JSON of segmental iformation from the wav_df row
    
    if the flag save_wav is set to true, also generates a WAV file

    Arguments:
        wf {[type]} -- [description]
        wav_df {[type]} -- [description]
        DT_ID {[type]} -- [description]
    
    Keyword Arguments:
        save_wav {bool} -- [description] (default: {False})
    """

    wav_stem = wf.stem

    # output locations
    if save_wav:
        # load wav file
        bout_wav, sr = librosa.load(wf, mono=True, sr=None)

        wav_out = (DATA_DIR / "processed" / DATASET_ID / DT_ID / "WAV" /
                   (wav_stem + ".WAV"))
        bout_duration = len(bout_wav) / sr
        # save wav file
        ensure_dir(wav_out)
        librosa.output.write_wav(wav_out, y=bout_wav, sr=sr, norm=True)
    else:
        sr = get_samplerate(wav_df.iloc[0].wavloc.as_posix())
        wav_out = wav_df.iloc[0].wavloc
        bout_duration = librosa.get_duration(
            filename=wav_df.iloc[0].wavloc.as_posix())

    json_out = (DATA_DIR / "processed" / DATASET_ID / DT_ID / "JSON" /
                (wav_stem + ".JSON"))

    # create json dictionary
    indv = wav_df.iloc[0].indv
    json_dict = {}
    json_dict["indvs"] = {indv: {"phrases": {}}}
    json_dict["rendition"] = wav_df.iloc[0].rendition
    json_dict["datetime"] = wav_df.iloc[0].datetime.strftime(
        "%Y-%m-%d_%H-%M-%S")
    json_dict["original_wav"] = wav_df.iloc[0].wavloc.as_posix()
    json_dict["samplerate_hz"] = sr
    json_dict["indvs"][indv]["phrases"]["start_times"] = NoIndent(
        list(wav_df.phrase_start.values))
    json_dict["indvs"][indv]["phrases"]["end_times"] = NoIndent(
        list(wav_df.phrase_end.values))
    json_dict["indvs"][indv]["phrases"]["labels"] = NoIndent(
        list(wav_df.phrase_label.values))
    json_dict["wav_loc"] = wav_out.as_posix()
    json_dict["length_s"] = bout_duration
    json_dict["species"] = "Serinus canaria forma domestica"
    json_dict["common_name"] = "Domestic canary"

    # generate json
    json_txt = json.dumps(json_dict, cls=NoIndentEncoder, indent=2)

    # save json
    ensure_dir(json_out.as_posix())
    print(json_txt, file=open(json_out.as_posix(), "w"))

Example #11

0

Show file

File: bird_db.py Project: robinmeier/birds_latent_generation

def generate_json(wavfile, DT_ID, song_db):
    indv = wavfile.parent.parent.stem
    try:
        dt = datetime.strptime(wavfile.stem, "%Y-%m-%d_%H-%M-%S-%f")
    except ValueError:
        dt = datetime.now()
    datestring = dt.strftime("%Y-%m-%d")

    row = song_db[(song_db.SubjectName == indv)
                  & (song_db.recording_date == datestring)
                  & (song_db.recording_time == dt.time())].iloc[0]

    # make json dictionary
    json_dict = {}
    for key in dict(row).keys():
        if type(row[key]) == pd._libs.tslibs.timestamps.Timestamp:
            json_dict[key] = row[key].strftime("%Y-%m-%d_%H-%M-%S")
        elif type(row[key]) == dtt:
            json_dict[key] = row[key].strftime("%H:%M:%S")
        elif type(row[key]) == pd._libs.tslibs.nattype.NaTType:
            continue
        else:
            json_dict[key] = row[key]

    species_name = row.Species_short_name.replace(" ", "_")
    common_name = row.Subject_species.replace(" ", "_")
    DATASET_ID = "BIRD_DB_" + species_name

    json_dict["species"] = species_name
    json_dict["common_name"] = common_name
    json_dict["datetime"] = datestring

    sr = get_samplerate(wavfile.as_posix())
    wav_duration = librosa.get_duration(filename=wavfile.as_posix())

    json_dict["wav_loc"] = wavfile.as_posix()
    # rate and length
    json_dict["samplerate_hz"] = sr
    json_dict["length_s"] = wav_duration

    tg = wavfile.parent.parent / "TextGrids" / (wavfile.stem + ".TextGrid")

    if not tg.exists():
        print(tg.as_posix(), 'File does not exist')
        return
    textgrid = tgio.openTextgrid(fnFullPath=tg)

    tierlist = textgrid.tierDict[textgrid.tierNameList[0]].entryList
    start_times = [i.start for i in tierlist]
    end_times = [i.end for i in tierlist]
    labels = [i.label for i in tierlist]

    json_dict["indvs"] = {
        indv: {
            "syllables": {
                "start_times": NoIndent(start_times),
                "end_times": NoIndent(end_times),
                "labels": NoIndent(labels),
            }
        }
    }

    # generate json
    json_txt = json.dumps(json_dict, cls=NoIndentEncoder, indent=2)

    json_out = (DATA_DIR / "processed" / DATASET_ID / DT_ID / "JSON" /
                (wavfile.stem + ".JSON"))

    # save json
    ensure_dir(json_out.as_posix())
    print(json_txt, file=open(json_out.as_posix(), "w"))

Example #12

0

Show file

File: main_segment.py Project: robinmeier/birds_latent_generation

    def segment_spec_custom(key, df, save=False, plot=False):

        processed_files.append(key)

        # load wav
        data, _ = librosa.load(df.data["wav_loc"], sr=sr)

        # filter data
        data = butter_bandpass_filter(data, butter_lowcut, butter_highcut, sr)

        # segment
        results = dynamic_threshold_segmentation(
            vocalization=data,
            rate=sr,
            n_fft=n_fft,
            hop_length=ms_to_sample(hop_length_ms, sr),
            win_length=ms_to_sample(win_length_ms, sr),
            min_level_db_floor=min_level_db_floor,
            db_delta=db_delta,
            ref_level_db=ref_level_db,
            pre=pre,
            min_silence_for_spec=min_silence_for_spec,
            max_vocal_for_spec=max_vocal_for_spec,
            silence_threshold=silence_threshold,
            min_syllable_length_s=min_syllable_length_s,
            spectral_range=spectral_range,
            verbose=True,
        )
        if results is None:
            print('skipping')
            return

        segmented_files.append(key)

        # save the results
        json_out = DATA_DIR / "processed" / (
            DATASET_ID + "_segmented") / DT_ID / "JSON" / (key + ".JSON")

        json_dict = df.data.copy()

        json_dict["indvs"][list(df.data["indvs"].keys())[0]]["syllables"] = {
            "start_times": NoIndent(list(results["onsets"])),
            "end_times": NoIndent(list(results["offsets"])),
        }

        json_txt = json.dumps(json_dict, cls=NoIndentEncoder, indent=2)
        # save json
        if save:
            ensure_dir(json_out.as_posix())
            print(json_txt, file=open(json_out.as_posix(), "w"))

        ##########################################
        ##########################################
        # Debug: print start/end times in a text file
        # marker_path = re.sub('.wav', '.txt', df.data["wav_loc"])
        # with open(marker_path, 'w') as ff:
        #     for onset, offset in zip(results["onsets"], results["offsets"]):
        #         ff.write(f"{onset}\t{offset}\n")
        ##########################################
        ##########################################

        return results

Example #13

0

Show file

File: 2_Bengalese_Finch_Sakata_Plotting_UMAP_from_syllables.py Project: AvisP/AVGN_Avishek

    return (x-np.min(x)) / (np.max(x) - np.min(x))

specs = list(syllable_df.spectrogram.values)
specs = [norm(i) for i in specs]
specs_flattened = flatten_spectrograms(specs)
np.shape(specs_flattened)

# Variation across individuals ( Not complete)
# syllable_df.indv.unique()
# from cuml.manifold.umap import UMAP as cumlUMAP
import umap
from avgn.visualization.projections import scatter_spec
from avgn.utils.general import save_fig
from avgn.utils.paths import FIGURE_DIR, ensure_dir
from avgn.visualization.quickplots import draw_projection_plots
ensure_dir(FIGURE_DIR / 'bf_sakata_Bluebrown')

    
indv_dfs = {}    
for indvi, indv in enumerate(tqdm.tqdm(syllable_df.indv.unique())):
    indv_dfs[indv] = syllable_df[syllable_df.indv == indv]
    indv_dfs[indv] = indv_dfs[indv].sort_values(by=["key", "start_time"])
    print(indv, len(indv_dfs[indv]))
    specs = [norm(i) for i in indv_dfs[indv].spectrogram.values]
    
    # sequencing
    indv_dfs[indv]["syllables_sequence_id"] = None
    indv_dfs[indv]["syllables_sequence_pos"] = None
    for ki, key in enumerate(indv_dfs[indv].key.unique()):
        indv_dfs[indv].loc[indv_dfs[indv].key == key, "syllables_sequence_id"] = ki
        indv_dfs[indv].loc[indv_dfs[indv].key == key, "syllables_sequence_pos"] = np.arange(

Example #14

0

Show file

    return (x-np.min(x)) / (np.max(x) - np.min(x))

specs = list(syllable_df.spectrogram.values)
specs = [norm(i) for i in specs]
specs_flattened = flatten_spectrograms(specs)
np.shape(specs_flattened)

# Variation across individuals ( Not complete)
# syllable_df.indv.unique()
# from cuml.manifold.umap import UMAP as cumlUMAP
import umap
from avgn.visualization.projections import scatter_spec
from avgn.utils.general import save_fig
from avgn.utils.paths import FIGURE_DIR, ensure_dir
from avgn.visualization.quickplots import draw_projection_plots
ensure_dir(FIGURE_DIR / 'zf')

    
indv_dfs = {}    
for indvi, indv in enumerate(tqdm.tqdm(syllable_df.indv.unique())):
    indv_dfs[indv] = syllable_df[syllable_df.indv == indv]
    indv_dfs[indv] = indv_dfs[indv].sort_values(by=["key", "start_time"])
    print(indv, len(indv_dfs[indv]))
    specs = [norm(i) for i in indv_dfs[indv].spectrogram.values]
    
    # sequencing
    indv_dfs[indv]["syllables_sequence_id"] = None
    indv_dfs[indv]["syllables_sequence_pos"] = None
    for ki, key in enumerate(indv_dfs[indv].key.unique()):
        indv_dfs[indv].loc[indv_dfs[indv].key == key, "syllables_sequence_id"] = ki
        indv_dfs[indv].loc[indv_dfs[indv].key == key, "syllables_sequence_pos"] = np.arange(

Example #15

0

Show file

def generate_json(row, DT_ID):

    wav = row.wavloc

    cond = wav.parent.stem.split("_")
    if len(cond) == 2:
        common_name, condition = cond
    else:
        common_name = cond[0]
        condition = None

    if common_name == "mouse":
        if condition == "C57BL":
            data_id = wav.stem.split("_")[0]
            indv_id = mouse_id_dict[data_id]
        elif condition == "BALBc":
            indv_id = wav.stem.split("-")[0]
    elif common_name == "rat":
        indv_id = wav.stem.split("_")[-2]
    elif common_name == "gerbil":
        indv_id = wav.stem

    # wav info
    sr = get_samplerate(row.wavloc.as_posix())
    wav_duration = librosa.get_duration(filename=row.wavloc)
    species = species_dict[common_name]

    # make json dictionary
    json_dict = {}
    # add species
    json_dict["condition"] = condition
    json_dict["species"] = species
    json_dict["common_name"] = common_name
    json_dict["wav_loc"] = row.wavloc.as_posix()

    # rate and length
    json_dict["samplerate_hz"] = sr
    json_dict["length_s"] = wav_duration

    # get syllable start and end times
    csv = row.wavloc.parent / (row.wavloc.stem + ".csv")
    voc_df = pd.read_csv(csv, header=None)[[0, 1]]
    voc_df.columns = ["start_time", "end_time"]

    # add syllable information
    json_dict["indvs"] = {
        indv_id: {
            "syllables": {
                "start_times": NoIndent(list(voc_df.start_time.values)),
                "end_times": NoIndent(list(voc_df.end_time.values)),
            }
        }
    }

    DATASET_ID = "tachibana_" + common_name

    # dump
    json_txt = json.dumps(json_dict, cls=NoIndentEncoder, indent=2)
    wav_stem = row.wavloc.stem

    json_out = (DATA_DIR / "processed" / DATASET_ID / DT_ID / "JSON" /
                (wav_stem + ".JSON"))
    wav_out = DATA_DIR / "processed" / DATASET_ID / DT_ID / "WAV" / (wav_stem +
                                                                     ".WAV")
    print(json_out)
    # save json
    ensure_dir(json_out.as_posix())
    print(json_txt, file=open(json_out.as_posix(), "w"))

Example #16

0

Show file

def process_bird_wav(
    bird,
    wav_info,
    wav_time,
    params,
    save_to_folder,
    visualize=False,
    skip_created=False,
    seconds_timeout=300,
    save_spectrograms=True,
    verbose=False,
):
    """splits a wav file into periods of silence and periods of sound based on params
    """
    # Load up the WAV
    rate, data = load_wav(wav_info)
    params["sample_rate"] = rate
    if rate is None or data is None:
        return

    # bandpass filter
    data = butter_bandpass_filter(data.astype("float32"),
                                  params["lowcut"],
                                  params["highcut"],
                                  rate,
                                  order=2)
    data = float32_to_int16(data)

    # we only want one channel
    if len(np.shape(data)) == 2:
        data = data[:, 0]

    # threshold the (root mean squared of the) audio
    rms_data, sound_threshed = RMS(
        data,
        rate,
        params["rms_stride"],
        params["rms_window"],
        params["rms_padding"],
        params["noise_thresh"],
    )
    # Find the onsets/offsets of sound
    onset_sounds, offset_sounds = detect_onsets_offsets(
        np.repeat(sound_threshed, int(params["rms_stride"] * rate)),
        threshold=0,
        min_distance=0,
    )
    # make sure all onset sounds are at least zero (due to downsampling in RMS)
    onset_sounds[onset_sounds < 0] = 0

    # threshold clips of sound
    for onset_sound, offset_sound in zip(onset_sounds, offset_sounds):

        # segment the clip
        clip = data[onset_sound:offset_sound]
        ### if the clip is thresholded, as noise, do not save it into dataset
        # bin width in Hz of spectrogram
        freq_step_size_Hz = (rate / 2) / params["num_freq"]
        bout_spec = threshold_clip(clip,
                                   rate,
                                   freq_step_size_Hz,
                                   params,
                                   visualize=visualize,
                                   verbose=verbose)
        if bout_spec is None:
            # visualize spectrogram if desired
            if visualize:
                # compute spectrogram of clip
                wav_spectrogram = spectrogram(int16_to_float32(clip), params)
                visualize_spec(wav_spectrogram, show=True)
            continue

        # determine the datetime of this clip
        start_time = wav_time + timedelta(seconds=onset_sound / float(rate))
        time_string = start_time.strftime("%Y-%m-%d_%H-%M-%S-%f")

        # create a subfolder for the individual bird if it doesn't already exist
        bird_folder = Path(save_to_folder).resolve() / bird
        ensure_dir(bird_folder)

        # save data
        save_bout_wav(data, rate, bird_folder, bird, wav_info, time_string,
                      skip_created)

        # save the spectrogram of the data
        if save_spectrograms:
            save_bout_spec(bird_folder, bout_spec, time_string, skip_created)