def generate_json(row, DT_ID):
    # get sr and duration
    sr = get_samplerate(row.wavloc.as_posix())
    wav_duration = librosa.get_duration(filename=row.wavloc)

    # create json
    json_dict = {}
    json_dict["common_name"] = "Macaque"
    json_dict["species"] = "Macaque mulatta"
    json_dict["samplerate_hz"] = sr
    json_dict["length_s"] = wav_duration
    json_dict["wav_loc"] = row.wavloc.as_posix()
    json_dict["idnum"] = row.idnum
    json_dict["samplerate_hz"] = sr
    json_dict["indvs"] = {
        row.indv: {
            "coos": {
                "start_times": NoIndent([0.0]),
                "end_times": NoIndent([wav_duration]),
            }
        }
    }
    json_out = (DATA_DIR / "processed" / DATASET_ID / DT_ID / "JSON" /
                (row.wavloc.stem + ".JSON"))
    json_txt = json.dumps(json_dict, cls=NoIndentEncoder, indent=2)

    # save json
    avgn.utils.paths.ensure_dir(json_out.as_posix())
    print(json_txt, file=open(json_out.as_posix(), "w"))
Exemple #2
0
def generate_json_wav(row, CBINLIST, cbin_names, DT_ID):
    """ generates a json and WAV for bengalese finch data in MAT and CBIN format
    """
    cbin_file = np.array(CBINLIST)[cbin_names == row.wavname][0]
    bout_wav, rate = load_cbin(cbin_file.as_posix())

    # general json info
    # make json dictionary
    json_dict = {}
    json_dict["species"] = "Lonchura striata domestica"
    json_dict["common_name"] = "Bengalese finch"
    json_dict["indvs"] = {
        row.bird: {
            "syllables": {
                "start_times": NoIndent(list(row.start_times)),
                "end_times": NoIndent(list(row.end_times)),
                "labels": NoIndent(list(row.syllables)),
            }
        }
    }
    wav_date = row.stime.strftime("%Y-%m-%d_%H-%M-%S")
    json_dict["datetime"] = wav_date
    # rate and length
    json_dict["samplerate_hz"] = rate
    json_dict["length_s"] = len(bout_wav) / rate

    wav_stem = row.wavname[:-5]

    # output locations
    wav_out = DATA_DIR / "processed" / DATASET_ID / DT_ID / "WAV" / (wav_stem + ".WAV")
    json_dict["wav_loc"] = wav_out.as_posix()
    json_out = (
        DATA_DIR / "processed" / DATASET_ID / DT_ID / "JSON" / (wav_stem + ".JSON")
    )

    # encode json
    json_txt = json.dumps(json_dict, cls=NoIndentEncoder, indent=2)

    # save wav file
    avgn.utils.paths.ensure_dir(wav_out)
    librosa.output.write_wav(
        wav_out, y=bout_wav.astype("float32"), sr=int(rate), norm=True
    )

    # save json
    avgn.utils.paths.ensure_dir(json_out.as_posix())
    print(json_txt, file=open(json_out.as_posix(), "w"))
Exemple #3
0
def annotate_bouts(DT_ID, bout_number, wav_df, bout_df, hparams):
    """ segments parsed bouts and annotates as json
    """

    bout_start = bout_df.pulse_start.values[0]
    bout_end = bout_df.pulse_end.values[-1]
    # Ensure padding does not start before WAV starts
    bout_pad_start = hparams.bout_pad_s
    if bout_start - hparams.bout_pad_s < 0:
        bout_pad_start = hparams.bout_pad_s - bout_start

    # load the wav at the relevant times + padding if possible
    clip_duration = (bout_end + hparams.bout_pad_s) - (bout_start - bout_pad_start)
    bout_wav, sr = librosa.load(
        bout_df.iloc[0].wav_loc,
        mono=True,
        sr=None,
        offset=bout_start - bout_pad_start,
        duration=clip_duration,
    )
    # extract a noise clip
    if hparams.get_noise_clip:
        bout_noise, noise_sr = avgn.custom_parsing.general.extract_noise_clip(
            bout_df.iloc[0].wav_loc,
            bout_start,
            bout_end,
            wav_df.pulse_start.values,
            wav_df.pulse_end.values,
            hparams.min_noise_clip_size_s,
            hparams.max_noise_clip_size_s,
        )
    else:
        bout_noise = None
        noise_sr = None

    # get time of bout relative to wav
    time_in_wav = bout_start - bout_pad_start
    bout_start_string = avgn.utils.general.seconds_to_str(time_in_wav)
    wav_stem = bout_df.iloc[0].wav_loc.stem

    # output locations
    wav_out = (
        DATA_DIR
        / "processed"
        / DATASET_ID
        / DT_ID
        / "WAV"
        / (wav_stem + "__" + bout_start_string + ".WAV")
    )
    json_out = (
        DATA_DIR
        / "processed"
        / DATASET_ID
        / DT_ID
        / "JSON"
        / (wav_stem + "__" + bout_start_string + ".JSON")
    )

    noise_out = (
        DATA_DIR
        / "processed"
        / DATASET_ID
        / DT_ID
        / "NOISE"
        / (wav_stem + "__" + bout_start_string + ".WAV")
    )

    bout_duration = len(bout_wav) / sr
    # generate the json for the bout
    wavdate = datetime.strptime(bout_df.date.values[0], "%d%m%y")
    wav_date = wavdate.strftime("%Y-%m-%d_%H-%M-%S")

    # wav general information
    json_dict = {}
    json_dict["species"] = "Callithrix jacchus"
    json_dict["common_name"] = "Common marmoset"
    json_dict["bout_number"] = bout_number
    json_dict["datetime"] = wav_date
    json_dict["samplerate_hz"] = sr
    json_dict["original_wav"] = bout_df.wav_loc.values[0].as_posix()
    json_dict["length_s"] = bout_duration
    json_dict["time_relative_to_original_wav"] = bout_start - bout_pad_start
    json_dict["wav_loc"] = wav_out.as_posix()
    json_dict["noise_loc"] = noise_out.as_posix()
    json_dict["indvs"] = {}

    # individual specific information
    for indv in bout_df.indv.unique():
        json_dict["indvs"][indv] = {}
        indv_df = bout_df[bout_df.indv == indv].sort_values(by="pulse_start")
        json_dict["indvs"][indv]["partner"] = indv_df.partner.values[0]
        json_dict["indvs"][indv]["calls"] = {
            "start_times": NoIndent(
                list(indv_df.pulse_start.values - bout_start + bout_pad_start)
            ),
            "end_times": NoIndent(
                list(indv_df.pulse_end.values - bout_start + bout_pad_start)
            ),
            "labels": NoIndent(list(indv_df.call_type.values)),
            "call_num": NoIndent(list(indv_df.call_num.values)),
            "pulse_num": NoIndent(list(indv_df.pulse_n.values)),
        }

    json_txt = json.dumps(json_dict, cls=NoIndentEncoder, indent=2)

    # save wav file
    avgn.utils.paths.ensure_dir(wav_out)
    librosa.output.write_wav(wav_out, y=bout_wav, sr=sr, norm=True)

    # save json
    avgn.utils.paths.ensure_dir(json_out.as_posix())
    print(json_txt, file=open(json_out.as_posix(), "w"))

    # save noise file
    if hparams.get_noise_clip:

        avgn.utils.paths.ensure_dir(noise_out)
        if bout_noise is not None:
            librosa.output.write_wav(noise_out, y=bout_noise, sr=noise_sr, norm=True)
Exemple #4
0
def gen_wav_json(wf, wav_df, DT_ID, save_wav=False):
    """ generates a JSON of segmental iformation from the wav_df row
    
    if the flag save_wav is set to true, also generates a WAV file

    Arguments:
        wf {[type]} -- [description]
        wav_df {[type]} -- [description]
        DT_ID {[type]} -- [description]
    
    Keyword Arguments:
        save_wav {bool} -- [description] (default: {False})
    """

    wav_stem = wf.stem

    # output locations
    if save_wav:
        # load wav file
        bout_wav, sr = librosa.load(wf, mono=True, sr=None)

        wav_out = (DATA_DIR / "processed" / DATASET_ID / DT_ID / "WAV" /
                   (wav_stem + ".WAV"))
        bout_duration = len(bout_wav) / sr
        # save wav file
        ensure_dir(wav_out)
        librosa.output.write_wav(wav_out, y=bout_wav, sr=sr, norm=True)
    else:
        sr = get_samplerate(wav_df.iloc[0].wavloc.as_posix())
        wav_out = wav_df.iloc[0].wavloc
        bout_duration = librosa.get_duration(
            filename=wav_df.iloc[0].wavloc.as_posix())

    json_out = (DATA_DIR / "processed" / DATASET_ID / DT_ID / "JSON" /
                (wav_stem + ".JSON"))

    # create json dictionary
    indv = wav_df.iloc[0].indv
    json_dict = {}
    json_dict["indvs"] = {indv: {"phrases": {}}}
    json_dict["rendition"] = wav_df.iloc[0].rendition
    json_dict["datetime"] = wav_df.iloc[0].datetime.strftime(
        "%Y-%m-%d_%H-%M-%S")
    json_dict["original_wav"] = wav_df.iloc[0].wavloc.as_posix()
    json_dict["samplerate_hz"] = sr
    json_dict["indvs"][indv]["phrases"]["start_times"] = NoIndent(
        list(wav_df.phrase_start.values))
    json_dict["indvs"][indv]["phrases"]["end_times"] = NoIndent(
        list(wav_df.phrase_end.values))
    json_dict["indvs"][indv]["phrases"]["labels"] = NoIndent(
        list(wav_df.phrase_label.values))
    json_dict["wav_loc"] = wav_out.as_posix()
    json_dict["length_s"] = bout_duration
    json_dict["species"] = "Serinus canaria forma domestica"
    json_dict["common_name"] = "Domestic canary"

    # generate json
    json_txt = json.dumps(json_dict, cls=NoIndentEncoder, indent=2)

    # save json
    ensure_dir(json_out.as_posix())
    print(json_txt, file=open(json_out.as_posix(), "w"))
def annotate_bouts(
    row, songdata_row, individual_row, wav_elements, wav_syllables, DT_ID
):
    """Grabs annotation information for swampsparrow and creates JSON labels and saves wav
    
    [description]
    
    Arguments:
        row {[type]} -- [description]
        songdata_row {[type]} -- [description]
        individual_row {[type]} -- [description]
        wav_elements {[type]} -- [description]
        wav_syllables {[type]} -- [description]
        DT_ID {[type]} -- [description]
    """

    if type(row.WAV) == float:
        if np.isnan(row.WAV):
            return

    # recording time
    recording_time = datetime.fromtimestamp(row.TIME / 1000.0).strftime(
        "%Y-%m-%d_%H-%M-%S"
    )

    # output locations
    wav_stem = songdata_row.NAME.split(".")[0]
    wav_out = DATA_DIR / "processed" / DATASET_ID / DT_ID / "WAV" / (wav_stem + ".WAV")
    json_out = (
        DATA_DIR / "processed" / DATASET_ID / DT_ID / "JSON" / (wav_stem + ".JSON")
    )

    # wav general information
    json_dict = {}

    json_dict["datetime"] = recording_time
    json_dict["samplerate_hz"] = row.SAMPLERATE
    json_dict["indvs"] = {individual_row.NAME: {}}
    json_dict["MAXFREQ"] = float(songdata_row.MAXFREQ)
    json_dict["RECORDER"] = songdata_row.RECORDER
    json_dict["species"] = "Melospiza georgiana"
    json_dict["common_name"] = individual_row.SPECID
    json_dict["POPID"] = individual_row.POPID
    json_dict["LOCDESC"] = individual_row.LOCDESC
    json_dict["GRIDTYPE"] = individual_row.GRIDTYPE
    json_dict["GRIDX"] = individual_row.GRIDX
    json_dict["GRIDY"] = individual_row.GRIDY
    json_dict["SEX"] = individual_row.SEX
    json_dict["AGE"] = individual_row.AGE
    json_dict["RANK"] = individual_row.RANK
    json_dict["wav_loc"] = wav_out.as_posix()

    # load the wav
    wavdata = string2int16(row.WAV)
    sr = int(row.SAMPLERATE)

    # populate with syllable information

    json_dict["indvs"][individual_row.NAME]["syllables"] = {}

    syllable_start_times = []
    syllable_end_times = []
    for idx, syllable_row in wav_syllables[1:].iterrows():
        syllable_start_times.append(
            (syllable_row.STARTTIME) / 1000
        )  # * row.SAMPLERATE)
        syllable_end_times.append((syllable_row.ENDTIME) / 1000)  # * row.SAMPLERATE)

    json_dict["indvs"][individual_row.NAME]["syllables"]["start_time"] = NoIndent(
        syllable_start_times
    )
    json_dict["indvs"][individual_row.NAME]["syllables"]["end_time"] = NoIndent(
        syllable_end_times
    )

    # populate with element information
    json_dict["indvs"][individual_row.NAME]["elements"] = {}
    element_start_times = []
    element_end_times = []
    element_syllable_num = []
    element_pos_in_syllable = []
    element_pf_start = []
    element_pf_end = []
    element_pf_mean = []
    element_trill_amp_mean = []

    for idx, element_row in wav_elements.iterrows():

        # Peak frequency
        freq_list = np.array(element_row.PEAKFREQ.split(" "))[:-1].astype("float")
        trillamp_list = np.array(element_row.TRILLAMP.split(" "))[:-1].astype("float")
        element_pf_start.append(freq_list[5])
        element_pf_end.append(freq_list[-1])
        element_pf_mean.append(np.mean(freq_list[5:]))
        element_trill_amp_mean.append(np.mean(trillamp_list[6:]))

        # timings of element
        element_start = (
            element_row.STARTTIME * element_row.TIMESTEP
        ) / 1000  # * row.SAMPLERATE
        element_end = element_start + (element_row.TIMELENGTH / 1000)
        element_start_times.append(element_start)
        element_end_times.append(element_end)
        element_middle = (element_start + element_end) / 2
        # which syllable does this element belong to
        syllable_num = np.where(
            (np.array(syllable_start_times) <= element_middle)
            & (np.array(syllable_end_times) >= element_middle)
        )[0]
        if len(syllable_num) > 0:
            syllable_num = syllable_num[0]
        else:
            syllable_num = -1

        element_syllable_num.append(syllable_num)

        # what number element within the sylalble is this
        if len(element_pos_in_syllable) > 0:
            if syllable_num == element_syllable_num[-2]:
                # this syllable is the same
                element_pos_in_syllable.append(element_pos_in_syllable[-1] + 1)
            else:
                element_pos_in_syllable.append(0)
        else:
            element_pos_in_syllable.append(0)

    # add information about elements
    json_dict["indvs"][individual_row.NAME]["elements"]["pos_in_syllable"] = NoIndent(
        element_pos_in_syllable
    )
    json_dict["indvs"][individual_row.NAME]["elements"]["peakfreq_start"] = NoIndent(
        element_pf_start
    )
    json_dict["indvs"][individual_row.NAME]["elements"]["peakfreq_end"] = NoIndent(
        element_pf_end
    )
    json_dict["indvs"][individual_row.NAME]["elements"]["peakfreq_mean"] = NoIndent(
        element_pf_mean
    )
    json_dict["indvs"][individual_row.NAME]["elements"]["trill_amp_mean"] = NoIndent(
        element_trill_amp_mean
    )
    json_dict["indvs"][individual_row.NAME]["elements"]["syllable"] = NoIndent(
        [int(i) for i in element_syllable_num]
    )
    json_dict["indvs"][individual_row.NAME]["elements"]["start_times"] = NoIndent(
        element_start_times
    )
    json_dict["indvs"][individual_row.NAME]["elements"]["end_times"] = NoIndent(
        element_end_times
    )

    # dump
    json_txt = json.dumps(json_dict, cls=NoIndentEncoder, indent=2)

    # save wav file
    avgn.utils.paths.ensure_dir(wav_out)
    librosa.output.write_wav(
        wav_out, y=np.array(wavdata).astype("float32"), sr=sr, norm=True
    )

    # save json
    avgn.utils.paths.ensure_dir(json_out.as_posix())
    print(json_txt, file=open(json_out.as_posix(), "w"))
Exemple #6
0
def generate_json(wavfile, DT_ID, song_db):
    indv = wavfile.parent.parent.stem
    dt = datetime.strptime(wavfile.stem, "%Y-%m-%d_%H-%M-%S-%f")
    datestring = dt.strftime("%Y-%m-%d")

    row=song_db[
                (song_db.SubjectName == indv)
                & (song_db.recording_date == datestring)
                & (song_db.recording_time == dt.time())
            ].iloc[0]

    # make json dictionary
    json_dict = {}
    for key in dict(row).keys():
        if type(row[key]) == pd._libs.tslibs.timestamps.Timestamp:
            json_dict[key] = row[key].strftime("%Y-%m-%d_%H-%M-%S")
        elif type(row[key]) == dtt:
            json_dict[key] = row[key].strftime("%H:%M:%S")
        elif type(row[key]) == pd._libs.tslibs.nattype.NaTType:
            continue
        else:
            json_dict[key] = row[key]


    json_dict["species"] = "Toxostoma redivivum"
    json_dict["common_name"] = "California thrasher"
    json_dict["datetime"] = datestring

    sr = get_samplerate(wavfile.as_posix())
    wav_duration = librosa.get_duration(filename=wavfile.as_posix())

    # rate and length
    json_dict["samplerate_hz"] = sr
    json_dict["length_s"] = wav_duration
    json_dict["wav_loc"] = wavfile.as_posix()

    tg = wavfile.parent.parent / "TextGrids" / (wavfile.stem + ".TextGrid")

    textgrid = tgio.openTextgrid(fnFullPath=tg)

    tierlist = textgrid.tierDict[textgrid.tierNameList[0]].entryList
    start_times = [i.start for i in tierlist]
    end_times = [i.end for i in tierlist]
    labels = [i.label for i in tierlist]

    json_dict["indvs"] = {
        indv: {
            "syllables": {
                "start_times": NoIndent(start_times),
                "end_times": NoIndent(end_times),
                "labels": NoIndent(labels),
            }
        }
    }

    # generate json
    json_txt = json.dumps(json_dict, cls=NoIndentEncoder, indent=2)


    json_out = (
        DATA_DIR / "processed" / DATASET_ID / DT_ID / "JSON" / (wavfile.stem + ".JSON")
    )

    # save json
    ensure_dir(json_out.as_posix())
    print(json_txt, file=open(json_out.as_posix(), "w"))
def generate_noise_and_json(bout_number, fn, DT_ID, wavloc, file_df):
    # location of wav
    #wavloc = np.array(wavs)[np.array([i.stem for i in wavs]) == fn][0]
    # wav time
    wavdate = datetime.strptime(fn, "%y%m%d-%H%M")
    wav_date = wavdate.strftime("%Y-%m-%d_%H-%M-%S")
    # wav samplerate and duration
    sr = get_samplerate(wavloc.as_posix())
    wav_duration = librosa.get_duration(filename=wavloc)
    # df of syllables in file
    #file_df = label_df[label_df.file == fn].sort_values(by="start_time")

    ## find the longest stretch of non-vocal behavior in this wav
    noise_start, noise_end = find_longest_nonvocal_stretch(
        file_df, wav_duration)
    bout_start_string = avgn.utils.general.seconds_to_str(noise_start)

    # determine save locations
    noise_out = (DATA_DIR / "processed" / DATASET_ID / DT_ID / "NOISE" /
                 (fn + "__" + bout_start_string + ".WAV"))

    json_out = DATA_DIR / "processed" / DATASET_ID / DT_ID / "JSON" / (fn +
                                                                       ".JSON")

    # wav general information
    json_dict = {}
    json_dict["bout_number"] = bout_number
    json_dict["species"] = "Megaptera novaengliae"
    json_dict["common_name"] = "Humpback whale"
    json_dict["datetime"] = wav_date
    json_dict["samplerate_hz"] = sr
    json_dict["length_s"] = wav_duration
    json_dict["wav_loc"] = wavloc.as_posix()
    json_dict["noise_loc"] = noise_out.as_posix()
    json_dict["indvs"] = {
        "UNK": {
            "syllables": {
                "start_times":
                NoIndent(list(file_df.start_time.values.astype("float"))),
                "end_times":
                NoIndent(list(file_df.end_time.astype("float"))),
                "high_freq":
                NoIndent(list(file_df.high_freq.astype("float"))),
                "low_freq":
                NoIndent(list(file_df.low_freq.astype("float"))),
                "SNR":
                NoIndent(list(file_df.SNR.astype("float"))),
            }
        }
    }

    json_txt = json.dumps(json_dict, cls=NoIndentEncoder, indent=2)

    # save wav file
    noise_wav, sr = librosa.load(wavloc,
                                 sr=None,
                                 mono=True,
                                 offset=noise_start,
                                 duration=noise_end - noise_start)
    avgn.utils.paths.ensure_dir(noise_out)
    librosa.output.write_wav(noise_out, y=noise_wav, sr=sr, norm=True)

    # save json
    avgn.utils.paths.ensure_dir(json_out.as_posix())
    print(json_txt, file=open(json_out.as_posix(), "w"))
def generate_json(DSLOC, DT_ID, bird, wfn, wfn_df):

    # wav location
    wav_loc = DSLOC / bird / "Wave" / wfn

    # wav info
    sr = get_samplerate(wav_loc.as_posix())
    wav_duration = librosa.get_duration(filename=wav_loc)

    # make json dictionary
    json_dict = {}
    # add species
    json_dict["species"] = "Lonchura striata domestica"
    json_dict["common_name"] = "Bengalese finch"
    json_dict["wav_loc"] = wav_loc.as_posix()
    # rate and length
    json_dict["samplerate_hz"] = sr
    json_dict["length_s"] = wav_duration

    # make a dataframe of wav info
    # wfn_df = bird_df[bird_df.WaveFileName == wfn]
    seq_df = pd.DataFrame(
        ([[
            list(np.repeat(sequence_num, len(row.NotePositions))),
            list(row.NoteLabels),
            np.array(
                (np.array(row.NotePositions).astype("int") + int(row.Position))
                / sr).astype("float64"),
            np.array(
                (np.array(row.NotePositions).astype("int") +
                 np.array(row.NoteLengths).astype("int") + int(row.Position)) /
                sr).astype("float64"),
        ] for sequence_num, (idx, row) in enumerate(wfn_df.iterrows())]),
        columns=["sequence_num", "labels", "start_times", "end_times"],
    )
    # add syllable information
    json_dict["indvs"] = {
        bird: {
            "notes": {
                "start_times":
                NoIndent(list(np.concatenate(seq_df.start_times.values))),
                "end_times":
                NoIndent(list(np.concatenate(seq_df.end_times.values))),
                "labels":
                NoIndent(list(np.concatenate(seq_df.labels.values))),
                "sequence_num":
                NoIndent([
                    int(i) for i in np.concatenate(seq_df.sequence_num.values)
                ]),
            }
        }
    }

    # dump json
    json_txt = json.dumps(json_dict, cls=NoIndentEncoder, indent=2)

    wav_stem = bird + "_" + wfn.split(".")[0]
    json_out = (DATA_DIR / "processed" / DATASET_ID / DT_ID / "JSON" /
                (wav_stem + ".JSON"))

    # save json
    avgn.utils.paths.ensure_dir(json_out.as_posix())
    print(json_txt, file=open(json_out.as_posix(), "w"))
def generate_json(row, DT_ID):

    wav = row.wavloc

    cond = wav.parent.stem.split("_")
    if len(cond) == 2:
        common_name, condition = cond
    else:
        common_name = cond[0]
        condition = None

    if common_name == "mouse":
        if condition == "C57BL":
            data_id = wav.stem.split("_")[0]
            indv_id = mouse_id_dict[data_id]
        elif condition == "BALBc":
            indv_id = wav.stem.split("-")[0]
    elif common_name == "rat":
        indv_id = wav.stem.split("_")[-2]
    elif common_name == "gerbil":
        indv_id = wav.stem

    # wav info
    sr = get_samplerate(row.wavloc.as_posix())
    wav_duration = librosa.get_duration(filename=row.wavloc)
    species = species_dict[common_name]

    # make json dictionary
    json_dict = {}
    # add species
    json_dict["condition"] = condition
    json_dict["species"] = species
    json_dict["common_name"] = common_name
    json_dict["wav_loc"] = row.wavloc.as_posix()

    # rate and length
    json_dict["samplerate_hz"] = sr
    json_dict["length_s"] = wav_duration

    # get syllable start and end times
    csv = row.wavloc.parent / (row.wavloc.stem + ".csv")
    voc_df = pd.read_csv(csv, header=None)[[0, 1]]
    voc_df.columns = ["start_time", "end_time"]

    # add syllable information
    json_dict["indvs"] = {
        indv_id: {
            "syllables": {
                "start_times": NoIndent(list(voc_df.start_time.values)),
                "end_times": NoIndent(list(voc_df.end_time.values)),
            }
        }
    }

    DATASET_ID = "tachibana_" + common_name

    # dump
    json_txt = json.dumps(json_dict, cls=NoIndentEncoder, indent=2)
    wav_stem = row.wavloc.stem

    json_out = (DATA_DIR / "processed" / DATASET_ID / DT_ID / "JSON" /
                (wav_stem + ".JSON"))
    wav_out = DATA_DIR / "processed" / DATASET_ID / DT_ID / "WAV" / (wav_stem +
                                                                     ".WAV")
    print(json_out)
    # save json
    ensure_dir(json_out.as_posix())
    print(json_txt, file=open(json_out.as_posix(), "w"))