コード例 #1
0
def generate_json(row, DT_ID):
    common_name = common_names[row.species]
    species = ' '.join(re.findall('[A-Z][^A-Z]*', row.species)).capitalize()
    # wav info
    sr = get_samplerate(row.wavloc.as_posix())
    wav_duration = librosa.get_duration(filename=row.wavloc)
    fn = row.wavloc.stem

    DATASET_ID = 'woodpecker_' + species.lower().replace(' ', '_')
    json_out = (DATA_DIR / "processed" / DATASET_ID / DT_ID / "JSON" /
                (fn + ".JSON"))

    # make json dictionary
    json_dict = {}
    json_dict["indvs"] = {"UNK": {}}
    # add species
    json_dict["species"] = species
    json_dict["common_name"] = common_name
    json_dict["wav_loc"] = row.wavloc.as_posix()
    json_dict["sound_type"] = row.call_type
    json_dict["origin"] = row.origin
    # rate and length
    json_dict["samplerate_hz"] = sr
    json_dict["length_s"] = wav_duration

    # dump json
    json_txt = json.dumps(json_dict, cls=NoIndentEncoder, indent=2)

    # save json
    avgn.utils.paths.ensure_dir(json_out.as_posix())
    print(json_txt, file=open(json_out.as_posix(), "w"))
コード例 #2
0
def generate_json(row, DT_ID):
    # get sr and duration
    sr = get_samplerate(row.wavloc.as_posix())
    wav_duration = librosa.get_duration(filename=row.wavloc)

    # create json
    json_dict = {}
    json_dict["common_name"] = "Macaque"
    json_dict["species"] = "Macaque mulatta"
    json_dict["samplerate_hz"] = sr
    json_dict["length_s"] = wav_duration
    json_dict["wav_loc"] = row.wavloc.as_posix()
    json_dict["idnum"] = row.idnum
    json_dict["samplerate_hz"] = sr
    json_dict["indvs"] = {
        row.indv: {
            "coos": {
                "start_times": NoIndent([0.0]),
                "end_times": NoIndent([wav_duration]),
            }
        }
    }
    json_out = (DATA_DIR / "processed" / DATASET_ID / DT_ID / "JSON" /
                (row.wavloc.stem + ".JSON"))
    json_txt = json.dumps(json_dict, cls=NoIndentEncoder, indent=2)

    # save json
    avgn.utils.paths.ensure_dir(json_out.as_posix())
    print(json_txt, file=open(json_out.as_posix(), "w"))
コード例 #3
0
def generate_json(row, DT_ID):
    # wav info
    try:
        sr = get_samplerate(row.wavloc.as_posix())
    except Exception as e:
        print(row.wavloc.as_posix(), e)

    wav_duration = librosa.get_duration(filename=row.wavloc)

    # make json dictionary
    json_dict = {}
    # add species
    json_dict["species"] = "Lonchura striata"
    json_dict["common_name"] = "White rumped munia"
    json_dict["wav_loc"] = row.wavloc.as_posix()
    # rate and length
    json_dict["samplerate_hz"] = sr
    json_dict["length_s"] = wav_duration
    json_dict["wav_num"] = row.wav_num

    # add syllable information
    json_dict["indvs"] = {row.indv: {}}

    # dump json
    json_txt = json.dumps(json_dict, cls=NoIndentEncoder, indent=2)

    wav_stem = row.indv + "_" + str(row.wav_num)
    json_out = (DATA_DIR / "processed" / DATASET_ID / DT_ID / "JSON" /
                (wav_stem + ".JSON"))

    # save json
    avgn.utils.paths.ensure_dir(json_out.as_posix())
    print(json_txt, file=open(json_out.as_posix(), "w"))
コード例 #4
0
def generate_json(row, DT_ID):
    datet = datetime.strptime(row.wavdate, "%Y-%m-%d_%H-%M-%S-%f")
    datestr = datet.strftime("%Y-%m-%d_%H-%M-%S")
    sr = get_samplerate(row.wavloc.as_posix())
    wav_duration = librosa.get_duration(filename=row.wavloc.as_posix())
    # general json info
    # make json dictionary
    json_dict = {}
    json_dict["species"] = "European starling"
    json_dict["common_name"] = "Sturnus vulgaris"
    json_dict["indvs"] = {row.indv: {}}
    json_dict["datetime"] = datestr
    # rate and length
    json_dict["samplerate_hz"] = sr
    json_dict["length_s"] = wav_duration
    json_dict["wav_loc"] = row.wavloc.as_posix()

    # generate json
    json_txt = json.dumps(json_dict, cls=NoIndentEncoder, indent=2)

    json_out = (DATA_DIR / "processed" / DATASET_ID / DT_ID / "JSON" /
                (row.wavloc.stem + ".JSON"))

    # save json
    ensure_dir(json_out.as_posix())
    print(json_txt, file=open(json_out.as_posix(), "w"))
コード例 #5
0
def generate_json(row, DT_ID):

    # wav info
    sr = get_samplerate(row.wavloc.as_posix())
    wav_duration = librosa.get_duration(filename=row.wavloc)

    # make json dictionary
    json_dict = {}
    # add species
    json_dict["species_id"] = row.species

    json_dict["species"] = species_dict[row.species_group]
    json_dict["common_name"] = species_dict_common[row.species_group]
    json_dict["wav_loc"] = row.wavloc.as_posix()

    # rate and length
    json_dict["samplerate_hz"] = sr
    json_dict["length_s"] = wav_duration

    # add syllable information
    json_dict["indvs"] = {row.species: {}}

    DATASET_ID = 'insect_dataset_' + species_dict_common[row.species_group]

    json_txt = json.dumps(json_dict, cls=NoIndentEncoder, indent=2)

    wav_stem = row.wavloc.stem

    json_out = (DATA_DIR / "processed" / DATASET_ID / DT_ID / "JSON" /
                (wav_stem + ".JSON"))

    # save json
    avgn.utils.paths.ensure_dir(json_out.as_posix())
    print(json_txt, file=open(json_out.as_posix(), "w"))
コード例 #6
0
def generate_json(row, DT_ID):
    wavdate = datetime(year=int(row.year),
                       day=int(row.day),
                       month=int(row.month))
    wav_date = wavdate.strftime("%Y-%m-%d_%H-%M-%S")

    # wav samplerate and duration
    sr = get_samplerate(row.wav_loc.as_posix())
    wav_duration = librosa.get_duration(filename=row.wav_loc)

    # wav general information
    json_dict = {}
    json_dict["datetime"] = wav_date
    json_dict["samplerate_hz"] = sr
    json_dict["samplerate_hz"] = sr
    json_dict["length_s"] = wav_duration
    json_dict["species"] = "Mus musculus"
    json_dict["common_name"] = "House mouse"
    json_dict["wav_loc"] = row.wav_loc.as_posix()
    json_dict["age"] = row.AGE
    json_dict["FemaleMouse"] = row.FemaleMouse
    json_dict['call_type'] = row.SONG
    json_dict["weight"] = row.Weight
    json_dict["indvs"] = {row.indv: {}}

    json_txt = json.dumps(json_dict, cls=NoIndentEncoder, indent=2)

    json_out = DATA_DIR / "processed" / DATASET_ID / DT_ID / "JSON" / (
        row.wav_loc.stem + ".JSON")

    # save json
    avgn.utils.paths.ensure_dir(json_out.as_posix())
    print(json_txt, file=open(json_out.as_posix(), "w"))
コード例 #7
0
def generate_json(row, DT_ID):

    # wav info
    sr = get_samplerate(row.wav_loc.as_posix())
    wav_duration = librosa.get_duration(filename=row.wav_loc)

    # make json dictionary
    json_dict = {}
    # add species
    json_dict["species"] = "Taeniopygia guttata"
    json_dict["common_name"] = "Zebra finch"
    json_dict["wav_loc"] = row.wav_loc.as_posix()

    # rate and length
    json_dict["samplerate_hz"] = sr
    json_dict["length_s"] = wav_duration
    json_dict["wav_num"] = row.voc_num

    json_dict["vocalization_type"] = row.vocalization_type
    json_dict["voc_type_full"] = row.voc_type_full
    json_dict["voc_type_def"] = call_dict[row.vocalization_type]
    json_dict["age"] = row.age
    json_dict["datetime"] = row.recordingdate.strftime("%Y-%m-%d_%H-%M-%S")

    # add syllable information
    json_dict["indvs"] = {
        row.indv: {
            "elements": {
                "start_times": [0.0],
                "end_times": [wav_duration]
            }
        }
    }

    json_txt = json.dumps(json_dict, cls=NoIndentEncoder, indent=2)

    wav_stem = row.wav_loc.stem

    json_out = (DATA_DIR / "processed" / DATASET_ID / DT_ID / "JSON" /
                (wav_stem + ".JSON"))

    # save json
    avgn.utils.paths.ensure_dir(json_out.as_posix())
    print(json_txt, file=open(json_out.as_posix(), "w"))
コード例 #8
0
def generate_json_custom(wavfile, DT_ID):
    indv = wavfile.parent.parent.stem
    dataset_id = wavfile.parent.parent.parent.stem
    wav_loc = wavfile.as_posix()
    dt = datetime.now()
    datestring = dt.strftime("%Y-%m-%d")

    DATASET_ID = f'{dataset_id}_{indv}'
    sr = get_samplerate(wavfile.as_posix())
    wav_duration = librosa.get_duration(filename=wavfile.as_posix())
    wav_loc = wavfile.as_posix()

    # make json dictionary
    json_dict = {
        "sample_rate": sr,
        "species": indv,
        "datetime": datestring,
        "wav_loc": wav_loc,
        "samplerate_hz": sr,
        "length_s": wav_duration,
    }

    # no manual segmentation
    json_dict["indvs"] = {
        indv: {
            "syllables": {
                "start_times": [],
                "end_times": [],
                "labels": [],
            }
        }
    }

    # generate json
    json_txt = json.dumps(json_dict, cls=NoIndentEncoder, indent=2)

    json_out = (DATA_DIR / "processed" / DATASET_ID / DT_ID / "JSON" /
                (wavfile.stem + ".JSON"))

    # save json
    ensure_dir(json_out.as_posix())
    print(json_txt, file=open(json_out.as_posix(), "w"))
    return
コード例 #9
0
def generate_json(row, DT_ID):
    species = row.species.lstrip().capitalize()
    DATASET_ID = "NA_BIRDS_" + species.lower().replace(" ", "_")

    # sample rate and duration
    sr = get_samplerate(row.wavloc.as_posix())
    wav_duration = librosa.get_duration(filename=row.wavloc)

    # make json dictionary
    json_dict = {}
    json_dict["indvs"] = {
        "UNK": {
            "syllables": {
                "start_times": [0],
                "end_times": [wav_duration]
            }
        }
    }
    # add species
    json_dict["species"] = species
    json_dict["common_name"] = common_names[species]

    # add wav number
    json_dict["wav_num"] = int(row.wavnum)
    # add wav location
    json_dict["wav_loc"] = row.wavloc.as_posix()

    # rate and length
    json_dict["samplerate_hz"] = sr
    json_dict["length_s"] = wav_duration

    # dump json
    json_txt = json.dumps(json_dict, cls=NoIndentEncoder, indent=2)

    # save information
    json_name = species.lower().replace(" ", "_") + '_' + str(
        row.wavnum).zfill(4)
    json_out = (DATA_DIR / "processed" / DATASET_ID / DT_ID / "JSON" /
                (json_name + ".JSON"))

    # save json
    avgn.utils.paths.ensure_dir(json_out.as_posix())
    print(json_txt, file=open(json_out.as_posix(), "w"))
コード例 #10
0
def gen_wav_json(wf, wav_df, DT_ID, save_wav=False):
    """ generates a JSON of segmental iformation from the wav_df row
    
    if the flag save_wav is set to true, also generates a WAV file

    Arguments:
        wf {[type]} -- [description]
        wav_df {[type]} -- [description]
        DT_ID {[type]} -- [description]
    
    Keyword Arguments:
        save_wav {bool} -- [description] (default: {False})
    """

    wav_stem = wf.stem

    # output locations
    if save_wav:
        # load wav file
        bout_wav, sr = librosa.load(wf, mono=True, sr=None)

        wav_out = (DATA_DIR / "processed" / DATASET_ID / DT_ID / "WAV" /
                   (wav_stem + ".WAV"))
        bout_duration = len(bout_wav) / sr
        # save wav file
        ensure_dir(wav_out)
        librosa.output.write_wav(wav_out, y=bout_wav, sr=sr, norm=True)
    else:
        sr = get_samplerate(wav_df.iloc[0].wavloc.as_posix())
        wav_out = wav_df.iloc[0].wavloc
        bout_duration = librosa.get_duration(
            filename=wav_df.iloc[0].wavloc.as_posix())

    json_out = (DATA_DIR / "processed" / DATASET_ID / DT_ID / "JSON" /
                (wav_stem + ".JSON"))

    # create json dictionary
    indv = wav_df.iloc[0].indv
    json_dict = {}
    json_dict["indvs"] = {indv: {"phrases": {}}}
    json_dict["rendition"] = wav_df.iloc[0].rendition
    json_dict["datetime"] = wav_df.iloc[0].datetime.strftime(
        "%Y-%m-%d_%H-%M-%S")
    json_dict["original_wav"] = wav_df.iloc[0].wavloc.as_posix()
    json_dict["samplerate_hz"] = sr
    json_dict["indvs"][indv]["phrases"]["start_times"] = NoIndent(
        list(wav_df.phrase_start.values))
    json_dict["indvs"][indv]["phrases"]["end_times"] = NoIndent(
        list(wav_df.phrase_end.values))
    json_dict["indvs"][indv]["phrases"]["labels"] = NoIndent(
        list(wav_df.phrase_label.values))
    json_dict["wav_loc"] = wav_out.as_posix()
    json_dict["length_s"] = bout_duration
    json_dict["species"] = "Serinus canaria forma domestica"
    json_dict["common_name"] = "Domestic canary"

    # generate json
    json_txt = json.dumps(json_dict, cls=NoIndentEncoder, indent=2)

    # save json
    ensure_dir(json_out.as_posix())
    print(json_txt, file=open(json_out.as_posix(), "w"))
コード例 #11
0
def generate_json(wavfile, DT_ID, song_db):
    indv = wavfile.parent.parent.stem
    try:
        dt = datetime.strptime(wavfile.stem, "%Y-%m-%d_%H-%M-%S-%f")
    except ValueError:
        dt = datetime.now()
    datestring = dt.strftime("%Y-%m-%d")

    row = song_db[(song_db.SubjectName == indv)
                  & (song_db.recording_date == datestring)
                  & (song_db.recording_time == dt.time())].iloc[0]

    # make json dictionary
    json_dict = {}
    for key in dict(row).keys():
        if type(row[key]) == pd._libs.tslibs.timestamps.Timestamp:
            json_dict[key] = row[key].strftime("%Y-%m-%d_%H-%M-%S")
        elif type(row[key]) == dtt:
            json_dict[key] = row[key].strftime("%H:%M:%S")
        elif type(row[key]) == pd._libs.tslibs.nattype.NaTType:
            continue
        else:
            json_dict[key] = row[key]

    species_name = row.Species_short_name.replace(" ", "_")
    common_name = row.Subject_species.replace(" ", "_")
    DATASET_ID = "BIRD_DB_" + species_name

    json_dict["species"] = species_name
    json_dict["common_name"] = common_name
    json_dict["datetime"] = datestring

    sr = get_samplerate(wavfile.as_posix())
    wav_duration = librosa.get_duration(filename=wavfile.as_posix())

    json_dict["wav_loc"] = wavfile.as_posix()
    # rate and length
    json_dict["samplerate_hz"] = sr
    json_dict["length_s"] = wav_duration

    tg = wavfile.parent.parent / "TextGrids" / (wavfile.stem + ".TextGrid")

    if not tg.exists():
        print(tg.as_posix(), 'File does not exist')
        return
    textgrid = tgio.openTextgrid(fnFullPath=tg)

    tierlist = textgrid.tierDict[textgrid.tierNameList[0]].entryList
    start_times = [i.start for i in tierlist]
    end_times = [i.end for i in tierlist]
    labels = [i.label for i in tierlist]

    json_dict["indvs"] = {
        indv: {
            "syllables": {
                "start_times": NoIndent(start_times),
                "end_times": NoIndent(end_times),
                "labels": NoIndent(labels),
            }
        }
    }

    # generate json
    json_txt = json.dumps(json_dict, cls=NoIndentEncoder, indent=2)

    json_out = (DATA_DIR / "processed" / DATASET_ID / DT_ID / "JSON" /
                (wavfile.stem + ".JSON"))

    # save json
    ensure_dir(json_out.as_posix())
    print(json_txt, file=open(json_out.as_posix(), "w"))
コード例 #12
0
def generate_noise_and_json(bout_number, fn, DT_ID, wavloc, file_df):
    # location of wav
    #wavloc = np.array(wavs)[np.array([i.stem for i in wavs]) == fn][0]
    # wav time
    wavdate = datetime.strptime(fn, "%y%m%d-%H%M")
    wav_date = wavdate.strftime("%Y-%m-%d_%H-%M-%S")
    # wav samplerate and duration
    sr = get_samplerate(wavloc.as_posix())
    wav_duration = librosa.get_duration(filename=wavloc)
    # df of syllables in file
    #file_df = label_df[label_df.file == fn].sort_values(by="start_time")

    ## find the longest stretch of non-vocal behavior in this wav
    noise_start, noise_end = find_longest_nonvocal_stretch(
        file_df, wav_duration)
    bout_start_string = avgn.utils.general.seconds_to_str(noise_start)

    # determine save locations
    noise_out = (DATA_DIR / "processed" / DATASET_ID / DT_ID / "NOISE" /
                 (fn + "__" + bout_start_string + ".WAV"))

    json_out = DATA_DIR / "processed" / DATASET_ID / DT_ID / "JSON" / (fn +
                                                                       ".JSON")

    # wav general information
    json_dict = {}
    json_dict["bout_number"] = bout_number
    json_dict["species"] = "Megaptera novaengliae"
    json_dict["common_name"] = "Humpback whale"
    json_dict["datetime"] = wav_date
    json_dict["samplerate_hz"] = sr
    json_dict["length_s"] = wav_duration
    json_dict["wav_loc"] = wavloc.as_posix()
    json_dict["noise_loc"] = noise_out.as_posix()
    json_dict["indvs"] = {
        "UNK": {
            "syllables": {
                "start_times":
                NoIndent(list(file_df.start_time.values.astype("float"))),
                "end_times":
                NoIndent(list(file_df.end_time.astype("float"))),
                "high_freq":
                NoIndent(list(file_df.high_freq.astype("float"))),
                "low_freq":
                NoIndent(list(file_df.low_freq.astype("float"))),
                "SNR":
                NoIndent(list(file_df.SNR.astype("float"))),
            }
        }
    }

    json_txt = json.dumps(json_dict, cls=NoIndentEncoder, indent=2)

    # save wav file
    noise_wav, sr = librosa.load(wavloc,
                                 sr=None,
                                 mono=True,
                                 offset=noise_start,
                                 duration=noise_end - noise_start)
    avgn.utils.paths.ensure_dir(noise_out)
    librosa.output.write_wav(noise_out, y=noise_wav, sr=sr, norm=True)

    # save json
    avgn.utils.paths.ensure_dir(json_out.as_posix())
    print(json_txt, file=open(json_out.as_posix(), "w"))
コード例 #13
0
def generate_json(row, DT_ID, noise_indv_df):
    """ generate a json from available wav information for stowell dataset
    """
    DATASET_ID = "stowell_" + row.species

    sr = get_samplerate(row.wavloc.as_posix())
    wav_duration = librosa.get_duration(filename=row.wavloc)

    # make json dictionary
    json_dict = {}
    json_dict["indvs"] = {row.indv: {}}
    # add species
    json_dict["species"] = row.species

    species = {
        "chiffchaff": "Phylloscopus collybita",
        "littleowl": "Athene noctua",
        "pipit": "Anthus trivialis",
    }
    json_dict["species"] = species[row.species]
    json_dict["common_name"] = row.species

    # add year information
    json_dict["year"] = row.year
    # add train/test split
    json_dict["train"] = row.trntst
    # add wav number
    json_dict["wav_num"] = int(row.wavnum)
    # add wav location
    json_dict["wav_loc"] = row.wavloc.as_posix()

    # rate and length
    json_dict["samplerate_hz"] = sr
    json_dict["length_s"] = wav_duration

    # get noise loc
    noise_indv_df = noise_indv_df[(noise_indv_df.species == row.species)]
    noise_indv_df = noise_indv_df[(noise_indv_df.year == row.year)]
    noise_indv_df = noise_indv_df[(noise_indv_df.groundx == row.groundx)]
    noise_indv_df = noise_indv_df[(noise_indv_df.fgbg == 'bg')]

    if len(noise_indv_df[noise_indv_df.wavnum == row.wavnum]) > 0:
        noise_loc = (noise_indv_df[noise_indv_df.wavnum ==
                                   row.wavnum].iloc[0].wavloc.as_posix())
    else:
        if len(noise_indv_df) > 0:
            noise_loc = noise_indv_df.iloc[0].wavloc.as_posix()
        else:
            noise_loc = ''

    return
    json_dict["noise_loc"] = noise_loc

    # dump json
    json_txt = json.dumps(json_dict, cls=NoIndentEncoder, indent=2)

    # save information
    json_out = (DATA_DIR / "processed" / DATASET_ID / DT_ID / "JSON" /
                (row.wavloc.stem + ".JSON"))

    # save json
    avgn.utils.paths.ensure_dir(json_out.as_posix())
    print(json_txt, file=open(json_out.as_posix(), "w"))
コード例 #14
0
def generate_json(DSLOC, DT_ID, bird, wfn, wfn_df):

    # wav location
    wav_loc = DSLOC / bird / "Wave" / wfn

    # wav info
    sr = get_samplerate(wav_loc.as_posix())
    wav_duration = librosa.get_duration(filename=wav_loc)

    # make json dictionary
    json_dict = {}
    # add species
    json_dict["species"] = "Lonchura striata domestica"
    json_dict["common_name"] = "Bengalese finch"
    json_dict["wav_loc"] = wav_loc.as_posix()
    # rate and length
    json_dict["samplerate_hz"] = sr
    json_dict["length_s"] = wav_duration

    # make a dataframe of wav info
    # wfn_df = bird_df[bird_df.WaveFileName == wfn]
    seq_df = pd.DataFrame(
        ([[
            list(np.repeat(sequence_num, len(row.NotePositions))),
            list(row.NoteLabels),
            np.array(
                (np.array(row.NotePositions).astype("int") + int(row.Position))
                / sr).astype("float64"),
            np.array(
                (np.array(row.NotePositions).astype("int") +
                 np.array(row.NoteLengths).astype("int") + int(row.Position)) /
                sr).astype("float64"),
        ] for sequence_num, (idx, row) in enumerate(wfn_df.iterrows())]),
        columns=["sequence_num", "labels", "start_times", "end_times"],
    )
    # add syllable information
    json_dict["indvs"] = {
        bird: {
            "notes": {
                "start_times":
                NoIndent(list(np.concatenate(seq_df.start_times.values))),
                "end_times":
                NoIndent(list(np.concatenate(seq_df.end_times.values))),
                "labels":
                NoIndent(list(np.concatenate(seq_df.labels.values))),
                "sequence_num":
                NoIndent([
                    int(i) for i in np.concatenate(seq_df.sequence_num.values)
                ]),
            }
        }
    }

    # dump json
    json_txt = json.dumps(json_dict, cls=NoIndentEncoder, indent=2)

    wav_stem = bird + "_" + wfn.split(".")[0]
    json_out = (DATA_DIR / "processed" / DATASET_ID / DT_ID / "JSON" /
                (wav_stem + ".JSON"))

    # save json
    avgn.utils.paths.ensure_dir(json_out.as_posix())
    print(json_txt, file=open(json_out.as_posix(), "w"))
コード例 #15
0
def generate_json(row, DT_ID):

    wav = row.wavloc

    cond = wav.parent.stem.split("_")
    if len(cond) == 2:
        common_name, condition = cond
    else:
        common_name = cond[0]
        condition = None

    if common_name == "mouse":
        if condition == "C57BL":
            data_id = wav.stem.split("_")[0]
            indv_id = mouse_id_dict[data_id]
        elif condition == "BALBc":
            indv_id = wav.stem.split("-")[0]
    elif common_name == "rat":
        indv_id = wav.stem.split("_")[-2]
    elif common_name == "gerbil":
        indv_id = wav.stem

    # wav info
    sr = get_samplerate(row.wavloc.as_posix())
    wav_duration = librosa.get_duration(filename=row.wavloc)
    species = species_dict[common_name]

    # make json dictionary
    json_dict = {}
    # add species
    json_dict["condition"] = condition
    json_dict["species"] = species
    json_dict["common_name"] = common_name
    json_dict["wav_loc"] = row.wavloc.as_posix()

    # rate and length
    json_dict["samplerate_hz"] = sr
    json_dict["length_s"] = wav_duration

    # get syllable start and end times
    csv = row.wavloc.parent / (row.wavloc.stem + ".csv")
    voc_df = pd.read_csv(csv, header=None)[[0, 1]]
    voc_df.columns = ["start_time", "end_time"]

    # add syllable information
    json_dict["indvs"] = {
        indv_id: {
            "syllables": {
                "start_times": NoIndent(list(voc_df.start_time.values)),
                "end_times": NoIndent(list(voc_df.end_time.values)),
            }
        }
    }

    DATASET_ID = "tachibana_" + common_name

    # dump
    json_txt = json.dumps(json_dict, cls=NoIndentEncoder, indent=2)
    wav_stem = row.wavloc.stem

    json_out = (DATA_DIR / "processed" / DATASET_ID / DT_ID / "JSON" /
                (wav_stem + ".JSON"))
    wav_out = DATA_DIR / "processed" / DATASET_ID / DT_ID / "WAV" / (wav_stem +
                                                                     ".WAV")
    print(json_out)
    # save json
    ensure_dir(json_out.as_posix())
    print(json_txt, file=open(json_out.as_posix(), "w"))