Python Pathlike Examples

Programming Language: Python

Namespace/Package Name: lhotse.utils

Class/Type: Pathlike

Examples at hotexamples.com: 6

Python Pathlike - 6 examples found. These are the top rated real world Python examples of lhotse.utils.Pathlike extracted from open source projects. You can rate examples to help us improve the quality of examples.

Frequently Used Methods

Show Hide

mkdir(30)

is_dir(28)

rglob(8)

glob(7)

read_text(2)

strip(2)

is_file(1)

Example #1

Show file

def prepare_libritts(
    corpus_dir: Pathlike,
    dataset_parts: Union[str, Sequence[str]] = "auto",
    output_dir: Optional[Pathlike] = None,
    num_jobs: int = 1,
    link_previous_utt: bool = False,
) -> Dict[str, Dict[str, Union[RecordingSet, SupervisionSet]]]:
    """
    Returns the manifests which consist of the Recordings and Supervisions.
    When all the manifests are available in the ``output_dir``, it will simply read and return them.

    :param corpus_dir: Pathlike, the path of the data dir.
    :param dataset_parts: string or sequence of strings representing dataset part names, e.g. 'train-clean-100', 'train-clean-5', 'dev-clean'.
        By default we will infer which parts are available in ``corpus_dir``.
    :param output_dir: Pathlike, the path where to write the manifests.
    :param num_jobs: the number of parallel workers parsing the data.
    :param link_previous_utt: If true adds previous utterance id to supervisions.
        Useful for reconstructing chains of utterances as they were read.
        If previous utterance was skipped from LibriTTS datasets previous_utt label is None.
    :return: a Dict whose key is the dataset part, and the value is Dicts with the keys 'audio' and 'supervisions'.
    """
    corpus_dir = Path(corpus_dir)
    assert corpus_dir.is_dir(), f"No such directory: {corpus_dir}"

    if dataset_parts == "auto":
        dataset_parts = LIBRITTS
    elif isinstance(dataset_parts, str):
        assert dataset_parts in LIBRITTS
        dataset_parts = [dataset_parts]

    manifests = {}

    if output_dir is not None:
        output_dir = Path(output_dir)
        output_dir.mkdir(parents=True, exist_ok=True)
        # Maybe the manifests already exist: we can read them and save a bit of preparation time.
        manifests = read_manifests_if_cached(dataset_parts=dataset_parts,
                                             output_dir=output_dir,
                                             prefix="libritts")

    # Contents of the file
    #   ;ID  |SEX| SUBSET           |MINUTES| NAME
    #   14   | F | train-clean-360  | 25.03 | ...
    #   16   | F | train-clean-360  | 25.11 | ...
    #   17   | M | train-clean-360  | 25.04 | ...
    spk2gender = {
        spk_id.strip(): gender.strip()
        for spk_id, gender, *_ in (line.split("|") for line in (
            corpus_dir / "SPEAKERS.txt").read_text().splitlines()
                                   if not line.startswith(";"))
    }

    for part in tqdm(dataset_parts, desc="Preparing LibriTTS parts"):
        if manifests_exist(part=part, output_dir=output_dir,
                           prefix="libritts"):
            logging.info(
                f"LibriTTS subset: {part} already prepared - skipping.")
            continue
        part_path = corpus_dir / part
        recordings = RecordingSet.from_dir(part_path,
                                           "*.wav",
                                           num_jobs=num_jobs)
        supervisions = []
        for trans_path in tqdm(
                part_path.rglob("*.trans.tsv"),
                desc="Scanning transcript files (progbar per speaker)",
                leave=False,
        ):
            # The trans.tsv files contain only the recordings that were kept for LibriTTS.
            # Example path to a file:
            #   /export/corpora5/LibriTTS/dev-clean/84/121123/84_121123.trans.tsv
            #
            # Example content:
            #   84_121123_000007_000001 Maximilian.     Maximilian.
            #   84_121123_000008_000000 Villefort rose, half ashamed of being surprised in such a paroxysm of grief.    Villefort rose, half ashamed of being surprised in such a paroxysm of grief.

            # book.tsv contains additional metadata
            utt2snr = [(rec_id, float(snr)) for rec_id, *_, snr in map(
                str.split,
                (trans_path.parent /
                 trans_path.name.replace(".trans.tsv", ".book.tsv")
                 ).read_text().splitlines(),
            )]
            # keeps the order of uttids as they appear in book.tsv
            uttids = [r for r, _ in utt2snr]
            utt2snr = dict(utt2snr)

            if link_previous_utt:
                # Using the property of sorted keys to find previous utterance
                # The keys has structure speaker_book_x_y e.g. 1089_134691_000004_000001
                utt2prevutt = dict(zip(uttids + [None], [None] + uttids))

            prev_rec_id = None
            for line in trans_path.read_text().splitlines():
                rec_id, orig_text, norm_text = line.split("\t")
                spk_id = rec_id.split("_")[0]
                customd = {"orig_text": orig_text, "snr": utt2snr[rec_id]}
                if link_previous_utt:
                    # all recordings ids should be in the book.csv
                    # but they are some missing e.g. 446_123502_000030_000003
                    prev_utt = utt2prevutt.get(rec_id, None)
                    # previous utterance has to be present in trans.csv - otherwise it was skipped
                    prev_utt = prev_utt if prev_utt == prev_rec_id else None
                    customd["prev_utt"] = prev_utt
                    prev_rec_id = rec_id
                supervisions.append(
                    SupervisionSegment(
                        id=rec_id,
                        recording_id=rec_id,
                        start=0.0,
                        duration=recordings[rec_id].duration,
                        channel=0,
                        text=norm_text,
                        language="English",
                        speaker=spk_id,
                        gender=spk2gender[spk_id],
                        custom=customd,
                    ))

        supervisions = SupervisionSet.from_segments(supervisions)
        validate_recordings_and_supervisions(recordings, supervisions)

        if output_dir is not None:
            supervisions.to_json(output_dir /
                                 f"libritts_supervisions_{part}.json")
            recordings.to_json(output_dir / f"libritts_recordings_{part}.json")

        manifests[part] = {
            "recordings": recordings,
            "supervisions": supervisions
        }

    return manifests

Example #2

Show file

File: icsi.py Project: oplatek/lhotse

def prepare_icsi(
    audio_dir: Pathlike,
    transcripts_dir: Pathlike,
    output_dir: Optional[Pathlike] = None,
    mic: Optional[str] = "ihm",
    normalize_text: str = "kaldi",
) -> Dict[str, Dict[str, Union[RecordingSet, SupervisionSet]]]:
    """
    Returns the manifests which consist of the Recordings and Supervisions
    :param audio_dir: Pathlike, the path which holds the audio data
    :param transcripts_dir: Pathlike, the path which holds the transcripts data
    :param output_dir: Pathlike, the path where to write the manifests - `None` means manifests aren't stored on disk.
    :param mic: str {'ihm','ihm-mix','sdm','mdm'}, type of mic to use.
    :param normalize_text: str {'none', 'upper', 'kaldi'} normalization of text
    :return: a Dict whose key is ('train', 'dev', 'test'), and the values are dicts of manifests under keys
        'recordings' and 'supervisions'.
    """
    audio_dir = Path(audio_dir)
    transcripts_dir = Path(transcripts_dir)

    assert audio_dir.is_dir(), f"No such directory: {audio_dir}"
    assert transcripts_dir.is_dir(), f"No such directory: {transcripts_dir}"
    assert mic in MIC_TO_CHANNELS.keys(), f"Mic {mic} not supported"

    if output_dir is not None:
        output_dir = Path(output_dir)
        output_dir.mkdir(parents=True, exist_ok=True)

    logging.info("Parsing ICSI transcripts")
    annotations, channel_to_idx_map = parse_icsi_annotations(
        transcripts_dir, normalize=normalize_text)

    # Audio
    logging.info("Preparing recording manifests")

    channels = "".join(MIC_TO_CHANNELS[mic])
    if mic == "ihm" or mic == "mdm":
        audio_paths = audio_dir.rglob(f"chan[{channels}].sph")
        audio = prepare_audio_grouped(
            list(audio_paths), channel_to_idx_map if mic == "ihm" else None)
    elif mic == "sdm" or mic == "ihm-mix":
        audio_paths = (audio_dir.rglob(f"chan[{channels}].sph")
                       if len(channels) else audio_dir.rglob("*.wav"))
        audio = prepare_audio_single(list(audio_paths))

    # Supervisions
    logging.info("Preparing supervision manifests")
    supervision = (prepare_supervision_ihm(
        audio, annotations, channel_to_idx_map) if mic == "ihm" else
                   prepare_supervision_other(audio, annotations))

    manifests = defaultdict(dict)

    for part in ["train", "dev", "test"]:
        # Get recordings for current data split
        audio_part = audio.filter(lambda x: x.id in PARTITIONS[part])
        supervision_part = supervision.filter(
            lambda x: x.recording_id in PARTITIONS[part])

        # Write to output directory if a path is provided
        if output_dir is not None:
            audio_part.to_file(output_dir / f"recordings_{part}.jsonl")
            supervision_part.to_file(output_dir / f"supervisions_{part}.jsonl")

        audio_part, supervision_part = fix_manifests(audio_part,
                                                     supervision_part)
        validate_recordings_and_supervisions(audio_part, supervision_part)

        # Combine all manifests into one dictionary
        manifests[part] = {
            "recordings": audio_part,
            "supervisions": supervision_part
        }

    return dict(manifests)

Example #3

Show file

def prepare_mobvoihotwords(
    corpus_dir: Pathlike,
    output_dir: Optional[Pathlike] = None
) -> Dict[str, Dict[str, Union[RecordingSet, SupervisionSet]]]:
    """
    Returns the manifests which consist of the Recordings and Supervisions

    :param corpus_dir: Pathlike, the path of the data dir.
    :param output_dir: Pathlike, the path where to write the manifests.
    :return: a Dict whose key is the dataset part, and the value is Dicts with the keys 'audio' and 'supervisions'.
    """
    corpus_dir = Path(corpus_dir)
    assert corpus_dir.is_dir(), f'No such directory: {corpus_dir}'
    if output_dir is not None:
        output_dir = Path(output_dir)
        output_dir.mkdir(parents=True, exist_ok=True)
    manifests = defaultdict(dict)
    dataset_parts = ['train', 'dev', 'test']
    for part in dataset_parts:
        # Generate a mapping: utt_id -> (audio_path, audio_info, speaker, text)
        recordings = []
        supervisions = []
        for prefix in ['p_', 'n_']:
            prefixed_part = prefix + part
            json_path = corpus_dir / 'mobvoi_hotword_dataset_resources' / f'{prefixed_part}.json'
            with open(json_path, 'r', encoding='utf-8') as f:
                json_data = json.load(f)
                for entry in json_data:
                    idx = entry['utt_id']
                    speaker = idx if entry['speaker_id'] is None else entry[
                        'speaker_id']
                    audio_path = corpus_dir / 'mobvoi_hotword_dataset' / f'{idx}.wav'
                    text = 'FREETEXT'
                    if entry['keyword_id'] == 0:
                        text = 'HiXiaowen'
                    elif entry['keyword_id'] == 1:
                        text = 'NihaoWenwen'
                    else:
                        assert entry['keyword_id'] == -1
                    if not audio_path.is_file():
                        logging.warning(f'No such file: {audio_path}')
                        continue
                    recording = Recording.from_file(audio_path)
                    recordings.append(recording)
                    segment = SupervisionSegment(id=idx,
                                                 recording_id=idx,
                                                 start=0.0,
                                                 duration=recording.duration,
                                                 channel=0,
                                                 language='Chinese',
                                                 speaker=speaker,
                                                 text=text.strip())
                    supervisions.append(segment)

        recording_set = RecordingSet.from_recordings(recordings)
        supervision_set = SupervisionSet.from_segments(supervisions)

        if output_dir is not None:
            supervision_set.to_json(output_dir / f'supervisions_{part}.json')
            recording_set.to_json(output_dir / f'recordings_{part}.json')

        manifests[part] = {
            'recordings': recording_set,
            'supervisions': supervision_set
        }

    return manifests

Example #4

Show file

def prepare_mobvoihotwords(
    corpus_dir: Pathlike,
    output_dir: Optional[Pathlike] = None
) -> Dict[str, Dict[str, Union[RecordingSet, SupervisionSet]]]:
    """
    Returns the manifests which consist of the Recordings and Supervisions

    :param corpus_dir: Pathlike, the path of the data dir.
    :param output_dir: Pathlike, the path where to write the manifests.
    :return: a Dict whose key is the dataset part, and the value is Dicts with the keys 'audio' and 'supervisions'.
    """
    corpus_dir = Path(corpus_dir)
    assert corpus_dir.is_dir(), f"No such directory: {corpus_dir}"
    dataset_parts = ["train", "dev", "test"]

    manifests = {}

    if output_dir is not None:
        output_dir = Path(output_dir)
        output_dir.mkdir(parents=True, exist_ok=True)
        # Maybe the manifests already exist: we can read them and save a bit of preparation time.
        manifests = read_manifests_if_cached(dataset_parts=dataset_parts,
                                             output_dir=output_dir)

    for part in dataset_parts:
        logging.info(f"Preparing MobvoiHotwords subset: {part}")
        if manifests_exist(part=part, output_dir=output_dir):
            logging.info(
                f"MobvoiHotwords subset: {part} already prepared - skipping.")
            continue
        # Generate a mapping: utt_id -> (audio_path, audio_info, speaker, text)
        recordings = []
        supervisions = []
        for prefix in ["p_", "n_"]:
            prefixed_part = prefix + part
            json_path = (corpus_dir / "mobvoi_hotword_dataset_resources" /
                         f"{prefixed_part}.json")
            with open(json_path, "r", encoding="utf-8") as f:
                json_data = json.load(f)
                for entry in json_data:
                    idx = entry["utt_id"]
                    speaker = (idx if entry["speaker_id"] is None else
                               entry["speaker_id"])
                    audio_path = corpus_dir / "mobvoi_hotword_dataset" / f"{idx}.wav"
                    text = "FREETEXT"
                    if entry["keyword_id"] == 0:
                        text = "HiXiaowen"
                    elif entry["keyword_id"] == 1:
                        text = "NihaoWenwen"
                    else:
                        assert entry["keyword_id"] == -1
                    if not audio_path.is_file():
                        logging.warning(f"No such file: {audio_path}")
                        continue
                    recording = Recording.from_file(audio_path)
                    recordings.append(recording)
                    segment = SupervisionSegment(
                        id=idx,
                        recording_id=idx,
                        start=0.0,
                        duration=recording.duration,
                        channel=0,
                        language="Chinese",
                        speaker=speaker,
                        text=text.strip(),
                    )
                    supervisions.append(segment)

        recording_set = RecordingSet.from_recordings(recordings)
        supervision_set = SupervisionSet.from_segments(supervisions)
        validate_recordings_and_supervisions(recording_set, supervision_set)

        if output_dir is not None:
            supervision_set.to_file(output_dir /
                                    f"mobvoi_supervisions_{part}.jsonl.gz")
            recording_set.to_file(output_dir /
                                  f"mobvoi_recordings_{part}.jsonl.gz")

        manifests[part] = {
            "recordings": recording_set,
            "supervisions": supervision_set
        }

    return manifests

Example #5

Show file

def prepare_heroico(
        speech_dir: Pathlike,
        transcript_dir: Pathlike,
        output_dir: Optional[Pathlike] = None
) -> Dict[str, Dict[str, Union[RecordingSet, SupervisionSet]]]:
    """
    Returns the manifests which consist of the Recordings and Supervisions

    :param speech_dir: Pathlike, the path of the speech data dir.
param transcripts_dir: Pathlike, the path of the transcript data dir.
    :param output_dir: Pathlike, the path where to write the manifests.
    :return: a Dict whose key is the fold, and the value is Dicts with the keys 'audio' and 'supervisions'.
    """
    speech_dir = Path(speech_dir)
    transcript_dir = Path(transcript_dir)
    assert speech_dir.is_dir(), f'No such directory: {speech_dir}'
    assert transcript_dir.is_dir(), f'No such directory: {transcript_dir}'
    if output_dir is not None:
        output_dir = Path(output_dir)
        output_dir.mkdir(parents=True, exist_ok=True)
    manifests = defaultdict(dict)

    # set some patterns to match fields in transcript files and filenames 
    answers_line_pattern = re.compile("\d+/\d+\t.+")
    answers_path_pattern = re.compile('Answers_Spanish')
    heroico_recitations_line_pattern = re.compile("\d+\t.+")
    heroico_recitations_devtest_path_pattern = re.compile('Recordings_Spanish')
    heroico_recitations_train_path_pattern = re.compile('Recordings_Spanish')
    usma_line_pattern = re.compile("s\d+\t.+")
    usma_native_demo_pattern = re.compile("usma/native\-[fm]\-\w+\-\S+\-\S+\-\S+\-\S+\-\w+\d+")
    usma_native_path_pattern = re.compile('usma/native')
    usma_native_prompt_id_pattern = re.compile('s\d+')
    usma_nonnative_demo_pattern = re.compile(
        "nonnative\-[fm]\-[a-zA-Z]+\d*\-[a-zA-Z]+\-[a-zA-Z]+\-[a-zA-Z]+\-[a-zA-Z]+\-[a-zA-Z]+\d+")
    usma_nonnative_path_pattern = re.compile('nonnative.+\.wav')

    # Generate a mapping: utt_id -> (audio_path, audio_info, text)

    transcripts = defaultdict(dict)
    # store answers trnscripts
    answers_trans_path = Path(transcript_dir, heroico_dataset_answers)
    with open(answers_trans_path, encoding='iso-8859-1') as f:
        for line in f:
            line = line.rstrip()
            # some recordings do not have a transcript, skip them here
            if not answers_line_pattern.match(line):
                continue
            # IDs have the form speaker/prompt_id
            spk_utt, text = line.split(maxsplit=1)
            spk_id, prompt_id = spk_utt.split('/')
            utt_id = '-'.join(['answers', spk_id, prompt_id])
            transcripts[utt_id] = text

    # store heroico recitations transcripts
    heroico_recitations_trans_path = Path(transcript_dir, heroico_dataset_recordings)
    with open(heroico_recitations_trans_path, encoding='iso-8859-1') as f:
        for line in f:
            line = line.rstrip()
            if not heroico_recitations_line_pattern.match(line):
                continue
            idx, text = line.split(maxsplit=1)
            utt_id = '-'.join(['heroico-recitations', idx])
            transcripts[utt_id] = text

    # store usma transcripts
    usma_trans_path = Path(transcript_dir, usma_dataset)
    with open(usma_trans_path, encoding='iso-8859-1') as f:
        for line in f:
            line = line.rstrip()
            if not usma_line_pattern.match(line):
                continue
            idx, text = line.split(maxsplit=1)
            utt_id = '-'.join(['usma', idx])
            transcripts[utt_id] = text

    # store utterance info
    audio_paths = speech_dir.rglob('*.wav')
    uttdata = {}
    for wav_file in audio_paths:
        wav_path = Path(wav_file)
        path_components = wav_path.parts
        pid = wav_path.stem
        if re.findall(answers_path_pattern, str(wav_file)):
            # store utternce info for Heroico Answers
            spk = wav_path.parts[-2]
            utt_id = '-'.join(['answers', spk, pid])
            if utt_id not in transcripts:
                uttdata[str(wav_file)] = None
                continue
            uttdata[str(wav_file)] = UttInfo(fold='train', speaker=spk, prompt_id=pid, subcorpus='answers',
                                             utterance_id=utt_id, transcript=transcripts[utt_id])
        elif re.findall(usma_native_path_pattern, str(wav_file)):
            # store utterance info for usma native data
            spk = wav_path.parts[-2]
            utt_id = '-'.join(['usma', spk, pid])
            trans_id = '-'.join(['usma', pid])
            if not usma_native_demo_pattern.match(spk):
                uttdata[str(wav_file)] = None
            if not usma_native_prompt_id_pattern.match(pid):
                uttdata[str(wav_file)] = None
                continue
            uttdata[str(wav_file)] = UttInfo(fold='test', speaker=spk, prompt_id=pid, subcorpus='usma',
                                             utterance_id=utt_id, transcript=transcripts[trans_id])
        elif re.findall(usma_nonnative_path_pattern, str(wav_file)):
            # store utterance data for usma nonnative data
            spk = wav_path.parts[-2]
            utt_id = '-'.join(['usma', spk, pid])
            trans_id = '-'.join(['usma', pid])
            if not usma_nonnative_demo_pattern.match(spk):
                uttdata[str(wav_file)] = None
                continue
            uttdata[str(wav_file)] = UttInfo(fold='test', speaker=spk, prompt_id=pid, subcorpus='usma',
                                             utterance_id=utt_id, transcript=transcripts[trans_id])
        elif int(pid) <= 354 or int(pid) >= 562:
            # store utterance info for heroico recitations for train dataset
            spk = wav_path.parts[-2]
            utt_id = '-'.join(['heroico-recitations', spk, pid])
            trans_id = '-'.join(['heroico-recitations', pid])
            uttdata[str(wav_file)] = UttInfo(fold='train', speaker=spk, prompt_id=pid, subcorpus='heroico-recitations',
                                             utterance_id=utt_id, transcript=transcripts[trans_id])
        elif int(pid) > 354 and int(pid) < 562:
            spk = wav_path.parts[-2]
            utt_id = '-'.join(['heroico-recitations-repeats', spk, pid])
            trans_id = '-'.join(['heroico-recitations-repeats', pid])
            uttdata[str(wav_file)] = UttInfo(fold='devtest', speaker=spk, prompt_id=pid,
                                             subcorpus='heroico-recitations-repeats', utterance_id=utt_id,
                                             transcript=transcripts[trans_id])
        else:
            logging.warning(f'No such file: {wav_file}')

    audio_paths = speech_dir.rglob('*.wav')
    audio_files = [w for w in audio_paths]

    for fld in folds:
        metadata = {}
        for wav_file in audio_files:
            wav_path = Path(wav_file)
            # skip files with no record
            if not uttdata[str(wav_file)]:
                continue
            # only process the current fold
            if uttdata[str(wav_file)].fold != fld:
                continue
            path_components = wav_path.parts
            prompt_id = wav_path.stem
            # info[0]: info of the raw audio (e.g. channel number, sample rate, duration ... )
            # info[1]: info about the encoding (e.g. FLAC/ALAW/ULAW ...)
            info = torchaudio.info(str(wav_file))
            spk = wav_path.parts[-2]
            utt_id = '-'.join([uttdata[str(wav_file)].subcorpus, spk, prompt_id])
            metadata[utt_id] = HeroicoMetaData(audio_path=wav_file, audio_info=info[0],
                                               text=uttdata[str(wav_file)].transcript)

        # Audio
        audio = RecordingSet.from_recordings(
            Recording(
                id=idx,
                sources=[
                    AudioSource(
                        type='file',
                        channels=[0],
                        source=str(metadata[idx].audio_path)
                    )
                ],
                sampling_rate=int(metadata[idx].audio_info.rate),
                num_samples=metadata[idx].audio_info.length,
                duration=(metadata[idx].audio_info.length / metadata[idx].audio_info.rate)
            )
            for idx in metadata
        )

        # Supervision
        supervision = SupervisionSet.from_segments(
            SupervisionSegment(
                id=idx,
                recording_id=idx,
                start=0.0,
                duration=audio.recordings[idx].duration,
                channel=0,
                language='Spanish',
                speaker=idx.split('-')[-2],
                text=metadata[idx].text
            )
            for idx in audio.recordings
        )

        if output_dir is not None:
            supervision.to_json(output_dir / f'supervisions_{fld}.json')
            audio.to_json(output_dir / f'recordings_{fld}.json')

        manifests[fld] = {
            'recordings': audio,
            'supervisions': supervision
        }

    return manifests

Example #6

Show file

def prepare_adept(
    corpus_dir: Pathlike,
    output_dir: Optional[Pathlike] = None,
):
    """
    Returns the manifests which consist of the Recordings and Supervisions.
    When all the manifests are available in the ``output_dir``, it will simply read and return them.

    :param corpus_dir: Pathlike, the path of the data dir.
    :param dataset_parts: string or sequence of strings representing dataset part names,
        e.g. 'train-clean-100', 'train-clean-5', 'dev-clean'.
        By default we will infer which parts are available in ``corpus_dir``.
    :param output_dir: Pathlike, the path where to write the manifests.
    :return: a Dict whose key is the dataset part, and the value is Dicts with the keys 'audio' and 'supervisions'.
    """
    corpus_dir = Path(corpus_dir)
    assert corpus_dir.is_dir(), f"No such directory: {corpus_dir}"

    recordings = RecordingSet.from_recordings(
        Recording.from_file(
            path=path,
            # converts:
            #   path/to/ADEPT/wav_44khz/propositional_attitude/surprise/ad01_0204.wav
            # to:
            #   propositional_attitude_surprise_ad01_0204
            recording_id=str(path.relative_to(path.parent.parent.parent))
            [:-4].replace("/", "_"),
        ) for path in (corpus_dir / "wav_44khz").rglob("*.wav"))

    supervisions = []

    with open(corpus_dir / "adept_prompts.json") as f:
        interpretation_map = json.load(f)

    for path in (corpus_dir / "txt").rglob("*.txt"):
        annotation_type, label, prompt_id = str(
            path.relative_to(path.parent.parent.parent))[:-4].split("/")
        speaker_id = "ADEPT_" + prompt_id.split("_")[0]
        recording_id = "_".join((annotation_type, label, prompt_id))
        interpretation_group = interpretation_map.get(annotation_type)
        interpretation = (interpretation_group[prompt_id][label]
                          if interpretation_group else None)
        recording = recordings[recording_id]
        custom = {
            "type": annotation_type,
            "label": label,
            "prompt_id": prompt_id
        }
        if interpretation:
            # label is "interpretation_1", "interpretation_2", ..., "middle", "end", etc
            # Interpretations' labels meaning is defined by their textual realisation:
            #  {..., "middle": "Galleries are WHAT on Thursdays?", "end": "Galleries are free WHEN?"}
            custom["text"] = interpretation
        supervisions.append(
            SupervisionSegment(
                id=recording_id,
                recording_id=recording_id,
                start=0,
                duration=recording.duration,
                channel=0,
                text=path.read_text(),
                language="English",
                speaker=speaker_id,
                custom=custom,
            ))

    supervisions = SupervisionSet.from_segments(supervisions)
    validate_recordings_and_supervisions(recordings, supervisions)

    if output_dir is not None:
        output_dir = Path(output_dir)
        supervisions.to_file(output_dir / "adept_supervisions.json")
        recordings.to_file(output_dir / "adept_recordings.json")

    return {"recordings": recordings, "supervisions": supervisions}