def convert_chapter(path: pathlib.Path, sink: pathlib.Path, prefix: str, transformer: sox.Transformer): transcriptions = locate_transcriptions(path) if not sink.is_dir(): os.makedirs(sink) if transcriptions: with open(str(transcriptions), "r") as transcriptions: for line in transcriptions.readlines(): line = line.strip() end_of_index = line.find(" ") file_name = line[:end_of_index].strip() file_name_with_extension = file_name + ".flac" audio_input_file = path / file_name_with_extension audio_output_file = sink / f"{prefix}-{file_name}.wav" label_output_file = sink / f"{prefix}-{file_name}.lab" label = line[end_of_index:].strip() transformer.build_file(input_filepath=str(audio_input_file), output_filepath=str(audio_output_file)) with open(str(label_output_file), "w") as label_file: label_file.write(label)
def convert_cv(transformer: sox.Transformer, max_per_speaker: int, tsv: pathlib.Path, clips: pathlib.Path, sink: pathlib.Path, prefix: str): meta = pd.read_csv(tsv, delimiter="\t") for client, df in tqdm(meta.groupby(by="client_id")): df = df.tail(max_per_speaker) speaker_sink = sink / client if not speaker_sink.is_dir(): os.makedirs(speaker_sink) for audio, transcription in zip(df["path"], df["sentence"]): try: audio_without_stem = audio.split(".")[0] input_audio_file = clips / audio output_audio_file = speaker_sink / f"{prefix}-{audio_without_stem}.wav" output_transcription_file = speaker_sink / f"{prefix}-{audio_without_stem}.lab" with open(str(output_transcription_file), "w") as o: transcription = normalize_transcription(transcription) o.write(transcription) transformer.build_file(input_filepath=str(input_audio_file), output_filepath=str(output_audio_file)) except Exception as e: print( f"Failed to convert audio {audio} with sentence {transcription} reason: {str(e)}" )