コード例 #1
0
def _get_all_speakers(l: PreDataList) -> Tuple[SpeakersDict, SpeakersLogDict]:
    all_speakers: List[str] = [x.speaker_name for x in l.items()]
    all_speakers_count = Counter(all_speakers)
    speakers_log = SpeakersLogDict.fromcounter(all_speakers_count)
    all_speakers = remove_duplicates_list_orderpreserving(all_speakers)
    speakers_dict = SpeakersDict.fromlist(all_speakers)
    return speakers_dict, speakers_log
コード例 #2
0
def parse(dir_path: str, logger: Logger = getLogger()) -> PreDataList:
    if not os.path.exists(dir_path):
        logger.exception(f"Directory not found: {dir_path}!")
        raise Exception()

    readme_path = os.path.join(dir_path, "README.md")
    readme = read_lines(readme_path)
    readme = readme[34:58]
    speakers_dict = {}
    for speaker_details in readme:
        name, gender, accent, _, _ = speaker_details[1:-1].split("|")
        speakers_dict[name] = gender, accent

    speaker_folders = get_subfolders(dir_path)
    lang = Language.ENG

    entries = PreDataList()

    logger.info("Parsing files...")
    for speaker_folder in tqdm(speaker_folders):
        speaker_name = get_basename(speaker_folder)
        if speaker_name not in speakers_dict.keys():
            logger.info(f"Skipping {speaker_name}")
            continue
        wavs = get_filepaths(os.path.join(speaker_folder, "wav"))
        # only 150, they do not contain good IPA
        annotations = get_filepaths(os.path.join(speaker_folder, "annotation"))
        textgrids = get_filepaths(os.path.join(speaker_folder, "textgrid"))
        transcripts = get_filepaths(os.path.join(speaker_folder, "transcript"))

        assert len(wavs) == len(textgrids) == len(transcripts)

        speaker_name = get_basename(speaker_folder)
        speaker_gender, speaker_accent = speakers_dict[speaker_name]
        accent_name = f"{speaker_accent}-{speaker_name}"
        gender = Gender.MALE if speaker_gender == "M" else Gender.FEMALE

        for wav, textgrid, transcript in zip(wavs, textgrids, transcripts):
            text_en = read_text(transcript)
            text_en = f"{text_en}."
            symbols = text_to_symbols(text_en, lang)

            entry = PreData(name=get_basename(wav),
                            speaker_name=speaker_name,
                            text=text_en,
                            wav_path=wav,
                            symbols=symbols,
                            accents=[accent_name] * len(symbols),
                            gender=gender,
                            lang=lang)

            entries.append(entry)

    entries.sort(key=sort_arctic, reverse=False)
    logger.info(
        f"Parsed {len(entries)} entries from {len(speakers_dict)} speakers.")

    return entries
コード例 #3
0
def parse(dir_path: str) -> PreDataList:
  if not os.path.exists(dir_path):
    print("Directory not found:", dir_path)
    raise Exception()

  result = PreDataList()
  lang = Language.ENG
  tmp: List[Tuple[Tuple, PreDataList]] = list()

  subfolders = get_subfolders(dir_path)
  for subfolder in tqdm(subfolders):
    data_path = os.path.join(subfolder, OATA_CSV_NAME)
    entries = cast_as(Entries.load(Entry, data_path), Entries)
    for entry in entries.items():
      gender = Gender.MALE if entry.gender == "m" else Gender.FEMALE

      symbols = text_to_symbols(entry.text, lang)
      wav_path = os.path.join(subfolder, AUDIO_FOLDER_NAME, entry.wav)
      data = PreData(
        name=entry.entry_id,
        speaker_name=entry.speaker,
        lang=lang,
        wav_path=wav_path,
        gender=gender,
        text=entry.text,
        symbols=symbols,
        accents=[entry.accent] * len(symbols),
      )
      sorting_keys = entry.speaker, subfolder, entry.entry_id
      tmp.append((sorting_keys, data))

  tmp.sort(key=lambda x: x[0])

  result = PreDataList([x for _, x in tmp])

  return result
コード例 #4
0
def _get_ds_data(l: PreDataList, speakers_dict: SpeakersDict,
                 accents: AccentsDict, symbols: SymbolIdDict) -> DsDataList:
    result = [
        DsData(entry_id=i,
               basename=values.name,
               speaker_name=values.speaker_name,
               speaker_id=speakers_dict[values.speaker_name],
               text=values.text,
               serialized_symbols=symbols.get_serialized_ids(values.symbols),
               serialized_accents=accents.get_serialized_ids(values.accents),
               wav_path=values.wav_path,
               lang=values.lang,
               gender=values.gender) for i, values in enumerate(l.items())
    ]
    return DsDataList(result)
コード例 #5
0
def parse(dir_path: str, logger: Logger = getLogger()) -> PreDataList:
    if not os.path.exists(dir_path):
        print("Directory not found:", dir_path)
        raise Exception()

    speakers_path = os.path.join(dir_path, "SPEAKERS.txt")
    speakers = read_lines(speakers_path)
    speakers = speakers[12:]
    speakers_dict = {}
    for speaker_details in speakers:
        s_id, gender, _, _, name = speaker_details.split(" | ")
        speakers_dict[s_id.strip()] = name.strip(), gender.strip()

    lang = Language.ENG

    entries = PreDataList()

    logger.info("Parsing files...")
    for dataset_folder in tqdm(get_subfolders(dir_path)):
        logger.info(f"Parsing {get_basename(dataset_folder)}...")

        for speaker_folder in tqdm(get_subfolders(dataset_folder)):
            speaker_id = get_basename(speaker_folder)
            speaker_name, speaker_gender = speakers_dict[speaker_id]
            accent_name = speaker_name
            gender = Gender.MALE if speaker_gender == "M" else Gender.FEMALE

            for chapter_folder in get_subfolders(speaker_folder):
                files = get_filepaths(chapter_folder)
                wavs = [x for x in files if x.endswith(".wav")]
                texts = [x for x in files if x.endswith(".normalized.txt")]
                assert len(wavs) == len(texts)

                for wav_file, text_file in zip(wavs, texts):
                    assert get_basename(wav_file) == get_basename(
                        text_file)[:-len(".normalized")]
                    text_en = read_text(text_file)
                    symbols = text_to_symbols(text_en, lang)

                    entry = PreData(name=get_basename(wav_file),
                                    speaker_name=speaker_name,
                                    text=text_en,
                                    wav_path=wav_file,
                                    symbols=symbols,
                                    accents=[accent_name] * len(symbols),
                                    gender=gender,
                                    lang=lang)

                    entries.append(entry)

    entries.sort(key=sort_libri, reverse=False)
    logger.info(
        f"Parsed {len(entries)} entries from {len(speakers_dict)} speakers.")

    return entries
コード例 #6
0
ファイル: ljs.py プロジェクト: stefantaubert/tacotron2
def parse(path: str) -> PreDataList:
    if not os.path.exists(path):
        print("Directory not found:", path)
        raise Exception()

    metadata_filepath = os.path.join(path, 'metadata.csv')

    if not os.path.exists(metadata_filepath):
        print("Metadatafile not found:", metadata_filepath)
        raise Exception()

    wav_dirpath = os.path.join(path, 'wavs')

    if not os.path.exists(wav_dirpath):
        print("WAVs not found:", wav_dirpath)
        raise Exception()

    result = PreDataList()
    speaker_name = '1'
    accent_name = "north_america"
    lang = Language.ENG
    gender = Gender.FEMALE

    lines = read_lines(metadata_filepath)
    print("Parsing files...")
    for line in tqdm(lines):
        parts = line.split('|')
        basename = parts[0]
        # parts[1] contains years, in parts[2] the years are written out
        # ex. ['LJ001-0045', '1469, 1470;', 'fourteen sixty-nine, fourteen seventy;']
        wav_path = os.path.join(wav_dirpath, f'{basename}.wav')
        text = parts[2]
        symbols = text_to_symbols(text, lang)
        entry = PreData(name=basename,
                        speaker_name=speaker_name,
                        text=text,
                        wav_path=wav_path,
                        symbols=symbols,
                        accents=[accent_name] * len(symbols),
                        gender=gender,
                        lang=lang)

        result.append(entry)

    result.sort(key=sort_ljs, reverse=False)
    print("Done.")

    return result
コード例 #7
0
def parse(dir_path: str) -> PreDataList:
    if not os.path.exists(dir_path):
        print("Directory not found:", dir_path)
        raise Exception()

    sent_paths = os.path.join(dir_path, "data", "*.trn")
    wav_paths = os.path.join(dir_path, "data", "*.wav")
    sent_files = glob.glob(sent_paths)
    wav_files = glob.glob(wav_paths)
    sent_files_gen = ["{}.trn".format(x) for x in wav_files]

    wavs_sents = sorted(tuple(zip(wav_files, sent_files_gen)))
    skipped = [x for x in wavs_sents if x[1] not in sent_files]
    wavs_sents = [x for x in wavs_sents if x[1] in sent_files]

    print("Skipped:", len(skipped), "of", len(sent_files_gen))
    # print(skipped)

    res = PreDataList()
    print("Parsing files...")
    for wav, sent_file in tqdm(wavs_sents):
        content = read_lines(sent_file)
        chn = content[0].strip()
        # remove "=" from chinese transcription because it is not correct
        # occurs only in sentences with nr. 374, e.g. B22_374
        chn = chn.replace("= ", '')
        basename = os.path.basename(wav)[:-4]
        speaker, nr = basename.split("_")
        nr = int(nr)
        #res.append((nr, speaker, basename, wav, chn, sent_file))

        symbols = text_to_symbols(chn, Language.CHN)
        accents = [speaker] * len(symbols)
        tmp = PreData(basename, speaker, chn, wav, symbols, accents,
                      Gender.FEMALE, Language.CHN)  # TODO Gender
        res.append(tmp)
    print("Done.")

    x: PreData
    res.sort(key=lambda x: x.name)

    return res
コード例 #8
0
ファイル: thchs.py プロジェクト: stefantaubert/tacotron2
def parse(dir_path: str) -> PreDataList:
  if not os.path.exists(dir_path):
    print("Directory not found:", dir_path)
    raise Exception()

  train_words = os.path.join(dir_path, 'doc/trans/train.word.txt')
  test_words = os.path.join(dir_path, 'doc/trans/test.word.txt')
  train_wavs = os.path.join(dir_path, 'wav/train/')
  test_wavs = os.path.join(dir_path, 'wav/test/')

  parse_paths = [
    (train_words, train_wavs),
    (test_words, test_wavs)
  ]

  files: List[Tuple[Tuple[str, int, int], PreData]] = []
  lang = Language.CHN

  print("Parsing files...")
  for words_path, wavs_dir in parse_paths:
    lines = read_lines(words_path)

    for x in tqdm(lines):
      pos = x.find(' ')
      name, chinese = x[:pos], x[pos + 1:]

      speaker_name, nr = name.split("_")
      speaker_gender = Gender.MALE if speaker_name in MALE_SPEAKERS else Gender.FEMALE
      nr = int(nr)
      speaker_name_letter = speaker_name[0]
      speaker_name_number = int(speaker_name[1:])
      wav_path = os.path.join(wavs_dir, speaker_name, name + '.wav')
      exists = os.path.exists(wav_path)
      if not exists:
        wav_path = os.path.join(wavs_dir, speaker_name, name + '.WAV')
      exists = os.path.exists(wav_path)
      if not exists:
        print("Not found wav file:", wav_path)
        continue

      # remove "=" from chinese transcription because it is not correct
      # occurs only in sentences with nr. 374, e.g. B22_374
      chinese = chinese.replace("= ", '')
      is_question = str.endswith(chinese, QUESTION_PARTICLE_1) or str.endswith(
        chinese, QUESTION_PARTICLE_2)
      if is_question:
        chinese += "?"
      else:
        chinese += "。"

      symbols = text_to_symbols(chinese, lang)

      accent_name = speaker_name
      if speaker_name in ACCENTS.keys():
        accent_name = ACCENTS[speaker_name]
      entry = PreData(
        name=name,
        speaker_name=speaker_name,
        text=chinese,
        wav_path=wav_path,
        symbols=symbols,
        accents=[accent_name] * len(symbols),
        gender=speaker_gender,
        lang=lang
      )

      files.append((entry, (speaker_name_letter, speaker_name_number, nr)))

  files.sort(key=lambda tup: tup[1], reverse=False)
  res = PreDataList([x for x, _ in files])
  return res
コード例 #9
0
def _get_symbols_id_dict(l: PreDataList) -> SymbolIdDict:
    symbols = set()
    for x in l.items():
        symbols = symbols.union(set(x.symbols))
    return SymbolIdDict.init_from_symbols(symbols)
コード例 #10
0
def _get_all_accents(l: PreDataList) -> AccentsDict:
    accents = set()
    for x in l.items():
        accents = accents.union(set(x.accents))
    return AccentsDict.init_from_accents(accents)