def _get_all_speakers(l: PreDataList) -> Tuple[SpeakersDict, SpeakersLogDict]: all_speakers: List[str] = [x.speaker_name for x in l.items()] all_speakers_count = Counter(all_speakers) speakers_log = SpeakersLogDict.fromcounter(all_speakers_count) all_speakers = remove_duplicates_list_orderpreserving(all_speakers) speakers_dict = SpeakersDict.fromlist(all_speakers) return speakers_dict, speakers_log
def _get_ds_data(l: PreDataList, speakers_dict: SpeakersDict, accents: AccentsDict, symbols: SymbolIdDict) -> DsDataList: result = [ DsData(entry_id=i, basename=values.name, speaker_name=values.speaker_name, speaker_id=speakers_dict[values.speaker_name], text=values.text, serialized_symbols=symbols.get_serialized_ids(values.symbols), serialized_accents=accents.get_serialized_ids(values.accents), wav_path=values.wav_path, lang=values.lang, gender=values.gender) for i, values in enumerate(l.items()) ] return DsDataList(result)
def _get_symbols_id_dict(l: PreDataList) -> SymbolIdDict: symbols = set() for x in l.items(): symbols = symbols.union(set(x.symbols)) return SymbolIdDict.init_from_symbols(symbols)
def _get_all_accents(l: PreDataList) -> AccentsDict: accents = set() for x in l.items(): accents = accents.union(set(x.accents)) return AccentsDict.init_from_accents(accents)