def text_to_sequence(text, speaker_name='baker', inference=False):
    sequence = []
    tmp = ""
    if "baker" == speaker_name:
        if inference:
            my_pinyin = Pinyin(MyConverter())
            pinyin = my_pinyin.pinyin(text, style=Style.TONE3,
                                      # errors="ignore"
                                      errors=alpha_handler
                                      )
            print("text_to_sequence pinyin=", pinyin)
            new_pinyin = []
            for x in pinyin:
                x = "".join(x)
                if "#" not in x:
                    new_pinyin.append(x)
            print("text_to_sequence new_pinyin=", new_pinyin)
            phonemes = get_phoneme_from_char_and_pinyin(text, new_pinyin)
            text = " ".join(phonemes)
            print(f"phoneme seq: {text}")
        try:
            for symbol in text.split():
                tmp = symbol
                idx = symbol_to_id[symbol]
                sequence.append(idx)
        except Exception as e:
            print("text_to_sequence error", tmp)
    else:
        if not inference:  # in train mode text should be already transformed to phonemes
            sequence = symbols_to_ids(clean_g2p(text.strip().split(" ")))
        else:
            sequence = inference_text_to_seq(text)
    # add eos tokens
    sequence += ['eos_id']
    return sequence
    def get_audio_dict(self) -> (dict, dict, dict):
        """
        获取原始数据
        :return:
        """
        use_type = self._use_type
        dataset_path = self._dataset_path
        participle = self._participle

        id_path_dict = {}
        id_hanzi_dict = {}
        id_pinyin_dict = {}

        for use_type in use_type:
            with open(file=dataset_path.joinpath(use_type + '.txt'),
                      mode='r',
                      encoding='utf-8') as txt_file:
                for line in txt_file.readlines():
                    # 生成id(str)
                    id = line.split('\t')[0]

                    # 生成audio路径
                    path = dataset_path.joinpath(use_type, id)

                    # 是否需要进行分词 生成汉字(str)
                    hanzi = line.split('\t')[1].strip('\n')
                    if participle:
                        hanzi = list(jieba.cut(hanzi, cut_all=False))
                    else:
                        hanzi = hanzi.split(' ')

                    # 生成拼音(str)
                    pinyin_dict = DataUtils.get_pinyin_dict()
                    my_pinyin = Pinyin(MyConverter())
                    pinyin = ''
                    for token in hanzi:
                        for char in my_pinyin.pinyin(token,
                                                     style=Style.TONE3,
                                                     heteronym=False):
                            if char[0] not in pinyin_dict:
                                pinyin += ('_' + ' ')
                            else:
                                pinyin += (char[0] + ' ')

                    id_path_dict[id] = path
                    id_hanzi_dict[id] = ' '.join(list(''.join(hanzi)))
                    id_pinyin_dict[id] = pinyin

        return id_path_dict, id_hanzi_dict, id_pinyin_dict