# created_file = True
# if created_file:
#     create_sil_file()
training_data = True
state = True
if training_data:
    file_id_path = './file_id_list_canto.scp'
    text_file = './raw_text.txt'
    label_phone_align = './label_state_align/'
else:
    file_id_path = './test_id_list.scp'
    text_file = './test_raw_text.txt'
    label_phone_align = './prompt-lab/'

file_id_canto = open(file_id_path,'w')
mld = Linguistic_DICT()
word_dict = mld.get_phone_dict(dict_file='./word2jyut.lex')
with open(text_file, 'r') as fid:
    textlines = fid.readlines()

# for training data , this will create time duration
ali_files = glob.glob('./cup_ali/*.txt')
all_files_num = len(ali_files)
if not os.path.exists(label_phone_align):
    os.mkdir(label_phone_align)
for text_line in textlines:
    sent_index, sent_content = pre_process(text_line)
    word_list, pos_list = get_word_pos_list(sent_content)
    phone_list, tone_list, syl_map, word_map = get_word_phone_list(word_dict, word_list)
    file_id_canto.write(sent_index+'\n')
    sil_phone_list = []