def handle1sutra(xml, folder_out): ''' 處理一經 ''' print(xml) fn_in=os.path.basename(xml) s=fn_in.replace('.xml','') globals['sutra_no']=s[s.find('n')+1:] if globals['collection'] == 'T': globals['sutra_no']=re.sub("0220[a-z]","0220",globals['sutra_no']) # 大般若經經號後面的 a-z 移除 globals['sutra_no_0']=re.sub("^0*","",globals['sutra_no']) # sutra_no_0 是前面沒有 0 的經號 print('sutra_no:', globals['sutra_no']) fn_out=fn_in.replace('.xml','.txt') path_out=os.path.join(folder_out, fn_out) fo=open(path_out, 'w', encoding='utf8') tree=etree.parse(xml) tree=zbx_xml.stripNamespaces(tree) # 去掉 namespace #zbx_xml.stripComments(tree) # 去掉 xml 註解 readCharInfo(tree) if options.fileHeader: # 是否要印出卷首資訊 mo=re.search(r'T07n0220[d-z]', xml) if mo is not None: # T07n0220d 之後的不要印出詳細卷首 tmp=fileHeader(tree) # 還是要先執行, 以取得需要的資料 tmp=shortFileHeader() tmp=tmp[:-1] # 移除最後一個 '換行', 只有 T07 此處才需要 fo.write(tmp) else: fo.write(fileHeader(tree)) # 處理 <text rend='no_nor'> 的情況 globals['no_nor'] = 0 text_tag = tree.find('.//text') if text_tag.get('rend') == 'no_nor': globals['no_nor'] += 1 body=tree.find('.//body') outtxt = handleNode(body) # 在這裡處理連續悉曇字 outtxt = re.sub(r"((()|([)|(?)|(…)|(.)|(‧))*◇((◇)|( )|( )|(.)|(‧)|(()|())|([)|(])|(?)|(…))*◇(())|(])|(?)|(…)|(.)|(‧))*","【◇】",outtxt); fo.write(outtxt) fo.close() if options.splitByJuan: splitByJuan(path_out, folder_out)
def handle1sutra(xml, folder_out): ''' 處理一經 ''' print(xml) fn_in=os.path.basename(xml) s=fn_in.replace('.xml','') globals['sutra_no']=s[s.find('n')+1:] globals['sutra_no_0']=re.sub("^0*","",globals['sutra_no']) # sutra_no_0 是前面沒有 0 的經號 print('sutra_no:', globals['sutra_no']) fn_out=fn_in.replace('.xml','.txt') path_out=os.path.join(folder_out, fn_out) fo=open(path_out, 'w', encoding='utf8') tree=etree.parse(xml) tree=zbx_xml.stripNamespaces(tree) # 去掉 namespace #zbx_xml.stripComments(tree) # 去掉 xml 註解 if options.fileHeader: fo.write(fileHeader(tree)) readCharInfo(tree) body=tree.find('.//body') fo.write(handleNode(body)) fo.close() if options.splitByJuan: splitByJuan(path_out, folder_out)