コード例 #1
0
ファイル: test.py プロジェクト: virginiadawson/contents
def main():
#  for line in open('lahu_writing.txt','rU'):
#    for word in line.split():
#      print word,
#      print transduce(word,decompose),
#      print transduce(transduce(word,decompose),chinese),
#      print transduce(transduce(word,decompose),baptist)

  f = open('4testLahuTexts.xml','rt')
  tree = ET.parse(f)
  f.close()
  for node in tree.findall('.//word/words/word/item'):
    if node.attrib['type'] == 'txt':
      form = codecs.encode(node.text,'utf8')
      print form,
      print [form],
      print transduce(form,decompose),
      print transduce(transduce(form,decompose),chinese),
      print transduce(transduce(form,decompose),baptist)
コード例 #2
0
 phrases = text.findall('.//phrases')
 print >> OutLaTeX, '\\begin{examples}'
 sentences = []
 for p in phrases:
     for s in p.findall('.//word/words'):
         # sentencenumber = s.find(".//item[@type='segnum']")
         print >> OutLaTeX, "\\item\n"
         print >> OutLaTeX, "\glll ",
         BaptistSentence = ''
         ChineseSentence = ''
         for w in s.findall('.//word'):
             for i in w.findall('.//item'):
                 if i.attrib['type'] in ['txt','punct']:
                     #form = codecs.encode(i.text,'utf8')
                     form = i.text
                     form = transduce(form,decompose)
                     form = escape(form)
                     BaptistSentence += ' %s' % transduce(form,baptist)
                     ChineseSentence += ' %s' % transduce(form,chinese)
         for level in ['txt', 'msa', 'gls']:
             for w in s.findall('.//word'):
                 itemToOutput = ' {}'
                 for i in w.findall('.//item'):
                     if i.attrib['type'] == 'punct':
                         if level in ['msa']:
                             form = ''
                         elif level in ['gls']:
                             form = ''
                         else:
                             form = str(i.text)
                     elif i.attrib['type'] == level: