コード例 #1
0
import xml.etree.ElementTree as ET
from pystardict import Dictionary

f = ET.parse(sys.argv[1])
unknown_words_f = open('uwords.txt', 'w')
words_f = open('pwords.txt', 'w')
dict_bel_rus = Dictionary(sys.argv[2])

words = set()

path = './/Paradigm'
for paradigm in f.findall(path):
    lem = paradigm.attrib['Lemma'].replace('´', '')
    words.add(lem)

print("Parsed: {} words".format(len(words)))

translated = 0
for element in words:
    if dict_bel_rus.has_key(element):
        translated += 1
        rus_eq = dict_bel_rus.get(element).split("\n")[1]
        words_f.write("{} | {}\n".format(element, rus_eq))
    else:
        unknown_words_f.write(element + "\n")

print("{} were translated".format(translated))

unknown_words_f.close()
words_f.close()