print "Parsing the tab ... ",
 list = open("../resources/vocab_jlpt_n3_cum.xml")
 #list = open("../resources/vocab_jlpt_n3.xml")    
 parser = make_parser()
 vocab_list = HtmlTabHandler()
 parser.setContentHandler(vocab_list)
 parser.parse(list)
 print "done."
 
 print "Creating Vocab List ... ",
 kanjiList = []
 for entry in vocab_list.list[1:]:
     if len(entry) == 4:
         kanjiList.append(Kanji(literal=entry[0], reading = entry[1], meaning = entry[2].split(','), jlpt = 3))
     elif len(entry) == 3:
         kanjiList.append(Kanji(literal=entry[0], meaning = entry[1].split(','), jlpt = 3))
     else:
         pass
 print "done."
 
 for entry in kanjiList:
     entry.show()
 print "Number of entries: ", len(kanjiList)
 
 print "Copy list list to XML File ...", 
 kl2xml = KanjiListToXmlFile("../resources/JLPT_N3_VOC_CUM.xml")
 #kl2xml = KanjiListToXmlFile("../resources/JLPT_N3_VOC.xml")
 kl2xml.kanjiListToXml(kanjiList)
 print "done."
     
Esempio n. 2
0
@author: zermelozf
'''
from xml.sax import make_parser
from Kanjidic2SaxHandlers import ListKanji
from KanjiListSaxWriters import KanjiListToXmlFile

if __name__ == '__main__':
    '''
    This script uses the ListKanji class to create a customized list of Kanji from Kanjidic2.
    The list is then saved to an XML file.
    You need to put the Kanjidic2 file available at http://www.csse.monash.edu.au/~jwb/kanjidic2/ 
    in the resources directory.
    '''
    #Create a customized list of JLPT3 Kanji
    dict = open("../resources/kanjidic2")  
    print "Creating the list ...", 
    parser = make_parser()
    jlpt = ListKanji('jlpt', 3)
    parser.setContentHandler(jlpt)
    parser.parse(dict)
    sortedKanjiList = jlpt.organizeListBy('freq')
    print "done."
    for kanji in sortedKanjiList:
        kanji.show()
    print "JLPT 3 contains", len(jlpt.kanjiList), "Kanji."
    
    #Save the list to an XML file
    print "Copy JLPT3 list to XML File ...", 
    kl2xml = KanjiListToXmlFile("../resources/JLPT3.xml")
    kl2xml.kanjiListToXml(sortedKanjiList)
    print "done."