def __init__(self): xml.sax.ContentHandler.__init__(self) self.m_lcnt = 0 # input line number self.m_ppath = [ ] # contains the XML path to the current node (names of the ancestors) self.m_done = -1 # -1: not started synset yet, 0: inside synset, 1: done with synset self.m_syns = synset.Synset() # points to the output struct self.m_syns_list = [] # points to the output struct self.m_ilrs0_temp = "" # Temp vars for Tuples (std::pair in C++) self.m_ilrs1_temp = "" self.m_sumolinks0_temp = "" self.m_sumolinks1_temp = "" self.m_elrs0_temp = "" self.m_elrs1_temp = "" self.m_elrs30_temp = "" self.m_elrs31_temp = "" self.m_ekszlinks0_temp = "" self.m_ekszlinks1_temp = "" self.m_vframelinks0_temp = "" self.m_vframelinks1_temp = "" self.m_startroot = False # was there a starting root tag? self.m_endroot = False # was there an end root tag?
def endElement(self, name): if DEBUG: print("({0}, {1}): /{2}/END: {3}".format( self._locator.getLineNumber(), self._locator.getColumnNumber(), "/".join(self.m_ppath), name)) if len(self.m_ppath) >= 2: parent = self.m_ppath[-2] else: parent = "" if name == "WNXML": # WNXML self.m_endroot = True elif name == "SYNSET": # SYNSET if self.m_done != 0: raise WNXMLParserException( "This is impossible!\nThe parser should've caught this error: 'SYNSET' end tag without previous begin tag" ) self.m_done = 1 self.m_syns_list.append((self.m_syns, self.m_lcnt)) self.m_syns = synset.Synset() elif name == "ILR" and parent == "SYNSET": self.m_syns.ilrs.append((self.m_ilrs0_temp, self.m_ilrs1_temp)) self.m_ilrs0_temp = "" self.m_ilrs1_temp = "" elif name == "SUMO" and parent == "SYNSET": self.m_syns.sumolinks.append( (self.m_sumolinks0_temp, self.m_sumolinks1_temp)) self.m_sumolinks0_temp = "" self.m_sumolinks1_temp = "" elif name == "ELR" and parent == "SYNSET": self.m_syns.elrs.append((self.m_elrs0_temp, self.m_elrs1_temp)) self.m_elrs0_temp = "" self.m_elrs0_temp = "" elif name == "ELR3" and parent == "SYNSET": self.m_syns.elrs3.append((self.m_elrs30_temp, self.m_elrs31_temp)) self.m_elrs30_temp = "" self.m_elrs30_temp = "" elif name == "EKSZ" and parent == "SYNSET": self.m_syns.ekszlinks.append( (self.m_ekszlinks0_temp, self.m_ekszlinks1_temp)) self.m_ekszlinks0_temp = "" self.m_ekszlinks1_temp = "" elif name == "VFRAME" and parent == "SYNSET": self.m_syns.vframelinks.append( (self.m_vframelinks0_temp, self.m_vframelinks1_temp)) self.m_vframelinks0_temp = "" self.m_vframelinks1_temp = "" self.m_ppath.pop()
def createSynset(self, wnid, pos): if wnid not in self.dat(pos): return synset.Synset() return self.dat(pos)[wnid]