def loadFrameFe(self, feNode): """ """ goodNodes = getNoneTextChildNodes(feNode) fe = {} fe = loadXMLAttributes(fe, feNode.attributes) fe['semtypes'] = {} for gn in goodNodes: if gn.nodeName == 'definition': if not fe.has_key('definition'): try: fe['definition'] = gn.childNodes[0].nodeValue except: fe['definition'] = None else: print >> sys.stderr, 'Error , fe already have a definition:', fe['definition'] return None elif gn.nodeName == 'semTypes': goodSemTypeNodes = getNoneTextChildNodes(gn) for gsn in goodSemTypeNodes: semType = {} loadXMLAttributes(semType, gsn.attributes) fe['semtypes'][semType['ID']] = semType else: print >> sys.stderr, 'In loadFrameFe, found this node:', gn.nodeName return None return fe
def loadXML(self, fileName): """ """ doc = xml.dom.minidom.parse(fileName) relationTypeNodes = getNoneTextChildNodes( doc.childNodes[1]) #the actual frame-relation-type nodes for rtn in relationTypeNodes: rt = {} #dictionary for frame-relation-type loadXMLAttributes(rt, rtn.attributes) relationNodes = getNoneTextChildNodes( rtn) #the actual frame-relationS nodes if len(relationNodes) != 1: print >> sys.stderr, 'Got more than one frame-relations node in type:', rt[ 'name'] return False singleRelationNodes = getNoneTextChildNodes( relationNodes[0]) # the actual frame-relation nodes singleRelations = {} i = 0 for srn in singleRelationNodes: tmp = self.loadSingleRelation(srn) if tmp == None: print >> sys.stderr, 'Unable to load relation No.' + str( i), 'for type', rt['name'] return False singleRelations[tmp['ID']] = tmp rt['frame-relations'] = singleRelations self['relation-types'][rt['ID']] = rt return True
def loadXML(self, fileName): """ """ doc = xml.dom.minidom.parse(fileName) relationTypeNodes = getNoneTextChildNodes(doc.childNodes[1]) #the actual frame-relation-type nodes for rtn in relationTypeNodes: rt = {} #dictionary for frame-relation-type loadXMLAttributes(rt, rtn.attributes) relationNodes = getNoneTextChildNodes(rtn) #the actual frame-relationS nodes if len(relationNodes) != 1: print >> sys.stderr, 'Got more than one frame-relations node in type:', rt['name'] return False singleRelationNodes = getNoneTextChildNodes(relationNodes[0]) # the actual frame-relation nodes singleRelations = {} i = 0 for srn in singleRelationNodes: tmp = self.loadSingleRelation(srn) if tmp == None: print >> sys.stderr, 'Unable to load relation No.' + str(i), 'for type', rt['name'] return False singleRelations[tmp['ID']] = tmp rt['frame-relations'] = singleRelations self['relation-types'][rt['ID']] = rt return True
def loadFrameFe(self, feNode): """ """ goodNodes = getNoneTextChildNodes(feNode) fe = {} fe = loadXMLAttributes(fe, feNode.attributes) fe['semtypes'] = {} for gn in goodNodes: if gn.nodeName == 'definition': if not fe.has_key('definition'): try: fe['definition'] = gn.childNodes[0].nodeValue except: fe['definition'] = None else: print >> sys.stderr, 'Error , fe already have a definition:', fe[ 'definition'] return None elif gn.nodeName == 'semTypes': goodSemTypeNodes = getNoneTextChildNodes(gn) for gsn in goodSemTypeNodes: semType = {} loadXMLAttributes(semType, gsn.attributes) fe['semtypes'][semType['ID']] = semType else: print >> sys.stderr, 'In loadFrameFe, found this node:', gn.nodeName return None return fe
def loadXMLNode(self, frameNode): """ """ loadXMLAttributes(self, frameNode.attributes) goodNodes = getNoneTextChildNodes(frameNode) for node in goodNodes: if node.nodeName == 'definition': if not self.loadFrameDefinition(node): print >> sys.stderr, 'Unable to read definition node for frame:', self[ 'ID'] elif node.nodeName == 'fes': if not self.loadFrameFes(node): print >> sys.stderr, 'Unable to read a fes node for frame:', self[ 'ID'] elif node.nodeName == 'lexunits': if not self.loadFrameLexunits(node): print >> sys.stderr, 'Unable to read a lexunits node for frame:', self[ 'ID'] elif node.nodeName == 'semTypes': if not self.loadFrameSemtypes(node): print >> sys.stderr, 'Unable to read a semtypes node for frame:', self[ 'ID'] else: print >> sys.stderr, 'Have no idea how to deal with node type:', node.nodeName return False return True
def loadSingleRelation(self, relationNode): """ """ frRelation = {} loadXMLAttributes(frRelation, relationNode.attributes) feNodes = getNoneTextChildNodes(relationNode) for fn in feNodes: tmp = {} loadXMLAttributes(tmp, fn.attributes) frRelation[tmp['ID']] = tmp return frRelation
def loadFrameSemtypes(self, node): """ """ goodSemTypeNodes = getNoneTextChildNodes(node) semTypes = {} for gsn in goodSemTypeNodes: semType = {} loadXMLAttributes(semType, gsn.attributes) semTypes[semType['ID']] = semType self['semtypes'] = semTypes return True
def loadSingleLayer(self, layerNode): """ Loads a single layer, <layer> """ layer = {'labels': {}} try: layer['ID'] = int(layerNode.attributes['ID'].value) layer['name'] = str(layerNode.attributes['name'].value) except: print >> sys.stderr, '>> Unable to load layer id or name' return None labelsNode = filter(lambda x: x.nodeType != x.TEXT_NODE, layerNode.childNodes) if len(labelsNode) > 1: print >> sys.stderr, '>> Got more than one labels node for a layer' print >> sys.stderr, labelsNode return None if len(labelsNode) > 0: labels = filter(lambda x: x.nodeType != x.TEXT_NODE, labelsNode[0].childNodes) for l in labels: ll = {} ll = loadXMLAttributes(ll, l.attributes) layer['labels'][ll['ID']] = ll self['layers'][layer['ID']] = layer return layer
def loadSentence(self, sentNode, annotation): """ Loads the sentence """ goodNodes = filter(lambda x: x.nodeType != x.TEXT_NODE, sentNode.childNodes) if len(goodNodes) != 1: print >> sys.stderr, '>> Sent node has:', len( goodNodes), 'good nodes' return False sent = {} try: sent = loadXMLAttributes(sent, sentNode.attributes) except: print >> sys.stderr, '>> Unable to get one of ID or aPos' return False sent['text'] = goodNodes[0].childNodes[0].nodeValue if not isinstance(sent['text'], unicode): print >> sys.stderr, '>> Unable to get the sentence text from', goodNodes[ 0].childNodes[0] return False try: sent['text'] = str(sent['text']) except: pass annotation['sentence'] = sent return True
def loadSingleLayer(self, layerNode): """ Loads a single layer, <layer> """ layer = {"labels": {}} try: layer["ID"] = int(layerNode.attributes["ID"].value) layer["name"] = str(layerNode.attributes["name"].value) except: print >>sys.stderr, ">> Unable to load layer id or name" return None labelsNode = filter(lambda x: x.nodeType != x.TEXT_NODE, layerNode.childNodes) if len(labelsNode) > 1: print >>sys.stderr, ">> Got more than one labels node for a layer" print >>sys.stderr, labelsNode return None if len(labelsNode) > 0: labels = filter(lambda x: x.nodeType != x.TEXT_NODE, labelsNode[0].childNodes) for l in labels: ll = {} ll = loadXMLAttributes(ll, l.attributes) layer["labels"][ll["ID"]] = ll self["layers"][layer["ID"]] = layer return layer
def loadSentence(self, sentNode, annotation): """ Loads the sentence """ goodNodes = filter(lambda x: x.nodeType != x.TEXT_NODE, sentNode.childNodes) if len(goodNodes) != 1: print >>sys.stderr, ">> Sent node has:", len(goodNodes), "good nodes" return False sent = {} try: sent = loadXMLAttributes(sent, sentNode.attributes) except: print >>sys.stderr, ">> Unable to get one of ID or aPos" return False sent["text"] = goodNodes[0].childNodes[0].nodeValue if not isinstance(sent["text"], unicode): print >>sys.stderr, ">> Unable to get the sentence text from", goodNodes[0].childNodes[0] return False try: sent["text"] = str(sent["text"]) except: pass annotation["sentence"] = sent return True
def loadXMLNode(self, frameNode): """ """ loadXMLAttributes(self, frameNode.attributes) goodNodes = getNoneTextChildNodes(frameNode) for node in goodNodes: if node.nodeName == 'definition': if not self.loadFrameDefinition(node): print >> sys.stderr, 'Unable to read definition node for frame:', self['ID'] elif node.nodeName == 'fes': if not self.loadFrameFes(node): print >> sys.stderr, 'Unable to read a fes node for frame:', self['ID'] elif node.nodeName == 'lexunits': if not self.loadFrameLexunits(node): print >> sys.stderr, 'Unable to read a lexunits node for frame:', self['ID'] elif node.nodeName == 'semTypes': if not self.loadFrameSemtypes(node): print >> sys.stderr, 'Unable to read a semtypes node for frame:', self['ID'] else: print >> sys.stderr, 'Have no idea how to deal with node type:', node.nodeName return False return True
def loadFrameLexicalUnit(self, lexunitNode): """ """ lexunit = {} loadXMLAttributes(lexunit, lexunitNode.attributes) goodNodes = getNoneTextChildNodes(lexunitNode) for gn in goodNodes: if gn.nodeName == 'definition': try: lexunit['definition'] = gn.childNodes[0].nodeValue except: lexunit['definition'] = None elif gn.nodeName == 'annotation': annoNodes = getNoneTextChildNodes(gn) anno = {} for an in annoNodes: try: anno[str(an.nodeName)] = an.childNodes[0].nodeValue try: n = int(anno[str(an.nodeName)]) anno[str(an.nodeName)] = n except: pass except: anno[str(an.nodeName)] = None print >> sys.stderr, 'Warning!! unable to retrieve', an.nodeName, 'for annotation' lexunit['annotation'] = anno elif gn.nodeName == 'lexemes': goodSemTypeNodes = getNoneTextChildNodes(gn) lexemes = {} for gsn in goodSemTypeNodes: lexeme = {} loadXMLAttributes(lexeme, gsn.attributes) # store the actual word lexeme['lexeme'] = gsn.childNodes[0].nodeValue lexemes[lexeme['ID']] = lexeme lexunit['lexeme'] = lexemes elif gn.nodeName == 'semTypes': goodSemTypeNodes = getNoneTextChildNodes(gn) semTypes = {} for gsn in goodSemTypeNodes: semType = {} loadXMLAttributes(semType, gsn.attributes) semTypes[semType['ID']] = semType lexunit['semtypes'] = semTypes else: print >> sys.stderr, 'Error, encounted the node:', gn.nodeName, 'in', lexunitNode.nodeName, 'lexunit' return None return lexunit