def buildParseGraphFromBioInfer(self, tokenElements, dependencyElements, reverseDependencies=False): """ Returns dictionaries containing tokens and dependencies of the graph generated from ElementTree-elements. """ tokensById = {} tokensByOrigId = {} dependenciesById = {} prevOffset = -1000 for tokenElement in tokenElements: node = ParseGraphNode() node.origid = tokenElement.attrib["id"] node.id = len(tokensById)+1 if tokenElement.attrib.has_key("POS"): node.pos = tokenElement.attrib["POS"] else: node.pos = "N/A" subtokenElement = tokenElement.find("subtoken") node.text = subtokenElement.attrib["text"] charFrom = int(tokenElement.attrib["charOffset"]) assert(prevOffset < charFrom) prevOffset = charFrom charTo = charFrom + len(subtokenElement.attrib["text"])-1 node.charOffset = (charFrom, charTo) tokensById[node.id] = node tokensByOrigId[node.origid] = node #self.depByOrder = [] dependencyIndex = len(tokensById) + 99 for dependencyElement in dependencyElements: if dependencyElement.attrib["token1"] == dependencyElement.attrib["token2"]: continue dependency = ParseGraphNode(True) dependency.dependencyType = dependencyElement.attrib["type"] if dependency.dependencyType[0] == "<": dependency.to = tokensByOrigId[dependencyElement.attrib["token1"]] dependency.fro = tokensByOrigId[dependencyElement.attrib["token2"]] if reverseDependencies: dependency.fro, dependency.to = dependency.to, dependency.fro dependency.dependencyType = dependency.dependencyType[1:] elif dependency.dependencyType[-1] == ">": dependency.fro = tokensByOrigId[dependencyElement.attrib["token1"]] dependency.to = tokensByOrigId[dependencyElement.attrib["token2"]] if reverseDependencies: dependency.fro, dependency.to = dependency.to, dependency.fro dependency.dependencyType = dependency.dependencyType[:-1] else: sys.exit("Couldn't solve dependency type") tokensById[dependency.fro.id].dependencies.append(dependency) tokensById[dependency.to.id].dependencies.append(dependency) #dependenciesById["dep_" + str(dependencyIndex) + "-mt_" + str(dependency.fro.id) + "-" + dependency.dependencyType + "-mt_" + str(dependency.to.id)] = dependency #dependenciesById[dependencyIndex] = dependency dependency.id = dependencyIndex # (dependency.fro.id,dependency.to.id) assert( not dependenciesById.has_key(dependency.id) ) dependenciesById[dependency.id] = dependency dependencyIndex += 1 return tokensById, dependenciesById
def addAnnotationDependency(self, token1, token2, type, directionality="UNIDIRECTIONAL"): # print "TBefore:", token1, token2 token1 += 1 token2 += 1 # print "TBefore+1:", token1, token2 # POSITION 1 BEGIN # POSITION 1 END # print "TAfter1:", token1, token2 annotationDependency = ParseGraphNode(True) annotationDependency.fro = self.tokensById[token1] annotationDependency.to = self.tokensById[token2] annotationDependency.dependencyType = type annotationDependency.directionality = directionality # Check for duplicates for annDep in self.sentence.annotationDependencies: if annDep.fro == annotationDependency.fro and \ annDep.to == annotationDependency.to and \ annDep.dependencyType == annotationDependency.dependencyType and \ annDep.directionality == annotationDependency.directionality: return None #self.sentence.annotationDependencies.append( (token1, token2, type) ) # print "TAfter2:", token1, token2 #self.sentence.annotationDependencies.append( (True,True,True) ) self.sentence.annotationDependencies.append(annotationDependency) # POSITION 2 BEGIN rv = False for dependency in self.dependenciesById.values(): if (dependency.fro == self.tokensById[token1] and dependency.to == self.tokensById[token2]) or ( dependency.fro == self.tokensById[token2] and dependency.to == self.tokensById[token1]): self.sentence.annotationDependenciesWithParseDependency += 1 rv = True # POSITION 2 END return rv
def buildParseGraphFromBioInfer(self, tokenElements, dependencyElements, reverseDependencies=False): """ Returns dictionaries containing tokens and dependencies of the graph generated from ElementTree-elements. """ tokensById = {} tokensByOrigId = {} dependenciesById = {} prevOffset = -1000 for tokenElement in tokenElements: node = ParseGraphNode() node.origid = tokenElement.attrib["id"] node.id = len(tokensById) + 1 if tokenElement.attrib.has_key("POS"): node.pos = tokenElement.attrib["POS"] else: node.pos = "N/A" subtokenElement = tokenElement.find("subtoken") node.text = subtokenElement.attrib["text"] charFrom = int(tokenElement.attrib["charOffset"]) assert (prevOffset < charFrom) prevOffset = charFrom charTo = charFrom + len(subtokenElement.attrib["text"]) - 1 node.charOffset = (charFrom, charTo) tokensById[node.id] = node tokensByOrigId[node.origid] = node #self.depByOrder = [] dependencyIndex = len(tokensById) + 99 for dependencyElement in dependencyElements: if dependencyElement.attrib["token1"] == dependencyElement.attrib[ "token2"]: continue dependency = ParseGraphNode(True) dependency.dependencyType = dependencyElement.attrib["type"] if dependency.dependencyType[0] == "<": dependency.to = tokensByOrigId[ dependencyElement.attrib["token1"]] dependency.fro = tokensByOrigId[ dependencyElement.attrib["token2"]] if reverseDependencies: dependency.fro, dependency.to = dependency.to, dependency.fro dependency.dependencyType = dependency.dependencyType[1:] elif dependency.dependencyType[-1] == ">": dependency.fro = tokensByOrigId[ dependencyElement.attrib["token1"]] dependency.to = tokensByOrigId[ dependencyElement.attrib["token2"]] if reverseDependencies: dependency.fro, dependency.to = dependency.to, dependency.fro dependency.dependencyType = dependency.dependencyType[:-1] else: sys.exit("Couldn't solve dependency type") tokensById[dependency.fro.id].dependencies.append(dependency) tokensById[dependency.to.id].dependencies.append(dependency) #dependenciesById["dep_" + str(dependencyIndex) + "-mt_" + str(dependency.fro.id) + "-" + dependency.dependencyType + "-mt_" + str(dependency.to.id)] = dependency #dependenciesById[dependencyIndex] = dependency dependency.id = dependencyIndex # (dependency.fro.id,dependency.to.id) assert (not dependenciesById.has_key(dependency.id)) dependenciesById[dependency.id] = dependency dependencyIndex += 1 return tokensById, dependenciesById