예제 #1
0
    def addAnnotationDependency(self,
                                token1,
                                token2,
                                type,
                                directionality="UNIDIRECTIONAL"):
        #        print "TBefore:", token1, token2
        token1 += 1
        token2 += 1
        #        print "TBefore+1:", token1, token2
        # POSITION 1 BEGIN
        # POSITION 1 END
        #        print "TAfter1:", token1, token2

        annotationDependency = ParseGraphNode(True)
        annotationDependency.fro = self.tokensById[token1]
        annotationDependency.to = self.tokensById[token2]
        annotationDependency.dependencyType = type
        annotationDependency.directionality = directionality

        # Check for duplicates
        for annDep in self.sentence.annotationDependencies:
            if annDep.fro == annotationDependency.fro and \
            annDep.to == annotationDependency.to and \
            annDep.dependencyType == annotationDependency.dependencyType and \
            annDep.directionality == annotationDependency.directionality:
                return None

        #self.sentence.annotationDependencies.append( (token1, token2, type) )
#        print "TAfter2:", token1, token2
#self.sentence.annotationDependencies.append( (True,True,True) )
        self.sentence.annotationDependencies.append(annotationDependency)
        # POSITION 2 BEGIN
        rv = False
        for dependency in self.dependenciesById.values():
            if (dependency.fro == self.tokensById[token1]
                    and dependency.to == self.tokensById[token2]) or (
                        dependency.fro == self.tokensById[token2]
                        and dependency.to == self.tokensById[token1]):
                self.sentence.annotationDependenciesWithParseDependency += 1
                rv = True
        # POSITION 2 END
        return rv
예제 #2
0
    def buildParseGraphFromBioInfer(self,
                                    tokenElements,
                                    dependencyElements,
                                    reverseDependencies=False):
        """ Returns dictionaries containing tokens and dependencies
        of the graph generated from ElementTree-elements.
        """
        tokensById = {}
        tokensByOrigId = {}
        dependenciesById = {}
        prevOffset = -1000
        for tokenElement in tokenElements:
            node = ParseGraphNode()

            node.origid = tokenElement.attrib["id"]
            node.id = len(tokensById) + 1
            if tokenElement.attrib.has_key("POS"):
                node.pos = tokenElement.attrib["POS"]
            else:
                node.pos = "N/A"
            subtokenElement = tokenElement.find("subtoken")
            node.text = subtokenElement.attrib["text"]
            charFrom = int(tokenElement.attrib["charOffset"])
            assert (prevOffset < charFrom)
            prevOffset = charFrom
            charTo = charFrom + len(subtokenElement.attrib["text"]) - 1
            node.charOffset = (charFrom, charTo)
            tokensById[node.id] = node
            tokensByOrigId[node.origid] = node

        #self.depByOrder = []
        dependencyIndex = len(tokensById) + 99
        for dependencyElement in dependencyElements:
            if dependencyElement.attrib["token1"] == dependencyElement.attrib[
                    "token2"]:
                continue

            dependency = ParseGraphNode(True)
            dependency.dependencyType = dependencyElement.attrib["type"]
            if dependency.dependencyType[0] == "<":
                dependency.to = tokensByOrigId[
                    dependencyElement.attrib["token1"]]
                dependency.fro = tokensByOrigId[
                    dependencyElement.attrib["token2"]]
                if reverseDependencies:
                    dependency.fro, dependency.to = dependency.to, dependency.fro
                dependency.dependencyType = dependency.dependencyType[1:]
            elif dependency.dependencyType[-1] == ">":
                dependency.fro = tokensByOrigId[
                    dependencyElement.attrib["token1"]]
                dependency.to = tokensByOrigId[
                    dependencyElement.attrib["token2"]]
                if reverseDependencies:
                    dependency.fro, dependency.to = dependency.to, dependency.fro
                dependency.dependencyType = dependency.dependencyType[:-1]
            else:
                sys.exit("Couldn't solve dependency type")

            tokensById[dependency.fro.id].dependencies.append(dependency)
            tokensById[dependency.to.id].dependencies.append(dependency)
            #dependenciesById["dep_" + str(dependencyIndex) + "-mt_" + str(dependency.fro.id) + "-" + dependency.dependencyType + "-mt_" + str(dependency.to.id)] = dependency
            #dependenciesById[dependencyIndex] = dependency
            dependency.id = dependencyIndex  # (dependency.fro.id,dependency.to.id)
            assert (not dependenciesById.has_key(dependency.id))
            dependenciesById[dependency.id] = dependency
            dependencyIndex += 1

        return tokensById, dependenciesById
예제 #3
0
 def buildParseGraphFromBioInfer(self, tokenElements, dependencyElements, reverseDependencies=False):
     """ Returns dictionaries containing tokens and dependencies
     of the graph generated from ElementTree-elements.
     """
     tokensById = {}
     tokensByOrigId = {}
     dependenciesById = {}
     prevOffset = -1000
     for tokenElement in tokenElements:
         node = ParseGraphNode()
         
         node.origid = tokenElement.attrib["id"]
         node.id = len(tokensById)+1
         if tokenElement.attrib.has_key("POS"):
             node.pos = tokenElement.attrib["POS"]
         else:
             node.pos = "N/A"
         subtokenElement = tokenElement.find("subtoken")
         node.text = subtokenElement.attrib["text"]
         charFrom = int(tokenElement.attrib["charOffset"])
         assert(prevOffset < charFrom)
         prevOffset = charFrom
         charTo = charFrom + len(subtokenElement.attrib["text"])-1
         node.charOffset = (charFrom, charTo)
         tokensById[node.id] = node
         tokensByOrigId[node.origid] = node
 
     #self.depByOrder = []
     dependencyIndex = len(tokensById) + 99
     for dependencyElement in dependencyElements:
         if dependencyElement.attrib["token1"] == dependencyElement.attrib["token2"]:
             continue
         
         dependency = ParseGraphNode(True)
         dependency.dependencyType = dependencyElement.attrib["type"]
         if dependency.dependencyType[0] == "<":
             dependency.to = tokensByOrigId[dependencyElement.attrib["token1"]]
             dependency.fro = tokensByOrigId[dependencyElement.attrib["token2"]]
             if reverseDependencies:
                 dependency.fro, dependency.to = dependency.to, dependency.fro
             dependency.dependencyType = dependency.dependencyType[1:]
         elif dependency.dependencyType[-1] == ">":
             dependency.fro = tokensByOrigId[dependencyElement.attrib["token1"]]
             dependency.to = tokensByOrigId[dependencyElement.attrib["token2"]]
             if reverseDependencies:
                 dependency.fro, dependency.to = dependency.to, dependency.fro
             dependency.dependencyType = dependency.dependencyType[:-1]
         else:
             sys.exit("Couldn't solve dependency type")
         
         tokensById[dependency.fro.id].dependencies.append(dependency)
         tokensById[dependency.to.id].dependencies.append(dependency)
         #dependenciesById["dep_" + str(dependencyIndex) + "-mt_" + str(dependency.fro.id) + "-" + dependency.dependencyType + "-mt_" + str(dependency.to.id)] = dependency
         #dependenciesById[dependencyIndex] = dependency
         dependency.id = dependencyIndex # (dependency.fro.id,dependency.to.id)
         assert( not dependenciesById.has_key(dependency.id) )
         dependenciesById[dependency.id] = dependency
         dependencyIndex += 1
     
     return tokensById, dependenciesById