Python IDUtils.getNextFreeId Examples

Programming Language: Python

Namespace/Package Name: Utils.InteractionXML

Class/Type: IDUtils

Method/Function: getNextFreeId

Examples at hotexamples.com: 9

Python IDUtils.getNextFreeId - 9 examples found. These are the top rated real world Python examples of Utils.InteractionXML.IDUtils.getNextFreeId extracted from open source projects. You can rate examples to help us improve the quality of examples.

Frequently Used Methods

Show Hide

getNextFreeId(4)

Example #1

Show file

File: UnmergingExampleWriter.py Project: DUT-LiuYang/TEES

    def writeXMLSentence(
        self,
        examples,
        predictionsByExample,
        sentenceObject,
        classSet,
        classIds,
        goldSentence=None,
        exampleStyle=None,
        structureAnalyzer=None,
    ):
        sentenceElement = sentenceObject.sentence
        self.sentenceId = sentenceElement.get("id")
        self.assertSameSentence(examples, self.sentenceId)
        # detach analyses-element
        sentenceAnalysesElement = None
        sentenceAnalysesElement = sentenceElement.find("sentenceanalyses")
        if sentenceAnalysesElement == None:
            sentenceAnalysesElement = sentenceElement.find("analyses")
        if sentenceAnalysesElement != None:
            sentenceElement.remove(sentenceAnalysesElement)

        # remove pairs and interactions
        interactions = self.removeChildren(sentenceElement, ["interaction"])
        arguments, relations = self.getInteractionsAndRelations(interactions)
        # remove entities
        entities = self.removeNonNameEntities(sentenceElement)
        interactionsByEntity, interactionsById = self.mapInteractions(arguments + relations, sentenceObject.entities)

        self.entityCount = IDUtils.getNextFreeId(sentenceElement.findall("entity"))
        self.interactionCount = IDUtils.getNextFreeId(sentenceElement.findall("interaction"))
        self.newEntities = []
        self.newInteractions = []

        self.mapEntities(sentenceObject.entities)
        exampleByEntityId = self.mapExamples(examples, sentenceObject)
        argumentsByExample = self.connectArgumentsToExamples(
            examples, predictionsByExample, interactionsById, sentenceObject.entitiesById
        )
        self.mapEntityDuplicates(sentenceObject.entities)

        self.insertExamples(examples, predictionsByExample, argumentsByExample, sentenceObject, classSet, classIds)
        self.insertRelations(relations, sentenceObject.entitiesById)

        # Attach the new elements
        for element in self.newEntities + self.newInteractions:
            sentenceElement.append(element)

        # re-attach the analyses-element
        if sentenceAnalysesElement != None:
            sentenceElement.append(sentenceAnalysesElement)

Example #2

Show file

    def writeXMLSentence(self,
                         examples,
                         predictionsByExample,
                         sentenceObject,
                         classSet,
                         classIds,
                         goldSentence=None,
                         exampleStyle=None,
                         structureAnalyzer=None):
        self.assertSameSentence(examples)

        sentenceElement = sentenceObject.sentence
        sentenceId = sentenceElement.get("id")
        sentenceText = sentenceElement.get("text")
        # detach analyses-element
        sentenceAnalysesElement = None
        sentenceAnalysesElement = sentenceElement.find("sentenceanalyses")
        if sentenceAnalysesElement == None:
            sentenceAnalysesElement = sentenceElement.find("analyses")
        if sentenceAnalysesElement != None:
            sentenceElement.remove(sentenceAnalysesElement)
        # remove pairs and interactions
        interactions = self.removeChildren(sentenceElement,
                                           ["pair", "interaction"])
        # remove entities
        newEntityIdCount = IDUtils.getNextFreeId(
            sentenceElement.findall("entity"))
        nonNameEntities = self.removeNonNameEntities(sentenceElement)

        # add new pairs
        for example in examples:
            prediction = predictionsByExample[example[0]]
            entityElement = ET.Element("entity")
            #entityElement.attrib["given"] = "False"
            headToken = example[3]["t"]
            for token in sentenceObject.tokens:
                if token.get("id") == headToken:
                    headToken = token
                    break
            entityElement.set("charOffset", example[3]["charOffset"])
            entityElement.set("headOffset", headToken.get("charOffset"))
            entityElement.set("phraseType", example[3]["ptype"])
            entOffset = Range.charOffsetToSingleTuple(example[3]["charOffset"])
            entityElement.set("text", sentenceText[entOffset[0]:entOffset[1]])
            entityElement.set("id", sentenceId + ".e" + str(newEntityIdCount))
            self.setElementType(entityElement, prediction, classSet, classIds)
            newEntityIdCount += 1
            sentenceElement.append(entityElement)

        # re-attach the analyses-element
        if sentenceAnalysesElement != None:
            sentenceElement.append(sentenceAnalysesElement)

Example #3

Show file

File: UnmergingExampleWriter.py Project: sbnlp/2017BioNLPEvaluation

    def writeXMLSentence(self, examples, predictionsByExample, sentenceObject, classSet, classIds, goldSentence=None, exampleStyle=None, structureAnalyzer=None):        
        sentenceElement = sentenceObject.sentence
        self.sentenceId = sentenceElement.get("id")
        self.assertSameSentence(examples, self.sentenceId)
        # detach analyses-element
        sentenceAnalysesElement = None
        sentenceAnalysesElement = sentenceElement.find("sentenceanalyses")
        if sentenceAnalysesElement == None:
            sentenceAnalysesElement = sentenceElement.find("analyses")
        if sentenceAnalysesElement != None:
            sentenceElement.remove(sentenceAnalysesElement)
                
        # remove pairs and interactions
        interactions = self.removeChildren(sentenceElement, ["interaction"])
        arguments, relations = self.getInteractionsAndRelations(interactions)
        # remove entities
        entities = self.removeNonNameEntities(sentenceElement)
        interactionsByEntity, interactionsById = self.mapInteractions(arguments + relations, sentenceObject.entities)

        self.entityCount = IDUtils.getNextFreeId(sentenceElement.findall("entity"))
        self.interactionCount = IDUtils.getNextFreeId(sentenceElement.findall("interaction"))
        self.newEntities = []
        self.newInteractions = []
        
        self.mapEntities(sentenceObject.entities)
        exampleByEntityId = self.mapExamples(examples, sentenceObject)
        argumentsByExample = self.connectArgumentsToExamples(examples, predictionsByExample, interactionsById, sentenceObject.entitiesById)
        self.mapEntityDuplicates(sentenceObject.entities)
        
        self.insertExamples(examples, predictionsByExample, argumentsByExample, sentenceObject, classSet, classIds)
        self.insertRelations(relations, sentenceObject.entitiesById)
        
        # Attach the new elements
        for element in self.newEntities + self.newInteractions:
            sentenceElement.append(element)

        # re-attach the analyses-element
        if sentenceAnalysesElement != None:
            sentenceElement.append(sentenceAnalysesElement)

Example #4

Show file

File: PhraseTriggerExampleWriter.py Project: DUT-LiuYang/TEES

    def writeXMLSentence(self, examples, predictionsByExample, sentenceObject, classSet, classIds, goldSentence=None, exampleStyle=None, structureAnalyzer=None):        
        self.assertSameSentence(examples)
        
        sentenceElement = sentenceObject.sentence
        sentenceId = sentenceElement.get("id")
        sentenceText = sentenceElement.get("text")
        # detach analyses-element
        sentenceAnalysesElement = None
        sentenceAnalysesElement = sentenceElement.find("sentenceanalyses")
        if sentenceAnalysesElement == None:
            sentenceAnalysesElement = sentenceElement.find("analyses")
        if sentenceAnalysesElement != None:
            sentenceElement.remove(sentenceAnalysesElement)
        # remove pairs and interactions
        interactions = self.removeChildren(sentenceElement, ["pair", "interaction"])
        # remove entities
        newEntityIdCount = IDUtils.getNextFreeId(sentenceElement.findall("entity"))
        nonNameEntities = self.removeNonNameEntities(sentenceElement)
            
        # add new pairs
        for example in examples:
            prediction = predictionsByExample[example[0]]
            entityElement = ET.Element("entity")
            #entityElement.attrib["given"] = "False"
            headToken = example[3]["t"]
            for token in sentenceObject.tokens:
                if token.get("id") == headToken:
                    headToken = token
                    break
            entityElement.set("charOffset", example[3]["charOffset"]) 
            entityElement.set("headOffset", headToken.get("charOffset"))
            entityElement.set("phraseType", example[3]["ptype"])
            entOffset = Range.charOffsetToSingleTuple(example[3]["charOffset"])
            entityElement.set("text", sentenceText[entOffset[0]:entOffset[1]])
            entityElement.set("id", sentenceId + ".e" + str(newEntityIdCount))
            self.setElementType(entityElement, prediction, classSet, classIds)
            newEntityIdCount += 1
            sentenceElement.append(entityElement)

        # re-attach the analyses-element
        if sentenceAnalysesElement != None:
            sentenceElement.append(sentenceAnalysesElement)

Example #5

Show file

File: EntityExampleWriter.py Project: MaximumEntropy/UPSITE

    def writeXMLSentence(self,
                         examples,
                         predictionsByExample,
                         sentenceObject,
                         classSet,
                         classIds,
                         goldSentence=None,
                         exampleStyle=None,
                         structureAnalyzer=None):
        self.assertSameSentence(examples)

        extensionRequested = False

        sentenceElement = sentenceObject.sentence
        sentenceId = sentenceElement.get("id")
        # detach analyses-element
        sentenceAnalysesElement = None
        sentenceAnalysesElement = sentenceElement.find("sentenceanalyses")
        if sentenceAnalysesElement == None:
            sentenceAnalysesElement = sentenceElement.find("analyses")
        if sentenceAnalysesElement != None:
            sentenceElement.remove(sentenceAnalysesElement)
        # remove pairs and interactions
        interactions = self.removeChildren(sentenceElement,
                                           ["pair", "interaction"])
        # remove entities
        newEntityIdCount = IDUtils.getNextFreeId(
            sentenceElement.findall("entity"))
        nonNameEntities = self.removeNonNameEntities(sentenceElement)
        # remove named entities if needed
        if exampleStyle != None and "names" in exampleStyle and exampleStyle[
                "names"]:  # remove all entities, including names
            self.removeChildren(sentenceElement, ["entity"])

        # gold sentence elements
        goldEntityTypeByHeadOffset = {}
        goldEntityByHeadOffset = {}
        if goldSentence != None:
            for entity in goldSentence.entities:
                headOffset = entity.get("headOffset")
                if not goldEntityTypeByHeadOffset.has_key(headOffset):
                    goldEntityTypeByHeadOffset[headOffset] = []
                    goldEntityByHeadOffset[headOffset] = []
                goldEntityTypeByHeadOffset[headOffset].append(entity)
                goldEntityByHeadOffset[headOffset].append(entity)
            for key in goldEntityTypeByHeadOffset:
                goldEntityTypeByHeadOffset[key] = self.getMergedEntityType(
                    goldEntityTypeByHeadOffset[key])
            for token in sentenceObject.tokens:
                if not goldEntityTypeByHeadOffset.has_key(
                        token.get("charOffset")):
                    goldEntityTypeByHeadOffset[token.get("charOffset")] = "neg"

        # add new pairs
        for example in examples:
            # Entity examplesalways refer to a single head token
            headTokenId = example[3]["t"]
            headToken = None
            for token in sentenceObject.tokens:
                if token.get("id") == headTokenId:
                    headToken = token
                    break
            assert headToken != None, example[3]
            # Determine if additional processing is requested
            unmergeEPINeg = None
            if "unmergeneg" in example[3] and example[3]["unmergeneg"] == "epi":
                unmergeEPINeg = headToken.get("text")
            if "trigex" in example[3] and example[3]["trigex"] == "bb":
                extensionRequested = True
            # Make entities for positive predictions
            prediction = predictionsByExample[example[0]]
            predictionString = self.getPredictionStrengthString(
                prediction, classSet, classIds)
            for eType in self.getElementTypes(
                    prediction,
                    classSet,
                    classIds,
                    unmergeEPINegText=unmergeEPINeg):  # split merged classes
                entityElement = ET.Element("entity")
                #entityElement.set("given", "False")
                entityElement.set("charOffset", headToken.get("charOffset"))
                entityElement.set("headOffset", headToken.get("charOffset"))
                entityElement.set("text", headToken.get("text"))
                entityElement.set("id",
                                  sentenceId + ".e" + str(newEntityIdCount))
                entityElement.set("type", eType)
                entityElement.set("conf", predictionString)
                if structureAnalyzer.isEvent(eType):
                    entityElement.set("event", "True")
                #self.setElementType(entityElement, prediction, classSet, classIds, unmergeEPINeg=unmergeEPINeg)
                if self.insertWeights:  # in other words, use gold types
                    headOffset = headToken.get("charOffset")
                    if goldEntityByHeadOffset.has_key(headOffset):
                        for entity in goldEntityByHeadOffset[headOffset]:
                            entity.set("conf", entityElement.get("conf"))
                if goldEntityTypeByHeadOffset.has_key(
                        headToken.get("charOffset")):
                    entityElement.set(
                        "goldType", goldEntityTypeByHeadOffset[headToken.get(
                            "charOffset")])
                if "goldIds" in example[
                        3]:  # The entities for which this example was built
                    entityElement.set("goldIds", example[3]["goldIds"])
                if (entityElement.get("type") != "neg"
                        and not goldEntityByHeadOffset.has_key(
                            entityElement.get("headOffset"))
                    ) or not self.insertWeights:
                    newEntityIdCount += 1
                    sentenceElement.append(entityElement)
                elif entityElement.get("type") == "neg":
                    pass
                    #newEntityIdCount += 1
                    #sentenceElement.append(entityElement)

        # if only adding weights, re-attach interactions and gold entities
        if self.insertWeights:
            for entity in nonNameEntities:
                sentenceElement.append(entity)
            for interaction in interactions:
                sentenceElement.append(interaction)

        # re-attach the analyses-element
        if sentenceAnalysesElement != None:
            sentenceElement.append(sentenceAnalysesElement)

        # Extend bacteria triggers
        if extensionRequested:
            Utils.InteractionXML.ExtendTriggers.extend(
                sentenceElement, entityTypes=["Bacterium"])

Example #6

Show file

File: EntityExampleWriter.py Project: jbjorne/TEES

    def writeXMLSentence(self, examples, predictionsByExample, sentenceObject, classSet, classIds, goldSentence=None, exampleStyle=None, structureAnalyzer=None):        
        self.assertSameSentence(examples)
        
        extensionRequested = False
        
        sentenceElement = sentenceObject.sentence
        sentenceId = sentenceElement.get("id")
        # detach analyses-element
        sentenceAnalysesElement = None
        sentenceAnalysesElement = sentenceElement.find("sentenceanalyses")
        if sentenceAnalysesElement == None:
            sentenceAnalysesElement = sentenceElement.find("analyses")
        if sentenceAnalysesElement != None:
            sentenceElement.remove(sentenceAnalysesElement)
        # remove pairs and interactions
        interactions = self.removeChildren(sentenceElement, ["pair", "interaction"])
        # remove entities
        newEntityIdCount = IDUtils.getNextFreeId(sentenceElement.findall("entity"))
        nonNameEntities = self.removeNonNameEntities(sentenceElement)
        # remove named entities if needed
        if exampleStyle != None and "names" in exampleStyle and exampleStyle["names"]: # remove all entities, including names
            self.removeChildren(sentenceElement, ["entity"])
        
        # gold sentence elements
        goldEntityTypeByHeadOffset = {}
        goldEntityByHeadOffset = {}
        if goldSentence != None:
            for entity in goldSentence.entities:
                headOffset = entity.get("headOffset")
                if not goldEntityTypeByHeadOffset.has_key(headOffset):
                    goldEntityTypeByHeadOffset[headOffset] = []
                    goldEntityByHeadOffset[headOffset] = []
                goldEntityTypeByHeadOffset[headOffset].append(entity)
                goldEntityByHeadOffset[headOffset].append(entity)
            for key in goldEntityTypeByHeadOffset:
                goldEntityTypeByHeadOffset[key] =  self.getMergedEntityType(goldEntityTypeByHeadOffset[key])
            for token in sentenceObject.tokens:
                if not goldEntityTypeByHeadOffset.has_key(token.get("charOffset")):
                    goldEntityTypeByHeadOffset[token.get("charOffset")] = "neg"
            
        # add new pairs
        for example in examples:
            # Entity examplesalways refer to a single head token
            headTokenId = example[3]["t"]
            headToken = None
            for token in sentenceObject.tokens:
                if token.get("id") == headTokenId:
                    headToken = token
                    break
            assert headToken != None, example[3]
            # Determine if additional processing is requested
            unmergeEPINeg = None
            if "unmergeneg" in example[3] and example[3]["unmergeneg"] == "epi":
                unmergeEPINeg = headToken.get("text")
            if "trigex" in example[3] and example[3]["trigex"] == "bb":
                extensionRequested = True
            # Make entities for positive predictions
            prediction = predictionsByExample[example[0]]
            predictionString = self.getPredictionStrengthString(prediction, classSet, classIds)
            for eType in self.getElementTypes(prediction, classSet, classIds, unmergeEPINegText=unmergeEPINeg): # split merged classes
                entityElement = ET.Element("entity")
                #entityElement.set("given", "False")
                entityElement.set("charOffset", headToken.get("charOffset"))
                if "define_offset" in example[3]:
                    entityElement.set("charOffset", example[3]["define_offset"])
                entityElement.set("headOffset", headToken.get("charOffset"))
                entityElement.set("text", headToken.get("text"))
                entityElement.set("id", sentenceId + ".e" + str(newEntityIdCount))
                entityElement.set("type", eType)
                entityElement.set("conf", predictionString)
                if structureAnalyzer.isEvent(eType):
                    entityElement.set("event", "True")
                #self.setElementType(entityElement, prediction, classSet, classIds, unmergeEPINeg=unmergeEPINeg)
                if self.insertWeights: # in other words, use gold types
                    headOffset = headToken.get("charOffset")
                    if goldEntityByHeadOffset.has_key(headOffset):
                        for entity in goldEntityByHeadOffset[headOffset]:
                            entity.set("conf", entityElement.get("conf") )
                if goldEntityTypeByHeadOffset.has_key(headToken.get("charOffset")):
                    entityElement.set("goldType", goldEntityTypeByHeadOffset[headToken.get("charOffset")])
                if "goldIds" in example[3]: # The entities for which this example was built
                    entityElement.set("goldIds", example[3]["goldIds"])
                if (entityElement.get("type") != "neg" and not goldEntityByHeadOffset.has_key(entityElement.get("headOffset"))) and not self.insertWeights:
                    newEntityIdCount += 1
                    sentenceElement.append(entityElement)
                elif entityElement.get("type") == "neg":
                    pass
                    #newEntityIdCount += 1
                    #sentenceElement.append(entityElement)
        
        # if only adding weights, re-attach interactions and gold entities
        if self.insertWeights:
            for entity in nonNameEntities:
                sentenceElement.append(entity)
            for interaction in interactions:
                sentenceElement.append(interaction)

        # re-attach the analyses-element
        if sentenceAnalysesElement != None:
            sentenceElement.append(sentenceAnalysesElement)
        
        # Extend bacteria triggers
        if extensionRequested:
            Utils.InteractionXML.ExtendTriggers.extend(sentenceElement, entityTypes=["Bacterium"])

Example #7

Show file

File: GeniaEventsToSharedTask.py Project: ninjin/TEES

def run(input, output, eventDir, parse="split-mccc-preparsed", verbose=False):
    print >> sys.stderr, "Loading corpus", input
    corpusTree = ETUtils.ETFromObj(input)
    print >> sys.stderr, "Corpus file loaded"
    corpusRoot = corpusTree.getroot()
    
    counts = defaultdict(int)
    for document in corpusRoot.findall("document"):
        sentDict = None
        pmid = document.get("pmid")
        isPMC = False
        for sentence in document.findall("sentence"):
            counts["sentences"] += 1
            sentenceId = str(sentence.get("id")) + "/" + str(sentence.get("origId"))
            if verbose: print "Processing", sentenceId
            if sentDict == None:
                if sentence.get("origId") != None:
                    assert pmid == None
                    sentDict = loadEventXML( eventDir + "/" + sentence.get("origId").split(".")[0] + ".xml" , verbose=verbose)
                else:
                    #pmid = sentence.get("pmid")
                    assert pmid != None
                    if pmid.startswith("PMC"):
                        isPMC = True
                        sentDict = {}
                    else:
                        assert pmid.startswith("PMID")
                        sentDict = loadEventXML( eventDir + "/" + pmid.split("-", 1)[-1] + ".xml" , verbose=verbose)
            interactionXMLText = sentence.get("text")
            if not sentDict.has_key(interactionXMLText):
                counts["missing-sentences"] += 1
                if isPMC: counts["missing-sentences-PMC"] += 1
                if verbose: print "Missing sentence:", pmid, (sentenceId, sentDict, sentence.get("text"))
            else:
                sentenceAnalyses = sentence.find("sentenceanalyses")
                if sentenceAnalyses != None:
                    sentence.remove(sentenceAnalyses)
                entityIdCount = IDUtils.getNextFreeId(sentence.findall("entity"))
                events = sentDict[interactionXMLText]
                events.sort()
                for event in events:
                    if not keepEvent(event[2]):
                        counts["filtered-triggers"] += 1
                        continue
                    trigger = ET.Element("entity")
                    trigger.set("isName", "False")
                    trigger.set("charOffset", str(event[0]) + "-" + str(event[1]))
                    trigger.set("type", str(event[2]))
                    trigger.set("text", str(event[3]))
                    trigger.set("source", "GENIA_event_annotation_0.9")
                    trigger.set("id", sentence.get("id") + ".e" + str(entityIdCount))
                    entityIdCount += 1
                    counts["added-triggers"] += 1
                    sentence.append(trigger)
                if sentenceAnalyses != None:
                    sentence.append(sentenceAnalyses)
    
    FindHeads.findHeads(corpusTree, parse, removeExisting=False)
    removeDuplicates(corpusRoot)
    print counts
    
    if output != None:
        print >> sys.stderr, "Writing output to", output
        ETUtils.write(corpusRoot, output)
    return corpusTree

Example #8

Show file

def run(input, output, eventDir, parse="split-mccc-preparsed", verbose=False):
    print >> sys.stderr, "Loading corpus", input
    corpusTree = ETUtils.ETFromObj(input)
    print >> sys.stderr, "Corpus file loaded"
    corpusRoot = corpusTree.getroot()
    
    counts = defaultdict(int)
    for document in corpusRoot.findall("document"):
        sentDict = None
        pmid = document.get("pmid")
        isPMC = False
        for sentence in document.findall("sentence"):
            counts["sentences"] += 1
            sentenceId = str(sentence.get("id")) + "/" + str(sentence.get("origId"))
            if verbose: print "Processing", sentenceId
            if sentDict == None:
                if sentence.get("origId") != None:
                    assert pmid == None
                    sentDict = loadEventXML( eventDir + "/" + sentence.get("origId").split(".")[0] + ".xml" , verbose=verbose)
                else:
                    #pmid = sentence.get("pmid")
                    assert pmid != None
                    if pmid.startswith("PMC"):
                        isPMC = True
                        sentDict = {}
                    else:
                        assert pmid.startswith("PMID")
                        sentDict = loadEventXML( eventDir + "/" + pmid.split("-", 1)[-1] + ".xml" , verbose=verbose)
            interactionXMLText = sentence.get("text")
            if not sentDict.has_key(interactionXMLText):
                counts["missing-sentences"] += 1
                if isPMC: counts["missing-sentences-PMC"] += 1
                if verbose: print "Missing sentence:", pmid, (sentenceId, sentDict, sentence.get("text"))
            else:
                sentenceAnalyses = sentence.find("sentenceanalyses")
                if sentenceAnalyses != None:
                    sentence.remove(sentenceAnalyses)
                entityIdCount = IDUtils.getNextFreeId(sentence.findall("entity"))
                events = sentDict[interactionXMLText]
                events.sort()
                for event in events:
                    if not keepEvent(event[2]):
                        counts["filtered-triggers"] += 1
                        continue
                    trigger = ET.Element("entity")
                    #trigger.set("given", "False")
                    trigger.set("charOffset", str(event[0]) + "-" + str(event[1]))
                    trigger.set("type", str(event[2]))
                    trigger.set("text", str(event[3]))
                    trigger.set("source", "GENIA_event_annotation_0.9")
                    trigger.set("id", sentence.get("id") + ".e" + str(entityIdCount))
                    entityIdCount += 1
                    counts["added-triggers"] += 1
                    sentence.append(trigger)
                if sentenceAnalyses != None:
                    sentence.append(sentenceAnalyses)
    
    FindHeads.findHeads(corpusTree, parse, removeExisting=False)
    removeDuplicates(corpusRoot)
    print counts
    
    if output != None:
        print >> sys.stderr, "Writing output to", output
        ETUtils.write(corpusRoot, output)
    return corpusTree

Example #9

Show file

File: UnmergingExampleWriter.py Project: ninjin/TEES

    def writeXMLSentence(self, examples, predictionsByExample, sentenceObject, classSet, classIds, goldSentence=None, exampleStyle=None):        
        sentenceElement = sentenceObject.sentence
        self.sentenceId = sentenceElement.get("id")
        self.assertSameSentence(examples, self.sentenceId)
        # detach analyses-element
        sentenceAnalysesElement = None
        sentenceAnalysesElement = sentenceElement.find("sentenceanalyses")
        if sentenceAnalysesElement == None:
            sentenceAnalysesElement = sentenceElement.find("analyses")
        if sentenceAnalysesElement != None:
            sentenceElement.remove(sentenceAnalysesElement)
                
        # remove pairs and interactions
        interactions = self.removeChildren(sentenceElement, ["pair", "interaction"])
        # remove entities
        entities = self.removeNonNameEntities(sentenceElement)
        
        # filter interactions
        interactionsToKeep = []
        for interaction in interactions:
            if interaction.get("type") != "neg":
                interactionsToKeep.append(interaction)
        interactions = interactionsToKeep
        
        # early out
        cutoff = 100
        #if len(interactions) == 0 or len(interactions) > cutoff:
        if len(interactions) > cutoff:
            # re-attach the analyses-element
            if sentenceAnalysesElement != None:
                sentenceElement.append(sentenceAnalysesElement)
            #if len(interactions) > cutoff:
            print >> sys.stderr, "Warning, sentence", sentenceObject.sentence.get("id"), "has more than", cutoff, "interactions, removing all."
            return
        
        interactionsByEntity = {}
        interactionsById = {}
        for entity in entities:
            interactionsByEntity[entity.get("id")] = []
        for interaction in interactions:
            e1Id = interaction.get("e1")
            if not interactionsByEntity.has_key(e1Id):
                interactionsByEntity[e1Id] = []
            interactionsByEntity[e1Id].append(interaction)
            interactionsById[interaction.get("id")] = interaction

        # NOTE! Following won't work for pairs
        self.entityCount = IDUtils.getNextFreeId(sentenceElement.findall("entity"))
        self.interactionCount = IDUtils.getNextFreeId(sentenceElement.findall("interaction"))
        self.newEntities = []
        self.newInteractions = []
        
        # Mapping for connecting the events
        self.entitiesByHeadByType = {}
        #self.tokenByOffset = {}
        #for token in sentenceObject.tokens:
        #    self.tokenByOffset[token.get("charOffset")] = token
        #    self.entityByHeadByType[token.get("charOffset")] = {}
        for entity in sentenceObject.entities:
            # by offset
            offset = entity.get("headOffset")
            if not self.entitiesByHeadByType.has_key(offset):
                self.entitiesByHeadByType[offset] = {}
            # by type
            eType = entity.get("type")
            if entity.get("isName") != "True":
                self.entitiesByHeadByType[offset][eType] = []
            else: # add names to structure
                if not self.entitiesByHeadByType[offset].has_key(eType):
                    self.entitiesByHeadByType[offset][eType] = []
                self.entitiesByHeadByType[offset][eType].append(entity)
        
        entityKeys = sentenceObject.entitiesById.keys()
        exampleByEntityId = {}
        for example in examples:
            #if predictionsByExample[example[0]][0] == 1: # negative
            #    continue
            eId = example[3]["e"]
            assert eId in entityKeys
            if not exampleByEntityId.has_key(eId):
                exampleByEntityId[eId] = []
            exampleByEntityId[eId].append(example)
        
        # This doesn't work, it was an attempt to include
        # only the positive example with the highest prediction strength
#        for key in sorted(exampleByEntityId.keys()):
#            eType = sentenceObject.entitiesById[key].get("type")
#            eExamples = exampleByEntityId[key]
#            if eType == "Binding" and len(eExamples) > 1:
#                maxArgs = -1
#                maxStr = -999999999
#                for example in eExamples:
#                    if predictionsByExample[example[0]][0] == 1:
#                        continue
#                    numArgs = example[3]["i"].count(",") + 1
#                    if numArgs > maxArgs:
#                        maxArgs = numArgs
#                    predClass = predictionsByExample[example[0]][0]
#                    predictionStrength = predictionsByExample[example[0]][predClass]
#                    if predictionStrength > maxStr:
#                        maxStr = predictionStrength
#                #print maxArgs, len(eExamples)
#                for example in eExamples:
#                    if predictionsByExample[example[0]][0] == 1:
#                        continue
#                    predClass = predictionsByExample[example[0]][0]
#                    predictionStrength = predictionsByExample[example[0]][predClass]
#                    if predictionStrength != maxStr:
#                        examples.remove(example)
#                    #if example[3]["i"].count(",") + 1 < maxArgs:
#                    #    examples.remove(example)
        
        #self.newEntitiesById = {}
        #self.outEdgesByEntity = {}
        
        # Gather arguments for the simple, one-argument events
        argumentsByExample = {}
        positiveExamples = []
        exampleIdCount = 0
        for entity in entities:
            # If no example, case is unambiguous
            if entity.get("id") not in exampleByEntityId:
                simpleEventInteractions = interactionsByEntity[entity.get("id")]
                numCauses = 0
                numThemes = 0
                for interaction in simpleEventInteractions[:]:
                    if self.isIntersentence(interaction):
                        print "Warning, intersentence interaction for", entity.get("id"), entity.get("type")
                        simpleEventInteractions.remove(interaction)
                        continue
                    if interaction.get("type") == "neg":
                        simpleEventInteractions.remove(interaction)
                        continue
                    iType = interaction.get("type")
                    if iType == "Cause":
                        numCauses += 1
                    elif iType == "Theme":
                        numThemes += 1
                eType = entity.get("type")
                assert numThemes == 0 or (numThemes != 0 and numCauses == 0) or (numThemes > 1 and eType != "Binding"), (numThemes,numCauses,eType,entity.get("id"), [x[0] for x in examples], entityKeys)
                #assert numThemes == 0 or (numThemes != 0 and numCauses == 0) or (numThemes > 1 and eType == "Binding"), (numThemes,numCauses,eType,entity.get("id"))
                for interaction in simpleEventInteractions:
                    self.counts["simple-" + eType + "-" + interaction.get("type")] += 1
                    exampleId = "simple." + str(exampleIdCount)
                    exampleIdCount += 1
                    positiveExamples.append([exampleId,None,None,None])
                    argumentsByExample[exampleId] = [interaction]
                    #self.addEvent([interaction], sentenceObject, "simple")
            
        # Gather arguments for predicted, unmerged events
        for example in examples:
            #print predictionsByExample[example[0]]
            if predictionsByExample[example[0]][0] == 1: # negative
                continue
            positiveExamples.append(example)
            arguments = []
            for iId in example[3]["i"].split(","):
                if iId == "": # processes can have 0 arguments
                    assert "etype" in example[3], example[3]
                    assert example[3]["etype"] == "Process", example[3]
                    break
                arg = interactionsById[iId]
                if self.isIntersentence(arg):
                    continue
                assert arg.get("type") != "neg"
                arguments.append(arg)
            argumentsByExample[example[0]] = arguments
        
        # Loop until all positive examples are added. This process
        # assumes that the events (mostly) form a directed acyclic
        # graph, which can written by "growing" the structure from
        # the "leaf" events, and consecutively adding levels of
        # nesting events.
        examplesLeft = len(positiveExamples)
        exampleAdded = {}
        for example in positiveExamples:
            exampleAdded[example[0]] = False
        forceAdd = False
        forcedCount = 0
        while examplesLeft > 0:
            if len(self.newEntities) > 100:
                print >> sys.stderr, "Warning, sentence", sentenceObject.sentence.get("id"), "has generated more than", cutoff, "events, skipping the rest."
                break
            examplesAddedThisRound = 0
            # For each round, loop through the potentially remaining examples
            for example in positiveExamples:
                if len(self.newEntities) > 100:
                    break
                if exampleAdded[example[0]]: # This event has already been inserted
                    continue
                arguments = argumentsByExample[example[0]]
                # An event can be added if all of its argument events have already
                # been added. Addition is forced if lack of argument events blocks
                # the process.
                if forceAdd or self.argumentEntitiesExist(arguments, sentenceObject):
                    umType = "complex" # mark the root entity in the output xml
                    predictionStrength = None
                    if example[0].find("simple") != -1:
                        umType = "simple"
                    else:
                        # Prediction strength is only available for classified argument groups
                        predictionStrength = self.getPredictionStrength(example, predictionsByExample, classSet, classIds)
                    #print example 
                    if umType != "simple" and "etype" in example[3] and example[3]["etype"] == "Process" and len(arguments) == 0:
                        origProcess = sentenceObject.entitiesById[example[3]["e"]]
                        # Put back the original entity
                        newProcess = self.addEntity(origProcess)
                        newProcess.set("umType", umType)
                        if predictionStrength != None:
                            newProcess.set("umStrength", str(predictionStrength))
                    else: # example has arguments
                        self.addEvent(arguments, sentenceObject, umType, forceAdd, predictionStrength, exampleNotes=example[3])
                    exampleAdded[example[0]] = True
                    examplesLeft -= 1
                    examplesAddedThisRound += 1
                    forceAdd = False
            if examplesLeft > 0 and examplesAddedThisRound == 0:
                # If there are examples left, but nothing was added, this
                # means that some nested events are missing. Theoretically
                # this could also be because two events are referring to
                # each other, preventing each other's insertion. In any
                # case this is solved by simply forcing the addition of 
                # the first non-inserted event, by creating 0-argument
                # entities for its argument events.
                forcedCount += 1
                #print "Warning, forcing event addition"
                forceAdd = True                  
        
        # Attach the new elements
        for element in self.newEntities + self.newInteractions:
            sentenceElement.append(element)

        # re-attach the analyses-element
        if sentenceAnalysesElement != None:
            sentenceElement.append(sentenceAnalysesElement)