Python IDUtils Exemples, Utils.InteractionXML.IDUtils Python Exemples

Exemple #1

0

Afficher le fichier

Fichier : UnmergingExampleWriter.py Projet : DUT-LiuYang/TEES

    def writeXMLSentence(
        self,
        examples,
        predictionsByExample,
        sentenceObject,
        classSet,
        classIds,
        goldSentence=None,
        exampleStyle=None,
        structureAnalyzer=None,
    ):
        sentenceElement = sentenceObject.sentence
        self.sentenceId = sentenceElement.get("id")
        self.assertSameSentence(examples, self.sentenceId)
        # detach analyses-element
        sentenceAnalysesElement = None
        sentenceAnalysesElement = sentenceElement.find("sentenceanalyses")
        if sentenceAnalysesElement == None:
            sentenceAnalysesElement = sentenceElement.find("analyses")
        if sentenceAnalysesElement != None:
            sentenceElement.remove(sentenceAnalysesElement)

        # remove pairs and interactions
        interactions = self.removeChildren(sentenceElement, ["interaction"])
        arguments, relations = self.getInteractionsAndRelations(interactions)
        # remove entities
        entities = self.removeNonNameEntities(sentenceElement)
        interactionsByEntity, interactionsById = self.mapInteractions(arguments + relations, sentenceObject.entities)

        self.entityCount = IDUtils.getNextFreeId(sentenceElement.findall("entity"))
        self.interactionCount = IDUtils.getNextFreeId(sentenceElement.findall("interaction"))
        self.newEntities = []
        self.newInteractions = []

        self.mapEntities(sentenceObject.entities)
        exampleByEntityId = self.mapExamples(examples, sentenceObject)
        argumentsByExample = self.connectArgumentsToExamples(
            examples, predictionsByExample, interactionsById, sentenceObject.entitiesById
        )
        self.mapEntityDuplicates(sentenceObject.entities)

        self.insertExamples(examples, predictionsByExample, argumentsByExample, sentenceObject, classSet, classIds)
        self.insertRelations(relations, sentenceObject.entitiesById)

        # Attach the new elements
        for element in self.newEntities + self.newInteractions:
            sentenceElement.append(element)

        # re-attach the analyses-element
        if sentenceAnalysesElement != None:
            sentenceElement.append(sentenceAnalysesElement)

Exemple #2

0

Afficher le fichier

    def writeXMLSentence(self,
                         examples,
                         predictionsByExample,
                         sentenceObject,
                         classSet,
                         classIds,
                         goldSentence=None,
                         exampleStyle=None,
                         structureAnalyzer=None):
        self.assertSameSentence(examples)

        sentenceElement = sentenceObject.sentence
        sentenceId = sentenceElement.get("id")
        sentenceText = sentenceElement.get("text")
        # detach analyses-element
        sentenceAnalysesElement = None
        sentenceAnalysesElement = sentenceElement.find("sentenceanalyses")
        if sentenceAnalysesElement == None:
            sentenceAnalysesElement = sentenceElement.find("analyses")
        if sentenceAnalysesElement != None:
            sentenceElement.remove(sentenceAnalysesElement)
        # remove pairs and interactions
        interactions = self.removeChildren(sentenceElement,
                                           ["pair", "interaction"])
        # remove entities
        newEntityIdCount = IDUtils.getNextFreeId(
            sentenceElement.findall("entity"))
        nonNameEntities = self.removeNonNameEntities(sentenceElement)

        # add new pairs
        for example in examples:
            prediction = predictionsByExample[example[0]]
            entityElement = ET.Element("entity")
            #entityElement.attrib["given"] = "False"
            headToken = example[3]["t"]
            for token in sentenceObject.tokens:
                if token.get("id") == headToken:
                    headToken = token
                    break
            entityElement.set("charOffset", example[3]["charOffset"])
            entityElement.set("headOffset", headToken.get("charOffset"))
            entityElement.set("phraseType", example[3]["ptype"])
            entOffset = Range.charOffsetToSingleTuple(example[3]["charOffset"])
            entityElement.set("text", sentenceText[entOffset[0]:entOffset[1]])
            entityElement.set("id", sentenceId + ".e" + str(newEntityIdCount))
            self.setElementType(entityElement, prediction, classSet, classIds)
            newEntityIdCount += 1
            sentenceElement.append(entityElement)

        # re-attach the analyses-element
        if sentenceAnalysesElement != None:
            sentenceElement.append(sentenceAnalysesElement)

Exemple #3

0

Afficher le fichier

Fichier : UnmergingExampleWriter.py Projet : sbnlp/2017BioNLPEvaluation

    def writeXMLSentence(self, examples, predictionsByExample, sentenceObject, classSet, classIds, goldSentence=None, exampleStyle=None, structureAnalyzer=None):        
        sentenceElement = sentenceObject.sentence
        self.sentenceId = sentenceElement.get("id")
        self.assertSameSentence(examples, self.sentenceId)
        # detach analyses-element
        sentenceAnalysesElement = None
        sentenceAnalysesElement = sentenceElement.find("sentenceanalyses")
        if sentenceAnalysesElement == None:
            sentenceAnalysesElement = sentenceElement.find("analyses")
        if sentenceAnalysesElement != None:
            sentenceElement.remove(sentenceAnalysesElement)
                
        # remove pairs and interactions
        interactions = self.removeChildren(sentenceElement, ["interaction"])
        arguments, relations = self.getInteractionsAndRelations(interactions)
        # remove entities
        entities = self.removeNonNameEntities(sentenceElement)
        interactionsByEntity, interactionsById = self.mapInteractions(arguments + relations, sentenceObject.entities)

        self.entityCount = IDUtils.getNextFreeId(sentenceElement.findall("entity"))
        self.interactionCount = IDUtils.getNextFreeId(sentenceElement.findall("interaction"))
        self.newEntities = []
        self.newInteractions = []
        
        self.mapEntities(sentenceObject.entities)
        exampleByEntityId = self.mapExamples(examples, sentenceObject)
        argumentsByExample = self.connectArgumentsToExamples(examples, predictionsByExample, interactionsById, sentenceObject.entitiesById)
        self.mapEntityDuplicates(sentenceObject.entities)
        
        self.insertExamples(examples, predictionsByExample, argumentsByExample, sentenceObject, classSet, classIds)
        self.insertRelations(relations, sentenceObject.entitiesById)
        
        # Attach the new elements
        for element in self.newEntities + self.newInteractions:
            sentenceElement.append(element)

        # re-attach the analyses-element
        if sentenceAnalysesElement != None:
            sentenceElement.append(sentenceAnalysesElement)

Exemple #4

0

Afficher le fichier

Fichier : PhraseTriggerExampleWriter.py Projet : DUT-LiuYang/TEES

    def writeXMLSentence(self, examples, predictionsByExample, sentenceObject, classSet, classIds, goldSentence=None, exampleStyle=None, structureAnalyzer=None):        
        self.assertSameSentence(examples)
        
        sentenceElement = sentenceObject.sentence
        sentenceId = sentenceElement.get("id")
        sentenceText = sentenceElement.get("text")
        # detach analyses-element
        sentenceAnalysesElement = None
        sentenceAnalysesElement = sentenceElement.find("sentenceanalyses")
        if sentenceAnalysesElement == None:
            sentenceAnalysesElement = sentenceElement.find("analyses")
        if sentenceAnalysesElement != None:
            sentenceElement.remove(sentenceAnalysesElement)
        # remove pairs and interactions
        interactions = self.removeChildren(sentenceElement, ["pair", "interaction"])
        # remove entities
        newEntityIdCount = IDUtils.getNextFreeId(sentenceElement.findall("entity"))
        nonNameEntities = self.removeNonNameEntities(sentenceElement)
            
        # add new pairs
        for example in examples:
            prediction = predictionsByExample[example[0]]
            entityElement = ET.Element("entity")
            #entityElement.attrib["given"] = "False"
            headToken = example[3]["t"]
            for token in sentenceObject.tokens:
                if token.get("id") == headToken:
                    headToken = token
                    break
            entityElement.set("charOffset", example[3]["charOffset"]) 
            entityElement.set("headOffset", headToken.get("charOffset"))
            entityElement.set("phraseType", example[3]["ptype"])
            entOffset = Range.charOffsetToSingleTuple(example[3]["charOffset"])
            entityElement.set("text", sentenceText[entOffset[0]:entOffset[1]])
            entityElement.set("id", sentenceId + ".e" + str(newEntityIdCount))
            self.setElementType(entityElement, prediction, classSet, classIds)
            newEntityIdCount += 1
            sentenceElement.append(entityElement)

        # re-attach the analyses-element
        if sentenceAnalysesElement != None:
            sentenceElement.append(sentenceAnalysesElement)

Exemple #5

0

Afficher le fichier

Fichier : EntityExampleWriter.py Projet : MaximumEntropy/UPSITE

    def writeXMLSentence(self,
                         examples,
                         predictionsByExample,
                         sentenceObject,
                         classSet,
                         classIds,
                         goldSentence=None,
                         exampleStyle=None,
                         structureAnalyzer=None):
        self.assertSameSentence(examples)

        extensionRequested = False

        sentenceElement = sentenceObject.sentence
        sentenceId = sentenceElement.get("id")
        # detach analyses-element
        sentenceAnalysesElement = None
        sentenceAnalysesElement = sentenceElement.find("sentenceanalyses")
        if sentenceAnalysesElement == None:
            sentenceAnalysesElement = sentenceElement.find("analyses")
        if sentenceAnalysesElement != None:
            sentenceElement.remove(sentenceAnalysesElement)
        # remove pairs and interactions
        interactions = self.removeChildren(sentenceElement,
                                           ["pair", "interaction"])
        # remove entities
        newEntityIdCount = IDUtils.getNextFreeId(
            sentenceElement.findall("entity"))
        nonNameEntities = self.removeNonNameEntities(sentenceElement)
        # remove named entities if needed
        if exampleStyle != None and "names" in exampleStyle and exampleStyle[
                "names"]:  # remove all entities, including names
            self.removeChildren(sentenceElement, ["entity"])

        # gold sentence elements
        goldEntityTypeByHeadOffset = {}
        goldEntityByHeadOffset = {}
        if goldSentence != None:
            for entity in goldSentence.entities:
                headOffset = entity.get("headOffset")
                if not goldEntityTypeByHeadOffset.has_key(headOffset):
                    goldEntityTypeByHeadOffset[headOffset] = []
                    goldEntityByHeadOffset[headOffset] = []
                goldEntityTypeByHeadOffset[headOffset].append(entity)
                goldEntityByHeadOffset[headOffset].append(entity)
            for key in goldEntityTypeByHeadOffset:
                goldEntityTypeByHeadOffset[key] = self.getMergedEntityType(
                    goldEntityTypeByHeadOffset[key])
            for token in sentenceObject.tokens:
                if not goldEntityTypeByHeadOffset.has_key(
                        token.get("charOffset")):
                    goldEntityTypeByHeadOffset[token.get("charOffset")] = "neg"

        # add new pairs
        for example in examples:
            # Entity examplesalways refer to a single head token
            headTokenId = example[3]["t"]
            headToken = None
            for token in sentenceObject.tokens:
                if token.get("id") == headTokenId:
                    headToken = token
                    break
            assert headToken != None, example[3]
            # Determine if additional processing is requested
            unmergeEPINeg = None
            if "unmergeneg" in example[3] and example[3]["unmergeneg"] == "epi":
                unmergeEPINeg = headToken.get("text")
            if "trigex" in example[3] and example[3]["trigex"] == "bb":
                extensionRequested = True
            # Make entities for positive predictions
            prediction = predictionsByExample[example[0]]
            predictionString = self.getPredictionStrengthString(
                prediction, classSet, classIds)
            for eType in self.getElementTypes(
                    prediction,
                    classSet,
                    classIds,
                    unmergeEPINegText=unmergeEPINeg):  # split merged classes
                entityElement = ET.Element("entity")
                #entityElement.set("given", "False")
                entityElement.set("charOffset", headToken.get("charOffset"))
                entityElement.set("headOffset", headToken.get("charOffset"))
                entityElement.set("text", headToken.get("text"))
                entityElement.set("id",
                                  sentenceId + ".e" + str(newEntityIdCount))
                entityElement.set("type", eType)
                entityElement.set("conf", predictionString)
                if structureAnalyzer.isEvent(eType):
                    entityElement.set("event", "True")
                #self.setElementType(entityElement, prediction, classSet, classIds, unmergeEPINeg=unmergeEPINeg)
                if self.insertWeights:  # in other words, use gold types
                    headOffset = headToken.get("charOffset")
                    if goldEntityByHeadOffset.has_key(headOffset):
                        for entity in goldEntityByHeadOffset[headOffset]:
                            entity.set("conf", entityElement.get("conf"))
                if goldEntityTypeByHeadOffset.has_key(
                        headToken.get("charOffset")):
                    entityElement.set(
                        "goldType", goldEntityTypeByHeadOffset[headToken.get(
                            "charOffset")])
                if "goldIds" in example[
                        3]:  # The entities for which this example was built
                    entityElement.set("goldIds", example[3]["goldIds"])
                if (entityElement.get("type") != "neg"
                        and not goldEntityByHeadOffset.has_key(
                            entityElement.get("headOffset"))
                    ) or not self.insertWeights:
                    newEntityIdCount += 1
                    sentenceElement.append(entityElement)
                elif entityElement.get("type") == "neg":
                    pass
                    #newEntityIdCount += 1
                    #sentenceElement.append(entityElement)

        # if only adding weights, re-attach interactions and gold entities
        if self.insertWeights:
            for entity in nonNameEntities:
                sentenceElement.append(entity)
            for interaction in interactions:
                sentenceElement.append(interaction)

        # re-attach the analyses-element
        if sentenceAnalysesElement != None:
            sentenceElement.append(sentenceAnalysesElement)

        # Extend bacteria triggers
        if extensionRequested:
            Utils.InteractionXML.ExtendTriggers.extend(
                sentenceElement, entityTypes=["Bacterium"])

Exemple #6

0

Afficher le fichier

Fichier : EntityExampleWriter.py Projet : jbjorne/TEES

    def writeXMLSentence(self, examples, predictionsByExample, sentenceObject, classSet, classIds, goldSentence=None, exampleStyle=None, structureAnalyzer=None):        
        self.assertSameSentence(examples)
        
        extensionRequested = False
        
        sentenceElement = sentenceObject.sentence
        sentenceId = sentenceElement.get("id")
        # detach analyses-element
        sentenceAnalysesElement = None
        sentenceAnalysesElement = sentenceElement.find("sentenceanalyses")
        if sentenceAnalysesElement == None:
            sentenceAnalysesElement = sentenceElement.find("analyses")
        if sentenceAnalysesElement != None:
            sentenceElement.remove(sentenceAnalysesElement)
        # remove pairs and interactions
        interactions = self.removeChildren(sentenceElement, ["pair", "interaction"])
        # remove entities
        newEntityIdCount = IDUtils.getNextFreeId(sentenceElement.findall("entity"))
        nonNameEntities = self.removeNonNameEntities(sentenceElement)
        # remove named entities if needed
        if exampleStyle != None and "names" in exampleStyle and exampleStyle["names"]: # remove all entities, including names
            self.removeChildren(sentenceElement, ["entity"])
        
        # gold sentence elements
        goldEntityTypeByHeadOffset = {}
        goldEntityByHeadOffset = {}
        if goldSentence != None:
            for entity in goldSentence.entities:
                headOffset = entity.get("headOffset")
                if not goldEntityTypeByHeadOffset.has_key(headOffset):
                    goldEntityTypeByHeadOffset[headOffset] = []
                    goldEntityByHeadOffset[headOffset] = []
                goldEntityTypeByHeadOffset[headOffset].append(entity)
                goldEntityByHeadOffset[headOffset].append(entity)
            for key in goldEntityTypeByHeadOffset:
                goldEntityTypeByHeadOffset[key] =  self.getMergedEntityType(goldEntityTypeByHeadOffset[key])
            for token in sentenceObject.tokens:
                if not goldEntityTypeByHeadOffset.has_key(token.get("charOffset")):
                    goldEntityTypeByHeadOffset[token.get("charOffset")] = "neg"
            
        # add new pairs
        for example in examples:
            # Entity examplesalways refer to a single head token
            headTokenId = example[3]["t"]
            headToken = None
            for token in sentenceObject.tokens:
                if token.get("id") == headTokenId:
                    headToken = token
                    break
            assert headToken != None, example[3]
            # Determine if additional processing is requested
            unmergeEPINeg = None
            if "unmergeneg" in example[3] and example[3]["unmergeneg"] == "epi":
                unmergeEPINeg = headToken.get("text")
            if "trigex" in example[3] and example[3]["trigex"] == "bb":
                extensionRequested = True
            # Make entities for positive predictions
            prediction = predictionsByExample[example[0]]
            predictionString = self.getPredictionStrengthString(prediction, classSet, classIds)
            for eType in self.getElementTypes(prediction, classSet, classIds, unmergeEPINegText=unmergeEPINeg): # split merged classes
                entityElement = ET.Element("entity")
                #entityElement.set("given", "False")
                entityElement.set("charOffset", headToken.get("charOffset"))
                if "define_offset" in example[3]:
                    entityElement.set("charOffset", example[3]["define_offset"])
                entityElement.set("headOffset", headToken.get("charOffset"))
                entityElement.set("text", headToken.get("text"))
                entityElement.set("id", sentenceId + ".e" + str(newEntityIdCount))
                entityElement.set("type", eType)
                entityElement.set("conf", predictionString)
                if structureAnalyzer.isEvent(eType):
                    entityElement.set("event", "True")
                #self.setElementType(entityElement, prediction, classSet, classIds, unmergeEPINeg=unmergeEPINeg)
                if self.insertWeights: # in other words, use gold types
                    headOffset = headToken.get("charOffset")
                    if goldEntityByHeadOffset.has_key(headOffset):
                        for entity in goldEntityByHeadOffset[headOffset]:
                            entity.set("conf", entityElement.get("conf") )
                if goldEntityTypeByHeadOffset.has_key(headToken.get("charOffset")):
                    entityElement.set("goldType", goldEntityTypeByHeadOffset[headToken.get("charOffset")])
                if "goldIds" in example[3]: # The entities for which this example was built
                    entityElement.set("goldIds", example[3]["goldIds"])
                if (entityElement.get("type") != "neg" and not goldEntityByHeadOffset.has_key(entityElement.get("headOffset"))) and not self.insertWeights:
                    newEntityIdCount += 1
                    sentenceElement.append(entityElement)
                elif entityElement.get("type") == "neg":
                    pass
                    #newEntityIdCount += 1
                    #sentenceElement.append(entityElement)
        
        # if only adding weights, re-attach interactions and gold entities
        if self.insertWeights:
            for entity in nonNameEntities:
                sentenceElement.append(entity)
            for interaction in interactions:
                sentenceElement.append(interaction)

        # re-attach the analyses-element
        if sentenceAnalysesElement != None:
            sentenceElement.append(sentenceAnalysesElement)
        
        # Extend bacteria triggers
        if extensionRequested:
            Utils.InteractionXML.ExtendTriggers.extend(sentenceElement, entityTypes=["Bacterium"])

Exemple #7

0

Afficher le fichier

Fichier : GeniaEventsToSharedTask.py Projet : ninjin/TEES

def run(input, output, eventDir, parse="split-mccc-preparsed", verbose=False):
    print >> sys.stderr, "Loading corpus", input
    corpusTree = ETUtils.ETFromObj(input)
    print >> sys.stderr, "Corpus file loaded"
    corpusRoot = corpusTree.getroot()
    
    counts = defaultdict(int)
    for document in corpusRoot.findall("document"):
        sentDict = None
        pmid = document.get("pmid")
        isPMC = False
        for sentence in document.findall("sentence"):
            counts["sentences"] += 1
            sentenceId = str(sentence.get("id")) + "/" + str(sentence.get("origId"))
            if verbose: print "Processing", sentenceId
            if sentDict == None:
                if sentence.get("origId") != None:
                    assert pmid == None
                    sentDict = loadEventXML( eventDir + "/" + sentence.get("origId").split(".")[0] + ".xml" , verbose=verbose)
                else:
                    #pmid = sentence.get("pmid")
                    assert pmid != None
                    if pmid.startswith("PMC"):
                        isPMC = True
                        sentDict = {}
                    else:
                        assert pmid.startswith("PMID")
                        sentDict = loadEventXML( eventDir + "/" + pmid.split("-", 1)[-1] + ".xml" , verbose=verbose)
            interactionXMLText = sentence.get("text")
            if not sentDict.has_key(interactionXMLText):
                counts["missing-sentences"] += 1
                if isPMC: counts["missing-sentences-PMC"] += 1
                if verbose: print "Missing sentence:", pmid, (sentenceId, sentDict, sentence.get("text"))
            else:
                sentenceAnalyses = sentence.find("sentenceanalyses")
                if sentenceAnalyses != None:
                    sentence.remove(sentenceAnalyses)
                entityIdCount = IDUtils.getNextFreeId(sentence.findall("entity"))
                events = sentDict[interactionXMLText]
                events.sort()
                for event in events:
                    if not keepEvent(event[2]):
                        counts["filtered-triggers"] += 1
                        continue
                    trigger = ET.Element("entity")
                    trigger.set("isName", "False")
                    trigger.set("charOffset", str(event[0]) + "-" + str(event[1]))
                    trigger.set("type", str(event[2]))
                    trigger.set("text", str(event[3]))
                    trigger.set("source", "GENIA_event_annotation_0.9")
                    trigger.set("id", sentence.get("id") + ".e" + str(entityIdCount))
                    entityIdCount += 1
                    counts["added-triggers"] += 1
                    sentence.append(trigger)
                if sentenceAnalyses != None:
                    sentence.append(sentenceAnalyses)
    
    FindHeads.findHeads(corpusTree, parse, removeExisting=False)
    removeDuplicates(corpusRoot)
    print counts
    
    if output != None:
        print >> sys.stderr, "Writing output to", output
        ETUtils.write(corpusRoot, output)
    return corpusTree

Exemple #8

0

Afficher le fichier

def run(input, output, eventDir, parse="split-mccc-preparsed", verbose=False):
    print >> sys.stderr, "Loading corpus", input
    corpusTree = ETUtils.ETFromObj(input)
    print >> sys.stderr, "Corpus file loaded"
    corpusRoot = corpusTree.getroot()
    
    counts = defaultdict(int)
    for document in corpusRoot.findall("document"):
        sentDict = None
        pmid = document.get("pmid")
        isPMC = False
        for sentence in document.findall("sentence"):
            counts["sentences"] += 1
            sentenceId = str(sentence.get("id")) + "/" + str(sentence.get("origId"))
            if verbose: print "Processing", sentenceId
            if sentDict == None:
                if sentence.get("origId") != None:
                    assert pmid == None
                    sentDict = loadEventXML( eventDir + "/" + sentence.get("origId").split(".")[0] + ".xml" , verbose=verbose)
                else:
                    #pmid = sentence.get("pmid")
                    assert pmid != None
                    if pmid.startswith("PMC"):
                        isPMC = True
                        sentDict = {}
                    else:
                        assert pmid.startswith("PMID")
                        sentDict = loadEventXML( eventDir + "/" + pmid.split("-", 1)[-1] + ".xml" , verbose=verbose)
            interactionXMLText = sentence.get("text")
            if not sentDict.has_key(interactionXMLText):
                counts["missing-sentences"] += 1
                if isPMC: counts["missing-sentences-PMC"] += 1
                if verbose: print "Missing sentence:", pmid, (sentenceId, sentDict, sentence.get("text"))
            else:
                sentenceAnalyses = sentence.find("sentenceanalyses")
                if sentenceAnalyses != None:
                    sentence.remove(sentenceAnalyses)
                entityIdCount = IDUtils.getNextFreeId(sentence.findall("entity"))
                events = sentDict[interactionXMLText]
                events.sort()
                for event in events:
                    if not keepEvent(event[2]):
                        counts["filtered-triggers"] += 1
                        continue
                    trigger = ET.Element("entity")
                    #trigger.set("given", "False")
                    trigger.set("charOffset", str(event[0]) + "-" + str(event[1]))
                    trigger.set("type", str(event[2]))
                    trigger.set("text", str(event[3]))
                    trigger.set("source", "GENIA_event_annotation_0.9")
                    trigger.set("id", sentence.get("id") + ".e" + str(entityIdCount))
                    entityIdCount += 1
                    counts["added-triggers"] += 1
                    sentence.append(trigger)
                if sentenceAnalyses != None:
                    sentence.append(sentenceAnalyses)
    
    FindHeads.findHeads(corpusTree, parse, removeExisting=False)
    removeDuplicates(corpusRoot)
    print counts
    
    if output != None:
        print >> sys.stderr, "Writing output to", output
        ETUtils.write(corpusRoot, output)
    return corpusTree

Exemple #9

0

Afficher le fichier

Fichier : UnmergingExampleWriter.py Projet : ninjin/TEES

    def writeXMLSentence(self, examples, predictionsByExample, sentenceObject, classSet, classIds, goldSentence=None, exampleStyle=None):        
        sentenceElement = sentenceObject.sentence
        self.sentenceId = sentenceElement.get("id")
        self.assertSameSentence(examples, self.sentenceId)
        # detach analyses-element
        sentenceAnalysesElement = None
        sentenceAnalysesElement = sentenceElement.find("sentenceanalyses")
        if sentenceAnalysesElement == None:
            sentenceAnalysesElement = sentenceElement.find("analyses")
        if sentenceAnalysesElement != None:
            sentenceElement.remove(sentenceAnalysesElement)
                
        # remove pairs and interactions
        interactions = self.removeChildren(sentenceElement, ["pair", "interaction"])
        # remove entities
        entities = self.removeNonNameEntities(sentenceElement)
        
        # filter interactions
        interactionsToKeep = []
        for interaction in interactions:
            if interaction.get("type") != "neg":
                interactionsToKeep.append(interaction)
        interactions = interactionsToKeep
        
        # early out
        cutoff = 100
        #if len(interactions) == 0 or len(interactions) > cutoff:
        if len(interactions) > cutoff:
            # re-attach the analyses-element
            if sentenceAnalysesElement != None:
                sentenceElement.append(sentenceAnalysesElement)
            #if len(interactions) > cutoff:
            print >> sys.stderr, "Warning, sentence", sentenceObject.sentence.get("id"), "has more than", cutoff, "interactions, removing all."
            return
        
        interactionsByEntity = {}
        interactionsById = {}
        for entity in entities:
            interactionsByEntity[entity.get("id")] = []
        for interaction in interactions:
            e1Id = interaction.get("e1")
            if not interactionsByEntity.has_key(e1Id):
                interactionsByEntity[e1Id] = []
            interactionsByEntity[e1Id].append(interaction)
            interactionsById[interaction.get("id")] = interaction

        # NOTE! Following won't work for pairs
        self.entityCount = IDUtils.getNextFreeId(sentenceElement.findall("entity"))
        self.interactionCount = IDUtils.getNextFreeId(sentenceElement.findall("interaction"))
        self.newEntities = []
        self.newInteractions = []
        
        # Mapping for connecting the events
        self.entitiesByHeadByType = {}
        #self.tokenByOffset = {}
        #for token in sentenceObject.tokens:
        #    self.tokenByOffset[token.get("charOffset")] = token
        #    self.entityByHeadByType[token.get("charOffset")] = {}
        for entity in sentenceObject.entities:
            # by offset
            offset = entity.get("headOffset")
            if not self.entitiesByHeadByType.has_key(offset):
                self.entitiesByHeadByType[offset] = {}
            # by type
            eType = entity.get("type")
            if entity.get("isName") != "True":
                self.entitiesByHeadByType[offset][eType] = []
            else: # add names to structure
                if not self.entitiesByHeadByType[offset].has_key(eType):
                    self.entitiesByHeadByType[offset][eType] = []
                self.entitiesByHeadByType[offset][eType].append(entity)
        
        entityKeys = sentenceObject.entitiesById.keys()
        exampleByEntityId = {}
        for example in examples:
            #if predictionsByExample[example[0]][0] == 1: # negative
            #    continue
            eId = example[3]["e"]
            assert eId in entityKeys
            if not exampleByEntityId.has_key(eId):
                exampleByEntityId[eId] = []
            exampleByEntityId[eId].append(example)
        
        # This doesn't work, it was an attempt to include
        # only the positive example with the highest prediction strength
#        for key in sorted(exampleByEntityId.keys()):
#            eType = sentenceObject.entitiesById[key].get("type")
#            eExamples = exampleByEntityId[key]
#            if eType == "Binding" and len(eExamples) > 1:
#                maxArgs = -1
#                maxStr = -999999999
#                for example in eExamples:
#                    if predictionsByExample[example[0]][0] == 1:
#                        continue
#                    numArgs = example[3]["i"].count(",") + 1
#                    if numArgs > maxArgs:
#                        maxArgs = numArgs
#                    predClass = predictionsByExample[example[0]][0]
#                    predictionStrength = predictionsByExample[example[0]][predClass]
#                    if predictionStrength > maxStr:
#                        maxStr = predictionStrength
#                #print maxArgs, len(eExamples)
#                for example in eExamples:
#                    if predictionsByExample[example[0]][0] == 1:
#                        continue
#                    predClass = predictionsByExample[example[0]][0]
#                    predictionStrength = predictionsByExample[example[0]][predClass]
#                    if predictionStrength != maxStr:
#                        examples.remove(example)
#                    #if example[3]["i"].count(",") + 1 < maxArgs:
#                    #    examples.remove(example)
        
        #self.newEntitiesById = {}
        #self.outEdgesByEntity = {}
        
        # Gather arguments for the simple, one-argument events
        argumentsByExample = {}
        positiveExamples = []
        exampleIdCount = 0
        for entity in entities:
            # If no example, case is unambiguous
            if entity.get("id") not in exampleByEntityId:
                simpleEventInteractions = interactionsByEntity[entity.get("id")]
                numCauses = 0
                numThemes = 0
                for interaction in simpleEventInteractions[:]:
                    if self.isIntersentence(interaction):
                        print "Warning, intersentence interaction for", entity.get("id"), entity.get("type")
                        simpleEventInteractions.remove(interaction)
                        continue
                    if interaction.get("type") == "neg":
                        simpleEventInteractions.remove(interaction)
                        continue
                    iType = interaction.get("type")
                    if iType == "Cause":
                        numCauses += 1
                    elif iType == "Theme":
                        numThemes += 1
                eType = entity.get("type")
                assert numThemes == 0 or (numThemes != 0 and numCauses == 0) or (numThemes > 1 and eType != "Binding"), (numThemes,numCauses,eType,entity.get("id"), [x[0] for x in examples], entityKeys)
                #assert numThemes == 0 or (numThemes != 0 and numCauses == 0) or (numThemes > 1 and eType == "Binding"), (numThemes,numCauses,eType,entity.get("id"))
                for interaction in simpleEventInteractions:
                    self.counts["simple-" + eType + "-" + interaction.get("type")] += 1
                    exampleId = "simple." + str(exampleIdCount)
                    exampleIdCount += 1
                    positiveExamples.append([exampleId,None,None,None])
                    argumentsByExample[exampleId] = [interaction]
                    #self.addEvent([interaction], sentenceObject, "simple")
            
        # Gather arguments for predicted, unmerged events
        for example in examples:
            #print predictionsByExample[example[0]]
            if predictionsByExample[example[0]][0] == 1: # negative
                continue
            positiveExamples.append(example)
            arguments = []
            for iId in example[3]["i"].split(","):
                if iId == "": # processes can have 0 arguments
                    assert "etype" in example[3], example[3]
                    assert example[3]["etype"] == "Process", example[3]
                    break
                arg = interactionsById[iId]
                if self.isIntersentence(arg):
                    continue
                assert arg.get("type") != "neg"
                arguments.append(arg)
            argumentsByExample[example[0]] = arguments
        
        # Loop until all positive examples are added. This process
        # assumes that the events (mostly) form a directed acyclic
        # graph, which can written by "growing" the structure from
        # the "leaf" events, and consecutively adding levels of
        # nesting events.
        examplesLeft = len(positiveExamples)
        exampleAdded = {}
        for example in positiveExamples:
            exampleAdded[example[0]] = False
        forceAdd = False
        forcedCount = 0
        while examplesLeft > 0:
            if len(self.newEntities) > 100:
                print >> sys.stderr, "Warning, sentence", sentenceObject.sentence.get("id"), "has generated more than", cutoff, "events, skipping the rest."
                break
            examplesAddedThisRound = 0
            # For each round, loop through the potentially remaining examples
            for example in positiveExamples:
                if len(self.newEntities) > 100:
                    break
                if exampleAdded[example[0]]: # This event has already been inserted
                    continue
                arguments = argumentsByExample[example[0]]
                # An event can be added if all of its argument events have already
                # been added. Addition is forced if lack of argument events blocks
                # the process.
                if forceAdd or self.argumentEntitiesExist(arguments, sentenceObject):
                    umType = "complex" # mark the root entity in the output xml
                    predictionStrength = None
                    if example[0].find("simple") != -1:
                        umType = "simple"
                    else:
                        # Prediction strength is only available for classified argument groups
                        predictionStrength = self.getPredictionStrength(example, predictionsByExample, classSet, classIds)
                    #print example 
                    if umType != "simple" and "etype" in example[3] and example[3]["etype"] == "Process" and len(arguments) == 0:
                        origProcess = sentenceObject.entitiesById[example[3]["e"]]
                        # Put back the original entity
                        newProcess = self.addEntity(origProcess)
                        newProcess.set("umType", umType)
                        if predictionStrength != None:
                            newProcess.set("umStrength", str(predictionStrength))
                    else: # example has arguments
                        self.addEvent(arguments, sentenceObject, umType, forceAdd, predictionStrength, exampleNotes=example[3])
                    exampleAdded[example[0]] = True
                    examplesLeft -= 1
                    examplesAddedThisRound += 1
                    forceAdd = False
            if examplesLeft > 0 and examplesAddedThisRound == 0:
                # If there are examples left, but nothing was added, this
                # means that some nested events are missing. Theoretically
                # this could also be because two events are referring to
                # each other, preventing each other's insertion. In any
                # case this is solved by simply forcing the addition of 
                # the first non-inserted event, by creating 0-argument
                # entities for its argument events.
                forcedCount += 1
                #print "Warning, forcing event addition"
                forceAdd = True                  
        
        # Attach the new elements
        for element in self.newEntities + self.newInteractions:
            sentenceElement.append(element)

        # re-attach the analyses-element
        if sentenceAnalysesElement != None:
            sentenceElement.append(sentenceAnalysesElement)