Esempio n. 1
0
    def buildExamplesFromGraph(self, sentenceGraph, outfile, goldGraph = None):
        """
        Build examples for a single sentence. Returns a list of examples.
        See Core/ExampleUtils for example format.
        """
        #examples = []
        exampleIndex = 0
        
        if self.styles["trigger_features"]: 
            self.triggerFeatureBuilder.initSentence(sentenceGraph)
        if self.styles["evex"]: 
            self.evexFeatureBuilder.initSentence(sentenceGraph)
            
        # Filter entities, if needed
        #mergedIds = None
        #duplicateEntities = None
        #entities = sentenceGraph.entities
        #entities, mergedIds, duplicateEntities = self.mergeEntities(sentenceGraph, False) # "no_duplicates" in self.styles)
        sentenceGraph.mergeInteractionGraph(True)
        entities = sentenceGraph.mergedEntities
        entityToDuplicates = sentenceGraph.mergedEntityToDuplicates
        self.exampleStats.addValue("Duplicate entities skipped", len(sentenceGraph.entities) - len(entities))
        
        # Connect to optional gold graph
        if goldGraph != None:
            entityToGold = EvaluateInteractionXML.mapEntities(entities, goldGraph.entities)
        
        paths = None
        if not self.styles["no_path"]:
            ##undirected = sentenceGraph.getUndirectedDependencyGraph()
            #undirected = self.nxMultiDiGraphToUndirected(sentenceGraph.dependencyGraph)
            ###undirected = sentenceGraph.dependencyGraph.to_undirected()
            ####undirected = NX10.MultiGraph(sentenceGraph.dependencyGraph) This didn't work
            undirected = sentenceGraph.dependencyGraph.toUndirected()
            #paths = NX10.all_pairs_shortest_path(undirected, cutoff=999)
            paths = undirected
        
        #for edge in sentenceGraph.dependencyGraph.edges:
        #    assert edge[2] != None
        #for edge in undirected.edges:
        #    assert edge[2] != None
        #if sentenceGraph.sentenceElement.get("id") == "GENIA.d70.s5":
        #    print [(x[0].get("id"), x[1].get("id"), x[2].get("id")) for x in sentenceGraph.dependencyGraph.edges]
        
        # Generate examples based on interactions between entities or interactions between tokens
        if self.styles["entities"]:
            loopRange = len(entities)
        else:
            loopRange = len(sentenceGraph.tokens)
        for i in range(loopRange-1):
            for j in range(i+1,loopRange):
                eI = None
                eJ = None
                if self.styles["entities"]:
                    eI = entities[i]
                    eJ = entities[j]
                    tI = sentenceGraph.entityHeadTokenByEntity[eI]
                    tJ = sentenceGraph.entityHeadTokenByEntity[eJ]
                    #if "no_ne_interactions" in self.styles and eI.get("isName") == "True" and eJ.get("isName") == "True":
                    #    continue
                    if eI.get("type") == "neg" or eJ.get("type") == "neg":
                        continue
                    if self.styles["skip_extra_triggers"]:
                        if eI.get("source") != None or eJ.get("source") != None:
                            continue
                else:
                    tI = sentenceGraph.tokens[i]
                    tJ = sentenceGraph.tokens[j]
                # only consider paths between entities (NOTE! entities, not only named entities)
                if self.styles["headsOnly"]:
                    if (len(sentenceGraph.tokenIsEntityHead[tI]) == 0) or (len(sentenceGraph.tokenIsEntityHead[tJ]) == 0):
                        continue
                
                if self.styles["directed"]:
                    # define forward
                    if self.styles["entities"]:
                        categoryName = self.getCategoryName(sentenceGraph, eI, eJ, True)
                        if goldGraph != None:
                            categoryName = self.getGoldCategoryName(goldGraph, entityToGold, eI, eJ, True)
                    else:
                        categoryName = self.getCategoryNameFromTokens(sentenceGraph, tI, tJ, True)
                    # make forward
                    self.exampleStats.beginExample(categoryName)
                    makeExample = True
                    if self.styles["genia_limits"] and not self.isPotentialGeniaInteraction(eI, eJ):
                        makeExample = False
                        self.exampleStats.filter("genia_limits")
                    if self.styles["genia_task1"] and (eI.get("type") == "Entity" or eJ.get("type") == "Entity"):
                        makeExample = False
                        self.exampleStats.filter("genia_task1")
                    if self.styles["rel_limits"] and not self.isPotentialRELInteraction(eI, eJ):
                        makeExample = False
                        self.exampleStats.filter("rel_limits")
                    if self.styles["co_limits"] and not self.isPotentialCOInteraction(eI, eJ, sentenceGraph):
                        makeExample = False
                        self.exampleStats.filter("co_limits")
                    if self.styles["bb_limits"] and not self.isPotentialBBInteraction(eI, eJ, sentenceGraph):
                        makeExample = False
                        self.exampleStats.filter("bb_limits")
                        if categoryName != "neg":
                            self.exampleStats.filter("bb_limits(" + categoryName + ":" + eI.get("type") + "/" + eJ.get("type") + ")")
                    if self.styles["bi_limits"] and not self.isPotentialBIInteraction(eI, eJ, sentenceGraph, self.exampleStats):
                        makeExample = False
                        #self.exampleStats.filter("bi_limits")
                    if self.styles["epi_limits"] and not self.isPotentialEPIInteraction(eI, eJ, sentenceGraph):
                        makeExample = False
                        self.exampleStats.filter("epi_limits")
                    if self.styles["id_limits"] and not self.isPotentialIDInteraction(eI, eJ, sentenceGraph):
                        makeExample = False
                        self.exampleStats.filter("id_limits")
#                    if self.styles["selftrain_limits"] and (eI.get("selftrain") == "False" or eJ.get("selftrain") == "False"):
#                        makeExample = False
#                        self.exampleStats.filter("selftrain_limits")
#                    if self.styles["selftrain_group"] and (eI.get("selftraingroup") not in self.selfTrainGroups or eJ.get("selftraingroup") not in self.selfTrainGroups):
#                        makeExample = False
#                        self.exampleStats.filter("selftrain_group")
                    if self.styles["pos_only"] and categoryName == "neg":
                        makeExample = False
                        self.exampleStats.filter("pos_only")
                    if makeExample:
                        #examples.append( self.buildExample(tI, tJ, paths, sentenceGraph, categoryName, exampleIndex, eI, eJ) )
                        ExampleUtils.appendExamples([self.buildExample(tI, tJ, paths, sentenceGraph, categoryName, exampleIndex, eI, eJ)], outfile)
                        exampleIndex += 1
                    self.exampleStats.endExample()
                    
                    # define reverse
                    if self.styles["entities"]:
                        categoryName = self.getCategoryName(sentenceGraph, eJ, eI, True)
                        if goldGraph != None:
                            categoryName = self.getGoldCategoryName(goldGraph, entityToGold, eJ, eI, True)
                    else:
                        categoryName = self.getCategoryNameFromTokens(sentenceGraph, tJ, tI, True)
                    # make reverse
                    self.exampleStats.beginExample(categoryName)
                    makeExample = True
                    if self.styles["genia_limits"] and not self.isPotentialGeniaInteraction(eJ, eI):
                        makeExample = False
                        self.exampleStats.filter("genia_limits")
                    if self.styles["genia_task1"] and (eI.get("type") == "Entity" or eJ.get("type") == "Entity"):
                        makeExample = False
                        self.exampleStats.filter("genia_task1")
                    if self.styles["rel_limits"] and not self.isPotentialRELInteraction(eJ, eI):
                        makeExample = False
                        self.exampleStats.filter("rel_limits")
                    if self.styles["co_limits"] and not self.isPotentialCOInteraction(eJ, eI, sentenceGraph):
                        makeExample = False
                        self.exampleStats.filter("co_limits")
                    if self.styles["bb_limits"] and not self.isPotentialBBInteraction(eJ, eI, sentenceGraph):
                        makeExample = False
                        self.exampleStats.filter("bb_limits")
                        if categoryName != "neg":
                            self.exampleStats.filter("bb_limits(" + categoryName + ":" + eJ.get("type") + "/" + eI.get("type") + ")")
                    if self.styles["bi_limits"] and not self.isPotentialBIInteraction(eJ, eI, sentenceGraph, self.exampleStats):
                        makeExample = False
                        #self.exampleStats.filter("bi_limits")
                    if self.styles["epi_limits"] and not self.isPotentialEPIInteraction(eJ, eI, sentenceGraph):
                        makeExample = False
                        self.exampleStats.filter("epi_limits")
                    if self.styles["id_limits"] and not self.isPotentialIDInteraction(eJ, eI, sentenceGraph):
                        makeExample = False
                        self.exampleStats.filter("id_limits")
#                    if self.styles["selftrain_limits"] and (eI.get("selftrain") == "False" or eJ.get("selftrain") == "False"):
#                        makeExample = False
#                        self.exampleStats.filter("selftrain_limits")
#                    if self.styles["selftrain_group"] and (eI.get("selftraingroup") not in self.selfTrainGroups or eJ.get("selftraingroup") not in self.selfTrainGroups):
#                        makeExample = False
#                        self.exampleStats.filter("selftrain_group")
                    if self.styles["pos_only"] and categoryName == "neg":
                        makeExample = False
                        self.exampleStats.filter("pos_only")
                    if makeExample:
                        #examples.append( self.buildExample(tJ, tI, paths, sentenceGraph, categoryName, exampleIndex, eJ, eI) )
                        ExampleUtils.appendExamples([self.buildExample(tJ, tI, paths, sentenceGraph, categoryName, exampleIndex, eJ, eI)], outfile)
                        exampleIndex += 1
                    self.exampleStats.endExample()
                else:
                    if self.styles["entities"]:
                        categoryName = self.getCategoryName(sentenceGraph, eI, eJ, False)
                    else:
                        categoryName = self.getCategoryNameFromTokens(sentenceGraph, tI, tJ, False)
                    self.exampleStats.beginExample(categoryName)
                    forwardExample = self.buildExample(tI, tJ, paths, sentenceGraph, categoryName, exampleIndex, eI, eJ)
                    if not self.styles["graph_kernel"]:
                        reverseExample = self.buildExample(tJ, tI, paths, sentenceGraph, categoryName, exampleIndex, eJ, eI)
                        forwardExample[2].update(reverseExample[2])
                    #examples.append(forwardExample)
                    ExampleUtils.appendExamples([forwardExample], outfile)
                    exampleIndex += 1
                    self.exampleStats.endExample()
        
        #return examples
        return exampleIndex
Esempio n. 2
0
    def buildExamplesFromGraph(self, sentenceGraph, examples, goldGraph=None):
        # example directionality
        if self.styles.get("directed") == None and self.styles.get("undirected") == None: # determine directedness from corpus
            examplesAreDirected = self.structureAnalyzer.hasDirectedTargets() if self.structureAnalyzer != None else True
        elif self.styles.get("directed"):
            assert self.styles.get("undirected") in [None, False]
            examplesAreDirected = True
        elif self.styles.get("undirected"):
            assert self.styles.get("directed") in [None, False]
            examplesAreDirected = False
            
        # Filter entities, if needed
        sentenceGraph.mergeInteractionGraph(True)
        entities = sentenceGraph.mergedEntities
        #entityToDuplicates = sentenceGraph.mergedEntityToDuplicates
        self.exampleStats.addValue("Duplicate entities skipped", len(sentenceGraph.entities) - len(entities))
        
        # Connect to optional gold graph
        entityToGold = None
        if goldGraph != None:
            entityToGold = EvaluateInteractionXML.mapEntities(entities, goldGraph.entities)
        
#         paths = None
#         if not self.styles.get("no_path"):
#             undirected = sentenceGraph.dependencyGraph.toUndirected()
#             paths = undirected
#             if self.styles.get("filter_shortest_path") != None: # For DDI use filter_shortest_path=conj_and
#                 paths.resetAnalyses() # just in case
#                 paths.FloydWarshall(self.filterEdge, {"edgeTypes":self.styles["filter_shortest_path"]})

        dg = sentenceGraph.dependencyGraph
        undirected = dg.toUndirected()
        edgeCounts = {x:len(dg.getInEdges(x) + dg.getOutEdges(x)) for x in sentenceGraph.tokens}
        
        tokens, tokenMap = self.getTokenFeatures(sentenceGraph)
        
        # Generate examples based on interactions between entities or interactions between tokens
        if self.styles.get("token_nodes"):
            loopRange = len(tokens)
        else:
            loopRange = len(entities)
        for i in range(loopRange-1):
            for j in range(i+1,loopRange):
                eI = None
                eJ = None
                if self.styles.get("token_nodes"):
                    tI = tokens[i]["element"]
                    tJ = tokens[j]["element"]
                else:
                    eI = entities[i]
                    eJ = entities[j]
                    tI = sentenceGraph.entityHeadTokenByEntity[eI]
                    tJ = sentenceGraph.entityHeadTokenByEntity[eJ]
                    if eI.get("type") == "neg" or eJ.get("type") == "neg":
                        continue
                    if self.styles.get("skip_extra_triggers"):
                        if eI.get("source") != None or eJ.get("source") != None:
                            continue
                # only consider paths between entities (NOTE! entities, not only named entities)
                if self.styles.get("headsOnly"):
                    if (len(sentenceGraph.tokenIsEntityHead[tI]) == 0) or (len(sentenceGraph.tokenIsEntityHead[tJ]) == 0):
                        continue
                
                if examplesAreDirected:
                    self.buildExample(examples, tI, tJ, eI, eJ, tokens, tokenMap, sentenceGraph, goldGraph, entityToGold, undirected, edgeCounts)
                    self.buildExample(examples, tJ, tI, eJ, eI, tokens, tokenMap, sentenceGraph, goldGraph, entityToGold, undirected, edgeCounts)
                else:
                    if tokenMap[tJ]["index"] < tokenMap[tI]["index"]:
                        tI, tJ = tJ, tI
                        eI, eJ = eJ, eI
                    self.buildExample(examples, tI, tJ, eI, eJ, tokens, tokenMap, sentenceGraph, goldGraph, entityToGold, undirected, edgeCounts, False)
Esempio n. 3
0
    def buildExamplesFromGraph(self, sentenceGraph, outfile, goldGraph = None, structureAnalyzer=None):
        """
        Build examples for a single sentence. Returns a list of examples.
        See Core/ExampleUtils for example format.
        """
        #examples = []
        exampleIndex = 0
        # example directionality
        if self.styles["directed"] == None and self.styles["undirected"] == None: # determine directedness from corpus
            examplesAreDirected = structureAnalyzer.hasDirectedTargets() if structureAnalyzer != None else True
        elif self.styles["directed"]:
            assert self.styles["undirected"] in [None, False]
            examplesAreDirected = True
        elif self.styles["undirected"]:
            assert self.styles["directed"] in [None, False]
            examplesAreDirected = False
        
        if not self.styles["no_trigger_features"]: 
            self.triggerFeatureBuilder.initSentence(sentenceGraph)
        if self.styles["evex"]: 
            self.evexFeatureBuilder.initSentence(sentenceGraph)
#         if self.styles["sdb_merge"]:
#             self.determineNonOverlappingTypes(structureAnalyzer)
            
        # Filter entities, if needed
        sentenceGraph.mergeInteractionGraph(True)
        entities = sentenceGraph.mergedEntities
        entityToDuplicates = sentenceGraph.mergedEntityToDuplicates
        self.exampleStats.addValue("Duplicate entities skipped", len(sentenceGraph.entities) - len(entities))
        
        # Connect to optional gold graph
        entityToGold = None
        if goldGraph != None:
            entityToGold = EvaluateInteractionXML.mapEntities(entities, goldGraph.entities)
        
        paths = None
        if not self.styles["no_path"]:
            undirected = sentenceGraph.dependencyGraph.toUndirected()
            paths = undirected
            if self.styles["filter_shortest_path"] != None: # For DDI use filter_shortest_path=conj_and
                paths.resetAnalyses() # just in case
                paths.FloydWarshall(self.filterEdge, {"edgeTypes":self.styles["filter_shortest_path"]})
        
        # Generate examples based on interactions between entities or interactions between tokens
        if self.styles["token_nodes"]:
            loopRange = len(sentenceGraph.tokens)
        else:
            loopRange = len(entities)
        for i in range(loopRange-1):
            for j in range(i+1,loopRange):
                eI = None
                eJ = None
                if self.styles["token_nodes"]:
                    tI = sentenceGraph.tokens[i]
                    tJ = sentenceGraph.tokens[j]
                else:
                    eI = entities[i]
                    eJ = entities[j]
                    tI = sentenceGraph.entityHeadTokenByEntity[eI]
                    tJ = sentenceGraph.entityHeadTokenByEntity[eJ]
                    if eI.get("type") == "neg" or eJ.get("type") == "neg":
                        continue
                    if self.styles["skip_extra_triggers"]:
                        if eI.get("source") != None or eJ.get("source") != None:
                            continue
                # only consider paths between entities (NOTE! entities, not only named entities)
                if self.styles["headsOnly"]:
                    if (len(sentenceGraph.tokenIsEntityHead[tI]) == 0) or (len(sentenceGraph.tokenIsEntityHead[tJ]) == 0):
                        continue
                
                examples = self.buildExamplesForPair(tI, tJ, paths, sentenceGraph, goldGraph, entityToGold, eI, eJ, structureAnalyzer, examplesAreDirected)
                for categoryName, features, extra in examples:
                    # make example
                    if self.styles["binary"]:
                        if categoryName != "neg":
                            category = 1
                        else:
                            category = -1
                        extra["categoryName"] = "i"
                    else:
                        category = self.classSet.getId(categoryName)
                    example = [sentenceGraph.getSentenceId()+".x"+str(exampleIndex), category, features, extra]
                    ExampleUtils.appendExamples([example], outfile)
                    exampleIndex += 1

        return exampleIndex
Esempio n. 4
0
    def buildExamplesFromGraph(self, sentenceGraph, outfile, goldGraph=None, structureAnalyzer=None):
        """
        Build one example for each token of the sentence
        """
        examples = []
        exampleIndex = 0
        
        self.tokenFeatures = {}

        if goldGraph != None:
            entityToGold = EvaluateInteractionXML.mapEntities(sentenceGraph.entities, goldGraph.entities)
        
        namedEntityCount = 0
        entityCount = 0
        for entity in sentenceGraph.entities:
            if entity.get("given") == "True": # known data which can be used for features
                namedEntityCount += 1
            else: # known data which can be used for features
                entityCount += 1
        namedEntityCountFeature = "nameCount_" + str(namedEntityCount)
        entityCountFeature = "entityCount_" + str(entityCount)
        
        bagOfWords = {}
        for token in sentenceGraph.tokens:
            text = "bow_" + token.get("text")
            if not bagOfWords.has_key(text):
                bagOfWords[text] = 0
            bagOfWords[text] += 1
            if sentenceGraph.tokenIsName[token]:
                text = "ne_" + text
                if not bagOfWords.has_key(text):
                    bagOfWords[text] = 0
                bagOfWords[text] += 1
            if len(sentenceGraph.tokenIsEntityHead) > 0:
                text = "ge_" + text
                if not bagOfWords.has_key(text):
                    bagOfWords[text] = 0
                bagOfWords[text] += 1
            
            text = token.get("text")
            if self.styles["speculation_words"] and text in self.specWords:
                if not bagOfWords.has_key("spec_bow_"+text):
                    bagOfWords["spec_bow_"+text] = 0
                bagOfWords["spec_bow_"+text] += 1
                bagOfWords["spec_sentence"] = 1
        
        bowFeatures = {}
        for k,v in bagOfWords.iteritems():
            bowFeatures[self.featureSet.getId(k)] = v
        
        self.inEdgesByToken = {}
        self.outEdgesByToken = {}
        self.edgeSetByToken = {}
        for token in sentenceGraph.tokens:
            inEdges = sentenceGraph.dependencyGraph.getInEdges(token)
            self.inEdgesByToken[token] = inEdges
            outEdges = sentenceGraph.dependencyGraph.getOutEdges(token)
            self.outEdgesByToken[token] = outEdges
            self.edgeSetByToken[token] = set(inEdges + outEdges)
        
        for entity in sentenceGraph.entities:
            #token = sentenceGraph.tokens[i]
            token = sentenceGraph.entityHeadTokenByEntity[entity]
            # Recognize only non-named entities (i.e. interaction words)
            if entity.get("given") == "True":
                continue
            
            # CLASS
            if self.styles["classification"] == "multiclass":
                task3Type = "multiclass"
                categoryName = ""
                if entity.get("negation") == "True":
                    categoryName += "negation"
                if entity.get("speculation") == "True":
                    if categoryName != "":
                        categoryName += "---"
                    categoryName += "speculation"
                if categoryName == "":
                    categoryName = "neg"
                category = self.classSet.getId(categoryName)  
            elif self.styles["classification"] == "speculation":
                task3Type = "speculation"
                if entity.get("speculation") == "True":
                    category = self.classSet.getId("speculation")
                else:
                    category = 1
                if goldGraph != None:
                    if len(entityToGold[entity]) > 0 and entityToGold[entity][0].get("speculation") == "True":
                        category = self.classSet.getId("speculation")
                    else:
                        category = 1
                categoryName = self.classSet.getName(category)
            elif self.styles["classification"] == "negation":
                task3Type = "negation"
                if entity.get("negation") == "True":
                    category = self.classSet.getId("negation")
                else:
                    category = 1
                if goldGraph != None:
                    if len(entityToGold[entity]) > 0 and entityToGold[entity][0].get("negation") == "True":
                        category = self.classSet.getId("negation")
                    else:
                        category = 1
                categoryName = self.classSet.getName(category)
            self.exampleStats.beginExample(categoryName)

            # FEATURES
            features = {}

            # ENTITY TYPE
            #entityType = self.classSet.getId(self.getMergedEntityType(entity))
            #del self.classSet.Ids[self.getMergedEntityType(entity)]
#IF LOCAL
            # There's a mistake here. The entityType should be the string, not
            # the id of the type. But there's also another issue. getMergedEntityType
            # expects a list, not an item. Therefore the type is always empty ->
            # types don't get used in classification. But this is the code used in
            # the publication, so it will now be published as is, and fixed in a later
            # release.
            #
            # Besides, using the classSet here generates an unneeded
            # additional class, that shows up in evaluations etc. However, to be 
            # able to publish the exact models used for the publication experiments,
            # this can't be fixed so it breaks feature id consistency. Therefore I'll
            # now just remove the redundant class id from the classSet.
#ENDIF            
            #features[self.featureSet.getId(entityType)] = 1
            
            features[self.featureSet.getId(namedEntityCountFeature)] = 1
            features[self.featureSet.getId(entityCountFeature)] = 1
            #for k,v in bagOfWords.iteritems():
            #    features[self.featureSet.getId(k)] = v
            # pre-calculate bow _features_
            features.update(bowFeatures)
            
#            for j in range(len(sentenceGraph.tokens)):
#                text = "bow_" + sentenceGraph.tokens[j].get("text")
#                if j < i:
#                    features[self.featureSet.getId("bf_" + text)] = 1
#                elif j > i:
#                    features[self.featureSet.getId("af_" + text)] = 1
        
            # Main features
            text = token.get("text")
            features[self.featureSet.getId("txt_"+text)] = 1
            features[self.featureSet.getId("POS_"+token.get("POS"))] = 1
            stem = PorterStemmer.stem(text)
            features[self.featureSet.getId("stem_"+stem)] = 1
            features[self.featureSet.getId("nonstem_"+text[len(stem):])] = 1
            
            if self.styles["speculation_words"]:
                if text in self.specWords:
                    features[self.featureSet.getId("ent_spec")] = 1
                if stem in self.specWordStems:
                    features[self.featureSet.getId("ent_spec_stem")] = 1
            
            # Linear order features
            for i in range(len(sentenceGraph.tokens)):
                if token == sentenceGraph.tokens[i]:
                    break
            for index in [-3,-2,-1,1,2,3]:
                if i + index > 0 and i + index < len(sentenceGraph.tokens):
                    self.buildLinearOrderFeatures(sentenceGraph, i + index, str(index), features)
            
            # Content
            if i > 0 and text[0].isalpha() and text[0].isupper():
                features[self.featureSet.getId("upper_case_start")] = 1
            for j in range(len(text)):
                if j > 0 and text[j].isalpha() and text[j].isupper():
                    features[self.featureSet.getId("upper_case_middle")] = 1
                # numbers and special characters
                if text[j].isdigit():
                    features[self.featureSet.getId("has_digits")] = 1
                    if j > 0 and text[j-1] == "-":
                        features[self.featureSet.getId("has_hyphenated_digit")] = 1
                elif text[j] == "-":
                    features[self.featureSet.getId("has_hyphen")] = 1
                elif text[j] == "/":
                    features[self.featureSet.getId("has_fslash")] = 1
                elif text[j] == "\\":
                    features[self.featureSet.getId("has_bslash")] = 1
                # duplets
                if j > 0:
                    features[self.featureSet.getId("dt_"+text[j-1:j+1].lower())] = 1
                # triplets
                if j > 1:
                    features[self.featureSet.getId("tt_"+text[j-2:j+1].lower())] = 1
            
            # Attached edges (Hanging in and out edges)
            t1InEdges = self.inEdgesByToken[token]
            for edge in t1InEdges:
                edgeType = edge[2].get("type")
                features[self.featureSet.getId("t1HIn_"+edgeType)] = 1
                features[self.featureSet.getId("t1HIn_"+edge[0].get("POS"))] = 1
                features[self.featureSet.getId("t1HIn_"+edgeType+"_"+edge[0].get("POS"))] = 1
                tokenText = sentenceGraph.getTokenText(edge[0])
                features[self.featureSet.getId("t1HIn_"+tokenText)] = 1
                features[self.featureSet.getId("t1HIn_"+edgeType+"_"+tokenText)] = 1
            t1OutEdges = self.outEdgesByToken[token]
            for edge in t1OutEdges:
                edgeType = edge[2].get("type")
                features[self.featureSet.getId("t1HOut_"+edgeType)] = 1
                features[self.featureSet.getId("t1HOut_"+edge[1].get("POS"))] = 1
                features[self.featureSet.getId("t1HOut_"+edgeType+"_"+edge[1].get("POS"))] = 1
                tokenText = sentenceGraph.getTokenText(edge[1])
                features[self.featureSet.getId("t1HOut_"+tokenText)] = 1
                features[self.featureSet.getId("t1HOut_"+edgeType+"_"+tokenText)] = 1

            self.buildChains(token, sentenceGraph, features)
             
            extra = {"xtype":"task3","t3type":task3Type,"t":token.get("id"),"entity":entity.get("id")}
            #examples.append( (sentenceGraph.getSentenceId()+".x"+str(exampleIndex),category,features,extra) )
            example = (sentenceGraph.getSentenceId()+".x"+str(exampleIndex),category,features,extra)
            ExampleUtils.appendExamples([example], outfile)
            exampleIndex += 1            
            self.exampleStats.endExample()
        #return examples
        return exampleIndex
Esempio n. 5
0
    def buildExamplesFromGraph(self, sentenceGraph, outfile, goldGraph=None):
        """
        Build examples for a single sentence. Returns a list of examples.
        See Core/ExampleUtils for example format.
        """
        #examples = []
        exampleIndex = 0

        if self.styles["trigger_features"]:
            self.triggerFeatureBuilder.initSentence(sentenceGraph)
        if self.styles["evex"]:
            self.evexFeatureBuilder.initSentence(sentenceGraph)

        # Filter entities, if needed
        #mergedIds = None
        #duplicateEntities = None
        #entities = sentenceGraph.entities
        #entities, mergedIds, duplicateEntities = self.mergeEntities(sentenceGraph, False) # "no_duplicates" in self.styles)
        sentenceGraph.mergeInteractionGraph(True)
        entities = sentenceGraph.mergedEntities
        entityToDuplicates = sentenceGraph.mergedEntityToDuplicates
        self.exampleStats.addValue("Duplicate entities skipped",
                                   len(sentenceGraph.entities) - len(entities))

        # Connect to optional gold graph
        if goldGraph != None:
            entityToGold = EvaluateInteractionXML.mapEntities(
                entities, goldGraph.entities)

        paths = None
        if not self.styles["no_path"]:
            ##undirected = sentenceGraph.getUndirectedDependencyGraph()
            #undirected = self.nxMultiDiGraphToUndirected(sentenceGraph.dependencyGraph)
            ###undirected = sentenceGraph.dependencyGraph.to_undirected()
            ####undirected = NX10.MultiGraph(sentenceGraph.dependencyGraph) This didn't work
            undirected = sentenceGraph.dependencyGraph.toUndirected()
            #paths = NX10.all_pairs_shortest_path(undirected, cutoff=999)
            paths = undirected

        #for edge in sentenceGraph.dependencyGraph.edges:
        #    assert edge[2] != None
        #for edge in undirected.edges:
        #    assert edge[2] != None
        #if sentenceGraph.sentenceElement.get("id") == "GENIA.d70.s5":
        #    print [(x[0].get("id"), x[1].get("id"), x[2].get("id")) for x in sentenceGraph.dependencyGraph.edges]

        # Generate examples based on interactions between entities or interactions between tokens
        if self.styles["entities"]:
            loopRange = len(entities)
        else:
            loopRange = len(sentenceGraph.tokens)
        for i in range(loopRange - 1):
            for j in range(i + 1, loopRange):
                eI = None
                eJ = None
                if self.styles["entities"]:
                    eI = entities[i]
                    eJ = entities[j]
                    tI = sentenceGraph.entityHeadTokenByEntity[eI]
                    tJ = sentenceGraph.entityHeadTokenByEntity[eJ]
                    #if "no_ne_interactions" in self.styles and eI.get("isName") == "True" and eJ.get("isName") == "True":
                    #    continue
                    if eI.get("type") == "neg" or eJ.get("type") == "neg":
                        continue
                    if self.styles["skip_extra_triggers"]:
                        if eI.get("source") != None or eJ.get(
                                "source") != None:
                            continue
                else:
                    tI = sentenceGraph.tokens[i]
                    tJ = sentenceGraph.tokens[j]
                # only consider paths between entities (NOTE! entities, not only named entities)
                if self.styles["headsOnly"]:
                    if (len(sentenceGraph.tokenIsEntityHead[tI]) == 0) or (len(
                            sentenceGraph.tokenIsEntityHead[tJ]) == 0):
                        continue

                if self.styles["directed"]:
                    # define forward
                    if self.styles["entities"]:
                        categoryName = self.getCategoryName(
                            sentenceGraph, eI, eJ, True)
                        if goldGraph != None:
                            categoryName = self.getGoldCategoryName(
                                goldGraph, entityToGold, eI, eJ, True)
                    else:
                        categoryName = self.getCategoryNameFromTokens(
                            sentenceGraph, tI, tJ, True)
                    # make forward
                    self.exampleStats.beginExample(categoryName)
                    makeExample = True
                    if self.styles[
                            "genia_limits"] and not self.isPotentialGeniaInteraction(
                                eI, eJ):
                        makeExample = False
                        self.exampleStats.filter("genia_limits")
                    if self.styles["genia_task1"] and (
                            eI.get("type") == "Entity"
                            or eJ.get("type") == "Entity"):
                        makeExample = False
                        self.exampleStats.filter("genia_task1")
                    if self.styles[
                            "rel_limits"] and not self.isPotentialRELInteraction(
                                eI, eJ):
                        makeExample = False
                        self.exampleStats.filter("rel_limits")
                    if self.styles[
                            "co_limits"] and not self.isPotentialCOInteraction(
                                eI, eJ, sentenceGraph):
                        makeExample = False
                        self.exampleStats.filter("co_limits")
                    if self.styles[
                            "bb_limits"] and not self.isPotentialBBInteraction(
                                eI, eJ, sentenceGraph):
                        makeExample = False
                        self.exampleStats.filter("bb_limits")
                        if categoryName != "neg":
                            self.exampleStats.filter("bb_limits(" +
                                                     categoryName + ":" +
                                                     eI.get("type") + "/" +
                                                     eJ.get("type") + ")")
                    if self.styles[
                            "bi_limits"] and not self.isPotentialBIInteraction(
                                eI, eJ, sentenceGraph, self.exampleStats):
                        makeExample = False
                        #self.exampleStats.filter("bi_limits")
                    if self.styles[
                            "epi_limits"] and not self.isPotentialEPIInteraction(
                                eI, eJ, sentenceGraph):
                        makeExample = False
                        self.exampleStats.filter("epi_limits")
                    if self.styles[
                            "id_limits"] and not self.isPotentialIDInteraction(
                                eI, eJ, sentenceGraph):
                        makeExample = False
                        self.exampleStats.filter("id_limits")
#                    if self.styles["selftrain_limits"] and (eI.get("selftrain") == "False" or eJ.get("selftrain") == "False"):
#                        makeExample = False
#                        self.exampleStats.filter("selftrain_limits")
#                    if self.styles["selftrain_group"] and (eI.get("selftraingroup") not in self.selfTrainGroups or eJ.get("selftraingroup") not in self.selfTrainGroups):
#                        makeExample = False
#                        self.exampleStats.filter("selftrain_group")
                    if self.styles["pos_only"] and categoryName == "neg":
                        makeExample = False
                        self.exampleStats.filter("pos_only")
                    if makeExample:
                        #examples.append( self.buildExample(tI, tJ, paths, sentenceGraph, categoryName, exampleIndex, eI, eJ) )
                        ExampleUtils.appendExamples([
                            self.buildExample(tI, tJ, paths, sentenceGraph,
                                              categoryName, exampleIndex, eI,
                                              eJ)
                        ], outfile)
                        exampleIndex += 1
                    self.exampleStats.endExample()

                    # define reverse
                    if self.styles["entities"]:
                        categoryName = self.getCategoryName(
                            sentenceGraph, eJ, eI, True)
                        if goldGraph != None:
                            categoryName = self.getGoldCategoryName(
                                goldGraph, entityToGold, eJ, eI, True)
                    else:
                        categoryName = self.getCategoryNameFromTokens(
                            sentenceGraph, tJ, tI, True)
                    # make reverse
                    self.exampleStats.beginExample(categoryName)
                    makeExample = True
                    if self.styles[
                            "genia_limits"] and not self.isPotentialGeniaInteraction(
                                eJ, eI):
                        makeExample = False
                        self.exampleStats.filter("genia_limits")
                    if self.styles["genia_task1"] and (
                            eI.get("type") == "Entity"
                            or eJ.get("type") == "Entity"):
                        makeExample = False
                        self.exampleStats.filter("genia_task1")
                    if self.styles[
                            "rel_limits"] and not self.isPotentialRELInteraction(
                                eJ, eI):
                        makeExample = False
                        self.exampleStats.filter("rel_limits")
                    if self.styles[
                            "co_limits"] and not self.isPotentialCOInteraction(
                                eJ, eI, sentenceGraph):
                        makeExample = False
                        self.exampleStats.filter("co_limits")
                    if self.styles[
                            "bb_limits"] and not self.isPotentialBBInteraction(
                                eJ, eI, sentenceGraph):
                        makeExample = False
                        self.exampleStats.filter("bb_limits")
                        if categoryName != "neg":
                            self.exampleStats.filter("bb_limits(" +
                                                     categoryName + ":" +
                                                     eJ.get("type") + "/" +
                                                     eI.get("type") + ")")
                    if self.styles[
                            "bi_limits"] and not self.isPotentialBIInteraction(
                                eJ, eI, sentenceGraph, self.exampleStats):
                        makeExample = False
                        #self.exampleStats.filter("bi_limits")
                    if self.styles[
                            "epi_limits"] and not self.isPotentialEPIInteraction(
                                eJ, eI, sentenceGraph):
                        makeExample = False
                        self.exampleStats.filter("epi_limits")
                    if self.styles[
                            "id_limits"] and not self.isPotentialIDInteraction(
                                eJ, eI, sentenceGraph):
                        makeExample = False
                        self.exampleStats.filter("id_limits")
#                    if self.styles["selftrain_limits"] and (eI.get("selftrain") == "False" or eJ.get("selftrain") == "False"):
#                        makeExample = False
#                        self.exampleStats.filter("selftrain_limits")
#                    if self.styles["selftrain_group"] and (eI.get("selftraingroup") not in self.selfTrainGroups or eJ.get("selftraingroup") not in self.selfTrainGroups):
#                        makeExample = False
#                        self.exampleStats.filter("selftrain_group")
                    if self.styles["pos_only"] and categoryName == "neg":
                        makeExample = False
                        self.exampleStats.filter("pos_only")
                    if makeExample:
                        #examples.append( self.buildExample(tJ, tI, paths, sentenceGraph, categoryName, exampleIndex, eJ, eI) )
                        ExampleUtils.appendExamples([
                            self.buildExample(tJ, tI, paths, sentenceGraph,
                                              categoryName, exampleIndex, eJ,
                                              eI)
                        ], outfile)
                        exampleIndex += 1
                    self.exampleStats.endExample()
                else:
                    if self.styles["entities"]:
                        categoryName = self.getCategoryName(
                            sentenceGraph, eI, eJ, False)
                    else:
                        categoryName = self.getCategoryNameFromTokens(
                            sentenceGraph, tI, tJ, False)
                    self.exampleStats.beginExample(categoryName)
                    forwardExample = self.buildExample(tI, tJ, paths,
                                                       sentenceGraph,
                                                       categoryName,
                                                       exampleIndex, eI, eJ)
                    if not self.styles["graph_kernel"]:
                        reverseExample = self.buildExample(
                            tJ, tI, paths, sentenceGraph, categoryName,
                            exampleIndex, eJ, eI)
                        forwardExample[2].update(reverseExample[2])
                    #examples.append(forwardExample)
                    ExampleUtils.appendExamples([forwardExample], outfile)
                    exampleIndex += 1
                    self.exampleStats.endExample()

        #return examples
        return exampleIndex
Esempio n. 6
0
    def buildExamplesFromGraph(self, sentenceGraph, outfile, goldGraph=None):
        """
        Build one example for each token of the sentence
        """
        examples = []
        exampleIndex = 0

        self.tokenFeatures = {}

        if goldGraph != None:
            entityToGold = EvaluateInteractionXML.mapEntities(
                sentenceGraph.entities, goldGraph.entities)

        namedEntityCount = 0
        entityCount = 0
        for entity in sentenceGraph.entities:
            if entity.get(
                    "isName"
            ) == "True":  # known data which can be used for features
                namedEntityCount += 1
            else:  # known data which can be used for features
                entityCount += 1
        namedEntityCountFeature = "nameCount_" + str(namedEntityCount)
        entityCountFeature = "entityCount_" + str(entityCount)

        bagOfWords = {}
        for token in sentenceGraph.tokens:
            text = "bow_" + token.get("text")
            if not bagOfWords.has_key(text):
                bagOfWords[text] = 0
            bagOfWords[text] += 1
            if sentenceGraph.tokenIsName[token]:
                text = "ne_" + text
                if not bagOfWords.has_key(text):
                    bagOfWords[text] = 0
                bagOfWords[text] += 1
            if len(sentenceGraph.tokenIsEntityHead) > 0:
                text = "ge_" + text
                if not bagOfWords.has_key(text):
                    bagOfWords[text] = 0
                bagOfWords[text] += 1

            text = token.get("text")
            if self.styles["speculation_words"] and text in self.specWords:
                if not bagOfWords.has_key("spec_bow_" + text):
                    bagOfWords["spec_bow_" + text] = 0
                bagOfWords["spec_bow_" + text] += 1
                bagOfWords["spec_sentence"] = 1

        bowFeatures = {}
        for k, v in bagOfWords.iteritems():
            bowFeatures[self.featureSet.getId(k)] = v

        self.inEdgesByToken = {}
        self.outEdgesByToken = {}
        self.edgeSetByToken = {}
        for token in sentenceGraph.tokens:
            inEdges = sentenceGraph.dependencyGraph.getInEdges(token)
            self.inEdgesByToken[token] = inEdges
            outEdges = sentenceGraph.dependencyGraph.getOutEdges(token)
            self.outEdgesByToken[token] = outEdges
            self.edgeSetByToken[token] = set(inEdges + outEdges)

        for entity in sentenceGraph.entities:
            #token = sentenceGraph.tokens[i]
            token = sentenceGraph.entityHeadTokenByEntity[entity]
            # Recognize only non-named entities (i.e. interaction words)
            if entity.get("isName") == "True":
                continue

            # CLASS
            if self.styles["classification"] == "multiclass":
                task3Type = "multiclass"
                categoryName = ""
                if entity.get("negation") == "True":
                    categoryName += "negation"
                if entity.get("speculation") == "True":
                    if categoryName != "":
                        categoryName += "---"
                    categoryName += "speculation"
                if categoryName == "":
                    categoryName = "neg"
                category = self.classSet.getId(categoryName)
            elif self.styles["classification"] == "speculation":
                task3Type = "speculation"
                if entity.get("speculation") == "True":
                    category = self.classSet.getId("speculation")
                else:
                    category = 1
                if goldGraph != None:
                    if len(entityToGold[entity]) > 0 and entityToGold[entity][
                            0].get("speculation") == "True":
                        category = self.classSet.getId("speculation")
                    else:
                        category = 1
                categoryName = self.classSet.getName(category)
            elif self.styles["classification"] == "negation":
                task3Type = "negation"
                if entity.get("negation") == "True":
                    category = self.classSet.getId("negation")
                else:
                    category = 1
                if goldGraph != None:
                    if len(entityToGold[entity]) > 0 and entityToGold[entity][
                            0].get("negation") == "True":
                        category = self.classSet.getId("negation")
                    else:
                        category = 1
                categoryName = self.classSet.getName(category)
            self.exampleStats.beginExample(categoryName)

            # FEATURES
            features = {}

            # ENTITY TYPE
            #entityType = self.classSet.getId(self.getMergedEntityType(entity))
            #del self.classSet.Ids[self.getMergedEntityType(entity)]
            #IF LOCAL
            # There's a mistake here. The entityType should be the string, not
            # the id of the type. But there's also another issue. getMergedEntityType
            # expects a list, not an item. Therefore the type is always empty ->
            # types don't get used in classification. But this is the code used in
            # the publication, so it will now be published as is, and fixed in a later
            # release.
            #
            # Besides, using the classSet here generates an unneeded
            # additional class, that shows up in evaluations etc. However, to be
            # able to publish the exact models used for the publication experiments,
            # this can't be fixed so it breaks feature id consistency. Therefore I'll
            # now just remove the redundant class id from the classSet.
            #ENDIF
            #features[self.featureSet.getId(entityType)] = 1

            features[self.featureSet.getId(namedEntityCountFeature)] = 1
            features[self.featureSet.getId(entityCountFeature)] = 1
            #for k,v in bagOfWords.iteritems():
            #    features[self.featureSet.getId(k)] = v
            # pre-calculate bow _features_
            features.update(bowFeatures)

            #            for j in range(len(sentenceGraph.tokens)):
            #                text = "bow_" + sentenceGraph.tokens[j].get("text")
            #                if j < i:
            #                    features[self.featureSet.getId("bf_" + text)] = 1
            #                elif j > i:
            #                    features[self.featureSet.getId("af_" + text)] = 1

            # Main features
            text = token.get("text")
            features[self.featureSet.getId("txt_" + text)] = 1
            features[self.featureSet.getId("POS_" + token.get("POS"))] = 1
            stem = PorterStemmer.stem(text)
            features[self.featureSet.getId("stem_" + stem)] = 1
            features[self.featureSet.getId("nonstem_" + text[len(stem):])] = 1

            if self.styles["speculation_words"]:
                if text in self.specWords:
                    features[self.featureSet.getId("ent_spec")] = 1
                if stem in self.specWordStems:
                    features[self.featureSet.getId("ent_spec_stem")] = 1

            # Linear order features
            for i in range(len(sentenceGraph.tokens)):
                if token == sentenceGraph.tokens[i]:
                    break
            for index in [-3, -2, -1, 1, 2, 3]:
                if i + index > 0 and i + index < len(sentenceGraph.tokens):
                    self.buildLinearOrderFeatures(sentenceGraph, i + index,
                                                  str(index), features)

            # Content
            if i > 0 and text[0].isalpha() and text[0].isupper():
                features[self.featureSet.getId("upper_case_start")] = 1
            for j in range(len(text)):
                if j > 0 and text[j].isalpha() and text[j].isupper():
                    features[self.featureSet.getId("upper_case_middle")] = 1
                # numbers and special characters
                if text[j].isdigit():
                    features[self.featureSet.getId("has_digits")] = 1
                    if j > 0 and text[j - 1] == "-":
                        features[self.featureSet.getId(
                            "has_hyphenated_digit")] = 1
                elif text[j] == "-":
                    features[self.featureSet.getId("has_hyphen")] = 1
                elif text[j] == "/":
                    features[self.featureSet.getId("has_fslash")] = 1
                elif text[j] == "\\":
                    features[self.featureSet.getId("has_bslash")] = 1
                # duplets
                if j > 0:
                    features[self.featureSet.getId("dt_" +
                                                   text[j - 1:j +
                                                        1].lower())] = 1
                # triplets
                if j > 1:
                    features[self.featureSet.getId("tt_" +
                                                   text[j - 2:j +
                                                        1].lower())] = 1

            # Attached edges (Hanging in and out edges)
            t1InEdges = self.inEdgesByToken[token]
            for edge in t1InEdges:
                edgeType = edge[2].get("type")
                features[self.featureSet.getId("t1HIn_" + edgeType)] = 1
                features[self.featureSet.getId("t1HIn_" +
                                               edge[0].get("POS"))] = 1
                features[self.featureSet.getId("t1HIn_" + edgeType + "_" +
                                               edge[0].get("POS"))] = 1
                tokenText = sentenceGraph.getTokenText(edge[0])
                features[self.featureSet.getId("t1HIn_" + tokenText)] = 1
                features[self.featureSet.getId("t1HIn_" + edgeType + "_" +
                                               tokenText)] = 1
            t1OutEdges = self.outEdgesByToken[token]
            for edge in t1OutEdges:
                edgeType = edge[2].get("type")
                features[self.featureSet.getId("t1HOut_" + edgeType)] = 1
                features[self.featureSet.getId("t1HOut_" +
                                               edge[1].get("POS"))] = 1
                features[self.featureSet.getId("t1HOut_" + edgeType + "_" +
                                               edge[1].get("POS"))] = 1
                tokenText = sentenceGraph.getTokenText(edge[1])
                features[self.featureSet.getId("t1HOut_" + tokenText)] = 1
                features[self.featureSet.getId("t1HOut_" + edgeType + "_" +
                                               tokenText)] = 1

            self.buildChains(token, sentenceGraph, features)

            extra = {
                "xtype": "task3",
                "t3type": task3Type,
                "t": token.get("id"),
                "entity": entity.get("id")
            }
            #examples.append( (sentenceGraph.getSentenceId()+".x"+str(exampleIndex),category,features,extra) )
            example = (sentenceGraph.getSentenceId() + ".x" +
                       str(exampleIndex), category, features, extra)
            ExampleUtils.appendExamples([example], outfile)
            exampleIndex += 1
            self.exampleStats.endExample()
        #return examples
        return exampleIndex
Esempio n. 7
0
 def makeExampleGraphWithGold(self, builder, sentenceGraph, goldGraph, sentenceIndex):
     exampleGraph = NX10.MultiDiGraph()
     for token in goldGraph.tokens:
         exampleGraph.add_node(token)
     arcStyles = {}
     labelStyles = {}
     extraByToken = {}
     edgeTypes = {}
     stats = {"entities":0,"edges":0,"tp":0,"fp":0,"tn":0,"fn":0}
     
     entityMap = EvaluateInteractionXML.mapEntities(sentenceGraph.entities, goldGraph.entities, goldGraph.tokens)
     tokenMap = self.getTokenMap(sentenceGraph, goldGraph)
     toEntitiesWithPredictions = set()
     for entityFrom, entitiesTo in entityMap.iteritems():
         stats["entities"] += 1
         entityFromHeadToken = sentenceGraph.entityHeadTokenByEntity[entityFrom]
         for entityTo in entitiesTo:
             toEntitiesWithPredictions.add(entityTo)
             entityToHeadToken = goldGraph.entityHeadTokenByEntity[entityTo]
             style = None
             eFromType = entityFrom.get("type")
             eToType = entityTo.get("type")
             if extraByToken.has_key(entityToHeadToken):
                 style = extraByToken[entityToHeadToken]
             if eFromType == eToType:
                 if eToType != "neg":
                     if style == None:
                         style = [entityTo.get("type"),{"fill":"green"}]
                     elif style[1]["fill"] == "#79BAEC":
                         style = [entityTo.get("type"),{"fill":"green"}]
                     if entityTo.get("isName") == "True":
                         style = [entityTo.get("type"),{"fill":"brown"}]
                     else:
                         stats["tp"] += 1
             else:
                 if eToType == "neg":
                     pass
             extraByToken[entityToHeadToken] = style
         if len(entitiesTo) == 0:
             stats["fp"] += 1
             if extraByToken.has_key(tokenMap[entityFromHeadToken]):
                 style = extraByToken[tokenMap[entityFromHeadToken]]
                 if style[1]["fill"] != "green":
                     style = [entityFrom.get("type"),{"fill":"red"}]
                 extraByToken[tokenMap[entityFromHeadToken]] = style
             else:
                 extraByToken[tokenMap[entityFromHeadToken]] = [entityFrom.get("type"),{"fill":"red"}]
     for entity in goldGraph.entities:
         if entity not in toEntitiesWithPredictions:
             stats["fn"] += 1
             extraByToken[goldGraph.entityHeadTokenByEntity[entity]] = [entity.get("type"),{"fill":"#79BAEC"}]
     
     toInteractionsWithPredictions = set()            
     for interactionFrom in sentenceGraph.interactions:
         if interactionFrom.get("type") == "neg":
             continue
         stats["edges"] += 1
         
         e1s = entityMap[sentenceGraph.entitiesById[interactionFrom.get("e1")]]
         e1Ids = []
         for e1 in e1s:
             e1Ids.append(e1.get("id"))
         e2s = entityMap[sentenceGraph.entitiesById[interactionFrom.get("e2")]]
         e2Ids = []
         for e2 in e2s:
             e2Ids.append(e2.get("id"))
             
         t1 = tokenMap[sentenceGraph.entityHeadTokenByEntity[sentenceGraph.entitiesById[interactionFrom.get("e1")]]]
         t2 = tokenMap[sentenceGraph.entityHeadTokenByEntity[sentenceGraph.entitiesById[interactionFrom.get("e2")]]]
         iFromType = interactionFrom.get("type")
         
         found = False
         for interactionTo in goldGraph.interactions:
             if interactionTo.get("e1") in e1Ids and interactionTo.get("e2") in e2Ids:
                 toInteractionsWithPredictions.add(interactionTo)
                 
                 iToType = interactionTo.get("type")
                 exampleGraph.add_edge(t1, t2, element=interactionFrom)
                 #edge = exampleGraph.get_edge(t1, t2, data=True)
                 edge = self.getNXEdge(exampleGraph, t1, t2, interactionFrom)
                 
                 if t1 != t2:
                     if iToType == iFromType:
                         edge[2]["arcStyles"] = {"stroke":"green"}
                         edge[2]["labelStyles"] = {"fill":"green"}
                         stats["tp"] += 1
                     else:
                         edge[2]["arcStyles"] = {"stroke":"red"}
                         edge[2]["labelStyles"] = {"fill":"red"}
                         stats["fp"] += 1
                 found = True
         if not found: # false positive prediction
             if t1 != t2:
                 exampleGraph.add_edge(t1, t2, element=interactionFrom)
                 edge = self.getNXEdge(exampleGraph, t1, t2, interactionFrom)
                 edge[2]["arcStyles"] = {"stroke":"red"}
                 edge[2]["labelStyles"] = {"fill":"red"}
                 stats["fp"] += 1
     for interactionTo in goldGraph.interactions:
         if interactionTo not in toInteractionsWithPredictions: # false negative gold
             t1 = goldGraph.entityHeadTokenByEntity[goldGraph.entitiesById[interactionTo.get("e1")]]
             t2 = goldGraph.entityHeadTokenByEntity[goldGraph.entitiesById[interactionTo.get("e2")]]                
             if t1 != t2:
                 exampleGraph.add_edge(t1, t2, element=interactionTo)
                 edge = self.getNXEdge(exampleGraph, t1, t2, interactionTo)
                 edge[2]["arcStyles"] = {"stroke":"#79BAEC"}
                 edge[2]["labelStyles"] = {"fill":"#79BAEC"}
                 stats["fn"] += 1
     
     builder.header("Classification",4)
     svgTokens = GraphToSVG.tokensToSVG(goldGraph.tokens,False,None,extraByToken)
     #arcStyles, labelStyles = self.getMatchingEdgeStyles(exampleGraph, sentenceGraph.interactionGraph, "green", "red" )
     svgEdges = GraphToSVG.edgesToSVG(svgTokens, exampleGraph, "type", None)
     sentenceId = sentenceGraph.getSentenceId()
     svgElement = GraphToSVG.writeSVG(svgTokens, svgEdges, self.outDir+"/svg/"+sentenceId+"-"+str(sentenceIndex)+"_learned.svg")
     builder.svg("../svg/" + sentenceId + "-"+str(sentenceIndex)+"_learned.svg",svgElement.attrib["width"],svgElement.attrib["height"],id="learned_graph")
     builder.lineBreak()
     return stats