import sys import cElementTree as ET from InteractionXML.CorpusElements import CorpusElements from SentenceGraph import * import GraphToSVG if __name__=="__main__": defaultInteractionFilename = "Data/BioInferForComplexPPI.xml" print >> sys.stderr, "Loading corpus file", defaultInteractionFilename corpusTree = ET.parse(defaultInteractionFilename) corpusRoot = corpusTree.getroot() corpusElements = CorpusElements(corpusRoot, "split_gs", "split_gs") sentence = corpusElements.sentencesById["BioInfer.d6.s6"] print >> sys.stderr, "Building graph" graph = SentenceGraph(sentence.tokens, sentence.dependencies) print >> sys.stderr, "Mapping interactions" graph.mapInteractions(sentence.entities, sentence.interactions) print >> sys.stderr, "Drawing" svgTokens = GraphToSVG.tokensToSVG(graph.tokens) svgDepEdges = GraphToSVG.edgesToSVG(svgTokens, graph.dependencyGraph) svgIntEdges = GraphToSVG.edgesToSVG(svgTokens, graph.interactionGraph) GraphToSVG.writeSVG(svgTokens, svgDepEdges, "Data/depGraph.svg") GraphToSVG.writeSVG(svgTokens, svgIntEdges, "Data/intGraph.svg")
import sys import cElementTree as ET from InteractionXML.CorpusElements import CorpusElements from SentenceGraph import * import GraphToSVG if __name__ == "__main__": defaultInteractionFilename = "Data/BioInferForComplexPPI.xml" print >> sys.stderr, "Loading corpus file", defaultInteractionFilename corpusTree = ET.parse(defaultInteractionFilename) corpusRoot = corpusTree.getroot() corpusElements = CorpusElements(corpusRoot, "split_gs", "split_gs") sentence = corpusElements.sentencesById["BioInfer.d6.s6"] print >> sys.stderr, "Building graph" graph = SentenceGraph(sentence.tokens, sentence.dependencies) print >> sys.stderr, "Mapping interactions" graph.mapInteractions(sentence.entities, sentence.interactions) print >> sys.stderr, "Drawing" svgTokens = GraphToSVG.tokensToSVG(graph.tokens) svgDepEdges = GraphToSVG.edgesToSVG(svgTokens, graph.dependencyGraph) svgIntEdges = GraphToSVG.edgesToSVG(svgTokens, graph.interactionGraph) GraphToSVG.writeSVG(svgTokens, svgDepEdges, "Data/depGraph.svg") GraphToSVG.writeSVG(svgTokens, svgIntEdges, "Data/intGraph.svg")
def makeSentencePage(self, sentenceGraph, examples, classificationsByExample, prevAndNextId=None, goldGraph=None): # Store info for sentence list sentenceId = sentenceGraph.getSentenceId() self.sentences.append([sentenceGraph,0,0,0,0]) sentenceIndex = len(self.sentences) sentenceGraph.stats = {"entities":0,"edges":0,"tp":0,"fp":0,"tn":0,"fn":0} visualizationSet = None if examples != None: for example in examples: if visualizationSet == None: visualizationSet = example[3]["visualizationSet"] else: assert(visualizationSet == example[3]["visualizationSet"]) self.sentences[-1][1] += 1 if classificationsByExample.has_key(example[0]): classification = classificationsByExample[example[0]] self.sentences[-1][2] += 1 if classification[1] == "tp": self.sentences[-1][3] += 1 elif classification[1] == "fp": self.sentences[-1][4] += 1 sentenceGraph.visualizationSet = visualizationSet # Make the page entityElements = sentenceGraph.entities entityTextById = {} for entityElement in entityElements: entityTextById[entityElement.get("id")] = entityElement.get("text") # Boot-it NG builder = HtmlBuilder() builder.newPage("Sentence " + sentenceId, "../") builder.addScript("../js/highlight_svg.js") builder.body.set("onload","for(i in document.forms){document.forms[i].reset();}") builder.div() builder.header("Sentence " + sentenceId,1) #builder.lineBreak() if prevAndNextId != None: if prevAndNextId[0] != None: builder.link(prevAndNextId[0]+"-"+str(sentenceIndex-1)+".html","previous") else: builder.span("previous","color:#0000FF;") if prevAndNextId[1] != None: builder.link(prevAndNextId[1]+"-"+str(sentenceIndex+1)+".html","next") else: builder.span("next","color:#0000FF;") builder.span("Original ID: " + self.__getOrigId(sentenceGraph.sentenceElement)) builder.span("Index: " + str(sentenceIndex)) builder.closeElement() # div builder.lineBreak() # Parse SVG builder.header("Parse",4) svgTokens = GraphToSVG.tokensToSVG(sentenceGraph.tokens, True) nxDepGraph = NX10.MultiDiGraph() for edge in sentenceGraph.dependencyGraph.edges: nxDepGraph.add_edge(edge[0], edge[1], element=edge[2]) svgDependencies = GraphToSVG.edgesToSVG(svgTokens, nxDepGraph) svgElement = GraphToSVG.writeSVG(svgTokens, svgDependencies,self.outDir+"/svg/"+sentenceId+"-"+str(sentenceIndex)+".svg") builder.svg("../svg/" + sentenceId + "-"+str(sentenceIndex)+".svg",svgElement.attrib["width"],svgElement.attrib["height"],id="dep_graph") builder.lineBreak() # Annotation SVG builder.header("Annotation",4) if goldGraph != None: # Check for named entities isNameByToken = {} for token in goldGraph.tokens: if goldGraph.getTokenText(token) == "NAMED_ENT": isNameByToken[token] = True else: isNameByToken[token] = False #arcStyles, labelStyles = self.getMatchingEdgeStyles(goldGraph.interactionGraph, goldGraph.dependencyGraph, "orange", "#F660AB" ) svgTokens = GraphToSVG.tokensToSVG(goldGraph.tokens, False, goldGraph.entitiesByToken, None, isNameByToken) nxGraph = NX10.MultiDiGraph() for edge in goldGraph.interactionGraph.edges: nxGraph.add_edge(edge[0], edge[1], element=edge[2]) svgInteractionEdges = GraphToSVG.edgesToSVG(svgTokens, nxGraph) svgElement = GraphToSVG.writeSVG(svgTokens, svgInteractionEdges,self.outDir+"/svg/"+sentenceId+"-"+str(sentenceIndex)+"_ann.svg") elif sentenceGraph.interactionGraph != None: # Check for named entities isNameByToken = {} for token in sentenceGraph.tokens: if sentenceGraph.getTokenText(token) == "NAMED_ENT": isNameByToken[token] = True else: isNameByToken[token] = False #arcStyles, labelStyles = self.getMatchingEdgeStyles(sentenceGraph.interactionGraph, sentenceGraph.dependencyGraph, "orange", "#F660AB" ) svgTokens = GraphToSVG.tokensToSVG(sentenceGraph.tokens, False, sentenceGraph.entitiesByToken, None, isNameByToken) nxGraph = NX10.MultiDiGraph() for edge in goldGraph.interactionGraph.edges: nxGraph.add_edge(edge[0], edge[1], element=edge[2]) svgInteractionEdges = GraphToSVG.edgesToSVG(svgTokens, nxGraph) svgElement = GraphToSVG.writeSVG(svgTokens, svgInteractionEdges,self.outDir+"/svg/"+sentenceId + "-"+str(sentenceIndex)+"_ann.svg") builder.svg("../svg/" + sentenceId + "-"+str(sentenceIndex)+"_ann.svg",svgElement.attrib["width"],svgElement.attrib["height"],id="ann_graph") builder.lineBreak() # Classification svg if classificationsByExample != None: self.makeExampleGraph(builder, sentenceGraph, examples, classificationsByExample, sentenceIndex) elif goldGraph != None: sentenceGraph.stats = self.makeExampleGraphWithGold(builder, sentenceGraph, goldGraph, sentenceIndex) builder.table(0,align="center",width="100%") builder.tableRow() # interactions pairElements = sentenceGraph.interactions builder.tableData(valign="top") builder.header("Interactions",4) builder.table(1,True) builder.tableHead() builder.tableRow() builder.tableHeader("id", True) builder.tableHeader("type", True) builder.tableHeader("e1", True) builder.tableHeader("e2", True) builder.tableHeader("e1 word", True) builder.tableHeader("e2 word", True) #builder.tableHeader("interaction", True) #th = builder.tableHeader("view",True) #th.set("class","{sorter: false}") builder.closeElement() builder.closeElement() # close tableHead builder.tableBody() for pairElement in sentenceGraph.interactions: tr = builder.tableRow() #tr.set( "onmouseover", getPairHighlightCommand("main_parse",pairElement.get("e1"),pairElement.get("e2"),entityTokens,"highlightPair") ) #tr.set( "onmouseout", getPairHighlightCommand("main_parse",pairElement.get("e1"),pairElement.get("e2"),entityTokens,"deHighlightPair") ) builder.tableData(pairElement.get("id").split(".")[-1][1:], True) builder.tableData(pairElement.get("type"), True) builder.tableData(pairElement.get("e1").split(".")[-1][1:], True) builder.tableData(pairElement.get("e2").split(".")[-1][1:], True) builder.tableData(entityTextById[pairElement.get("e1")], True) builder.tableData(entityTextById[pairElement.get("e2")], True) #builder.tableData("Dummy", True) #builder.tableData() #builder.form() #input = builder.formInput("checkbox") ##input.set("onClick",getPairHighlightCommand("main_parse",pairElement.get("e1"),pairElement.get("e2"),entityTokens,"toggleSelectPair",pairElement.get("id")) ) #builder.closeElement() # form #builder.closeElement() # tableData builder.closeElement() builder.closeElement() # close tableBody builder.closeElement() # close table # entities builder.tableData(valign="top") builder.header("Entities",4) builder.table(1,True) builder.tableHead() builder.tableRow() builder.tableHeader("id", True) builder.tableHeader("text", True) builder.tableHeader("type", True) builder.tableHeader("charOffset", True) builder.closeElement() # close tableRow builder.closeElement() # close tableHead entityElements = sentenceGraph.entities builder.tableBody() for entityElement in entityElements: builder.tableRow() builder.tableData(entityElement.get("id").split(".")[-1][1:], True) builder.tableData(entityElement.get("text"), True) if entityElement.attrib["isName"] == "True": builder.tableData("["+entityElement.get("type")+"]", True) else: builder.tableData(entityElement.get("type"), True) charOffset = entityElement.get("charOffset") charOffsetSplits = charOffset.split(",") headOffset = entityElement.get("headOffset") charOffset = "" headFound = False for charOffsetSplit in charOffsetSplits: if charOffset != "": charOffset += "," if charOffsetSplit == headOffset: charOffset += "<u>" + charOffsetSplit + "</u>" headFound = True else: charOffset += charOffsetSplit if not headFound: charOffset += " (<u>" + headOffset + "</u>)" builder.tableData(charOffset, True) builder.closeElement() builder.closeElement() # close tableBody builder.closeElement() # close table builder.closeElement() # close row builder.closeElement() # close table builder.closeElement() # close row builder.closeElement() # close table # Examples if examples != None: builder.header("Examples",4) for example in examples: string = example[0] if classificationsByExample.has_key(example[0]): string += " (" + classificationsByExample[example[0]][1] + ")" string += ":" features = example[2] if self.featureSet != None: featureNames = [] for key in features.keys(): featureNames.append(self.featureSet.getName(key)) featureNames.sort() for featureName in featureNames: string += " " + featureName + ":" + str(features[self.featureSet.getId(featureName)]) else: keys = features.keys() keys.sort() for key in keys: featureName = str(key) string += " " + featureName + ":" + str(features[key]) #string += "\n" builder.span(string) builder.lineBreak() builder.lineBreak() builder.write(self.outDir + "/sentences/"+sentenceId+"-"+str(sentenceIndex)+".html") repairApostrophes(self.outDir + "/sentences/"+sentenceId+"-"+str(sentenceIndex)+".html")
def makeExampleGraphWithGold(self, builder, sentenceGraph, goldGraph, sentenceIndex): exampleGraph = NX10.MultiDiGraph() for token in goldGraph.tokens: exampleGraph.add_node(token) arcStyles = {} labelStyles = {} extraByToken = {} edgeTypes = {} stats = {"entities":0,"edges":0,"tp":0,"fp":0,"tn":0,"fn":0} entityMap = EvaluateInteractionXML.mapEntities(sentenceGraph.entities, goldGraph.entities, goldGraph.tokens) tokenMap = self.getTokenMap(sentenceGraph, goldGraph) toEntitiesWithPredictions = set() for entityFrom, entitiesTo in entityMap.iteritems(): stats["entities"] += 1 entityFromHeadToken = sentenceGraph.entityHeadTokenByEntity[entityFrom] for entityTo in entitiesTo: toEntitiesWithPredictions.add(entityTo) entityToHeadToken = goldGraph.entityHeadTokenByEntity[entityTo] style = None eFromType = entityFrom.get("type") eToType = entityTo.get("type") if extraByToken.has_key(entityToHeadToken): style = extraByToken[entityToHeadToken] if eFromType == eToType: if eToType != "neg": if style == None: style = [entityTo.get("type"),{"fill":"green"}] elif style[1]["fill"] == "#79BAEC": style = [entityTo.get("type"),{"fill":"green"}] if entityTo.get("isName") == "True": style = [entityTo.get("type"),{"fill":"brown"}] else: stats["tp"] += 1 else: if eToType == "neg": pass extraByToken[entityToHeadToken] = style if len(entitiesTo) == 0: stats["fp"] += 1 if extraByToken.has_key(tokenMap[entityFromHeadToken]): style = extraByToken[tokenMap[entityFromHeadToken]] if style[1]["fill"] != "green": style = [entityFrom.get("type"),{"fill":"red"}] extraByToken[tokenMap[entityFromHeadToken]] = style else: extraByToken[tokenMap[entityFromHeadToken]] = [entityFrom.get("type"),{"fill":"red"}] for entity in goldGraph.entities: if entity not in toEntitiesWithPredictions: stats["fn"] += 1 extraByToken[goldGraph.entityHeadTokenByEntity[entity]] = [entity.get("type"),{"fill":"#79BAEC"}] toInteractionsWithPredictions = set() for interactionFrom in sentenceGraph.interactions: if interactionFrom.get("type") == "neg": continue stats["edges"] += 1 e1s = entityMap[sentenceGraph.entitiesById[interactionFrom.get("e1")]] e1Ids = [] for e1 in e1s: e1Ids.append(e1.get("id")) e2s = entityMap[sentenceGraph.entitiesById[interactionFrom.get("e2")]] e2Ids = [] for e2 in e2s: e2Ids.append(e2.get("id")) t1 = tokenMap[sentenceGraph.entityHeadTokenByEntity[sentenceGraph.entitiesById[interactionFrom.get("e1")]]] t2 = tokenMap[sentenceGraph.entityHeadTokenByEntity[sentenceGraph.entitiesById[interactionFrom.get("e2")]]] iFromType = interactionFrom.get("type") found = False for interactionTo in goldGraph.interactions: if interactionTo.get("e1") in e1Ids and interactionTo.get("e2") in e2Ids: toInteractionsWithPredictions.add(interactionTo) iToType = interactionTo.get("type") exampleGraph.add_edge(t1, t2, element=interactionFrom) #edge = exampleGraph.get_edge(t1, t2, data=True) edge = self.getNXEdge(exampleGraph, t1, t2, interactionFrom) if t1 != t2: if iToType == iFromType: edge[2]["arcStyles"] = {"stroke":"green"} edge[2]["labelStyles"] = {"fill":"green"} stats["tp"] += 1 else: edge[2]["arcStyles"] = {"stroke":"red"} edge[2]["labelStyles"] = {"fill":"red"} stats["fp"] += 1 found = True if not found: # false positive prediction if t1 != t2: exampleGraph.add_edge(t1, t2, element=interactionFrom) edge = self.getNXEdge(exampleGraph, t1, t2, interactionFrom) edge[2]["arcStyles"] = {"stroke":"red"} edge[2]["labelStyles"] = {"fill":"red"} stats["fp"] += 1 for interactionTo in goldGraph.interactions: if interactionTo not in toInteractionsWithPredictions: # false negative gold t1 = goldGraph.entityHeadTokenByEntity[goldGraph.entitiesById[interactionTo.get("e1")]] t2 = goldGraph.entityHeadTokenByEntity[goldGraph.entitiesById[interactionTo.get("e2")]] if t1 != t2: exampleGraph.add_edge(t1, t2, element=interactionTo) edge = self.getNXEdge(exampleGraph, t1, t2, interactionTo) edge[2]["arcStyles"] = {"stroke":"#79BAEC"} edge[2]["labelStyles"] = {"fill":"#79BAEC"} stats["fn"] += 1 builder.header("Classification",4) svgTokens = GraphToSVG.tokensToSVG(goldGraph.tokens,False,None,extraByToken) #arcStyles, labelStyles = self.getMatchingEdgeStyles(exampleGraph, sentenceGraph.interactionGraph, "green", "red" ) svgEdges = GraphToSVG.edgesToSVG(svgTokens, exampleGraph, "type", None) sentenceId = sentenceGraph.getSentenceId() svgElement = GraphToSVG.writeSVG(svgTokens, svgEdges, self.outDir+"/svg/"+sentenceId+"-"+str(sentenceIndex)+"_learned.svg") builder.svg("../svg/" + sentenceId + "-"+str(sentenceIndex)+"_learned.svg",svgElement.attrib["width"],svgElement.attrib["height"],id="learned_graph") builder.lineBreak() return stats
def makeExampleGraph(self, builder, sentenceGraph, examples, classificationsByExample, sentenceIndex): exampleGraph = NX.XDiGraph()#multiedges = True) for token in sentenceGraph.tokens: exampleGraph.add_node(token) arcStyles = {} labelStyles = {} extraByToken = {} edgeTypes = {} if examples != None: for example in examples: if classificationsByExample.has_key(example[0]): classification = classificationsByExample[example[0]] if example[3]["xtype"] == "edge" and classification[1] != "tn": #and a[1] != "fn": if classification[2] != "multiclass": exampleGraph.add_edge(example[3]["t1"], example[3]["t2"], example[0]) else: exampleGraph.add_edge(example[3]["t1"], example[3]["t2"], example[0]) # self.classSet.getName(classification[3])) elif example[3]["xtype"] == "token" and classification[1] != "tn": if classification[1] == "tp": style = {"fill":"green"} if classification[1] == "fp": style = {"fill":"red"} if classification[1] == "fn": style = {"fill":"#79BAEC"} if classification[2] != "multiclass": extraByToken[example[3]["t"]] = (classification[1],style) else: extraByToken[example[3]["t"]] = (self.classSet.getName(classification[3]),style) for edge in exampleGraph.edges(): addType = False classification = classificationsByExample[edge[2]][1] if classification == "tp": arcStyles[edge] = {"stroke":"green"} labelStyles[edge] = {"fill":"green"} addType = True elif classification == "fp": arcStyles[edge] = {"stroke":"red"} labelStyles[edge] = {"fill":"red"} addType = True elif classification == "fn": arcStyles[edge] = {"stroke":"#79BAEC"} labelStyles[edge] = {"fill":"#79BAEC"} addType = True if addType: if classificationsByExample[edge[2]][2] != "multiclass": edgeTypes[edge] = classificationsByExample[edge[2]][0][3]["type"] else: edgeTypes[edge] = self.classSet.getName(classificationsByExample[edge[2]][3]) if len(edgeTypes[edge]) > 3 and edgeTypes[edge][-4:] == "_rev": edgeTypes[edge] = edgeTypes[edge][:-4] if classificationsByExample[edge[2]][0][3]["deprev"]: edgeTypes[edge] += ">" else: edgeTypes[edge] = "<" + edgeTypes[edge] else: if classificationsByExample[edge[2]][0][3]["deprev"]: edgeTypes[edge] = "<" + edgeTypes[edge] else: edgeTypes[edge] += ">" builder.header("Classification",4) svgTokens = GraphToSVG.tokensToSVG(sentenceGraph.tokens,False,None,extraByToken) #arcStyles, labelStyles = self.getMatchingEdgeStyles(exampleGraph, sentenceGraph.interactionGraph, "green", "red" ) svgEdges = GraphToSVG.edgesToSVG(svgTokens, exampleGraph, arcStyles, labelStyles, None, edgeTypes) sentenceId = sentenceGraph.getSentenceId() svgElement = GraphToSVG.writeSVG(svgTokens, svgEdges, self.outDir+"/svg/"+sentenceId+"-"+str(sentenceIndex)+"_learned.svg") builder.svg("../svg/" + sentenceId + "-"+str(sentenceIndex)+"_learned.svg",svgElement.attrib["width"],svgElement.attrib["height"],id="learned_graph") builder.lineBreak()