def buildAdjacencyMatrix(tokenElements, dependencyElements, entityElements, metamapElements, pairElement, matrixSettings): m = matrixSettings #Punctuation dependencies are mostly junk dependencyElements = removeDependencies(dependencyElements, m.removeDependencies) parseGraph = ParseGraph.ParseGraph(tokenElements, dependencyElements, m.mergeDependencies) parseGraph.shortestPathMethod = "dijkstra" parseGraph.markNamedEntities(entityElements) e1Id = pairElement.get("e1") e2Id = pairElement.get("e2") entity1TokenIds = parseGraph.getNamedEntityTokenIds( [e1Id] ) entity2TokenIds = parseGraph.getNamedEntityTokenIds( [e2Id] ) interactionWordTokenIds = parseGraph.getTokenIdsByText(m.interactionWords, False) # Give dependencies base weights parseGraph.setAllDependencyWeights(m.depBaseWeight) # Set dependencies' weights based on paths pathStyles = ParseGraph.splitPathStyles(m.paths) for style in pathStyles: paths = [] if style["type"] == "binary": paths = parseGraph.buildBinaryPaths(entity1TokenIds, entity2TokenIds, style["length"], style["direction"]=="directed", m.pathTimeout) elif style["type"].find("tertiary") != -1: paths = parseGraph.buildTertiaryPaths(entity1TokenIds, interactionWordTokenIds, entity2TokenIds, style["type"]=="closest_tertiary", style["length"], style["direction"]=="directed", m.pathTimeout) if m.markInteractionWords == MatrixSettings.markedInteractionWords.fromTertiaryPaths: parseGraph.setPPIInteractionWords(paths) for i in range(len(paths)): paths[i] = paths[i][0] if paths != None: parseGraph.setDependencyWeightsByPath(paths, style["weight"]) # Reduce dependencies' weights by distance from threshold if m.weightByDistance: parseGraph.reduceWeightByDistance(m.depWeightReductionThreshold, m.depWeightReductionFactor) # Set dependency prefixes if m.depPrefixThreshold > 0.0: parseGraph.setPPIPrefixForDependencies(m.depPrefix, m.depPrefixThreshold) # f.e. shortest path prefix # Set token texts if m.tokenPPIText == MatrixSettings.ppiTexts.full: parseGraph.ppiTextFromOriginalText() elif m.tokenPPIText == MatrixSettings.ppiTexts.stem: parseGraph.ppiTextFromStems() else: print >> sys.stderr, "Illegal ppiText setting", m.tokenPPIText sys.exit(1) # Add metamap codes if metamapElements != None: metamapDict = {} for metamapElement in metamapElements: metamapDict[metamapElement.get("tokenid")] = metamapElement.get("basecodes").split(",") parseGraph.addMetamapCodes(metamapDict) if m.maskPPIText: parseGraph.maskNames(e1Id, e2Id) if m.tokenPositionTags: parseGraph.addPositionTags(entity1TokenIds, entity2TokenIds) if pairElement.get("interaction") == "True": output = 1. else: output = -1. adjMatrix, labels = parseGraph.buildAdjacencyMatrix(floattype, m.directed, m.linearOrderWeight) return adjMatrix, labels, output
def buildAdjacencyMatrix(tokenElements, dependencyElements, entityElements, metamapElements, pairElement, matrixSettings): m = matrixSettings #Punctuation dependencies are mostly junk dependencyElements = removeDependencies(dependencyElements, m.removeDependencies) parseGraph = ParseGraph.ParseGraph(tokenElements, dependencyElements, m.mergeDependencies) parseGraph.shortestPathMethod = "dijkstra" parseGraph.markNamedEntities(entityElements) e1Id = pairElement.get("e1") e2Id = pairElement.get("e2") entity1TokenIds = parseGraph.getNamedEntityTokenIds([e1Id]) entity2TokenIds = parseGraph.getNamedEntityTokenIds([e2Id]) interactionWordTokenIds = parseGraph.getTokenIdsByText( m.interactionWords, False) # Give dependencies base weights parseGraph.setAllDependencyWeights(m.depBaseWeight) # Set dependencies' weights based on paths pathStyles = ParseGraph.splitPathStyles(m.paths) for style in pathStyles: paths = [] if style["type"] == "binary": paths = parseGraph.buildBinaryPaths( entity1TokenIds, entity2TokenIds, style["length"], style["direction"] == "directed", m.pathTimeout) elif style["type"].find("tertiary") != -1: paths = parseGraph.buildTertiaryPaths( entity1TokenIds, interactionWordTokenIds, entity2TokenIds, style["type"] == "closest_tertiary", style["length"], style["direction"] == "directed", m.pathTimeout) if m.markInteractionWords == MatrixSettings.markedInteractionWords.fromTertiaryPaths: parseGraph.setPPIInteractionWords(paths) for i in range(len(paths)): paths[i] = paths[i][0] if paths != None: parseGraph.setDependencyWeightsByPath(paths, style["weight"]) # Reduce dependencies' weights by distance from threshold if m.weightByDistance: parseGraph.reduceWeightByDistance(m.depWeightReductionThreshold, m.depWeightReductionFactor) # Set dependency prefixes if m.depPrefixThreshold > 0.0: parseGraph.setPPIPrefixForDependencies( m.depPrefix, m.depPrefixThreshold) # f.e. shortest path prefix # Set token texts if m.tokenPPIText == MatrixSettings.ppiTexts.full: parseGraph.ppiTextFromOriginalText() elif m.tokenPPIText == MatrixSettings.ppiTexts.stem: parseGraph.ppiTextFromStems() else: print >> sys.stderr, "Illegal ppiText setting", m.tokenPPIText sys.exit(1) # Add metamap codes if metamapElements != None: metamapDict = {} for metamapElement in metamapElements: metamapDict[metamapElement.get("tokenid")] = metamapElement.get( "basecodes").split(",") parseGraph.addMetamapCodes(metamapDict) if m.maskPPIText: parseGraph.maskNames(e1Id, e2Id) if m.tokenPositionTags: parseGraph.addPositionTags(entity1TokenIds, entity2TokenIds) if pairElement.get("interaction") == "True": output = 1. else: output = -1. adjMatrix, labels = parseGraph.buildAdjacencyMatrix( floattype, m.directed, m.linearOrderWeight) return adjMatrix, labels, output