def matchTrees(tmData1, tmData2, columnList, threshold, resIds=False): retList = [] if not resIds: columnsToMean, columnsToStddev = tm3.calcColumnsMeanStddev( columnList, [tmData1, tmData2]) revData = False if len(tmData1.tree.keys()) < len(tmData2.tree.keys()): tmData1, tmData2 = tmData2, tmData1 revData = True cost = [] names1, names2 = [], [] for node2 in tmData2.tree.keys(): names2.append(node2) for node1 in tmData1.tree.keys(): names1.append(node1) costRow = [] for node2 in tmData2.tree.keys(): if not resIds: score = compareColumns( node1, node2, columnList, columnsToMean, columnsToStddev) else: score = compareColumnsResidues(node1, node2, columnList) costRow.append(score) cost.append(costRow) matches = munkreskuhn.assignAndReturnMatches(cost) returnConns = [] for match in matches: if revData: returnConns.append((names2[match[1]], names1[match[0]], match[2])) else: returnConns.append((names1[match[0]], names2[match[1]], match[2])) return returnConns
def findSimilarTrees( tmDataList, columnListNames, sizeColName, sizeMin=-1.0, sizeMax=10000000000, outputEachPair=False, justKeepBest=False, ): """does munkreskuhn matching over pocket-pocket shapes to get a score per tree, returns table of these""" columnList = tmDataList[0].titlesToColumns(columnListNames) sizeCol = tmDataList[0].titleToColumn(sizeColName) colToMean, colToStddev = calcColumnsMeanStddev(columnList, tmDataList) totalMatrix = {} for tmDataCount1, tmData1 in enumerate(tmDataList): totalMatrix[tmData1] = {} for tmDataCount2, tmData2 in enumerate(tmDataList): if tmDataCount2 > tmDataCount1: dotData = dot.dot([tmData1, tmData2]) rowNames, colNames, matchMatrix, tooBig = dotData.computeSearchConnections( 1e10000000, columnList, colToMean, colToStddev, False, sizeCol, False, False, sizeMin=sizeMin, sizeMax=sizeMax, doSelfScore=False, returnMatrix=True, ) if not justKeepBest: matches = munkreskuhn.assignAndReturnMatches(matchMatrix) sumScore = 0 for match in matches: sumScore += match[2] totalMatrix[tmData1][tmData2] = sumScore / float(len(matches)) if outputEachPair: # output a gdl for each pair of the munkres match newMatches = [] justNodes = tooBig for match in matches: node1 = rowNames[match[0]] node2 = colNames[match[1]] justNodes.append(node1) justNodes.append(node2) newMatches.append([tmData1, tmData2, node1, node2, match[2]]) dotData.matchList = newMatches dotData.addSearchConnections(1e1000000, remove=True) dotData.writeGdl( tmData1.inputFileName + "_" + tmData2.inputFileName + ".gdl", justNodes=justNodes, edges=True, force=True, ) else: # just find the best match minMatchMatrix = 1e10000000 for row in matchMatrix: for entry in row: if entry < minMatchMatrix: minMatchMatrix = entry totalMatrix[tmData1][tmData2] = minMatchMatrix return totalMatrix