예제 #1
0
def matchTrees(tmData1, tmData2, columnList, threshold, resIds=False):
  retList = []
  if not resIds:
    columnsToMean, columnsToStddev = tm3.calcColumnsMeanStddev(
        columnList, [tmData1, tmData2])
  revData = False
  if len(tmData1.tree.keys()) < len(tmData2.tree.keys()):
    tmData1, tmData2 = tmData2, tmData1
    revData = True
  cost = []
  names1, names2 = [], []
  for node2 in tmData2.tree.keys():
    names2.append(node2)
  for node1 in tmData1.tree.keys():
    names1.append(node1)
    costRow = []
    for node2 in tmData2.tree.keys():
      if not resIds:
        score = compareColumns(
            node1, node2, columnList, columnsToMean, columnsToStddev)
      else:
        score = compareColumnsResidues(node1, node2, columnList)
      costRow.append(score)
    cost.append(costRow)
  matches = munkreskuhn.assignAndReturnMatches(cost)
  returnConns = []
  for match in matches:
    if revData:
      returnConns.append((names2[match[1]], names1[match[0]], match[2]))
    else:
      returnConns.append((names1[match[0]], names2[match[1]], match[2]))
  return returnConns
예제 #2
0
def findSimilarTrees(
    tmDataList,
    columnListNames,
    sizeColName,
    sizeMin=-1.0,
    sizeMax=10000000000,
    outputEachPair=False,
    justKeepBest=False,
):
    """does munkreskuhn matching over pocket-pocket shapes to get a score
  per tree, returns table of these"""
    columnList = tmDataList[0].titlesToColumns(columnListNames)
    sizeCol = tmDataList[0].titleToColumn(sizeColName)
    colToMean, colToStddev = calcColumnsMeanStddev(columnList, tmDataList)
    totalMatrix = {}
    for tmDataCount1, tmData1 in enumerate(tmDataList):
        totalMatrix[tmData1] = {}
        for tmDataCount2, tmData2 in enumerate(tmDataList):
            if tmDataCount2 > tmDataCount1:
                dotData = dot.dot([tmData1, tmData2])
                rowNames, colNames, matchMatrix, tooBig = dotData.computeSearchConnections(
                    1e10000000,
                    columnList,
                    colToMean,
                    colToStddev,
                    False,
                    sizeCol,
                    False,
                    False,
                    sizeMin=sizeMin,
                    sizeMax=sizeMax,
                    doSelfScore=False,
                    returnMatrix=True,
                )
                if not justKeepBest:
                    matches = munkreskuhn.assignAndReturnMatches(matchMatrix)
                    sumScore = 0
                    for match in matches:
                        sumScore += match[2]
                    totalMatrix[tmData1][tmData2] = sumScore / float(len(matches))
                    if outputEachPair:  # output a gdl for each pair of the munkres match
                        newMatches = []
                        justNodes = tooBig
                        for match in matches:
                            node1 = rowNames[match[0]]
                            node2 = colNames[match[1]]
                            justNodes.append(node1)
                            justNodes.append(node2)
                            newMatches.append([tmData1, tmData2, node1, node2, match[2]])
                        dotData.matchList = newMatches
                        dotData.addSearchConnections(1e1000000, remove=True)
                        dotData.writeGdl(
                            tmData1.inputFileName + "_" + tmData2.inputFileName + ".gdl",
                            justNodes=justNodes,
                            edges=True,
                            force=True,
                        )
                else:  # just find the best match
                    minMatchMatrix = 1e10000000
                    for row in matchMatrix:
                        for entry in row:
                            if entry < minMatchMatrix:
                                minMatchMatrix = entry
                    totalMatrix[tmData1][tmData2] = minMatchMatrix
    return totalMatrix