コード例 #1
0
def calculateCorrelation(cogDist, taxDist):
    corrList = []
    for dir, cogDirDist in cogDist.iteritems():
        taxDirDist = taxDist[dir]
        corrList.append(calculateWeightedKendall( \
            [taxDirDist[x] for x in cogDirDist.keys()], cogDirDist.values()))

    mean = np.mean(corrList)
    std = np.std(corrList, ddof = 1.)
    print("Result: mean %f std %f" % (mean, std))
    return(mean, std)
コード例 #2
0
# Now find optimal reclassified TaxaTypes, and dump them into a file
print("Build reclassification...")
reclassObjList = []
dumpDirNodeCostDict = {}
for dir in dirTaxaTypeDictDict.keys():
    nodeCostDict = taxaTypeTree.bldCostDict(dirTaxaTypeDictDict[dir])
    dumpDirNodeCostDict[dir] = taxaTypeTree.utilJsonDump(
        nodeAttribDict = nodeCostDict)
    taxaType, cost = taxaTypeTree.optimal(nodeCostDict)
    dist = taxaDict[dir].type.distance(taxaType)
    reclassObjList.append(UtilObject(dir = dir, cogCorr = dirCorrDict[dir],
        oldClassif = taxaDict[dir].type, newClassif = taxaType,
        taxaDist = dist, cogDist=cost, taxaDistCnts = taxaDistCntDict[dir]))

UtilStore(dumpDirNodeCostDict, DIR_NODE_COST_DICT())

reclassObjList = sorted(reclassObjList, key = lambda x: x.cogCorr)

UtilStore([x for x in reclassObjList if x.taxaDist > 0],
          HIER_RECLASSIFIED_LIST())

distList = [0] * (TaxaType.maxDistance() + 1)
for obj in reclassObjList:
    distList[obj.taxaDist] += 1
print("Out of %d genomes, reclassification dist distribution %s" %
      (len(dirTaxaTypeDictDict), repr(distList)))

# Calculate Kendal correlation between taxaDist and cogCorr
corr = calculateWeightedKendall([x.taxaDist for x in reclassObjList],
    [x.cogCorr for x in reclassObjList])
print "taxaDist / cogCorr correlation", corr
コード例 #3
0
def calculateOrderCorrelation(l):
    """
    :param l - list of pairs
    :return: Kendal Tau correlation
    """
    return calculateWeightedKendall(*(zip(*l)))