def calculateCorrelation(cogDist, taxDist): corrList = [] for dir, cogDirDist in cogDist.iteritems(): taxDirDist = taxDist[dir] corrList.append(calculateWeightedKendall( \ [taxDirDist[x] for x in cogDirDist.keys()], cogDirDist.values())) mean = np.mean(corrList) std = np.std(corrList, ddof = 1.) print("Result: mean %f std %f" % (mean, std)) return(mean, std)
# Now find optimal reclassified TaxaTypes, and dump them into a file print("Build reclassification...") reclassObjList = [] dumpDirNodeCostDict = {} for dir in dirTaxaTypeDictDict.keys(): nodeCostDict = taxaTypeTree.bldCostDict(dirTaxaTypeDictDict[dir]) dumpDirNodeCostDict[dir] = taxaTypeTree.utilJsonDump( nodeAttribDict = nodeCostDict) taxaType, cost = taxaTypeTree.optimal(nodeCostDict) dist = taxaDict[dir].type.distance(taxaType) reclassObjList.append(UtilObject(dir = dir, cogCorr = dirCorrDict[dir], oldClassif = taxaDict[dir].type, newClassif = taxaType, taxaDist = dist, cogDist=cost, taxaDistCnts = taxaDistCntDict[dir])) UtilStore(dumpDirNodeCostDict, DIR_NODE_COST_DICT()) reclassObjList = sorted(reclassObjList, key = lambda x: x.cogCorr) UtilStore([x for x in reclassObjList if x.taxaDist > 0], HIER_RECLASSIFIED_LIST()) distList = [0] * (TaxaType.maxDistance() + 1) for obj in reclassObjList: distList[obj.taxaDist] += 1 print("Out of %d genomes, reclassification dist distribution %s" % (len(dirTaxaTypeDictDict), repr(distList))) # Calculate Kendal correlation between taxaDist and cogCorr corr = calculateWeightedKendall([x.taxaDist for x in reclassObjList], [x.cogCorr for x in reclassObjList]) print "taxaDist / cogCorr correlation", corr
def calculateOrderCorrelation(l): """ :param l - list of pairs :return: Kendal Tau correlation """ return calculateWeightedKendall(*(zip(*l)))