import sys from taxonomy import * from collections import defaultdict as DefDict import common_cogs_method as ccm from shared.algorithms.kendall import calculateWeightedKendall from shared.pyutils.utils import * import operator import math if len(sys.argv) != 2: print "Missing COG distance function name" sys.exit(-1) cogDistFunc = getattr(ccm, sys.argv[1]) cogDict, cogFreq, cogWeightDict, taxaDict = ccm.buildCogTaxaDict() print ("cogDict len %d, taxaDict len %d" % (len(cogDict), len(taxaDict))) dirCorrDict = UtilLoad(GENOME_CORR_DICT()) print ("dirCorrDict len %d" % len(dirCorrDict)) print("Building COG distances...") cogDist = DefDict(dict) for ordinal, (dir1, cs1) in enumerate(cogDict.iteritems(), start = 1): print("\r%d. %s" % (ordinal, dir1)), for dir2, cs2 in cogDict.iteritems(): cogDist[dir1][dir2] = cogDistFunc(cs1, cs2) print("\nBuilding average distances for TaxaTypes...") # Genome dir -> dict of {taxaTypes -> avg COG distance to dir} dirTaxaTypeDictDict = DefDict(lambda: DefDict(list))
from collections import defaultdict as DefDict import common_cogs_method as commonCogsMethod from shared.algorithms.kendall import calculateWeightedKendall from shared.pyutils.utils import * #from shared.pyutils.UtilNormDistrib import * import config import operator import math import itertools CutOffDiff = 0. CutOffBestFit = 0.00001 # To account for rounding errors _, _, taxaDict, _ = \ commonCogsMethod.buildCogTaxaDict(noWeights = True) print ("taxaDict len %d" % len(taxaDict)) print("Reading COG distances...") cogDist = UtilLoad(COG_DIST_DICT()) # Build a tree of TaxaTypes taxaTypeTree = TaxaTypeTree(taxaDict) # Set of all Taxa types on all levels allTaxaTypes = taxaTypeTree.getAllTypesSet() print("Length of allTaxaTypes %d" % len(allTaxaTypes)) # Build a dictionary: [dir][taxaType] -> UtilObject(mean, std, # isAncest, distList), where # mean - mean distance between this dir and all [other] dirs in this taxaType