import sys
from taxonomy import *
from collections import defaultdict as DefDict
import common_cogs_method as ccm
from shared.algorithms.kendall import calculateWeightedKendall
from shared.pyutils.utils import *
import operator
import math

if len(sys.argv) != 2:
    print "Missing COG distance function name"
    sys.exit(-1)

cogDistFunc = getattr(ccm, sys.argv[1])

cogDict, cogFreq, cogWeightDict, taxaDict = ccm.buildCogTaxaDict()
print ("cogDict len %d, taxaDict len %d" % (len(cogDict), len(taxaDict)))

dirCorrDict = UtilLoad(GENOME_CORR_DICT())
print ("dirCorrDict len %d" % len(dirCorrDict))

print("Building COG distances...")
cogDist = DefDict(dict)
for ordinal, (dir1, cs1) in enumerate(cogDict.iteritems(), start = 1):
    print("\r%d. %s" % (ordinal, dir1)),
    for dir2, cs2 in cogDict.iteritems():
        cogDist[dir1][dir2] = cogDistFunc(cs1, cs2)

print("\nBuilding average distances for TaxaTypes...")
# Genome dir -> dict of {taxaTypes -> avg COG distance to dir}
dirTaxaTypeDictDict = DefDict(lambda: DefDict(list))
예제 #2
0
from collections import defaultdict as DefDict
import common_cogs_method as commonCogsMethod
from shared.algorithms.kendall import calculateWeightedKendall
from shared.pyutils.utils import *
#from shared.pyutils.UtilNormDistrib import *
import config
import operator
import math
import itertools

CutOffDiff = 0.
CutOffBestFit = 0.00001 # To account for rounding errors


_, _, taxaDict, _ = \
    commonCogsMethod.buildCogTaxaDict(noWeights = True)
print ("taxaDict len %d" % len(taxaDict))

print("Reading COG distances...")
cogDist = UtilLoad(COG_DIST_DICT())

# Build a tree of TaxaTypes
taxaTypeTree = TaxaTypeTree(taxaDict)

# Set of all Taxa types on all levels
allTaxaTypes = taxaTypeTree.getAllTypesSet()
print("Length of allTaxaTypes %d" % len(allTaxaTypes))

# Build a dictionary: [dir][taxaType] -> UtilObject(mean, std,
# isAncest, distList), where
# mean - mean distance between this dir and all [other] dirs in this taxaType