def updatePi(path, db, motifChrom="chr17"): """calculate nucleotide diversity for a given genomic region""" mcollection = db["hg19"+motifChrom] print 'updating pi' #mcollection.ensure_index("pi",name="nucleotide_diversity",unique=False,background=True) cursor = mcollection.find({"tf_name":{"$in": ["IRF3","MAFK","NFYA","SIN3A","ZNF384"]}}) #{"tf_name": tfName, "motif_genomic_regions_info.chr": motifChrom}) for infile in glob.glob(os.path.join(path,"*v5.20130502.sites.vcf.bw")): (vcfpath,vcffile) = os.path.split(infile) (vcffilename,vcfext) = os.path.splitext(vcffile) #bwFile = os.path.join(path,vcffilename+'.bw') bwFileSnp = os.path.join(path,vcffilename+'_snp.bw') bwFileIndel = os.path.join(path,vcffilename+'_indel.bw') expName = '1000genomes' #countVcf.compressVcf(infile, expName, bwFile, bwFileSnp, bwFileIndel) # coordDict, valuesDict = countVcf.getBinVarCoord(bwFile, expName) # arrayDict = defaultdict(list) coordDictSnp, valuesDictSnp = countVcf.getBinVarCoord(bwFileSnp, expName) arrayDictSnp = defaultdict(list) coordDictIndel, valuesDictIndel = countVcf.getBinVarCoord(bwFileIndel, expName) arrayDictIndel = defaultdict(list) for test in cursor: #motifChrom = test["motif_genomic_regions_info"]["chr"] motifStart = test["genomic_region"]["start"] motifEnd = test["genomic_region"]["end"] # if not motifChrom in arrayDict: # arrayDict[motifChrom] = countVcf.buildVarHist(motifChrom, coordDict, valuesDict, expName) if not motifChrom in arrayDictSnp: arrayDictSnp[motifChrom] = countVcf.buildVarHist(motifChrom, coordDictSnp, valuesDictSnp, expName) if not motifChrom in arrayDictIndel: arrayDictIndel[motifChrom] = countVcf.buildVarHist(motifChrom, coordDictIndel, valuesDictIndel, expName) # xs, xvals, sums = arrayDict[motifChrom] # avg = countVcf.queryHist(xs,xvals,sums,motifStart,motifEnd)[0] xs, xvals, sums = arrayDictSnp[motifChrom] avgSnp = countVcf.queryHist(xs,xvals,sums,motifStart,motifEnd)[0] xs, xvals, sums = arrayDictIndel[motifChrom] avgIndel = countVcf.queryHist(xs,xvals,sums,motifStart,motifEnd)[0] mcollection.update({"_id":test["_id"]},{"$set":{#"map.pi": avg, "cons.piSnp": avgSnp, "cons.piIndel": avgIndel}}, upsert = True) #print "pi=", avgIndel#,motifEnd-motifStart return 0
def updatePi(path, tfName, motifChrom): """calculate nucleotide diversity for a given genomic region""" cursor = mcollection.find({"tf_name": tfName, "motif_genomic_regions_info.chr": motifChrom}) for infile in glob.glob(os.path.join(path,"*v5.20130502.sites.vcf.gz")): (vcfpath,vcffile) = os.path.split(infile) (vcffilename,vcfext) = os.path.splitext(vcffile) bwFile = os.path.join(path,vcffilename+'.bw') bwFileSnp = os.path.join(path,vcffilename+'_snp.bw') bwFileIndel = os.path.join(path,vcffilename+'_indel.bw') expName = '1000genomes' #countVcf.compressVcf(infile, expName, bwFile, bwFileSnp, bwFileIndel) #coordDict, valuesDict = countVcf.getBinVarCoord(bwFile, expName) #arrayDict = defaultdict(list) #coordDictSnp, valuesDictSnp = countVcf.getBinVarCoord(bwFileSnp, expName) #arrayDictSnp = defaultdict(list) coordDictIndel, valuesDictIndel = countVcf.getBinVarCoord(bwFileIndel, expName) arrayDictIndel = defaultdict(list) for test in cursor: #motifChrom = test["motif_genomic_regions_info"]["chr"] motifStart = test["motif_genomic_regions_info"]["start"] motifEnd = test["motif_genomic_regions_info"]["end"] #if not motifChrom in arrayDict: #arrayDict[motifChrom] = countVcf.buildVarHist(motifChrom, coordDict, valuesDict, expName) #if not motifChrom in arrayDictSnp: #arrayDictSnp[motifChrom] = countVcf.buildVarHist(motifChrom, coordDictSnp, valuesDictSnp, expName) if not motifChrom in arrayDictIndel: arrayDictIndel[motifChrom] = countVcf.buildVarHist(motifChrom, coordDictIndel, valuesDictIndel, expName) #xs, xvals, sums = arrayDict[motifChrom] #avg = countVcf.queryHist(xs,xvals,sums,motifStart,motifEnd)[0] #xs, xvals, sums = arrayDictSnp[motifChrom] #avgSnp = countVcf.queryHist(xs,xvals,sums,motifStart,motifEnd)[0] xs, xvals, sums = arrayDictIndel[motifChrom] avgIndel = countVcf.queryHist(xs,xvals,sums,motifStart,motifEnd)[0] mcollection.update({"_id":test["_id"]},{"$set":{#"motif_mapability_info.piTot": avg, #"motif_mapability_info.piSnp": avgSnp, "motif_mapability_info.piIndel": avgIndel}}, upsert = True) print "pi=", avgIndel#,motifEnd-motifStart return 0