Beispiel #1
0
def updatePi(path, db, motifChrom="chr17"):
    """calculate nucleotide diversity for a given genomic region"""
    mcollection = db["hg19"+motifChrom]
    print 'updating pi'
    #mcollection.ensure_index("pi",name="nucleotide_diversity",unique=False,background=True)
    cursor = mcollection.find({"tf_name":{"$in": ["IRF3","MAFK","NFYA","SIN3A","ZNF384"]}})
	#{"tf_name": tfName, "motif_genomic_regions_info.chr": motifChrom})
    for infile in glob.glob(os.path.join(path,"*v5.20130502.sites.vcf.bw")):
    	(vcfpath,vcffile) = os.path.split(infile)
	(vcffilename,vcfext) = os.path.splitext(vcffile)

	#bwFile = os.path.join(path,vcffilename+'.bw')
	bwFileSnp = os.path.join(path,vcffilename+'_snp.bw')
	bwFileIndel = os.path.join(path,vcffilename+'_indel.bw')

	expName = '1000genomes'

	#countVcf.compressVcf(infile, expName, bwFile, bwFileSnp, bwFileIndel)

#	coordDict, valuesDict = countVcf.getBinVarCoord(bwFile, expName)
#	arrayDict = defaultdict(list)

	coordDictSnp, valuesDictSnp = countVcf.getBinVarCoord(bwFileSnp, expName)
	arrayDictSnp = defaultdict(list)

	coordDictIndel, valuesDictIndel = countVcf.getBinVarCoord(bwFileIndel, expName)
	arrayDictIndel = defaultdict(list)

    	for test in cursor:
	    #motifChrom = test["motif_genomic_regions_info"]["chr"]
	    motifStart = test["genomic_region"]["start"]
	    motifEnd = test["genomic_region"]["end"]
#	    if not motifChrom in arrayDict:
 #               arrayDict[motifChrom] = countVcf.buildVarHist(motifChrom, coordDict, valuesDict, expName)
	    if not motifChrom in arrayDictSnp:
		arrayDictSnp[motifChrom] = countVcf.buildVarHist(motifChrom, coordDictSnp, valuesDictSnp, expName)
	    if not motifChrom in arrayDictIndel:
		arrayDictIndel[motifChrom] = countVcf.buildVarHist(motifChrom, coordDictIndel, valuesDictIndel, expName)
#	    xs, xvals, sums = arrayDict[motifChrom]
#	    avg = countVcf.queryHist(xs,xvals,sums,motifStart,motifEnd)[0]
	    xs, xvals, sums = arrayDictSnp[motifChrom]
	    avgSnp = countVcf.queryHist(xs,xvals,sums,motifStart,motifEnd)[0]
	    xs, xvals, sums = arrayDictIndel[motifChrom]
	    avgIndel = countVcf.queryHist(xs,xvals,sums,motifStart,motifEnd)[0]
	    mcollection.update({"_id":test["_id"]},{"$set":{#"map.pi": avg, 
			"cons.piSnp": avgSnp, 
			"cons.piIndel": avgIndel}}, upsert = True)
	    #print "pi=", avgIndel#,motifEnd-motifStart
    return 0
Beispiel #2
0
def updatePi(path, tfName, motifChrom):
    """calculate nucleotide diversity for a given genomic region"""
    cursor = mcollection.find({"tf_name": tfName, "motif_genomic_regions_info.chr": motifChrom})
    for infile in glob.glob(os.path.join(path,"*v5.20130502.sites.vcf.gz")):
    	(vcfpath,vcffile) = os.path.split(infile)
	(vcffilename,vcfext) = os.path.splitext(vcffile)

	bwFile = os.path.join(path,vcffilename+'.bw')
	bwFileSnp = os.path.join(path,vcffilename+'_snp.bw')
	bwFileIndel = os.path.join(path,vcffilename+'_indel.bw')

	expName = '1000genomes'

	#countVcf.compressVcf(infile, expName, bwFile, bwFileSnp, bwFileIndel)

	#coordDict, valuesDict = countVcf.getBinVarCoord(bwFile, expName)
	#arrayDict = defaultdict(list)

	#coordDictSnp, valuesDictSnp = countVcf.getBinVarCoord(bwFileSnp, expName)
	#arrayDictSnp = defaultdict(list)

	coordDictIndel, valuesDictIndel = countVcf.getBinVarCoord(bwFileIndel, expName)
	arrayDictIndel = defaultdict(list)

    	for test in cursor:
	    #motifChrom = test["motif_genomic_regions_info"]["chr"]
	    motifStart = test["motif_genomic_regions_info"]["start"]
	    motifEnd = test["motif_genomic_regions_info"]["end"]
	    #if not motifChrom in arrayDict:
                #arrayDict[motifChrom] = countVcf.buildVarHist(motifChrom, coordDict, valuesDict, expName)
	    #if not motifChrom in arrayDictSnp:
		#arrayDictSnp[motifChrom] = countVcf.buildVarHist(motifChrom, coordDictSnp, valuesDictSnp, expName)
	    if not motifChrom in arrayDictIndel:
		arrayDictIndel[motifChrom] = countVcf.buildVarHist(motifChrom, coordDictIndel, valuesDictIndel, expName)
	    #xs, xvals, sums = arrayDict[motifChrom]
	    #avg = countVcf.queryHist(xs,xvals,sums,motifStart,motifEnd)[0]
	    #xs, xvals, sums = arrayDictSnp[motifChrom]
	    #avgSnp = countVcf.queryHist(xs,xvals,sums,motifStart,motifEnd)[0]
	    xs, xvals, sums = arrayDictIndel[motifChrom]
	    avgIndel = countVcf.queryHist(xs,xvals,sums,motifStart,motifEnd)[0]
	    mcollection.update({"_id":test["_id"]},{"$set":{#"motif_mapability_info.piTot": avg, 
			#"motif_mapability_info.piSnp": avgSnp, 
			"motif_mapability_info.piIndel": avgIndel}}, upsert = True)
	    print "pi=", avgIndel#,motifEnd-motifStart
    return 0