Ejemplo n.º 1
0
def updateChip(path,db,motifChrom="chr17",window=0):
    """mark motifs if it overlaps ENCODE excluded regions"""
    mcollection = db["hg19"+motifChrom]
    for infile in glob.glob(os.path.join(path,"*.bed.gz")):
	(regpath,regfilename) = os.path.split(infile)
	expName = regfilename.split(motifChrom)[0]
	with gzip.open(infile,'rt') as bedFile:
	    bed = csv.reader(bedFile, delimiter = '\t') 
	    annoIntvlDict = countBed.getBed4Anno(bed,expName)
	    intervalStartDict = countBed.sortStart(annoIntvlDict)
	    intervalEndDict = countBed.sortEnd(annoIntvlDict)
	    cursor = mcollection.find({"tf_name":{"$in": ["IRF3","MAFK","NFYA","SIN3A","ZNF384"]}})
		#{"tf_name": tfName})
	    for test in cursor:
		motifStart, motifEnd = test["genomic_region"]["start"], \
					test["genomic_region"]["end"]
	        regionList, valueList = countBed.getMotifAnno(annoIntvlDict,
				intervalStartDict,intervalEndDict,motifChrom,
				motifStart,motifEnd,window)
		if valueList != []:
			#print regionList, valueList, motifChrom, motifStart, motifEnd
			mcollection.update({"_id": test["_id"]}, 
			{"$set": {"chip."+expName: valueList[0][1]}}, upsert = True)
	    	
    return 0
Ejemplo n.º 2
0
def updateChip(path,db,motifChrom="chr17",window=0):
    """mark motifs if it overlaps ENCODE excluded regions"""
    mcollection = db["hg19"+motifChrom]
    for tf in ["CTCF"]:#os.listdir(path):#["CTCF","JUND","MAX","REST","SIN3A","SP1","USF1","YY1"]:
	files = path+"/"+tf+"/wgEncodeBroadHistoneGm12878CtcfStdPkForma*"+motifChrom+".bed.gz"
        for infile in glob.glob(files):
	    print "update", infile
	    (regpath,regfilename) = os.path.split(infile)
	    expName = regfilename.split(motifChrom)[0]
	    with gzip.open(infile,'rt') as bedFile:
	        bed = csv.reader(bedFile, delimiter = '\t') 
	    	annoIntvlDict = countBed.getBed4Anno(bed,expName)
	    	intervalStartDict = countBed.sortStart(annoIntvlDict)
	    	intervalEndDict = countBed.sortEnd(annoIntvlDict)
	    	cursor = mcollection.find({"tf_name": tf})
	        for test in cursor:
		    motifStart, motifEnd = test["genomic_region"]["start"], \
					test["genomic_region"]["end"]
	            regionList, valueList = countBed.getMotifAnno(annoIntvlDict,
				intervalStartDict,intervalEndDict,motifChrom,
				motifStart,motifEnd,window)
		    if valueList != []:
			#print regionList, valueList, motifChrom, motifStart, motifEnd
			mcollection.update({"_id": test["_id"]}, 
			{"$set": {"chip."+expName: valueList[0][1]}}, upsert = True)
	    	
    return 0