def updateChip(path,db,motifChrom="chr17",window=0): """mark motifs if it overlaps ENCODE excluded regions""" mcollection = db["hg19"+motifChrom] for infile in glob.glob(os.path.join(path,"*.bed.gz")): (regpath,regfilename) = os.path.split(infile) expName = regfilename.split(motifChrom)[0] with gzip.open(infile,'rt') as bedFile: bed = csv.reader(bedFile, delimiter = '\t') annoIntvlDict = countBed.getBed4Anno(bed,expName) intervalStartDict = countBed.sortStart(annoIntvlDict) intervalEndDict = countBed.sortEnd(annoIntvlDict) cursor = mcollection.find({"tf_name":{"$in": ["IRF3","MAFK","NFYA","SIN3A","ZNF384"]}}) #{"tf_name": tfName}) for test in cursor: motifStart, motifEnd = test["genomic_region"]["start"], \ test["genomic_region"]["end"] regionList, valueList = countBed.getMotifAnno(annoIntvlDict, intervalStartDict,intervalEndDict,motifChrom, motifStart,motifEnd,window) if valueList != []: #print regionList, valueList, motifChrom, motifStart, motifEnd mcollection.update({"_id": test["_id"]}, {"$set": {"chip."+expName: valueList[0][1]}}, upsert = True) return 0
def updateChip(path,db,motifChrom="chr17",window=0): """mark motifs if it overlaps ENCODE excluded regions""" mcollection = db["hg19"+motifChrom] for tf in ["CTCF"]:#os.listdir(path):#["CTCF","JUND","MAX","REST","SIN3A","SP1","USF1","YY1"]: files = path+"/"+tf+"/wgEncodeBroadHistoneGm12878CtcfStdPkForma*"+motifChrom+".bed.gz" for infile in glob.glob(files): print "update", infile (regpath,regfilename) = os.path.split(infile) expName = regfilename.split(motifChrom)[0] with gzip.open(infile,'rt') as bedFile: bed = csv.reader(bedFile, delimiter = '\t') annoIntvlDict = countBed.getBed4Anno(bed,expName) intervalStartDict = countBed.sortStart(annoIntvlDict) intervalEndDict = countBed.sortEnd(annoIntvlDict) cursor = mcollection.find({"tf_name": tf}) for test in cursor: motifStart, motifEnd = test["genomic_region"]["start"], \ test["genomic_region"]["end"] regionList, valueList = countBed.getMotifAnno(annoIntvlDict, intervalStartDict,intervalEndDict,motifChrom, motifStart,motifEnd,window) if valueList != []: #print regionList, valueList, motifChrom, motifStart, motifEnd mcollection.update({"_id": test["_id"]}, {"$set": {"chip."+expName: valueList[0][1]}}, upsert = True) return 0