def getCons(path, tfName): for infile in glob.glob(os.path.join(path, "*.wigFix")): (wigpath,wigfilename) = os.path.split(infile) chrom = wigfilename.split('.')[0] consName = '_'.join(wigfilename.split('.')[1:3]) #print chrom, tfName, consName with open(infile,'rt') as wigFile: wig = csv.reader(wigFile,delimiter='\t') stepDict, startDict, valuesDict = countWig.getFixStart(wig,consName)#'phyloP30wayEuarchontoglires') start = startDict[consName][chrom] arrayDict = countWig.buildFixHist(chrom,stepDict,startDict,valuesDict,consName) cursor = mcollection.find({"tf_name": tfName, "motif_genomic_regions_info.chr": chrom}) for test in cursor: motifStart, motifEnd = test["motif_genomic_regions_info"]["start"], test["motif_genomic_regions_info"]["end"] avg = 0 #print motifStart, motifEnd startlist = [start[i] for i in xrange(len(start)-1) if (motifStart > start[i] and motifStart < start[i+1]) or (motifEnd > start[i] and motifEnd < start[i+1])] if len(startlist) > 0: print startlist #print start[-1] startlist.append(start[-1]) for i in xrange(len(startlist)): #print arrayDict[start[i]] # if avg != 0 and motifEnd < start[i]: # break ##fall into range and break out if avg != 0: if motifEnd > startlist[i]:##cases of partial overlap need to renormalize over two fragments xs, xvals, sums = arrayDict[startlist[i]] avg = avg * (startlist[i] - motifStart) + (countWig.queryHist(xs, xvals, sums, motifStart, motifEnd)[0] * (motifEnd - startlist[i] + 1)) / (motifEnd - motifStart + 1) else: break elif avg == 0: xs, xvals, sums = arrayDict[startlist[i]] avg = countWig.queryHist(xs, xvals, sums, motifStart, motifEnd)[0] if avg > 0: print avg, motifStart, motifEnd mcollection.update({"_id":test["_id"]},{"$set":{"motif_cons_info":{consName: avg}}}, upsert = True) else: mcollection.update({"_id":test["_id"]},{"$set":{"motif_cons_info":{consName: avg}}}, upsert = True) #mcollection.save(test) return 0