Esempio n. 1
0
def updateCount(path, tfName):
    """update count data in discontinuous variableStep wiggle format"""
    for infile in glob.glob(os.path.join(path,"*.wig")):
        (wigpath,wigfilename) = os.path.split(infile)
        ##depends on the data type and source
        expName = "Dnase"##or add an expName parser line
        ctName = wigfilename.split('EncodeUwDnase')[-1].split('Aln')[0]
        wigFile = open(infile,'rt')
        wig = csv.reader(wigFile,delimiter='\t')
        coordDict, valuesDict = countWig.getCoord(wig,ctName)
        arrayDict = defaultdict(list)
        cursor = mcollection.find({"tf_name": tfName})
        for test in cursor:
            motifChrom = test["motif_genomic_regions_info"]["chr"]
	    motifStart = test["motif_genomic_regions_info"]["start"] 
	    motifEnd = test["motif_genomic_regions_info"]["end"]
            if not motifChrom in arrayDict:
                arrayDict[motifChrom] = countWig.buildHist(motifChrom,coordDict,valuesDict,ctName)
            xs, xvals, sums = arrayDict[motifChrom]
            count = countWig.queryHist(xs, xvals, sums, motifStart, motifEnd)[0]
            #print count
	    #mcollection.update({"_id":test["_id"]},{"$set":{expName: count}}, upsert = True)
            test["ct_info"]["accessibility_score"][expName] = count
            mcollection.save(test)
    return 0
Esempio n. 2
0
def getCount(path, tfName):
    for infile in glob.glob(os.path.join(path,"*.wig")):
        (wigpath,wigfilename) = os.path.split(infile)
        ##depends on the data type and source
        methodName = "Dnase"
        ctName = wigfilename.split('EncodeUwDnase')[-1].split('Aln')[0]
        wigFile = open(infile,'rt')
        wig = csv.reader(wigFile,delimiter='\t')
        coordDict, valuesDict = countWig.getCoord(wig,ctName)
        arrayDict = defaultdict(list)
        cursor = mcollection.find({"tf_name": tfName})
        for test in cursor:
            motifChrom, motifStart, motifEnd = test["motif_genomic_regions_info"]["chr"], test["motif_genomic_regions_info"]["start"], test["motif_genomic_regions_info"]["end"]
            if not motifChrom in arrayDict:
                arrayDict[motifChrom] = countWig.buildHist(motifChrom,coordDict,valuesDict,ctName)
            xs, xvals, sums = arrayDict[motifChrom]
            count = countWig.queryHist(xs, xvals, sums, motifStart, motifEnd)[0]
            #print count
            test["ct_info"]["accessibility_score"][methodName] = count
            mcollection.save(test)
    return 0