def bedGraphToBigWig(chromSizes, bedGraphPath, bigWigPath, sort=True): """ takes a bedgraph file, orders it and converts it to a bigwig file using pyBigWig. """ from tempfile import NamedTemporaryFile from os import remove, system # Make a list of tuples for the bigWig header, this MUST be sorted identically to the bedGraph file sort_cmd = cfg.config.get('external_tools', 'sort') _file = NamedTemporaryFile(delete=False) for chrom, size in chromSizes: _file.write(toBytes("{}\t{}\n".format(chrom, size))) _file.close() system("LC_ALL=C {} -k1,1 -k2,2n {} > {}.sorted".format( sort_cmd, _file.name, _file.name)) cl = [] f = open("{}.sorted".format(_file.name)) for line in f: chrom, chromLen = line.split() cl.append((chrom, int(chromLen))) f.close() remove(_file.name) remove("{}.sorted".format(_file.name)) # check if the file is empty if os.stat(bedGraphPath).st_size < 10: import sys sys.stderr.write( "Error: The generated bedGraphFile was empty. Please adjust\n" "your deepTools settings and check your input files.\n") exit(1) if sort: # temporary file to store sorted bedgraph file _file = NamedTemporaryFile(delete=False) tempfilename1 = _file.name system("LC_ALL=C {} -k1,1 -k2,2n {} > {}".format( sort_cmd, bedGraphPath, tempfilename1)) bedGraphPath = tempfilename1 bw = pyBigWig.open(bigWigPath, "w") assert (bw is not None) # The lack of maxZooms will change the results a bit, perhaps the defaults are better bw.addHeader(cl, maxZooms=10) f = open(bedGraphPath) for line in f: interval = line.split() bw.addEntries([interval[0]], [int(interval[1])], ends=[int(interval[2])], values=[float(interval[3])]) f.close() bw.close() if sort: remove(tempfilename1)
def bedGraphToBigWig(chromSizes, bedGraphPath, bigWigPath, sort=True): """ takes a bedgraph file, orders it and converts it to a bigwig file using pyBigWig. """ from tempfile import NamedTemporaryFile from os import remove, system # Make a list of tuples for the bigWig header, this MUST be sorted identically to the bedGraph file sort_cmd = cfg.config.get('external_tools', 'sort') _file = NamedTemporaryFile(delete=False) for chrom, size in chromSizes: _file.write(toBytes("{}\t{}\n".format(chrom, size))) _file.close() system("LC_ALL=C {} -k1,1 -k2,2n {} > {}.sorted".format(sort_cmd, _file.name, _file.name)) cl = [] f = open("{}.sorted".format(_file.name)) for line in f: chrom, chromLen = line.split() cl.append((chrom, int(chromLen))) f.close() remove(_file.name) remove("{}.sorted".format(_file.name)) # check if the file is empty if os.stat(bedGraphPath).st_size < 10: import sys sys.stderr.write( "Error: The generated bedGraphFile was empty. Please adjust\n" "your deepTools settings and check your input files.\n") exit(1) if sort: # temporary file to store sorted bedgraph file _file = NamedTemporaryFile(delete=False) tempfilename1 = _file.name system("LC_ALL=C {} -k1,1 -k2,2n {} > {}".format(sort_cmd, bedGraphPath, tempfilename1)) bedGraphPath = tempfilename1 bw = pyBigWig.open(bigWigPath, "w") assert(bw is not None) # The lack of maxZooms will change the results a bit, perhaps the defaults are better bw.addHeader(cl, maxZooms=10) f = open(bedGraphPath) for line in f: interval = line.split() bw.addEntries([interval[0]], [int(interval[1])], ends=[int(interval[2])], values=[float(interval[3])]) f.close() bw.close() if sort: remove(tempfilename1)
def writeBedGraph_worker(chrom, start, end, tileSize, defaultFragmentLength, bamOrBwFileList, func, funcArgs, extendPairedEnds=True, smoothLength=0, missingDataAsZero=False, fixed_step=False): r""" Writes a bedgraph having as base a number of bam files. The given func is called to compute the desired bedgraph value using the funcArgs tileSize """ if start > end: raise NameError("start position ({0}) bigger than " "end position ({1})".format(start, end)) coverage = [] for indexFile, fileFormat in bamOrBwFileList: if fileFormat == 'bam': bamHandle = bamHandler.openBam(indexFile) coverage.append( getCoverageFromBam(bamHandle, chrom, start, end, tileSize, defaultFragmentLength, extendPairedEnds, True)) bamHandle.close() elif fileFormat == 'bigwig': bigwigHandle = pyBigWig.open(indexFile) coverage.append( getCoverageFromBigwig(bigwigHandle, chrom, start, end, tileSize, missingDataAsZero)) bigwigHandle.close() # is /dev/shm available? # working in this directory speeds the process try: _file = tempfile.NamedTemporaryFile(dir="/dev/shm", delete=False) except OSError: _file = tempfile.NamedTemporaryFile(delete=False) previousValue = None lengthCoverage = len(coverage[0]) for tileIndex in range(lengthCoverage): tileCoverage = [] for index in range(len(bamOrBwFileList)): if smoothLength > 0: vectorStart, vectorEnd = getSmoothRange( tileIndex, tileSize, smoothLength, lengthCoverage) tileCoverage.append( np.mean(coverage[index][vectorStart:vectorEnd])) else: try: tileCoverage.append(coverage[index][tileIndex]) except IndexError: sys.exit( "Chromosome {} probably not in one of the bigwig " "files. Remove this chromosome from the bigwig file " "to continue".format(chrom)) value = func(tileCoverage, funcArgs) if fixed_step: writeStart = start + tileIndex * tileSize writeEnd = min(writeStart + tileSize, end) try: _file.write( toBytes("%s\t%d\t%d\t%.2f\n" % (chrom, writeStart, writeEnd, value))) except TypeError: _file.write( toBytes("{}\t{}\t{}\t{}\n".format(chrom, writeStart, writeEnd, value))) else: if previousValue is None: writeStart = start + tileIndex * tileSize writeEnd = min(writeStart + tileSize, end) previousValue = value elif previousValue == value: writeEnd = min(writeEnd + tileSize, end) elif previousValue != value: if not np.isnan(previousValue): _file.write( toBytes("{0}\t{1}\t{2}\t{3:g}\n".format( chrom, writeStart, writeEnd, previousValue))) previousValue = value writeStart = writeEnd writeEnd = min(writeStart + tileSize, end) if not fixed_step: # write remaining value if not a nan if previousValue and writeStart != end and \ not np.isnan(previousValue): _file.write( toBytes("{0}\t{1}\t{2}\t{3:g}\n".format( chrom, writeStart, end, previousValue))) tempFileName = _file.name _file.close() return chrom, start, end, tempFileName
def writeBedGraph_worker( chrom, start, end, tileSize, defaultFragmentLength, bamOrBwFileList, func, funcArgs, extendPairedEnds=True, smoothLength=0, skipZeroOverZero=False, missingDataAsZero=False, fixed_step=False): r""" Writes a bedgraph having as base a number of bam files. The given func is called to compute the desired bedgraph value using the funcArgs tileSize """ if start > end: raise NameError("start position ({0}) bigger than " "end position ({1})".format(start, end)) coverage = [] for indexFile, fileFormat in bamOrBwFileList: if fileFormat == 'bam': bamHandle = bamHandler.openBam(indexFile) coverage.append(getCoverageFromBam( bamHandle, chrom, start, end, tileSize, defaultFragmentLength, extendPairedEnds, True)) bamHandle.close() elif fileFormat == 'bigwig': bigwigHandle = pyBigWig.open(indexFile) coverage.append( getCoverageFromBigwig( bigwigHandle, chrom, start, end, tileSize, missingDataAsZero)) bigwigHandle.close() # is /dev/shm available? # working in this directory speeds the process try: _file = tempfile.NamedTemporaryFile(dir="/dev/shm", delete=False) except OSError: _file = tempfile.NamedTemporaryFile(delete=False) previousValue = None lengthCoverage = len(coverage[0]) for tileIndex in range(lengthCoverage): tileCoverage = [] for index in range(len(bamOrBwFileList)): if smoothLength > 0: vectorStart, vectorEnd = getSmoothRange( tileIndex, tileSize, smoothLength, lengthCoverage) tileCoverage.append( np.mean(coverage[index][vectorStart:vectorEnd])) else: try: tileCoverage.append(coverage[index][tileIndex]) except IndexError: sys.exit("Chromosome {} probably not in one of the bigwig " "files. Remove this chromosome from the bigwig file " "to continue".format(chrom)) if skipZeroOverZero and np.sum(tileCoverage) == 0: previousValue = None continue value = func(tileCoverage, funcArgs) if fixed_step: writeStart = start + tileIndex * tileSize writeEnd = min(writeStart + tileSize, end) try: _file.write(toBytes("%s\t%d\t%d\t%.2f\n" % (chrom, writeStart, writeEnd, value))) except TypeError: _file.write(toBytes("{}\t{}\t{}\t{}\n".format(chrom, writeStart, writeEnd, value))) else: if previousValue is None: writeStart = start + tileIndex * tileSize writeEnd = min(writeStart + tileSize, end) previousValue = value elif previousValue == value: writeEnd = min(writeEnd + tileSize, end) elif previousValue != value: if not np.isnan(previousValue): _file.write( toBytes("{0}\t{1}\t{2}\t{3:g}\n".format(chrom, writeStart, writeEnd, previousValue))) previousValue = value writeStart = writeEnd writeEnd = min(writeStart + tileSize, end) if not fixed_step: # write remaining value if not a nan if previousValue and writeStart != end and \ not np.isnan(previousValue): _file.write(toBytes("{0}\t{1}\t{2}\t{3:g}\n".format(chrom, writeStart, end, previousValue))) tempFileName = _file.name _file.close() return chrom, start, end, tempFileName