예제 #1
0
def bedGraphToBigWig(chromSizes, bedGraphPath, bigWigPath, sort=True):
    """
    takes a bedgraph file, orders it and converts it to
    a bigwig file using pyBigWig.
    """

    from tempfile import NamedTemporaryFile
    from os import remove, system

    # Make a list of tuples for the bigWig header, this MUST be sorted identically to the bedGraph file
    sort_cmd = cfg.config.get('external_tools', 'sort')
    _file = NamedTemporaryFile(delete=False)
    for chrom, size in chromSizes:
        _file.write(toBytes("{}\t{}\n".format(chrom, size)))
    _file.close()
    system("LC_ALL=C {} -k1,1 -k2,2n {} > {}.sorted".format(
        sort_cmd, _file.name, _file.name))
    cl = []
    f = open("{}.sorted".format(_file.name))
    for line in f:
        chrom, chromLen = line.split()
        cl.append((chrom, int(chromLen)))
    f.close()
    remove(_file.name)
    remove("{}.sorted".format(_file.name))

    # check if the file is empty
    if os.stat(bedGraphPath).st_size < 10:
        import sys
        sys.stderr.write(
            "Error: The generated bedGraphFile was empty. Please adjust\n"
            "your deepTools settings and check your input files.\n")
        exit(1)

    if sort:
        # temporary file to store sorted bedgraph file
        _file = NamedTemporaryFile(delete=False)
        tempfilename1 = _file.name
        system("LC_ALL=C {} -k1,1 -k2,2n {} > {}".format(
            sort_cmd, bedGraphPath, tempfilename1))
        bedGraphPath = tempfilename1

    bw = pyBigWig.open(bigWigPath, "w")
    assert (bw is not None)
    # The lack of maxZooms will change the results a bit, perhaps the defaults are better
    bw.addHeader(cl, maxZooms=10)
    f = open(bedGraphPath)
    for line in f:
        interval = line.split()
        bw.addEntries([interval[0]], [int(interval[1])],
                      ends=[int(interval[2])],
                      values=[float(interval[3])])
    f.close()
    bw.close()

    if sort:
        remove(tempfilename1)
예제 #2
0
def bedGraphToBigWig(chromSizes, bedGraphPath, bigWigPath, sort=True):
    """
    takes a bedgraph file, orders it and converts it to
    a bigwig file using pyBigWig.
    """

    from tempfile import NamedTemporaryFile
    from os import remove, system

    # Make a list of tuples for the bigWig header, this MUST be sorted identically to the bedGraph file
    sort_cmd = cfg.config.get('external_tools', 'sort')
    _file = NamedTemporaryFile(delete=False)
    for chrom, size in chromSizes:
        _file.write(toBytes("{}\t{}\n".format(chrom, size)))
    _file.close()
    system("LC_ALL=C {} -k1,1 -k2,2n {} > {}.sorted".format(sort_cmd, _file.name, _file.name))
    cl = []
    f = open("{}.sorted".format(_file.name))
    for line in f:
        chrom, chromLen = line.split()
        cl.append((chrom, int(chromLen)))
    f.close()
    remove(_file.name)
    remove("{}.sorted".format(_file.name))

    # check if the file is empty
    if os.stat(bedGraphPath).st_size < 10:
        import sys
        sys.stderr.write(
            "Error: The generated bedGraphFile was empty. Please adjust\n"
            "your deepTools settings and check your input files.\n")
        exit(1)

    if sort:
        # temporary file to store sorted bedgraph file
        _file = NamedTemporaryFile(delete=False)
        tempfilename1 = _file.name
        system("LC_ALL=C {} -k1,1 -k2,2n {} > {}".format(sort_cmd, bedGraphPath, tempfilename1))
        bedGraphPath = tempfilename1

    bw = pyBigWig.open(bigWigPath, "w")
    assert(bw is not None)
    # The lack of maxZooms will change the results a bit, perhaps the defaults are better
    bw.addHeader(cl, maxZooms=10)
    f = open(bedGraphPath)
    for line in f:
        interval = line.split()
        bw.addEntries([interval[0]], [int(interval[1])], ends=[int(interval[2])], values=[float(interval[3])])
    f.close()
    bw.close()

    if sort:
        remove(tempfilename1)
def writeBedGraph_worker(chrom,
                         start,
                         end,
                         tileSize,
                         defaultFragmentLength,
                         bamOrBwFileList,
                         func,
                         funcArgs,
                         extendPairedEnds=True,
                         smoothLength=0,
                         missingDataAsZero=False,
                         fixed_step=False):
    r"""
    Writes a bedgraph having as base a number of bam files.

    The given func is called to compute the desired bedgraph value
    using the funcArgs

    tileSize
    """
    if start > end:
        raise NameError("start position ({0}) bigger than "
                        "end position ({1})".format(start, end))

    coverage = []

    for indexFile, fileFormat in bamOrBwFileList:
        if fileFormat == 'bam':
            bamHandle = bamHandler.openBam(indexFile)
            coverage.append(
                getCoverageFromBam(bamHandle, chrom, start, end, tileSize,
                                   defaultFragmentLength, extendPairedEnds,
                                   True))
            bamHandle.close()
        elif fileFormat == 'bigwig':
            bigwigHandle = pyBigWig.open(indexFile)
            coverage.append(
                getCoverageFromBigwig(bigwigHandle, chrom, start, end,
                                      tileSize, missingDataAsZero))
            bigwigHandle.close()

    # is /dev/shm available?
    # working in this directory speeds the process
    try:
        _file = tempfile.NamedTemporaryFile(dir="/dev/shm", delete=False)
    except OSError:
        _file = tempfile.NamedTemporaryFile(delete=False)

    previousValue = None
    lengthCoverage = len(coverage[0])
    for tileIndex in range(lengthCoverage):

        tileCoverage = []
        for index in range(len(bamOrBwFileList)):
            if smoothLength > 0:
                vectorStart, vectorEnd = getSmoothRange(
                    tileIndex, tileSize, smoothLength, lengthCoverage)
                tileCoverage.append(
                    np.mean(coverage[index][vectorStart:vectorEnd]))
            else:
                try:
                    tileCoverage.append(coverage[index][tileIndex])
                except IndexError:
                    sys.exit(
                        "Chromosome {} probably not in one of the bigwig "
                        "files. Remove this chromosome from the bigwig file "
                        "to continue".format(chrom))

        value = func(tileCoverage, funcArgs)

        if fixed_step:
            writeStart = start + tileIndex * tileSize
            writeEnd = min(writeStart + tileSize, end)
            try:
                _file.write(
                    toBytes("%s\t%d\t%d\t%.2f\n" %
                            (chrom, writeStart, writeEnd, value)))
            except TypeError:
                _file.write(
                    toBytes("{}\t{}\t{}\t{}\n".format(chrom, writeStart,
                                                      writeEnd, value)))
        else:
            if previousValue is None:
                writeStart = start + tileIndex * tileSize
                writeEnd = min(writeStart + tileSize, end)
                previousValue = value

            elif previousValue == value:
                writeEnd = min(writeEnd + tileSize, end)

            elif previousValue != value:
                if not np.isnan(previousValue):
                    _file.write(
                        toBytes("{0}\t{1}\t{2}\t{3:g}\n".format(
                            chrom, writeStart, writeEnd, previousValue)))
                previousValue = value
                writeStart = writeEnd
                writeEnd = min(writeStart + tileSize, end)

    if not fixed_step:
        # write remaining value if not a nan
        if previousValue and writeStart != end and \
                not np.isnan(previousValue):
            _file.write(
                toBytes("{0}\t{1}\t{2}\t{3:g}\n".format(
                    chrom, writeStart, end, previousValue)))

    tempFileName = _file.name
    _file.close()
    return chrom, start, end, tempFileName
def writeBedGraph_worker(
        chrom, start, end, tileSize, defaultFragmentLength,
        bamOrBwFileList, func, funcArgs, extendPairedEnds=True, smoothLength=0,
        skipZeroOverZero=False, missingDataAsZero=False, fixed_step=False):
    r"""
    Writes a bedgraph having as base a number of bam files.

    The given func is called to compute the desired bedgraph value
    using the funcArgs

    tileSize
    """
    if start > end:
        raise NameError("start position ({0}) bigger than "
                        "end position ({1})".format(start, end))

    coverage = []

    for indexFile, fileFormat in bamOrBwFileList:
        if fileFormat == 'bam':
            bamHandle = bamHandler.openBam(indexFile)
            coverage.append(getCoverageFromBam(
                bamHandle, chrom, start, end, tileSize,
                defaultFragmentLength, extendPairedEnds,
                True))
            bamHandle.close()
        elif fileFormat == 'bigwig':
            bigwigHandle = pyBigWig.open(indexFile)
            coverage.append(
                getCoverageFromBigwig(
                    bigwigHandle, chrom, start, end,
                    tileSize, missingDataAsZero))
            bigwigHandle.close()

    # is /dev/shm available?
    # working in this directory speeds the process
    try:
        _file = tempfile.NamedTemporaryFile(dir="/dev/shm", delete=False)
    except OSError:
        _file = tempfile.NamedTemporaryFile(delete=False)

    previousValue = None
    lengthCoverage = len(coverage[0])
    for tileIndex in range(lengthCoverage):

        tileCoverage = []
        for index in range(len(bamOrBwFileList)):
            if smoothLength > 0:
                vectorStart, vectorEnd = getSmoothRange(
                    tileIndex, tileSize, smoothLength, lengthCoverage)
                tileCoverage.append(
                    np.mean(coverage[index][vectorStart:vectorEnd]))
            else:
                try:
                    tileCoverage.append(coverage[index][tileIndex])
                except IndexError:
                    sys.exit("Chromosome {} probably not in one of the bigwig "
                             "files. Remove this chromosome from the bigwig file "
                             "to continue".format(chrom))

        if skipZeroOverZero and np.sum(tileCoverage) == 0:
            previousValue = None
            continue

        value = func(tileCoverage, funcArgs)

        if fixed_step:
            writeStart = start + tileIndex * tileSize
            writeEnd = min(writeStart + tileSize, end)
            try:
                _file.write(toBytes("%s\t%d\t%d\t%.2f\n" % (chrom, writeStart,
                                                            writeEnd, value)))
            except TypeError:
                _file.write(toBytes("{}\t{}\t{}\t{}\n".format(chrom, writeStart,
                                                              writeEnd, value)))
        else:
            if previousValue is None:
                writeStart = start + tileIndex * tileSize
                writeEnd = min(writeStart + tileSize, end)
                previousValue = value

            elif previousValue == value:
                writeEnd = min(writeEnd + tileSize, end)

            elif previousValue != value:
                if not np.isnan(previousValue):
                    _file.write(
                        toBytes("{0}\t{1}\t{2}\t{3:g}\n".format(chrom, writeStart,
                                                                writeEnd, previousValue)))
                previousValue = value
                writeStart = writeEnd
                writeEnd = min(writeStart + tileSize, end)

    if not fixed_step:
        # write remaining value if not a nan
        if previousValue and writeStart != end and \
                not np.isnan(previousValue):
            _file.write(toBytes("{0}\t{1}\t{2}\t{3:g}\n".format(chrom, writeStart,
                                                                end, previousValue)))

    tempFileName = _file.name
    _file.close()
    return chrom, start, end, tempFileName