Example #1
0
def addtags(bam,
            tagfile,
            output,
            sam=False,
            trim_suffix=True,
            mode="tag",
            nproc=1):
    """Add tags to reads from individual cells

    Copies BAM entries to a new file, adding a read tag to cells matching an input table

    Parameters
    ----------
    bam : str
        Path to BAM file.
    tagfile : str
        Tab-delimited file containing cell barcode, read tag to be added, tag information
    output : str
        Name for output BAM file.
    sam : bool, optional
        Output SAM format. Default is BAM format.
    trim_suffix: bool, optional
        Remove trailing 2 characters from cell barcode in bam file (sometimes needed to match 10x barcodes).
    nproc : int, optional
        Number of processors to use. Default is 1.
    mode : str
        Either tag (default) or readname. Some BAM file store the cell barcode in the readname rather than under
        a read tag.

    Raises
    ------
    Exception
        If samtools merge of temporary BAM files fails
    """
    nproc = int(nproc)
    tags = _readtags(tagfile)
    inputBam = pysam.AlignmentFile(bam, "rb")
    intervals = utils.chunk_bam(inputBam, nproc)
    inputBam.close()
    p = Pool(nproc)
    tempfiles = p.map_async(
        functools.partial(
            _add_read_tags,
            bam=bam,
            sam=sam,
            output=output,
            cb=tags,
            trim_suffix=trim_suffix,
            mode=mode,
        ),
        intervals.values(),
    ).get(9999999)
    mergestring = ("samtools merge -@ " + str(nproc) + " " + output + " " +
                   " ".join(tempfiles))
    call(mergestring, shell=True)
    if os.path.exists(output):
        [os.remove(i) for i in tempfiles]
    else:
        raise Exception("samtools merge failed, temp files not deleted")
Example #2
0
def filterbarcodes(cells,
                   bam,
                   output,
                   sam=False,
                   trim_suffix=True,
                   nproc=1,
                   mode="tag"):
    """Filter reads based on input list of cell barcodes

    Copy BAM entries matching a list of cell barcodes to a new BAM file.

    Parameters
    ----------
    cells : str
        Path to file containing cell barcodes, or comma-separated list of cell barcodes. File can be gzip compressed.
    bam : str
        Path to BAM file.
    output : str
        Name for output BAM file.
    sam : bool, optional
        Output SAM format. Default is BAM format.
    trim_suffix: bool, optional
        Remove trailing 2 characters from cell barcode in bam file (sometimes needed to match 10x barcodes).
    nproc : int, optional
        Number of processors to use. Default is 1.
    mode : str
        Either tag (default) or readname. Some BAM file store the cell barcode in the readname rather than under
        a read tag.

    Raises
    ------
    Exception
        If samtools merge of temporary BAM files fails
    """
    nproc = int(nproc)
    cb = utils.read_cells(cells)
    inputBam = pysam.AlignmentFile(bam, "rb")
    intervals = utils.chunk_bam(inputBam, nproc)
    inputBam.close()
    p = Pool(nproc)
    tempfiles = p.map_async(
        functools.partial(
            _iterate_reads,
            bam=bam,
            sam=sam,
            output=output,
            cb=cb,
            trim_suffix=trim_suffix,
            mode=mode,
        ),
        intervals.values(),
    ).get(9999999)
    mergestring = ("samtools merge -@ " + str(nproc) + " " + output + " " +
                   " ".join(tempfiles))
    call(mergestring, shell=True)
    if os.path.exists(output):
        [os.remove(i) for i in tempfiles]
    else:
        raise Exception("samtools merge failed, temp files not deleted")
Example #3
0
def filterbarcodes(
    cells, bam, readname_barcode, cellbarcode, sam=False, trim_suffix=True, nproc=1
):
    """Filter reads based on input list of cell barcodes

    Copy BAM entries matching a list of cell barcodes to a new BAM file.
    Output BAM files will be named according to the group name in the 
    file provided.

    Parameters
    ----------
    cells : str
        Path to file containing cell barcodes and the group associated with each barcode.
        File can be gzip compressed. A separate BAM file will be created for each 
        group of cells.
    bam : str
        Path to BAM file.
    trim_suffix: bool, optional
        Remove trailing 2 characters from cell barcode in bam file (sometimes needed to match 10x barcodes).
    nproc : int, optional
        Number of processors to use. Default is 1.
    cellbarcode : str
       Tag used for cell barcode. Default is CB (used by cellranger)
    readname_barcode : regex
        A regular expression for matching cell barcode in read name. If None (default),
        use the read tags.

    Raises
    ------
    Exception
        If samtools merge of temporary BAM files fails
    """
    nproc = int(nproc)
    cb = utils.read_cell_barcode_file(cells)
    unique_classes = list(set(chain.from_iterable(cb.values())))
    inputBam = pysam.AlignmentFile(bam, "rb")
    intervals = utils.chunk_bam(inputBam, nproc)
    inputBam.close()
    if readname_barcode is not None:
        readname_barcode = re.compile(readname_barcode)
    p = Pool(nproc)
    idents = p.map_async(
        functools.partial(
            _iterate_reads,
            bam=bam,
            cb=cb,
            classes=unique_classes,
            trim_suffix=trim_suffix,
            cellbarcode=cellbarcode,
            readname_barcode=readname_barcode
        ),
        intervals.values(),
    ).get(9999999)
    mergeAll(idents=idents, classes=unique_classes, nproc=nproc, remove=True)