def coverage(data):
    bed_file = dd.get_coverage_experimental(data)
    if not bed_file:
        return data

    work_dir = os.path.join(dd.get_work_dir(data), "report", "coverage")
    with chdir(work_dir):
        in_bam = data['work_bam']
        sample = os.path.splitext(os.path.basename(in_bam))[0]
        logger.debug("doing coverage for %s" % sample)
        region_bed = pybedtools.BedTool(bed_file)
        parse_file = os.path.join(sample + "_coverage.bed")
        parse_total_file = os.path.join(sample + "_cov_total.tsv")
        if not file_exists(parse_file):
            total_cov = cov_class(0, None, sample)
            bam_api = pysam.AlignmentFile(in_bam)
            with file_transaction(parse_file) as out_tx:
                with open(out_tx, 'w') as out_handle:
                    print >>out_handle, "#chrom\tstart\tend\tregion\treads\tstrand\tsize\tsample\tmean\tsdt\tq10\tq20\tq4\tq50"
                with tmpfile() as tx_tmp_file:
                    # tx_tmp_file = "tmpintersect"
                    for line in region_bed:
                        chrom = line.chrom
                        start = max(line.start, 0)
                        end = line.end
                        region_file = pybedtools.BedTool(str(line), from_string=True).saveas().fn
                        coords = "%s:%s-%s" % (chrom, start, end)
                        cmd = ("samtools view -b {in_bam} {coords} | "
                               "bedtools coverage -a {region_file} -b - -hist > {tx_tmp_file}")
                        _silence_run(cmd.format(**locals()))
                        total_cov = _get_exome_coverage_stats(os.path.abspath(tx_tmp_file), sample, out_tx, total_cov)
            total_cov.write_coverage(parse_total_file)
        data['coverage'] = os.path.abspath(parse_file)
        return data
def coverage(data):
    AVERAGE_REGION_STRING_LENGTH = 100
    bed_file = dd.get_coverage_experimental(data)
    if not bed_file:
        return data

    work_dir = os.path.join(dd.get_work_dir(data), "report", "coverage")
    batch_size = max_command_length() / AVERAGE_REGION_STRING_LENGTH

    with chdir(work_dir):
        in_bam = data['work_bam']
        sample = dd.get_sample_name(data)
        logger.debug("doing coverage for %s" % sample)
        region_bed = pybedtools.BedTool(bed_file)
        parse_file = os.path.join(sample + "_coverage.bed")
        parse_total_file = os.path.join(sample + "_cov_total.tsv")
        if not file_exists(parse_file):
            total_cov = cov_class(0, None, sample)
            with file_transaction(parse_file) as out_tx:
                with open(out_tx, 'w') as out_handle:
                    HEADER = ["#chrom", "start", "end", "region", "reads",
                              "strand", "size", "sample", "mean", "sd", "cutoff10",
                              "cutoff20", "cutoff4", "cutoff50"]
                    out_handle.write("\t".join(HEADER) + "\n")
                with tmpfile() as tx_tmp_file:
                    lcount = 0
                    for chunk in robust_partition_all(batch_size, region_bed):
                        coord_batch = []
                        line_batch = ""
                        for line in chunk:
                            lcount += 1
                            chrom = line.chrom
                            start = max(line.start, 0)
                            end = line.end
                            coords = "%s:%s-%s" % (chrom, start, end)
                            coord_batch.append(coords)
                            line_batch += str(line)
                        if not coord_batch:
                            continue
                        region_file = pybedtools.BedTool(line_batch,
                                                        from_string=True).saveas().fn
                        coord_string = " ".join(coord_batch)
                        cmd = ("samtools view -b {in_bam} {coord_string} | "
                                "bedtools coverage -a {region_file} -b - "
                                "-hist > {tx_tmp_file}")
                        _silence_run(cmd.format(**locals()))
                        total_cov = _get_exome_coverage_stats(os.path.abspath(tx_tmp_file), sample, out_tx, total_cov)
                        logger.debug("Processed %d regions." % lcount)
            total_cov.write_coverage(parse_total_file)
        data['coverage'] = os.path.abspath(parse_file)
        return data
Example #3
0
def coverage(data):
    bed_file = dd.get_coverage_experimental(data)
    if not bed_file:
        return data

    work_dir = os.path.join(dd.get_work_dir(data), "report", "coverage")
    with chdir(work_dir):
        in_bam = data['work_bam']
        sample = os.path.splitext(os.path.basename(in_bam))[0]
        logger.debug("doing coverage for %s" % sample)
        region_bed = pybedtools.BedTool(bed_file)
        parse_file = os.path.join(sample + "_coverage.bed")
        parse_total_file = os.path.join(sample + "_cov_total.tsv")
        if not file_exists(parse_file):
            total_cov = cov_class(0, None, sample)
            bam_api = pysam.AlignmentFile(in_bam)
            with file_transaction(parse_file) as out_tx:
                with open(out_tx, 'w') as out_handle:
                    print >> out_handle, "#chrom\tstart\tend\tregion\treads\tstrand\tsize\tsample\tmean\tsdt\tq10\tq20\tq4\tq50"
                with tmpfile() as tx_tmp_file:
                    # tx_tmp_file = "tmpintersect"
                    for line in region_bed:
                        chrom = line.chrom
                        start = max(line.start, 0)
                        end = line.end
                        region_file = pybedtools.BedTool(
                            str(line), from_string=True).saveas().fn
                        coords = "%s:%s-%s" % (chrom, start, end)
                        cmd = (
                            "samtools view -b {in_bam} {coords} | "
                            "bedtools coverage -a {region_file} -b - -hist > {tx_tmp_file}"
                        )
                        _silence_run(cmd.format(**locals()))
                        total_cov = _get_exome_coverage_stats(
                            os.path.abspath(tx_tmp_file), sample, out_tx,
                            total_cov)
            total_cov.write_coverage(parse_total_file)
        data['coverage'] = os.path.abspath(parse_file)
        return data