Example #1
0
def count_covariates(picard, dup_align_bam, ref_file, platform,
        snp_file):
    """Step 1 of GATK recalibration process -- counting covariates.
    """
    out_file = "%s.recal" % os.path.splitext(dup_align_bam)[0]
    params = ["-T", "CountCovariates",
              "-cov", "ReadGroupCovariate",
              "-cov", "QualityScoreCovariate",
              "-cov", "CycleCovariate",
              "-cov", "DinucCovariate",
              "-cov", "TileCovariate",
              "-recalFile", out_file,
              "-I", dup_align_bam,
              "-R", ref_file,
              "-l", "INFO",
              "-U",
              "-OQ",
              "--default_platform", platform,
              ]
    if snp_file:
        params += ["-B", "dbsnp,VCF,%s" % snp_file]
    if not os.path.exists(out_file):
        with curdir_tmpdir() as tmp_dir:
            picard.run_gatk(params, tmp_dir)
    return out_file
Example #2
0
def picard_sort(picard, align_bam):
    base, ext = os.path.splitext(align_bam)
    out_file = "%s-sort%s" % (base, ext)
    if not os.path.exists(out_file):
        with curdir_tmpdir() as tmp_dir:
            opts = [("INPUT", align_bam),
                    ("OUTPUT", out_file),
                    ("TMP_DIR", tmp_dir),
                    ("SORT_ORDER", "coordinate")]
            picard.run("SortSam", opts)
    return out_file
Example #3
0
def picard_fixmate(picard, align_bam):
    """Run Picard's FixMateInformation generating an aligned output file.
    """
    base, ext = os.path.splitext(align_bam)
    out_file = "%s-sort%s" % (base, ext)
    if not os.path.exists(out_file):
        with curdir_tmpdir() as tmp_dir:
            opts = [("INPUT", align_bam),
                    ("OUTPUT", out_file),
                    ("TMP_DIR", tmp_dir),
                    ("SORT_ORDER", "coordinate")]
            picard.run("FixMateInformation", opts)
    return out_file
Example #4
0
def mark_duplicates(picard, align_bam):
    base, ext = os.path.splitext(align_bam)
    base = base.replace(".", "-")
    dup_bam = "%s-dup%s" % (base, ext)
    dup_metrics = "%s-dup.dup_metrics" % base
    if not os.path.exists(dup_bam):
        with curdir_tmpdir() as tmp_dir:
            opts = [("INPUT", align_bam),
                    ("OUTPUT", dup_bam),
                    ("TMP_DIR", tmp_dir),
                    ("METRICS_FILE", dup_metrics)]
        picard.run("MarkDuplicates", opts)
    return dup_bam
def merge_bam_files(bam_files, work_dir, config):
    """Merge multiple BAM files from a sample into a single BAM for processing.
    """
    out_file = os.path.join(work_dir, os.path.basename(bam_files[0]))
    if not os.path.exists(out_file):
        picard = PicardRunner(config["program"]["picard"])
        with utils.curdir_tmpdir() as tmp_dir:
            opts = [("OUTPUT", out_file),
                    ("SORT_ORDER", "coordinate"),
                    ("TMP_DIR", tmp_dir)]
            for b in bam_files:
                opts.append(("INPUT", b))
            picard.run("MergeSamFiles", opts)
    return out_file
def fastq_to_bam(picard, sample_name, quality_format, read1, read2):
    base, ext = os.path.splitext(os.path.basename(read1))
    out_file = "%s.bam" % base
    if not os.path.exists(out_file):
        with curdir_tmpdir() as tmp_dir:
            opts = [("FASTQ", read1),
                    ("TMP_DIR", tmp_dir),
                    ("QUALITY_FORMAT", quality_format),
                    ("SAMPLE_NAME", sample_name),
                    ("OUTPUT", out_file)]
            if read2:
                opts.append(("FASTQ2", read2))
            picard.run("FastqToSam", opts)
    return out_file
Example #7
0
def main(config_file, align_sam, ref_file, fastq_one, fastq_pair=None,
        sample_name=""):
    with open(config_file) as in_handle:
        config = yaml.load(in_handle)
    picard = PicardRunner(config["program"]["picard"])
    index_ref_file(picard, ref_file)
    base_dir = os.path.split(align_sam)[0]
    with curdir_tmpdir() as tmp_dir:
        out_fastq_bam = picard_fastq_to_bam(picard, fastq_one, fastq_pair,
                base_dir, config["algorithm"]["quality_format"],
                sample_name, tmp_dir)
        out_bam = picard_merge_bam(picard, align_sam, out_fastq_bam,
                ref_file, tmp_dir, fastq_pair is not None)
        picard_sort(picard, out_bam, tmp_dir)
Example #8
0
def indel_realignment(picard, align_bam, ref_file, intervals):
    """Perform realignment of BAM file in specified regions
    """
    out_file = "%s-realign.bam" % os.path.splitext(align_bam)[0]
    params = ["-T", "IndelRealigner",
              "-I", align_bam,
              "-R", ref_file,
              "-targetIntervals", intervals,
              "-o", out_file,
              "-l", "INFO",
              ]
    if not (os.path.exists(out_file) and os.path.getsize(out_file) > 0):
        with curdir_tmpdir() as tmp_dir:
            picard.run_gatk(params, tmp_dir)
    return out_file
Example #9
0
def gatk_recalibrate(picard, dup_align_bam, ref_file, recal_file, platform):
    """Step 2 of GATK recalibration -- use covariates to re-write output file.
    """
    out_file = "%s-gatkrecal.bam" % os.path.splitext(dup_align_bam)[0]
    params = ["-T", "TableRecalibration",
              "-recalFile", recal_file,
              "-R", ref_file,
              "-I", dup_align_bam,
              "-outputBam", out_file,
              "-l", "INFO",
              "-U",
              "-OQ",
              "--default_platform", platform,
              ]
    if not os.path.exists(out_file):
        with curdir_tmpdir() as tmp_dir:
            picard.run_gatk(params, tmp_dir)
    return out_file
Example #10
0
def main(config_file, align_sam, ref_file, fastq_one, fastq_pair=None,
        sample_name="", rg_name="", pu_name=""):
    with open(config_file) as in_handle:
        config = yaml.load(in_handle)
    picard = PicardRunner(config["program"]["picard"])
    platform = config["algorithm"]["platform"]
    if platform.lower() == "illumina":
        qual_format = "Illumina"
    else:
        raise ValueError("Need to specify quality format for %s" % platform)
    index_ref_file(picard, ref_file)
    base_dir = os.path.split(align_sam)[0]
    with curdir_tmpdir() as tmp_dir:
        out_fastq_bam = picard_fastq_to_bam(picard, fastq_one, fastq_pair,
                base_dir, platform, qual_format, sample_name, rg_name, pu_name,
                tmp_dir)
        out_bam = picard_merge_bam(picard, align_sam, out_fastq_bam,
                ref_file, tmp_dir, fastq_pair is not None)
        picard_sort(picard, out_bam, tmp_dir)
Example #11
0
def picard_run_maq(picard, maq_cmd, input_bam, ref_file, barcode, lane,
        out_base, stringency, is_paired=False, limit=None, ext=""):
    out_dir = "%s-maq%s" % (out_base, ext)
    if not os.path.exists(out_dir):
        os.makedirs(out_dir)
    bam_out_file = "%s.bam" % (out_dir)
    with curdir_tmpdir() as tmp_dir:
        std_opts = [("INPUT", input_bam),
                    ("ANALYSIS_DIR", out_dir),
                    ("FLOWCELL_BARCODE", barcode),
                    ("LANE", lane),
                    ("REFERENCE_SEQUENCE", ref_file),
                    ("TMP_DIR", tmp_dir),
                    ("PAIRED_RUN", ("true" if is_paired else "false"))]
        # Convert fastq to Maq ready files
        if len(glob.glob(
               os.path.join(out_dir, "%s.%s*bfq" % (barcode, lane)))) == 0:
            opts = std_opts + [
                    ("PREPARE", "true"),
                    ]
            if limit:
                opts.append(("READS_TO_ALIGN", int(limit)))
            picard.run("RunMaq", opts)
        # actually run Maq. Use python as Picard is failing with same parameters
        if len(glob.glob(
               os.path.join(out_dir, "%s.%s*out*.map" % (barcode, lane)))) == 0:
            #opts = std_opts + [
            #        ("STRINGENCY", stringency),
            #        ("ALIGN", "true"),
            #        ]
            #picard.run("RunMaq", opts)
            run_maq(maq_cmd, stringency, out_dir, ref_file, barcode, lane)
        # Convert the output file to BAM aligned
        if not os.path.exists(bam_out_file):
            opts = std_opts + [
                    ("OUTPUT", bam_out_file),
                    ("BAM_OUTPUT", "true")
                    ]
            index_file = index_ref_file(picard, ref_file)
            picard.run("RunMaq", opts)
            #convert_map_to_bam(picard, out_dir, bam_out_file, ref_file,
            #        barcode, lane)
    return bam_out_file