コード例 #1
0
ファイル: genotype.py プロジェクト: kwoklab-user/bcbb
def _unified_genotyper(picard, align_bam, ref_file, dbsnp=None):
    """Perform SNP genotyping on the given alignment file.
    """
    out_file = "%s-snp.vcf" % os.path.splitext(align_bam)[0]
    params = ["-T", "UnifiedGenotyper",
              "-I", align_bam,
              "-R", ref_file,
              "-o", out_file,
              "-A", "DepthOfCoverage",
              "-A", "AlleleBalance",
              "-A", "HomopolymerRun",
              "-A", "QualByDepth",
              "--genotype_likelihoods_model", "SNP",
              "-baq", "CALCULATE_AS_NECESSARY",
              "--standard_min_confidence_threshold_for_calling", "10.0",
              "--standard_min_confidence_threshold_for_emitting", "10.0",
              #"--trigger_min_confidence_threshold_for_calling", "10.0",
              #"--trigger_min_confidence_threshold_for_emitting", "10.0",
              "--downsample_to_coverage", 10000,
              "--min_base_quality_score", 20,
              "-l", "INFO",
              ]
    if dbsnp:
        params += ["-B:dbsnp,VCF", dbsnp]
    if not (os.path.exists(out_file) and os.path.getsize(out_file) > 0):
        with file_transaction(out_file):
            picard.run_gatk(params)
    return out_file
コード例 #2
0
ファイル: picardrun.py プロジェクト: kwoklab-user/bcbb
def picard_fastq_to_bam(picard, fastq_one, fastq_two, out_dir,
                        platform, sample_name="", rg_name="", pu_name=""):
    """Convert fastq file(s) to BAM, adding sample, run group and platform information.
    """
    qual_formats = {"illumina" : "Illumina"}
    try:
        qual_format = qual_formats[platform.lower()]
    except KeyError:
        raise ValueError("Need to specify quality format for %s" % platform)
    out_bam = os.path.join(out_dir, "%s.bam" %
                           os.path.splitext(os.path.basename(fastq_one))[0])
    if not (os.path.exists(out_bam) and os.path.getsize(out_bam) > 0):
        with curdir_tmpdir() as tmp_dir:
            opts = [("FASTQ", fastq_one),
                    ("QUALITY_FORMAT", qual_format),
                    ("READ_GROUP_NAME", rg_name),
                    ("SAMPLE_NAME", sample_name),
                    ("PLATFORM_UNIT", pu_name),
                    ("PLATFORM", platform),
                    ("TMP_DIR", tmp_dir),
                    ("OUTPUT", out_bam)]
            if fastq_two:
                opts.append(("FASTQ2", fastq_two))
            with file_transaction(out_bam):
                picard.run("FastqToSam", opts)
    return out_bam
コード例 #3
0
ファイル: genotype.py プロジェクト: bh0085/compbio
def variant_eval(vcf_in, ref_file, dbsnp, target_intervals, picard):
    """Evaluate variants in comparison with dbSNP reference.
    """
    out_file = "%s.eval" % os.path.splitext(vcf_in)[0]
    params = [
        "-T",
        "VariantEval",
        "-R",
        ref_file,
        "--eval",
        vcf_in,
        "--dbsnp",
        dbsnp,
        "-ST",
        "Filter",
        "-o",
        out_file,
        "-l",
        "INFO",
    ]
    if target_intervals:
        params.extend(["-L", target_intervals])
    if not (os.path.exists(out_file) and os.path.getsize(out_file) > 0):
        with file_transaction(out_file):
            picard.run_gatk(params)
    return out_file
コード例 #4
0
ファイル: genotype.py プロジェクト: kwoklab-user/bcbb
def _variant_filtration(picard, snp_file, ref_file):
    """Filter out problematic SNP calls.

    Recommended Broad hard filtering for deep coverage exomes:
        QUAL < 30.0 || AB > 0.75 && DP > 40 || QD < 5.0 || HRun > 5 || SB > -0.10
    """
    out_file = "%s-filter%s" % os.path.splitext(snp_file)
    params = ["-T", "VariantFiltration",
              "-R", ref_file,
              "-o", out_file,
              "-B:variant,VCF", snp_file,
              "--filterName", "QUALFilter",
              "--filterExpression", "QUAL <= 50.0",
              "--filterName", "QDFilter",
              "--filterExpression", "QD < 5.0",
              "--filterName", "ABFilter",
              "--filterExpression", "AB > 0.75 && DP > 40",
              "--filterName", "HRunFilter",
              "--filterExpression", "HRun > 3.0",
              "--filterName", "SBFilter",
              "--filterExpression", "SB > -0.10",
              "-l", "INFO",
              ]
    if not (os.path.exists(out_file) and os.path.getsize(out_file) > 0):
        with file_transaction(out_file):
            picard.run_gatk(params)
    return out_file
コード例 #5
0
ファイル: genotype.py プロジェクト: bh0085/compbio
def _apply_variant_recal(broad_runner, snp_file, ref_file, recal_file, tranch_file, filter_type):
    """Apply recalibration details, returning filtered VCF file.
    """
    base, ext = os.path.splitext(snp_file)
    out_file = "{base}-{filter}filter{ext}".format(base=base, ext=ext, filter=filter_type)
    params = [
        "-T",
        "ApplyRecalibration",
        "-R",
        ref_file,
        "--input",
        snp_file,
        "--out",
        out_file,
        "--tranches_file",
        tranch_file,
        "--recal_file",
        recal_file,
        "--mode",
        filter_type,
    ]
    if not (os.path.exists(out_file) and os.path.getsize(out_file) > 0):
        with file_transaction(out_file):
            broad_runner.run_gatk(params)
    return out_file
コード例 #6
0
ファイル: demultiplex.py プロジェクト: cdoherty/bcbb
def split_by_barcode(fastq1, fastq2, multiplex, base_name, dirs, config):
    """Split a fastq file into multiplex pieces using barcode details.
    """
    if not multiplex:
        return [("", "", fastq1, fastq2)]
    bc_dir = os.path.join(dirs["work"], "%s_barcode" % base_name)
    nomatch_file = "%s_unmatched_1_fastq.txt" % base_name
    metrics_file = "%s_bc.metrics" % base_name
    out_files = []
    for info in multiplex:
        fq_fname = lambda x: os.path.join(bc_dir, "%s_%s_%s_fastq.txt" %
                             (base_name, info["barcode_id"], x))
        bc_file1 = fq_fname("1")
        bc_file2 = fq_fname("2") if fastq2 else None
        out_files.append((info["barcode_id"], info["name"], bc_file1, bc_file2))
    with utils.chdir(bc_dir):
        if not os.path.exists(nomatch_file) and not os.path.exists(metrics_file):
            tag_file = _make_tag_file(multiplex)
            cl = [config["program"]["barcode"], tag_file,
                  "%s_--b--_--r--_fastq.txt" % base_name,
                  fastq1]
            if fastq2:
                cl.append(fastq2)
            cl.append("--mismatch=%s" % config["algorithm"]["bc_mismatch"])
            cl.append("--metrics=%s" % metrics_file)
            if int(config["algorithm"]["bc_read"]) == 2:
                cl.append("--second")
            if int(config["algorithm"]["bc_position"]) == 5:
                cl.append("--five")
            if config["algorithm"].get("bc_allow_indels", True) is False:
                cl.append("--noindel")
            with utils.file_transaction(out_files + [nomatch_file, metrics_file]):
                subprocess.check_call(cl)
    out_files = [(b, n, f1, f2) for (b, n, f1, f2) in out_files if os.path.exists(f1)]
    return out_files
コード例 #7
0
ファイル: realign.py プロジェクト: bh0085/compbio
def gatk_realigner_targets(runner, align_bam, ref_file, dbsnp=None,
                           region=None, out_file=None, deep_coverage=False):
    """Generate a list of interval regions for realignment around indels.
    """
    if out_file:
        out_file = "%s.intervals" % os.path.splitext(out_file)[0]
    else:
        out_file = "%s-realign.intervals" % os.path.splitext(align_bam)[0]
    params = ["-T", "RealignerTargetCreator",
              "-I", align_bam,
              "-R", ref_file,
              "-o", out_file,
              "-l", "INFO",
              ]
    if region:
        params += ["-L", region]
    if dbsnp:
        params += ["--known", dbsnp]
    if deep_coverage:
        params += ["--mismatchFraction", "0.30",
                   "--maxIntervalSize", "650"]
    if not (os.path.exists(out_file) and os.path.getsize(out_file) > 0):
        with file_transaction(out_file):
            runner.run_gatk(params)
    return out_file
コード例 #8
0
ファイル: recalibrate.py プロジェクト: kwoklab-user/bcbb
def _gatk_count_covariates(picard, dup_align_bam, ref_file, platform,
        snp_file):
    """Step 1 of GATK recalibration process -- counting covariates.
    """
    out_file = "%s.recal" % os.path.splitext(dup_align_bam)[0]
    params = ["-T", "CountCovariates",
              "-cov", "ReadGroupCovariate",
              "-cov", "QualityScoreCovariate",
              "-cov", "CycleCovariate",
              "-cov", "DinucCovariate",
              "-cov", "TileCovariate",
              "-recalFile", out_file,
              "-I", dup_align_bam,
              "-R", ref_file,
              "-l", "INFO",
              "-U",
              "-OQ",
              "--default_platform", platform,
              ]
    if snp_file:
        params += ["-B:dbsnp,VCF", snp_file]
    if not os.path.exists(out_file):
        with curdir_tmpdir() as tmp_dir:
            with file_transaction(out_file):
                picard.run_gatk(params, tmp_dir)
    return out_file
コード例 #9
0
ファイル: tophat.py プロジェクト: kdaily/bcbb
def align(fastq_file, pair_file, ref_file, out_base, align_dir, config):
    qual_format = config["algorithm"].get("quality_format", None)
    if qual_format is None or qual_format.lower() == "illumina":
        qual_flags = ["--solexa1.3-quals"]
    else:
        qual_flags = []
    out_dir = os.path.join(align_dir, "%s_tophat" % out_base)
    safe_makedir(out_dir)
    out_file = os.path.join(out_dir, _out_fnames[0])
    files = [ref_file, fastq_file]
    if not os.path.exists(out_file):
        cl = [config["program"].get("tophat", "tophat")]
        cl += qual_flags
        cl += ["-m", str(config["algorithm"].get("max_errors", 0)),
               "--output-dir", out_dir,
               "--no-convert-bam"]
        if pair_file:
            d, d_stdev = _estimate_paired_innerdist(fastq_file, pair_file, ref_file,
                                                    out_base, out_dir, config)
            cl += ["--mate-inner-dist", str(d),
                   "--mate-std-dev", str(d_stdev)]
            files.append(pair_file)
        cl += files
        with file_transaction([os.path.join(out_dir, f) for f in _out_fnames]):
            child = subprocess.check_call(cl)
    out_file_final = os.path.join(out_dir, "%s.sam" % out_base)
    os.symlink(out_file, out_file_final)
    return out_file_final
コード例 #10
0
ファイル: genotype.py プロジェクト: bh0085/compbio
def _variant_filtration_indel(broad_runner, snp_file, ref_file, vrn_files, config):
    """Filter indel variant calls using GATK best practice recommendations.
    """
    filter_type = "INDEL"
    cov_interval = config["algorithm"].get("coverage_interval", "exome").lower()
    params, recal_file, tranches_file = _shared_variant_filtration(filter_type, snp_file, ref_file)
    if cov_interval in ["exome", "regional"]:
        return _variant_filtration_no_recal(
            broad_runner, snp_file, ref_file, filter_type, ["QD < 2.0", "ReadPosRankSum < -20.0", "FS > 200.0"]
        )
    else:
        assert vrn_files.train_indels, "Need indel training file specified"
        params.extend(
            [
                "-resource:mills,VCF,known=true,training=true,truth=true,prior=12.0",
                vrn_files.train_indels,
                "-an",
                "QD",
                "-an",
                "FS",
                "-an",
                "HaplotypeScore",
                "-an",
                "ReadPosRankSum",
            ]
        )
        if not (os.path.exists(recal_file) and os.path.getsize(recal_file) > 0):
            with file_transaction(recal_file, tranches_file):
                broad_runner.run_gatk(params)
        return _apply_variant_recal(broad_runner, snp_file, ref_file, recal_file, tranches_file, filter_type)
コード例 #11
0
ファイル: realign.py プロジェクト: hussius/bcbb
def gatk_indel_realignment(runner, align_bam, ref_file, intervals, deep_coverage=False):
    """Perform realignment of BAM file in specified regions
    """
    out_file = "%s-realign.bam" % os.path.splitext(align_bam)[0]
    params = [
        "-T",
        "IndelRealigner",
        "-I",
        align_bam,
        "-R",
        ref_file,
        "-targetIntervals",
        intervals,
        "-o",
        out_file,
        "-l",
        "INFO",
    ]
    if deep_coverage:
        params += [
            "--maxReadsInMemory",
            "300000",
            "--maxReadsForRealignment",
            str(int(5e5)),
            "--maxReadsForConsensuses",
            "500",
            "--maxConsensuses",
            "100",
        ]
    if not (os.path.exists(out_file) and os.path.getsize(out_file) > 0):
        with curdir_tmpdir() as tmp_dir:
            with file_transaction(out_file):
                runner.run_gatk(params, tmp_dir)
    return out_file
コード例 #12
0
ファイル: picardrun.py プロジェクト: kwoklab-user/bcbb
def picard_index(picard, in_bam):
    index_file = "%s.bai" % in_bam
    if not os.path.exists(index_file):
        opts = [("INPUT", in_bam),
                ("OUTPUT", index_file)]
        with file_transaction(index_file):
            picard.run("BuildBamIndex", opts)
    return index_file
コード例 #13
0
ファイル: metrics.py プロジェクト: bh0085/compbio
 def _collect_align_metrics(self, dup_bam, ref_file):
     base, ext = os.path.splitext(dup_bam)
     align_metrics = "%s.align_metrics" % base
     if not os.path.exists(align_metrics):
         opts = [("INPUT", dup_bam), ("OUTPUT", align_metrics), ("R", ref_file)]
         with file_transaction(align_metrics):
             self._picard.run("CollectAlignmentSummaryMetrics", opts)
     return align_metrics
コード例 #14
0
ファイル: sample.py プロジェクト: kwoklab-user/bcbb
def bam_to_wig(bam_file, config, config_file):
    """Provide a BigWig coverage file of the sorted alignments.
    """
    wig_file = "%s.bigwig" % os.path.splitext(bam_file)[0]
    if not (os.path.exists(wig_file) and os.path.getsize(wig_file) > 0):
        cl = [config["analysis"]["towig_script"], bam_file, config_file]
        with file_transaction(wig_file):
            subprocess.check_call(cl)
    return wig_file
コード例 #15
0
ファイル: metrics.py プロジェクト: bh0085/compbio
 def _insert_sizes(self, dup_bam):
     base, ext = os.path.splitext(dup_bam)
     insert_metrics = "%s.insert_metrics" % base
     insert_graph = "%s-insert.pdf" % base
     if not os.path.exists(insert_metrics):
         opts = [("INPUT", dup_bam), ("OUTPUT", insert_metrics), ("H", insert_graph)]
         with file_transaction(insert_graph, insert_metrics):
             self._picard.run("CollectInsertSizeMetrics", opts)
     return insert_graph, insert_metrics
コード例 #16
0
ファイル: metrics.py プロジェクト: bh0085/compbio
 def _gc_bias(self, dup_bam, ref_file):
     base, ext = os.path.splitext(dup_bam)
     gc_metrics = "%s.gc_metrics" % base
     gc_graph = "%s-gc.pdf" % base
     if not os.path.exists(gc_metrics):
         opts = [("INPUT", dup_bam), ("OUTPUT", gc_metrics), ("CHART", gc_graph), ("R", ref_file)]
         with file_transaction(gc_graph, gc_metrics):
             self._picard.run("CollectGcBiasMetrics", opts)
     return gc_graph, gc_metrics
コード例 #17
0
ファイル: picardrun.py プロジェクト: kwoklab-user/bcbb
def picard_index_ref(picard, ref_file):
    """Provide a Picard style dict index file for a reference genome.
    """
    dict_file = "%s.dict" % os.path.splitext(ref_file)[0]
    if not os.path.exists(dict_file):
        opts = [("REFERENCE", ref_file),
                ("OUTPUT", dict_file)]
        with file_transaction(dict_file):
            picard.run("CreateSequenceDictionary", opts)
    return dict_file
コード例 #18
0
ファイル: genotype.py プロジェクト: bh0085/compbio
def unified_genotyper(align_bam, ref_file, config, dbsnp=None, region=None, out_file=None):
    """Perform SNP genotyping on the given alignment file.
    """
    broad_runner = broad.runner_from_config(config)
    broad_runner.run_fn("picard_index_ref", ref_file)
    broad_runner.run_fn("picard_index", align_bam)
    coverage_depth = config["algorithm"].get("coverage_depth", "high").lower()
    if coverage_depth in ["low"]:
        confidence = "4.0"
    else:
        confidence = "30.0"
    if out_file is None:
        out_file = "%s-variants.vcf" % os.path.splitext(align_bam)[0]
    params = [
        "-T",
        "UnifiedGenotyper",
        "-I",
        align_bam,
        "-R",
        ref_file,
        "-o",
        out_file,
        "--annotation",
        "QualByDepth",
        "--annotation",
        "HaplotypeScore",
        "--annotation",
        "MappingQualityRankSumTest",
        "--annotation",
        "ReadPosRankSumTest",
        "--annotation",
        "FisherStrand",
        "--annotation",
        "RMSMappingQuality",
        "--annotation",
        "DepthOfCoverage",
        "--genotype_likelihoods_model",
        "BOTH",
        "--standard_min_confidence_threshold_for_calling",
        confidence,
        "--standard_min_confidence_threshold_for_emitting",
        confidence,
        "--min_mapping_quality_score",
        "20",
        "-l",
        "INFO",
    ]
    if dbsnp:
        params += ["--dbsnp", dbsnp]
    if region:
        params += ["-L", region]
    if not (os.path.exists(out_file) and os.path.getsize(out_file) > 0):
        with file_transaction(out_file):
            broad_runner.run_gatk(params)
    return out_file
コード例 #19
0
ファイル: picardrun.py プロジェクト: hussius/bcbb
def picard_mark_duplicates(picard, align_bam):
    base, ext = os.path.splitext(align_bam)
    dup_bam = "%s-dup%s" % (base, ext)
    dup_metrics = "%s-dup.dup_metrics" % base
    if not os.path.exists(dup_bam):
        with curdir_tmpdir() as tmp_dir:
            opts = [("INPUT", align_bam),
                    ("OUTPUT", dup_bam),
                    ("TMP_DIR", tmp_dir),
                    ("METRICS_FILE", dup_metrics)]
        with file_transaction(dup_bam, dup_metrics):
            picard.run("MarkDuplicates", opts)
    return dup_bam, dup_metrics
コード例 #20
0
ファイル: genotype.py プロジェクト: bh0085/compbio
def split_snps_indels(broad_runner, orig_file, ref_file):
    """Split a variant call file into SNPs and INDELs for processing.
    """
    base, ext = os.path.splitext(orig_file)
    snp_file = "{base}-snp{ext}".format(base=base, ext=ext)
    indel_file = "{base}-indel{ext}".format(base=base, ext=ext)
    params = ["-T", "SelectVariants", "-R", ref_file, "--variant", orig_file]
    for out_file, select_type in [(snp_file, "SNP"), (indel_file, "INDEL")]:
        if not (os.path.exists(out_file) and os.path.getsize(out_file) > 0):
            cur_params = params + ["--out", out_file, "--selectTypeToInclude", select_type]
            with file_transaction(out_file):
                broad_runner.run_gatk(cur_params)
    return snp_file, indel_file
コード例 #21
0
ファイル: novoalign.py プロジェクト: bh0085/compbio
def align(out_dir, ref_index, fastq1, fastq2=None, qual_format=None):
    out_file = os.path.join(out_dir, "%s.sam" % _get_base_filename(fastq1))
    if not os.path.exists(out_file):
        cl = ["novoalign", "-o", "SAM", "-r", "None", "-d", ref_index, "-f", fastq1]
        if fastq2:
            cl.append(fastq2)
        if qual_format:
            cl += ["-F", qual_format]
        print " ".join(cl)
        with file_transaction(out_file):
            with open(out_file, "w") as out_handle:
                subprocess.check_call(cl, stdout=out_handle)
    return out_file
コード例 #22
0
ファイル: effects.py プロジェクト: kwoklab-user/bcbb
def _run_snpeff(snp_in, genome, snpeff_jar, se_interval):
    snpeff_config = "%s.config" % os.path.splitext(snpeff_jar)[0]
    out_file = "%s-effects.tsv" % (os.path.splitext(snp_in)[0])
    if not os.path.exists(out_file):
        cl = ["java", "-jar", snpeff_jar, "-1", "-vcf4", "-pass", "-c", snpeff_config,
              genome, snp_in]
        if se_interval:
            cl.extend(["-filterInterval", se_interval])
        print " ".join(cl)
        with file_transaction(out_file):
            with open(out_file, "w") as out_handle:
                subprocess.check_call(cl, stdout=out_handle)
    return out_file
コード例 #23
0
ファイル: picardrun.py プロジェクト: kwoklab-user/bcbb
def picard_fixmate(picard, align_bam):
    """Run Picard's FixMateInformation generating an aligned output file.
    """
    base, ext = os.path.splitext(align_bam)
    out_file = "%s-sort%s" % (base, ext)
    if not os.path.exists(out_file):
        with curdir_tmpdir() as tmp_dir:
            opts = [("INPUT", align_bam),
                    ("OUTPUT", out_file),
                    ("TMP_DIR", tmp_dir),
                    ("SORT_ORDER", "coordinate")]
            with file_transaction(out_file):
                picard.run("FixMateInformation", opts)
    return out_file
コード例 #24
0
ファイル: picardrun.py プロジェクト: kwoklab-user/bcbb
def picard_sort(picard, align_bam):
    """Sort a BAM file by coordinates.
    """
    base, ext = os.path.splitext(align_bam)
    out_file = "%s-sort%s" % (base, ext)
    if not os.path.exists(out_file):
        with curdir_tmpdir() as tmp_dir:
            opts = [("INPUT", align_bam),
                    ("OUTPUT", out_file),
                    ("TMP_DIR", tmp_dir),
                    ("SORT_ORDER", "coordinate")]
            with file_transaction(out_file):
                picard.run("SortSam", opts)
    return out_file
コード例 #25
0
ファイル: annotation.py プロジェクト: bh0085/compbio
def annotate_effects(orig_file, snpeff_file, genome_file, config):
    """Annotate predicted variant effects using snpEff.
    """
    broad_runner = broad.runner_from_config(config)
    out_file = "%s-annotated%s" % os.path.splitext(orig_file)
    params = ["-T", "VariantAnnotator",
              "-R", genome_file,
              "-A", "SnpEff",
              "--variant", orig_file,
              "--snpEffFile", snpeff_file,
              "--out", out_file]
    if not (os.path.exists(out_file) and os.path.getsize(out_file) > 0):
        with file_transaction(out_file):
            broad_runner.run_gatk(params)
    return out_file
コード例 #26
0
ファイル: genotype.py プロジェクト: bh0085/compbio
def combine_variant_files(orig_files, out_file, ref_file, config):
    """Combine multiple VCF files into a single output file.
    """
    broad_runner = broad.runner_from_config(config)
    params = ["-T", "CombineVariants", "-R", ref_file, "--out", out_file]
    priority_order = []
    for orig_file in orig_files:
        name = os.path.splitext(os.path.basename(orig_file))[0]
        params.extend(["--variant:{name}".format(name=name), orig_file])
        priority_order.append(name)
    params.extend(["--rod_priority_list", ",".join(priority_order)])
    if not (os.path.exists(out_file) and os.path.getsize(out_file) > 0):
        with file_transaction(out_file):
            broad_runner.run_gatk(params)
    return out_file
コード例 #27
0
ファイル: genotype.py プロジェクト: bh0085/compbio
def _variant_filtration_no_recal(broad_runner, snp_file, ref_file, filter_type, expressions):
    """Perform hard filtering if coverage is in limited regions.

    Variant quality score recalibration will not work on some regions; it
    requires enough positions to train the model.
    """
    base, ext = os.path.splitext(snp_file)
    out_file = "{base}-filter{ftype}{ext}".format(base=base, ext=ext, ftype=filter_type)
    params = ["-T", "VariantFiltration", "-R", ref_file, "--out", out_file, "--variant", snp_file]
    for exp in expressions:
        params.extend(["--filterName", "GATKStandard{e}".format(e=exp.split()[0]), "--filterExpression", exp])
    if not (os.path.exists(out_file) and os.path.getsize(out_file) > 0):
        with file_transaction(out_file):
            broad_runner.run_gatk(params)
    return out_file
コード例 #28
0
ファイル: picardrun.py プロジェクト: kwoklab-user/bcbb
def picard_merge(picard, in_files, out_file=None):
    """Merge multiple BAM files together with Picard.
    """
    if out_file is None:
        out_file = "%smerge.bam" % os.path.commonprefix(in_files)
    if not os.path.exists(out_file):
        with curdir_tmpdir() as tmp_dir:
            opts = [("OUTPUT", out_file),
                    ("SORT_ORDER", "coordinate"),
                    ("TMP_DIR", tmp_dir)]
            for in_file in in_files:
                opts.append(("INPUT", in_file))
            with file_transaction(out_file):
                picard.run("MergeSamFiles", opts)
    return out_file
コード例 #29
0
ファイル: bwa.py プロジェクト: hussius/bcbb
def align(fastq_file, pair_file, ref_file, out_base, align_dir, config):
    """Perform a BWA alignment, generating a SAM file.
    """
    sai1_file = os.path.join(align_dir, "%s_1.sai" % out_base)
    sai2_file = os.path.join(align_dir, "%s_2.sai" % out_base) if pair_file else None
    sam_file = os.path.join(align_dir, "%s.sam" % out_base)
    if not os.path.exists(sam_file):
        if not os.path.exists(sai1_file):
            with file_transaction(sai1_file):
                _run_bwa_align(fastq_file, ref_file, sai1_file, config)
        if sai2_file and not os.path.exists(sai2_file):
            with file_transaction(sai2_file):
                _run_bwa_align(pair_file, ref_file, sai2_file, config)
        align_type = "sampe" if sai2_file else "samse"
        sam_cl = [config["program"]["bwa"], align_type, ref_file, sai1_file]
        if sai2_file:
            sam_cl.append(sai2_file)
        sam_cl.append(fastq_file)
        if sai2_file:
            sam_cl.append(pair_file)
        with file_transaction(sam_file):
            with open(sam_file, "w") as out_handle:
                subprocess.check_call(sam_cl, stdout=out_handle)
    return sam_file
コード例 #30
0
ファイル: metrics.py プロジェクト: cdoherty/bcbb
 def _hybrid_select_metrics(self, dup_bam, bait_file, target_file):
     """Generate metrics for hybrid selection efficiency.
     """
     base, ext = os.path.splitext(dup_bam)
     metrics = "%s.hs_metrics" % base
     if not os.path.exists(metrics):
         with bed_to_interval(bait_file, dup_bam) as ready_bait:
             with bed_to_interval(target_file, dup_bam) as ready_target:
                 opts = [("BAIT_INTERVALS", ready_bait),
                         ("TARGET_INTERVALS", ready_target),
                         ("INPUT", dup_bam),
                         ("OUTPUT", metrics)]
                 with file_transaction(metrics):
                     self._picard.run("CalculateHsMetrics", opts)
     return metrics