Esempio n. 1
0
 def command(self):
     return "zcat" + \
            required(" ", self.input) + \
            "| vcffilter " + \
            required("-f ", self.filter) + \
            "| bgzip " + required(" > ", self.output) + \
            " && tabix -p vcf {output}".format(output=self.output)
Esempio n. 2
0
 def command(self):
     required("", self.input_reference_sequence_fai)
     return "curl -L " + \
            required(" ", self.remote) + \
            "| gzip -d |" + vt_split_and_leftaln(self.input_reference_sequence, allow_ref_mismatches=True) + \
            "| bgzip " + required(" > ", self.output) + \
            " && tabix -p vcf {output}".format(output=self.output)
Esempio n. 3
0
 def command(self):
     cmd = "cp " + \
            required(" ", self.input) + \
            required(" ", self.output)
     if self.input.endswith(".bam"):
         cmd += " && samtools index {}".format(self.output)
     return cmd
Esempio n. 4
0
 def command(self):
     return "picard -XX:ParallelGCThreads=1 MergeSamFiles " + \
            repeat("INPUT=", self.input_bams) + \
            required("ASSUME_SORTED=", str(self.assume_sorted).lower()) + \
            required("MERGE_SEQUENCE_DICTIONARIES=", str(self.merge_dicts).lower()) + \
            required("OUTPUT=", self.output_bam) + \
            " && samtools index " + required("", self.output_bam)
Esempio n. 5
0
 def command(self):
     return "picard -XX:ParallelGCThreads=1 -Xmx5g CollectGcBiasMetrics CHART=/dev/null" + \
            required("I=", self.input) + \
            required("O=", self.output_metrics) + \
            required("S=", self.output_summary) + \
            required("R=", self.reference_sequence) + \
            optional("STOP_AFTER=", self.stop_after)
Esempio n. 6
0
 def command(self):
     return "picard -XX:ParallelGCThreads=1 CollectWgsMetrics " + \
            required("I=", self.input) + \
            required("R=", self.reference_sequence) + \
            required("O=", self.output_metrics) + \
            optional("MINIMUM_MAPPING_QUALITY=", self.minimum_mapping_quality) + \
            optional("MINIMUM_BASE_QUALITY=", self.minimum_base_quality) + \
            optional("COVERAGE_CAP=", self.coverage_cap)
Esempio n. 7
0
 def command(self):
     return "cat " + \
            required(" ", self.input) + \
            " | bgzip " + \
            required(" > ", self.output) + \
            " && tabix " + \
            optional("-p ", self.filetype) + \
            " {} ".format(self.output)
Esempio n. 8
0
 def command(self):
     qdnaseq2bed_cmd = "qdnaseq2bed.py -n segments " + \
                       required("-i ", self.input_segments) + \
                       "| sort -k1,1 -k2,2n " + \
                       "| bedtools median -c 5 -o mean " + \
                       required("-a ", self.genes_gtf) + " -b - " + \
                       "| cnvgtf2bed.py -i /dev/stdin -n gene_id " + \
                       required("> ", self.output_bed)
     return qdnaseq2bed_cmd
Esempio n. 9
0
 def command(self):
     return "picard -Xmx5g -XX:ParallelGCThreads=1 " + \
         required("-Djava.io.tmpdir=", self.scratch) + \
             " MarkDuplicates " + \
             required("INPUT=", self.input_bam) + \
             required("METRICS_FILE=", self.output_metrics) + \
             required("OUTPUT=", self.output_bam) + \
             conditional(self.remove_duplicates, "REMOVE_DUPLICATES=true") + \
             " && samtools index " + required("", self.output_bam)
Esempio n. 10
0
 def command(self):
     return "picard -XX:ParallelGCThreads=1 CollectHsMetrics " + \
            required("I=", self.input) + \
            required("R=", self.reference_sequence) + \
            required("O=", self.output_metrics) + \
            required("TI=", self.target_regions) + \
            required("BI=", self.bait_regions) + \
            optional("BAIT_SET_NAME=", self.bait_name) + \
            repeat('METRIC_ACCUMULATION_LEVEL=', self.accumulation_level)
Esempio n. 11
0
    def command(self):
        haplotypecaller_cmd = "gatk {} HaplotypeCaller ".format(self.java_options) + \
                        required(" -R ", self.reference_sequence) + \
                        required(" -I ", self.input_bam) + \
                        " -L " + self.interval_list + \
                        " --dbsnp " + self.dbSNP + \
                        required(" -O ", self.output)

        return haplotypecaller_cmd
Esempio n. 12
0
 def command(self):
     # compileMetadata 3098121 3098849 --db_config $HOME/repos/reportgen/tests/referral-db-config.json \
     #  --output /dev/stdout  --address_table_file reportgen/assets/addresses.csv
     return "compileMetadata" + \
            required('', self.blood_barcode) + \
            required('', self.tumor_barcode) + \
            required('--db_config ', self.referral_db_conf) + \
            required('--address_table_file ', self.addresses) + \
            required('--output ', self.output_json)
Esempio n. 13
0
 def command(self):
     return "gatk-klevebring -T HeterozygoteConcordance " + \
            required("-R ", self.reference_sequence) + \
            required("-V ", self.input_vcf) + \
            required("-I ", self.input_bam) + \
            required("-sid ", self.normalid) + \
            optional("-L ", self.target_regions) + \
            conditional(self.filter_reads_with_N_cigar, "--filter_reads_with_N_cigar") + \
            required("-o ", self.output)
Esempio n. 14
0
 def command(self):
     tag_cmd = ""
     if self.tag:
         tag_cmd = "echo \"# {}\" >> {} \n".format(self.tag, self.output)
     return "echo \"# bedtools-coverage-hist: {}\"".format(self.input_bam) + \
            required("d>", self.output) + "\n" + \
            tag_cmd + \
            "bedtools coverage -hist " + \
            required("-a ", self.input_bed) + \
            required("-b ", self.input_bam) + \
            "|grep \"^all\" " + required(">> ", self.output)
Esempio n. 15
0
    def command(self):
        required("", self.input_tumor)
        required("", self.input_normal)

        freq_filter = (
            " bcftools filter -e 'STATUS !~ \".*Somatic\"' 2> /dev/null "
            "| %s -c 'from autoseq.util.bcbio import depth_freq_filter_input_stream; import sys; print depth_freq_filter_input_stream(sys.stdin, %s, \"%s\")' "
            % (sys.executable, 0, 'bwa'))

        somatic_filter = (
            " sed 's/\\.*Somatic\\\"/Somatic/' "  # changes \".*Somatic\" to Somatic
            "| sed 's/REJECT,Description=\".*\">/REJECT,Description=\"Not Somatic via VarDict\">/' "
            "| %s -c 'from autoseq.util.bcbio import call_somatic; import sys; print call_somatic(sys.stdin.read())' "
            % sys.executable)

        blacklist_filter = " | intersectBed -a . -b {} | ".format(
            self.blacklist_bed)

        cmd = "vardict-java " + required("-G ", self.reference_sequence) + \
              optional("-f ", self.min_alt_frac) + \
              required("-N ", self.tumorid) + \
              optional("-r ", self.min_num_reads) + \
              " -b \"{}|{}\" ".format(self.input_tumor, self.input_normal) + \
              " -c 1 -S 2 -E 3 -g 4 -Q 10 " + required("", self.target_bed) + \
              " | testsomatic.R " + \
              " | var2vcf_paired.pl -P 0.9 -m 4.25 -M " + required("-f ", self.min_alt_frac) + \
              " -N \"{}|{}\" ".format(self.tumorid, self.normalid) + \
              " | " + freq_filter + " | " + somatic_filter + " | " + fix_ambiguous_cl() + " | " + remove_dup_cl() + \
              " | vcfstreamsort -w 1000 " + \
              " | " + vt_split_and_leftaln(self.reference_sequence) + \
              " | bcftools view --apply-filters .,PASS " + \
              " | vcfsorter.pl {} /dev/stdin ".format(self.reference_dict) + \
              conditional(blacklist_filter, self.blacklist_bed) + \
              " | bgzip > {output} && tabix -p vcf {output}".format(output=self.output)
        return cmd
Esempio n. 16
0
    def command(self):
        required("", self.input_tumor)
        required("", self.input_normal)

        tmp_vcf = "{scratch}/{uuid}.vcf.gz".format(scratch=self.scratch,
                                                   uuid=uuid.uuid4())

        # run vardict without removing non-somatic variants, and adding "SOMATIC" INFO field for somatic variants
        vardict_cmd = "vardict-java " + required("-G ", self.reference_sequence) + \
                      optional("-f ", self.min_alt_frac) + \
                      required("-N ", self.tumorid) + \
                      optional("-r ", self.min_num_reads) + \
                      " -b \"{}|{}\" ".format(self.input_tumor, self.input_normal) + \
                      " -c 1 -S 2 -E 3 -g 4 -Q 10 " + required("", self.target_bed) + \
                      " | testsomatic.R " + \
                      " | var2vcf_paired.pl -P 0.9 -m 4.25 " + required("-f ", self.min_alt_frac) + \
                      " -N \"{}|{}\" ".format(self.tumorid, self.normalid) + \
                      " | " + fix_ambiguous_cl() + " | " + remove_dup_cl() + \
                      " | sed 's/Somatic;/Somatic;SOMATIC;/g' " + \
                      " | sed '/^#CHROM/i ##INFO=<ID=SOMATIC,Number=0,Type=Flag,Description=\"Somatic event\">' " + \
                      " | vcfstreamsort -w 1000 " + \
                      " | bcftools view --apply-filters .,PASS " + \
                      " | vcfsorter.pl {} /dev/stdin ".format(self.reference_dict) + \
                      " | bgzip > " + tmp_vcf + " && tabix -p vcf " + tmp_vcf

        # annotate variants with dbSNP id
        annotate_cmd = "bcftools annotate --annotation {} --columns ID ".format(self.dbsnp) + \
                       " --output-type z --output {} ".format(self.output) + tmp_vcf + \
                       " && tabix -p vcf {}".format(self.output)

        # remove temporary vcf and tabix
        rm_tmp_cmd = "rm " + tmp_vcf + "*"

        return " && ".join([vardict_cmd, annotate_cmd, rm_tmp_cmd])
Esempio n. 17
0
    def command(self):
        activate_env_cmd = "source activate qdnaseqenv"

        qdnaseq_cmd = "qdnaseq.R " + \
                      required("--bam ", self.input) + \
                      required("--output ", self.output) + \
                      optional("--background ", self.background)

        deactivate_env_cmd = "source deactivate"

        return "{} && {} && {} ".format(
            activate_env_cmd,
            qdnaseq_cmd,
            deactivate_env_cmd,
        )
Esempio n. 18
0
    def command(self):
        regions_file = "{scratch}/{uuid}.regions".format(scratch=self.scratch,
                                                         uuid=uuid.uuid4())
        bed_to_regions_cmd = "cat {} | bed_to_regions.py > {}".format(
            self.target_bed, regions_file)

        call_somatic_cmd = " | {} -c 'from autoseq.util.bcbio import call_somatic; import sys; print call_somatic(sys.stdin.read())' ".format(
            sys.executable)

        freebayes_cmd = "freebayes-parallel {} {} ".format(regions_file, self.threads) + \
                        required("-f ", self.reference_sequence) + " --use-mapping-quality " + \
                        optional("--min-alternate-fraction ", self.min_alt_frac) + \
                        optional("--min-coverage ", self.min_coverage) + \
                        conditional(self.use_harmonic_indel_quals, "--harmonic-indel-quality") + \
                        optional("", self.params) + \
                        repeat(" ", self.input_bams) + \
                        """| bcftools filter -i 'ALT="<*>" || QUAL > 5' """ + \
                        "| filter_erroneus_alt.py -V /dev/stdin " + \
                        conditional(self.somatic_only, call_somatic_cmd) + \
                        " | " + vt_split_and_leftaln(self.reference_sequence) + \
                        " | vcfuniq | bcftools view --apply-filters .,PASS " + \
                        " | bgzip > {output} && tabix -p vcf {output}".format(output=self.output)
        # reason for 'vcfuniq': freebayes sometimes report duplicate variants that need to be uniqified.
        rm_regions_cmd = "rm {}".format(regions_file)
        return " && ".join([bed_to_regions_cmd, freebayes_cmd, rm_regions_cmd])
Esempio n. 19
0
    def command(self):
        tmpdir = "{}/write-alascca-report-{}".format(self.scratch,
                                                     uuid.uuid4())
        mkdir_tmp_cmd = "mkdir -p {}".format(tmpdir)
        tmp_pdf = os.path.join(tmpdir, 'Report.pdf')
        cmd = 'writeAlasccaReport ' + \
              required(' --tmp_dir ', tmpdir) + \
              required(' --output_dir ', tmpdir) + \
              conditional(self.alascca_only, " --alascca_only ") + \
              required('', self.input_genomic_json) + \
              required('', self.input_metadata_json)

        cp_cmd = "cp {} {}".format(tmp_pdf, self.output_pdf)
        rmdir_cmd = "rm -r {}".format(tmpdir)

        return " && ".join([mkdir_tmp_cmd, cmd, cp_cmd, rmdir_cmd])
Esempio n. 20
0
    def command(self):
        output_prefix = "{scratch}/msisensor-{uuid}".format(
            scratch=self.scratch, uuid=uuid.uuid4())
        output_table = "{}".format(output_prefix)
        output_dis = "{}_dis".format(output_prefix)
        output_germline = "{}_germline".format(output_prefix)
        output_somatic = "{}_somatic".format(output_prefix)

        return "msisensor msi " + \
               required("-d ", self.msi_sites) + \
               required("-n ", self.input_normal_bam) + \
               required("-t ", self.input_tumor_bam) + \
               required("-o ", output_prefix) + \
               required("-b ", self.threads) + \
               " && cp {} {}".format(output_prefix, self.output) + \
               " && rm {} {} {} {}".format(output_table, output_dis,
                                           output_germline, output_somatic)
Esempio n. 21
0
    def command(self):
        required("", self.input_bam)
        required("", self.reference_sequence)

        # configuration
        configure_strelkagermline = "configureStrelkaGermlineWorkflow.py " + \
                                    " --bam " + self.input_bam + \
                                    " --ref " +  self.reference_sequence + \
                                    " --targeted --callRegions " + self.target_bed + \
                                    " --runDir " + self.output_dir
        cmd = configure_strelkagermline + " && " + self.output_dir + "/runWorkflow.py -m local -j 20"

        filter_passed_variants = "zcat " + self.output_dir + "/results/variants/variants.vcf.gz" + \
                                " | awk 'BEGIN { OFS = \"\\t\"} /^#/ { print $0 } {if($7==\"PASS\") print $0 }' " + \
                                " | bgzip > {output} && tabix -p vcf {output}".format(output=self.output_filtered_vcf)

        return " && ".join([cmd, filter_passed_variants])
Esempio n. 22
0
    def command(self):

        fork = ""
        if self.threads > 1:  # vep does not accept "--fork 1", so need to check.
            fork = " --fork {} ".format(self.threads)

        cmdstr = "vep --vcf --output_file STDOUT " + \
                 self.additional_options + required("--dir ", self.vep_dir) + \
                 required("--fasta ", self.reference_sequence) + \
                 required("-i ", self.input_vcf) + \
                 " --check_existing  --total_length --allele_number " + \
                 " --no_escape --no_stats --everything --offline " + \
                 " --custom {},,vcf,exact,0,ClinicalSignificance ".format(self.brca_exchange_vcf) + \
                 fork + " > " + required("", self.output_vcf)
        # " && tabix -p vcf {}".format(self.output_vcf)

        return cmdstr
Esempio n. 23
0
 def command(self):
     return "alasccaCNA.R " + \
            required("--cnr ", self.input_cnr) + \
            required("--cns ", self.input_cns) + \
            required("--germlinevcf ", self.input_germline_vcf) + \
            required("--somaticvcf ", self.input_somatic_vcf) + \
            required("--chrsizes ", self.chrsizes) + \
            required("--png ", self.output_png) + \
            required("--json.cna ", self.output_cna) + \
            required("--json.purity ", self.output_purity)
Esempio n. 24
0
 def command(self):
     return 'compileAlasccaGenomicReport ' + \
            required('', self.input_somatic_vcf) + \
            required('', self.input_cn_calls) + \
            required('', self.input_msisensor) + \
            required('--tumorCovJSON ', self.input_tcov_qc) + \
            required('--normalCovJSON ', self.input_ncov_qc) + \
            required('--purityJSON ', self.input_purity_qc) + \
            required('--contaminationJSON ', self.input_contam_qc) + \
            required('--output ', self.output_json)
Esempio n. 25
0
    def command(self):
        bgzip = ""
        fork = ""
        if self.threads > 1:  # vep does not accept "--fork 1", so need to check.
            fork = " --fork {} ".format(self.threads)
        if self.output_vcf.endswith('gz'):
            bgzip = " | bgzip "

        cmdstr = "variant_effect_predictor.pl --vcf --output_file STDOUT " + \
                 self.additional_options + required("--dir ", self.vep_dir) + \
                 required("--fasta ", self.reference_sequence) + \
                 required("-i ", self.input_vcf) + \
                 " --check_alleles --check_existing  --total_length --allele_number " + \
                 " --no_escape --no_stats --everything --offline " + \
                 fork + bgzip + " > " + required("", self.output_vcf) + \
                 " && tabix -p vcf {}".format(self.output_vcf)

        return cmdstr
Esempio n. 26
0
    def command(self):
        if not self.reference and not self.targets_bed:
            raise ValueError("Either reference or targets_bed must be supplied")
        if self.reference and self.targets_bed:
            raise ValueError("Supply either reference OR targets_bed")

        tmpdir = "{}/cnvkit-{}".format(self.scratch, uuid.uuid4())
        sample_prefix = stripsuffix(os.path.basename(self.input_bam), ".bam")
        cnvkit_cmd = "cnvkit.py batch " + required("", self.input_bam) + \
                     optional("-r ", self.reference) + \
                     conditional(self.targets_bed, "--fasta " + str(self.fasta) + " --split ") + \
                     conditional(self.targets_bed, "-n") + \
                     optional("-t ", self.targets_bed) + \
                     required("-d ", tmpdir)
        copy_cns_cmd = "cp {}/{}.cns ".format(tmpdir, sample_prefix) + required(" ", self.output_cns)
        copy_cnr_cmd = "cp {}/{}.cnr ".format(tmpdir, sample_prefix) + required(" ", self.output_cnr)
        rm_cmd = "rm -r {}".format(tmpdir)
        return " && ".join([cnvkit_cmd, copy_cns_cmd, copy_cnr_cmd, rm_cmd])
Esempio n. 27
0
    def command(self):
        filt_vcf = "{scratch}/{uuid}.vcf.gz".format(scratch=self.scratch,
                                                    uuid=uuid.uuid4())
        bgzip = ""
        tabix = ""
        if self.output.endswith('gz'):
            bgzip = "| bgzip"
            tabix = " && tabix -p vcf {}".format(self.output)

        filt_vcf_cmd = "vcf_filter.py --no-filtered " + required("", self.input_vcf) + " sq --site-quality 5 " + \
                       "|bgzip" + " > " + filt_vcf
        vcf_add_sample_cmd = "vcf_add_sample.py " + \
                             conditional(self.filter_hom, "--filter_hom") + \
                             required("--samplename ", self.samplename) + \
                             filt_vcf + " " + \
                             required("", self.input_bam) + \
                             bgzip + " > " + self.output + tabix
        rm_filt_cmd = "rm " + filt_vcf
        return " && ".join([filt_vcf_cmd, vcf_add_sample_cmd, rm_filt_cmd])
Esempio n. 28
0
    def command(self):

        # activating conda env
        activate_cmd = "source activate purecn-env"

        # running PureCN
        running_cmd = "PureCN.R " + required("--out ", self.outdir) + \
                       required("--sampleid ", self.tumorid) + \
                       required("--segfile ", self.input_seg) + \
                       required("--tumor ", self.input_cnr) + \
                       required("--vcf ", self.input_vcf) + \
                       required("--genome ", self.genome) + \
                       optional("--funsegmentation ", self.funseg) + \
                       optional("--minpurity ", self.minpurity) + \
                       optional("--hzdev ", self.hzdev) + \
                       optional("--maxnonclonal ", self.maxnonclonal) + \
                       optional("--minaf ", self.minaf) + \
                       optional("--error ", self.error) + \
                       conditional(self.postopt, "--postoptimize")

        # deactivating the conda env
        deactivate_cmd = "conda deactivate"

        # touching required output files
        touch_cmd = "touch {} {} {} {}".format(self.out_csv, self.out_genes,
                                               self.out_variants, self.output)

        return " && ".join(
            [activate_cmd, running_cmd, deactivate_cmd, touch_cmd])
Esempio n. 29
0
    def command(self):
        required("input_files", self.input_files)
        required("output_base", self.output)
        required("dir_to_search", self.search_dir)

        basefn = os.path.basename(self.output)
        odir = os.path.dirname(self.output)
        return "multiqc " + \
               required("", self.search_dir) + \
               required("-o ", odir) + \
               optional("-n ", basefn) + \
               optional("-k ", self.data_format) + \
               optional("-i ", self.report_title) + \
               " --data-dir --zip-data-dir -v -f"
Esempio n. 30
0
 def command(self):
     return "extract_coverage_caveat.py " + \
            required(" ", self.input_histogram) + \
            required("--high-thresh-fraction ", self.high_thresh_fraction) + \
            required("--high-thresh-fold-cov ", self.high_thresh_fold_cov) + \
            required("--low-thresh-fraction ", self.low_thresh_fraction) + \
            required("--low-thresh-fold-cov ", self.low_thresh_fold_cov) + \
            required("> ", self.output)