コード例 #1
0
    def gatk_merge_vcfs(self, exe, reference, sample):
        """Gather all created VCFs and combine them."""
        out = f"{self.local_output}/GATK/VCFs/{sample}-genotype_variants.vcf.gz"
        out_merged = f"{self.local_output}/GATK/VCFs/{sample}-merged_variants.g.vcf.gz"

        if exe == 1:
            os.chdir(self.home)
            relevant_path = "output/GATK/VCFs/"
            included_extensions = ['-named_variants.g.vcf']
            file_names = [
                fn for fn in os.listdir(relevant_path) if any(
                    fn.endswith(ext) for ext in included_extensions)
            ]

            input_variant_files_list = []
            variants = []
            for gvcf in file_names:
                sam = gvcf.split("-")[0]
                if sam in self.ori_samples:
                    input_variant_files_list.append(gvcf)
                    variants.append('-V')
                    variants.append("output/GATK/VCFs/" + gvcf)

            args = [
                self.gatk, 'CombineGVCFs', variants, '-R', reference, '-O',
                out_merged
            ]

            result = Cmd['java']['-jar'][args] & TEE(retcode=None)

            interpret_result(
                log=self.logger,
                so_logger=self.st_logger,
                report=result,
                logger_mes=
                f'[GATK CombineGVCFs] combining g.VCFs was successful.',
                logger_error_mes=
                f'[GATK CombineGVCFs] For some reason the combining of g.VCFs '
                f'was not successful.',
                runtime_error_mes=f'GATK CombineGVCFs failed.')

            args = [
                self.gatk, 'GenotypeGVCFs', '-V', out_merged, '-R', reference,
                '-O', out
            ]

            result = Cmd['java']['-jar'][args] & TEE(retcode=None)

            interpret_result(
                log=self.logger,
                so_logger=self.st_logger,
                report=result,
                logger_mes=f'[GATK GenotypeGVCFs] Genotyping was successful.',
                logger_error_mes=
                f'[GATK GenotypeGVCFs] For some reason genotyping '
                f'was not successful.',
                runtime_error_mes=f'GATK GenotypeGVCFs failed.')

        return out
コード例 #2
0
 def main(self):
     if self.containername:
         self.gcrTag = "eu.gcr.io/" + self.project + "/" + self.containername
         print(info | "Creation of tagged containter image")
         print(info | "Building docker image from path " + self.buildpath )
         docker["build", "-t", "kube/" + self.containername, self.buildpath] & TEE()
         print(info | "Tagging the image")
         docker["tag", "kube/" + self.containername, self.gcrTag] & TEE()
         print(info | "Pushing the image to " + self.gcrTag)
         gcloud["docker", "--project", self.project, "--", "push", self.gcrTag] & TEE()
         print(info | "Done pushing image to gcr")
コード例 #3
0
    def gatk_variant_selection(self, sample, in1, out, reference, parameters,
                               exe):
        """Select a subset of variants from a VCF file GATK."""

        if exe == 1:
            os.chdir(self.home)

            args = [
                self.gatk, 'SelectVariants', '-R', reference, '-V', in1, '-O',
                out, '-select-type', parameters
            ]

            result = Cmd['java']['-jar'][args] & TEE(retcode=None)

            interpret_result(
                log=self.logger,
                so_logger=self.st_logger,
                report=result,
                logger_mes=
                f'[GATK SelectVariants] Sample {sample} Selecting a subset of variants was successful.',
                logger_error_mes=
                f'[GATK SelectVariants] For some reason Selecting a subset of variants of {sample} '
                f'was not successful.',
                runtime_error_mes=f'GATK SelectVariants failed ({sample}).')

        return out
コード例 #4
0
ファイル: hisat2_index.py プロジェクト: romunov/resolwe-bio
    def run(self, inputs, outputs):
        """Run analysis."""
        basename = Path(inputs.ref_seq.output.fasta.path).name
        assert basename.endswith(".fasta")
        name = basename[:-6]

        index_dir = Path("hisat2_index")
        index_dir.mkdir()

        shutil.copy(Path(inputs.ref_seq.output.fasta.path), Path.cwd())
        shutil.copy(Path(inputs.ref_seq.output.fastagz.path), Path.cwd())
        shutil.copy(Path(inputs.ref_seq.output.fai.path), Path.cwd())

        args = [
            inputs.ref_seq.output.fasta.path,
            index_dir / f"{name}_index",
            "-p",
            self.requirements.resources.cores,
        ]

        return_code, _, _ = Cmd["hisat2-build"][args] & TEE(retcode=None)
        if return_code:
            self.error("Error occurred while preparing the HISAT2 index.")

        outputs.index = index_dir.name
        outputs.fasta = f"{name}.fasta"
        outputs.fastagz = f"{name}.fasta.gz"
        outputs.fai = f"{name}.fasta.fai"
        outputs.species = inputs.ref_seq.output.species
        outputs.build = inputs.ref_seq.output.build
コード例 #5
0
    def run(self, inputs, outputs):
        """Run analysis."""
        basename = os.path.basename(inputs.slamdunk.bam.path)
        assert basename.endswith(".bam")
        name = basename[:-4]

        args = [
            "-o",
            "snpeval",
            "-r",
            inputs.ref_seq.fasta.path,
            "-b",
            inputs.regions.bed.path,
            "-s",
            ".",
            "-l",
            inputs.read_length,
        ]

        (Cmd["ln"]["-s", inputs.slamdunk.variants.path, f"{name}_snp.vcf"])()

        return_code, _, _ = Cmd["alleyoop"]["snpeval"][args][
            inputs.slamdunk.bam.path] & TEE(retcode=None)
        if return_code:
            self.error("Alleyoop snpeval analysis failed.")

        snp_file = os.path.join("snpeval", f"{name}_SNPeval.csv")
        snp_file_renamed = os.path.join("snpeval", f"{name}_SNPeval.txt")
        os.rename(snp_file, snp_file_renamed)

        outputs.report = snp_file_renamed
        outputs.plot = os.path.join("snpeval", f"{name}_SNPeval.pdf")
        outputs.species = inputs.slamdunk.species
        outputs.build = inputs.slamdunk.build
コード例 #6
0
    def picard_rg(self, sample, in1, ref_organism, exe):
        """Add or replace read groups with Picard."""
        out = f'{self.local_output}/GATK/{sample}.RG.bam'

        if exe == 1:
            os.chdir(self.home)

            args = [
                self.picard, 'AddOrReplaceReadGroups', f'INPUT={in1}',
                f'OUTPUT={out}', f'RGSM={sample}', f'RGPU=none',
                f'RGLB={ref_organism}', f'RGPL=ILLUMINA'
            ]

            result = Cmd['java']['-jar'][args] & TEE(retcode=None)

            interpret_result(
                log=self.logger,
                so_logger=self.st_logger,
                report=result,
                logger_mes=f'[RG] Sample {sample} RG was successfully.',
                logger_error_mes=
                f'[RG] For some reason the RG of {sample} was not successful.',
                runtime_error_mes=f'RG failed ({sample}).')

        return out
コード例 #7
0
    def bbduk_single(self, sample, r1_name, r1, exe, n_cores, ktrim, qtrim,
                     trimq, k, mink, hdist, ftm, chastityfilter, minlen,
                     adapters):
        """Trimming with BBduk single reads."""
        trimmed = f'{self.local_output}/trimmed/BBduk_{r1_name}.fastq'

        if exe == 1:
            os.chdir(self.home)

            args = [
                f'in={r1}', f'out={trimmed}',
                f'ref={self.local_adapters}/{adapters}', f'ktrim={ktrim}',
                f'qtrim={qtrim}', f'trimq={trimq}', f'overwrite=true',
                f'k={k}', f'mink={mink}', f'hdist={hdist}', f'tpe', f'tbo',
                f'ftm={ftm}', f'chastityfilter={chastityfilter}',
                f'minlen={minlen}', f'threads={n_cores}'
            ]

            # run BBduk
            result = Cmd['bbduk.sh'][args] & TEE(retcode=None)

            interpret_result(
                log=self.logger,
                so_logger=self.st_logger,
                report=result,
                logger_mes=f'[BBduk] Sample {sample} trimmed successfully.',
                logger_error_mes=
                f'[BBduk] For some reason the trimming of {sample} was not successful.',
                runtime_error_mes=f'BBduk failed ({sample}).')

        return trimmed
コード例 #8
0
    def bwa(self, sample, reference, in1, in2, exe, core):
        """Aligning with BWA."""
        out_bam = f'{self.local_output}/aligned/{sample}.bam'

        if exe == 1:
            os.chdir(self.home)
            out_bam = f'{self.local_output}/aligned/{sample}.bam'

            bwa = Cmd['bwa']
            samtools = Cmd['samtools']
            args = ["mem", '-t', core, '-M', reference, in1, in2]

            # run BWA
            result = (bwa[args] | samtools['fixmate', '-m', '-', '-']
                      | samtools['sort', '-o', out_bam]) & TEE(retcode=None)

            interpret_result(
                log=self.logger,
                so_logger=self.st_logger,
                report=result,
                logger_mes=f'[BWA] Sample {sample} aligned successfully.',
                logger_error_mes=
                f'[BWA] For some reason the alignment of {sample} was not successful.',
                runtime_error_mes=f'BWA failed ({sample}).')

        return out_bam
コード例 #9
0
    def run(self, inputs, outputs):
        """Run the analysis."""
        genome_build = inputs.genome.build
        annotation_build = inputs.annotation.build
        if genome_build != annotation_build:
            self.error(
                "Builds of the genome {} and annotation {} do not match. Please provide genome "
                "and annotation with the same build.".format(
                    genome_build, annotation_build))

        genome_species = inputs.genome.species
        annotation_species = inputs.annotation.species
        if genome_species != annotation_species:
            self.error(
                "Species of genome {} and annotation {} do not match. Please provide genome "
                "and annotation with the same species.".format(
                    genome_species, annotation_species))

        cmd = Cmd["cellranger"]["mkref"]
        cmd = cmd["--genome={}".format(genome_build)]
        cmd = cmd["--genes={}".format(inputs.annotation.annot_sorted.path)]
        cmd = cmd["--fasta={}".format(inputs.genome.fasta.path)]
        cmd = cmd["--nthreads={}".format(self.requirements.resources.cores)]
        cmd = cmd["--memgb={}".format(
            int(self.requirements.resources.memory * 0.9 / 1024))]
        return_code, _, _ = cmd & TEE(retcode=None)
        if return_code:
            self.error("Error while running cellranger mkref.")

        os.rename(genome_build, "cellranger_index")

        outputs.genome_index = "cellranger_index"
        outputs.source = inputs.annotation.source
        outputs.species = genome_species
        outputs.build = genome_build
コード例 #10
0
    def run(self, inputs, outputs):
        """Run analysis."""
        basename = os.path.basename(inputs.slamdunk.bam.path)
        assert basename.endswith(".bam")
        name = basename[:-4]
        args = [
            "-o",
            "rates",
            "-r",
            inputs.ref_seq.fasta.path,
        ]

        return_code, _, _ = Cmd["alleyoop"]["rates"][args][
            inputs.slamdunk.bam.path] & TEE(retcode=None)
        if return_code:
            self.error("Alleyoop rates analysis failed.")

        rates_file = os.path.join("rates", f"{name}_overallrates.csv")
        rates_file_renamed = os.path.join("rates", f"{name}_overallrates.txt")
        os.rename(rates_file, rates_file_renamed)

        outputs.report = rates_file_renamed
        outputs.plot = os.path.join("rates", f"{name}_overallrates.pdf")
        outputs.species = inputs.slamdunk.species
        outputs.build = inputs.slamdunk.build
コード例 #11
0
    def run(self, inputs, outputs):
        """Run MethylationArraySesame process."""

        dirdata = Path("./data")
        if not dirdata.exists():
            dirdata.mkdir()

        red = inputs.idat_file.output.red_channel.path
        green = inputs.idat_file.output.green_channel.path
        [copy2(src=x, dst=dirdata.name) for x in [red, green]]

        platform = inputs.idat_file.output.platform
        manifest = f"{platform}.hg38.manifest"

        sesame_args = [
            f"--platform={platform}",
            f"--manifest={manifest}",
        ]
        rc, _, _ = Cmd["sesame.R"][sesame_args] & TEE(retcode=None)
        # Returns QC_data.txt and beta_values_annotated.txt.gz

        if rc:
            self.error(
                "An error was encountered during the running of SeSAMe pipeline."
            )

        outputs.qc_data = "QC_data.txt"
        outputs.methylation_data = "beta_values_annotated.txt.gz"
        outputs.species = inputs.idat_file.output.species
        outputs.platform = platform
コード例 #12
0
    def run(self, inputs, outputs):
        """Run analysis."""
        name = Path(inputs.bam.output.bam.path).stem
        variants = name + ".g.vcf"
        variants_gz = variants + ".gz"
        variants_index = variants_gz + ".tbi"

        args = [
            "-R",
            inputs.ref_seq.output.fasta.path,
            "-I",
            inputs.bam.output.bam.path,
            "-O",
            variants,
            "-contamination",
            inputs.options.contamination,
            "-G",
            "StandardAnnotation",
            "-G",
            "StandardHCAnnotation",
            "-G",
            "AS_StandardAnnotation",
            "-GQB",
            10,
            "-GQB",
            20,
            "-GQB",
            30,
            "-GQB",
            40,
            "-GQB",
            50,
            "-GQB",
            60,
            "-GQB",
            70,
            "-GQB",
            80,
            "-GQB",
            90,
            "-ERC",
            "GVCF",
        ]

        if inputs.options.intervals:
            args.extend(["-L", inputs.options.intervals.output.bed.path])

        return_code, _, _ = Cmd["gatk"]["HaplotypeCaller"][args] & TEE(
            retcode=None)
        if return_code:
            self.error("GATK HaplotypeCaller tool failed.")

        # Compress and index the output variants file
        (Cmd["bgzip"]["-c", variants] > variants_gz)()
        Cmd["tabix"]["-p", "vcf", variants_gz]()

        outputs.vcf = variants_gz
        outputs.tbi = variants_index
        outputs.species = inputs.bam.output.species
        outputs.build = inputs.bam.output.build
コード例 #13
0
    def picard_md(self, sample, in1, exe):
        """Marking duplicates with Picard."""
        out = f'{self.local_output}/GATK/{sample}.Sorted.dedup.bam'

        if exe == 1:
            os.chdir(self.home)
            out_duplicated_metric = f'{self.local_output}/GATK/{sample}.sorted.dedup.metrics.txt'

            args = [
                self.picard, 'MarkDuplicates', f'INPUT={in1}', f'OUTPUT={out}',
                f'METRICS_FILE={out_duplicated_metric}'
            ]

            result = Cmd['java']['-jar'][args] & TEE(retcode=None)

            interpret_result(
                log=self.logger,
                so_logger=self.st_logger,
                report=result,
                logger_mes=
                f'[MD] Sample {sample} Marking duplicates was successfully.',
                logger_error_mes=
                f'[MD] For some reason the Marking duplicates of {sample} was not successful.',
                runtime_error_mes=f'Marking duplicates failed ({sample}).')

        return out
コード例 #14
0
ファイル: run.py プロジェクト: tobiH94/benchbuild
def run(command, retcode=0):
    """Execute a plumbum command, depending on the user's settings.

    Args:
        command: The plumbumb command to execute.
    """
    return command & TEE(retcode=retcode)
コード例 #15
0
    def run(self, inputs, outputs):
        """Run analysis."""
        basename = Path(inputs.ref_seq.fasta.path).name
        assert basename.endswith(".fasta")
        name = basename[:-6]

        index_dir = Path("BWA_index")
        index_dir.mkdir()

        shutil.copy(Path(inputs.ref_seq.fasta.path), Path.cwd())
        shutil.copy(Path(inputs.ref_seq.fastagz.path), Path.cwd())
        shutil.copy(Path(inputs.ref_seq.fai.path), Path.cwd())

        args = [
            "-p",
            index_dir / f"{name}.fasta",
            inputs.ref_seq.fasta.path,
        ]

        return_code, _, _ = Cmd["bwa"]["index"][args] & TEE(retcode=None)
        if return_code:
            self.error("Error occurred while preparing the BWA index.")

        outputs.index = index_dir.name
        outputs.fasta = f"{name}.fasta"
        outputs.fastagz = f"{name}.fasta.gz"
        outputs.fai = f"{name}.fasta.fai"
        outputs.species = inputs.ref_seq.species
        outputs.build = inputs.ref_seq.build
コード例 #16
0
    def gatk_BaseRecalibrator(self, sample, bam, ref_fasta, exe, ref_vcf):
        """Builds a model and then applies it to get new quality scores"""
        bqsr_recal_table = f"{sample}_recal_data.table"
        out = f"{sample}.bqsrCal.bam"

        if exe == 1:
            os.chdir(self.home)

            args = [
                self.gatk, 'BaseRecalibrator', '-R', ref_fasta, '-I', bam,
                '--use-original-qualities', '-known-sites', ref_vcf, '-O',
                bqsr_recal_table
            ]

            result = Cmd['java']['-jar'][args] & TEE(retcode=None)

            interpret_result(
                log=self.logger,
                so_logger=self.st_logger,
                report=result,
                logger_mes=
                f'[GATK] Sample {sample} Base model calibration was successful.',
                logger_error_mes=
                f'[GATK] For some reason base model calibration for {sample} was not successful.',
                runtime_error_mes=f'Base model calibration failed ({sample}).')

            # Run ApplyBQSR
            args = [
                self.gatk, 'ApplyBQSR', '--add-output-sam-program-record',
                '-R', ref_fasta, '-I', bam, '--use-original-qualities',
                '--bqsr-recal-file', bqsr_recal_table, '-O', out
            ]

            result = Cmd['java']['-jar'][args] & TEE(retcode=None)

            interpret_result(
                log=self.logger,
                so_logger=self.st_logger,
                report=result,
                logger_mes=
                f'[GATK] Sample {sample} quality score correction was successful.',
                logger_error_mes=
                f'[GATK] For some quality score correction of {sample} was not successful.',
                runtime_error_mes=f'Quality score correction failed ({sample}).'
            )

        return out
コード例 #17
0
 def kube_refresh(self, files):
     """ kubernetes files that would be deployed.Format to specify a file -f/path-to-file
     Note this is not currently working reliablly. Instead run delete and deploy commands to refresh kubernetes
     """
     for file in files:
         path = os.getcwd() + file
         print(info | "Deploying file" + path)
         kubectl["replace", "--force", "-f", path, "--validate=false"] & TEE()
コード例 #18
0
def fg(a, *cmds):
    fg_return = local[a][cmds] & TEE(retcode=None)
    fg_return_code = fg_return[0]
    if fg_return_code != 0:
        print(f"Failed to execute in foreground, error code: {fg_return_code}")
        return False
    else:
        return True
コード例 #19
0
    def run(self, inputs, outputs):
        """Run analysis."""
        # Get input reads file name (for the first of the possible multiple lanes)
        name = os.path.basename(inputs.reads.fastq[0].path).strip('.fastq.gz')
        # Concatenate multi-lane read files
        (Cmd['cat'][[reads.path for reads in inputs.reads.fastq]] > 'input_reads.fastq.gz')()

        if inputs.options.quality_cutoff is not None:
            read_trim_cutoff = '--quality-cutoff={}'.format(inputs.options.quality_cutoff)
        else:
            read_trim_cutoff = '--nextseq-trim={}'.format(inputs.options.nextseq_trim)

        first_pass_input = [
            '-m', inputs.options.min_len,
            '-O', inputs.options.min_overlap,
            '-n', inputs.options.times,
            '-a', 'polyA=A{20}',
            '-a', 'QUALITY=G{20}',
            '-j', self.requirements.resources.cores,
            'input_reads.fastq.gz',
        ]

        second_pass_input = [
            '-m', inputs.options.min_len,
            read_trim_cutoff,
            '-a', 'truseq=A{18}AGATCGGAAGAGCACACGTCTGAACTCCAGTCAC',
            '-j', self.requirements.resources.cores,
            '-',
        ]

        third_pass_input = [
            '-m', inputs.options.min_len,
            '-O', inputs.options.min_overlap,
            '-g', 'truseq=A{18}AGATCGGAAGAGCACACGTCTGAACTCCAGTCAC',
            '--discard-trimmed',
            '-j', self.requirements.resources.cores,
            '-o', '{}_trimmed.fastq.gz'.format(name),
            '-',
        ]

        # Run Cutadapt, write analysis reports into a report file
        (
            Cmd['cutadapt'][first_pass_input]
            | Cmd['cutadapt'][second_pass_input]
            | Cmd['cutadapt'][third_pass_input] > 'cutadapt_report.txt'
        )()

        # Prepare final FASTQC report
        fastqc_args = ['{}_trimmed.fastq.gz'.format(name), 'fastqc', 'fastqc_archive', 'fastqc_url', '--nogroup']
        return_code, _, _ = Cmd['fastqc.sh'][fastqc_args] & TEE(retcode=None)
        if return_code:
            self.error("Error while preparing FASTQC report.")

        # Save the outputs
        outputs.fastq = ['{}_trimmed.fastq.gz'.format(name)]
        outputs.report = 'cutadapt_report.txt'
コード例 #20
0
    def run(self, inputs, outputs):
        """Run analysis."""
        basename = os.path.basename(inputs.tcount.tcount.path)
        assert basename.endswith(".txt")
        name = basename[:-4]

        rc_file = name + "_rc.txt.gz"
        tpm_file = name + "_tmp.txt.gz"

        prepare_expressions(inputs.tcount.tcount.path, rc_file, tpm_file)

        for exp_file in [rc_file, tpm_file]:
            if not os.path.isfile(exp_file):
                self.error(
                    "Failed to parse tcout file. {} file was not created".format(
                        exp_file
                    )
                )

        # Save the abundance estimates to JSON storage
        Cmd["expression2storage.py"]("--output", "json.txt", tpm_file)

        # Prepare expression set file with feature_id -> gene_id mappings
        exp_set_args = [
            "--expressions",
            rc_file,
            "--source_db",
            inputs.source,
            "--species",
            inputs.tcount.species,
            "--output_name",
            name + "_expressions",
            "--norm_expressions",
            tpm_file,
            "--norm_expressions_type",
            "TPM",
        ]
        return_code, _, _ = Cmd["create_expression_set.py"][exp_set_args] & TEE(
            retcode=None
        )
        if return_code:
            self.error("Error while preparing the expression set file.")

        outputs.exp = tpm_file
        outputs.exp_json = "json.txt"
        outputs.exp_type = "TPM"
        outputs.rc = rc_file
        outputs.exp_set = name + "_expressions.txt.gz"
        outputs.exp_set_json = name + "_expressions.json"
        outputs.species = inputs.tcount.species
        outputs.build = inputs.tcount.build
        outputs.source = inputs.source
        outputs.feature_type = "gene"
コード例 #21
0
def test(filename, *args):
    command = local[LOCAL_DIR / filename / filename]
    command = command['--nosplash']
    for arg in args:
        command = command[arg]
    colors.info.print('Running', command)
    with local.cwd(LOCAL_DIR / filename):
        with Timer() as t:
            code, stdout, stderr = command & TEE(retcode=None)
        if code==0:
            colors.success.print(filename, 'Successful')
        else:
            colors.fatal.print(filename, 'Failed with status code:', code)
    return dict(name=filename+' '+' '.join(map(str,args)), code=code, time=t.interval, stdout=stdout, stderr=stderr)
コード例 #22
0
    def fastqc_single(self, status, sample, in1):
        """Create a report of fasta file quality."""
        os.chdir(self.local_input)

        result = Cmd['fastqc'][in1, '-o',
                               f'{self.local_output}/fastqc/{status}/'] & TEE(
                                   retcode=None)

        interpret_result(
            log=self.logger,
            so_logger=self.st_logger,
            report=result,
            logger_mes=f'[Fastqc] Sample {sample} analysed successfully.',
            logger_error_mes=
            f'[FastQC] For some reason the analysis of {sample} was not successful.',
            runtime_error_mes=f'FastQC failed ({sample}).')
コード例 #23
0
    def run(self, inputs, outputs):
        """Run analysis."""
        basename = os.path.basename(inputs.bam.bam.path)
        assert basename.endswith(".bam")
        name = basename[:-4]

        args = [
            "--INPUT",
            inputs.bam.bam.path,
            "--REFERENCE",
            inputs.genome.fasta.path,
            "--METRICS_FILE_PREFIX",
            name,
            "--C_QUALITY_THRESHOLD",
            inputs.min_quality,
            "--NEXT_BASE_QUALITY_THRESHOLD",
            inputs.next_base_quality,
            "--MINIMUM_READ_LENGTH",
            inputs.min_lenght,
            "--VALIDATION_STRINGENCY",
            inputs.validation_stringency,
            "--ASSUME_SORTED",
            inputs.assume_sorted,
        ]

        if 0 <= inputs.mismatch_rate <= 1:
            args.extend(["--MAX_MISMATCH_RATE", inputs.mismatch_rate])

        return_code, _, _ = Cmd["gatk"]["CollectRrbsMetrics"][args] & TEE(
            retcode=None)
        if return_code:
            self.error("CollectRrbsMetrics tool failed.")

        report_file = f"{name}_rrbs_summary_metrics.txt"
        os.rename(f"{name}.rrbs_summary_metrics", report_file)

        detailed_file = f"{name}_rrbs_detail_metrics.txt"
        os.rename(f"{name}.rrbs_detail_metrics", detailed_file)

        out_plot = f"{name}_rrbs_qc.pdf"
        os.rename(f"{name}.rrbs_qc.pdf", out_plot)

        outputs.report = report_file
        outputs.detailed_report = detailed_file
        outputs.plot = out_plot
        outputs.species = inputs.bam.species
        outputs.build = inputs.bam.build
コード例 #24
0
    def gatk_variants_to_table(self, in1, sample, exe):
        """Transforms a .vcf file into a table"""
        out = f"{self.local_output}/GATK/tables/{sample}-raw.snps.table"

        if exe == 1:
            os.chdir(self.home)

            args = [
                self.gatk,
                'VariantsToTable',
                '-V',
                in1,
                '-F',
                'CHROM',
                '-F',
                'POS',
                '-F',
                'REF',
                '-F',
                'ALT',
                '-GF',
                'DP',
                '-GF',
                'AD',
                '-GF',
                'GQ',
                '-GF',
                'PL',
                '-O',
                out,
            ]

            result = Cmd['java']['-jar'][args] & TEE(retcode=None)

            interpret_result(
                log=self.logger,
                so_logger=self.st_logger,
                report=result,
                logger_mes=
                f'[GATK VariantsToTable] Sample {sample} table creation was successful.',
                logger_error_mes=
                f'[GATK VariantsToTable] For some reason the table creation of {sample} '
                f'was not successful.',
                runtime_error_mes=f'GATK VariantsToTable failed ({sample}).')

        return out
コード例 #25
0
    def run(self, inputs, outputs):
        """Run analysis."""
        basename = os.path.basename(inputs.bam.bam.path)
        assert basename.endswith(".bam")
        name = basename[:-4]
        metrics_file = f"{name}_insert_size_metrics.txt"
        histogram_file = f"{name}_insert_size.pdf"

        args = [
            "--INPUT",
            inputs.bam.bam.path,
            "--OUTPUT",
            metrics_file,
            "--Histogram_FILE",
            histogram_file,
            "--REFERENCE_SEQUENCE",
            inputs.genome.fasta.path,
            "--DEVIATIONS",
            inputs.deviations,
            "--INCLUDE_DUPLICATES",
            inputs.include_duplicates,
            "--VALIDATION_STRINGENCY",
            inputs.validation_stringency,
            "--ASSUME_SORTED",
            inputs.assume_sorted,
        ]

        if 0 <= inputs.minimum_fraction <= 0.5:
            args.extend(["--MINIMUM_PCT", inputs.minimum_fraction])
        else:
            self.warning(
                "Minimum fraction of reads should be between 0 and 0.5. "
                "Setting minimum fraction of reads to 0."
            )
            args.extend(["--MINIMUM_PCT", 0])

        return_code, _, _ = Cmd["gatk"]["CollectInsertSizeMetrics"][args] & TEE(
            retcode=None
        )
        if return_code:
            self.error("CollectInsertSizeMetrics tool failed.")

        outputs.report = metrics_file
        outputs.plot = histogram_file
        outputs.species = inputs.bam.species
        outputs.build = inputs.bam.build
コード例 #26
0
    def run(self, inputs, outputs):
        """Run the analysis."""
        exp = inputs.exp.import_file(imported_format="compressed")
        exp_stem = Path(exp).stem

        supported_extensions = (".tab", ".tsv", ".txt")
        if not exp_stem.endswith(supported_extensions):
            self.error(
                "The imported file has unsupported file name extension. "
                f"The supported extensions are {supported_extensions}.")

        name = exp_stem[:-4]

        expression_to_json(exp, "json.txt")

        exp_set_args = [
            "--expressions",
            exp,
            "--source_db",
            inputs.source,
            "--species",
            inputs.exp_unmapped.output.species,
            "--output_name",
            name + "_expressions",
            "--expressions_type",
            inputs.exp_unmapped.output.exp_type,
        ]
        return_code, _, _ = Cmd["create_expression_set.py"][
            exp_set_args] & TEE(retcode=None)
        if return_code:
            self.error("Error while preparing the expression set file.")

        if inputs.exp_unmapped.output.platform_id:
            outputs.platform_id = inputs.exp_unmapped.output.platform_id

        outputs.exp = exp
        outputs.exp_json = "json.txt"
        outputs.exp_type = inputs.exp_unmapped.output.exp_type
        outputs.platform = inputs.exp_unmapped.output.platform
        outputs.exp_set = name + "_expressions.txt.gz"
        outputs.exp_set_json = name + "_expressions.json"
        outputs.source = inputs.source
        outputs.species = inputs.exp_unmapped.output.species
        outputs.build = inputs.build
        outputs.feature_type = "gene"
        outputs.probe_mapping = inputs.probe_mapping
コード例 #27
0
    def run(self, inputs, outputs):
        """Run the analysis."""
        basename = os.path.basename(inputs.mr.mr.path)
        assert basename.endswith(".mr.gz")
        name = basename[:-6]
        report_file = f"{name}_spikein_bsrate.txt"

        skip_process = inputs.skip

        try:
            inputs.mr.spikein_mr.path
        except AttributeError:
            self.warning(
                "Selected sample lacks the alignment file for unmethylated control reads."
            )
            skip_process = True
        try:
            inputs.sequence.fasta.path
        except AttributeError:
            self.warning("Unmethylated control sequence was not provided.")
            skip_process = True

        if not skip_process:
            (Cmd["pigz"]["-cd", inputs.mr.spikein_mr.path] > f"{name}.mr")()

            args = [
                "-chrom",
                inputs.sequence.fasta.path,
                "-output",
                report_file,
            ]
            if inputs.count_all:
                args.append("-all")
            if inputs.max_mismatch:
                args.extend(["-max", inputs.max_mismatch])
            if inputs.a_rich:
                args.append("-a-rich")

            return_code, _, _ = Cmd["bsrate"][args][f"{name}.mr"] & TEE(retcode=None)
            if return_code:
                self.error("Bsrate analysis failed.")
        else:
            with open(report_file, "w") as f:
                f.write("Bisulfite conversion rate process skipped.")

        outputs.report = report_file
コード例 #28
0
    def samtools_index(self, sample, in1, exe):
        """Create index with samtools."""

        if exe == 1:
            os.chdir(self.home)
            samtools = Cmd['samtools']

            result = samtools['index', in1] & TEE(retcode=None)

            interpret_result(
                log=self.logger,
                so_logger=self.st_logger,
                report=result,
                logger_mes=
                f'[Samtools index] Sample {sample} indexing successfully.',
                logger_error_mes=
                f'[Samtools index] For some reasonthe indexing of {sample} was not successful.',
                runtime_error_mes=f'Samtools index failed ({sample}).')
コード例 #29
0
ファイル: wgs_metrics.py プロジェクト: romunov/resolwe-bio
    def run(self, inputs, outputs):
        """Run analysis."""
        basename = os.path.basename(inputs.bam.output.bam.path)
        assert basename.endswith(".bam")
        name = basename[:-4]
        metrics_file = f"{name}_wgs_metrics.txt"

        args = [
            "--INPUT",
            inputs.bam.output.bam.path,
            "--OUTPUT",
            metrics_file,
            "--REFERENCE_SEQUENCE",
            inputs.genome.output.fasta.path,
            "--READ_LENGTH",
            inputs.read_length,
            "--INCLUDE_BQ_HISTOGRAM",
            inputs.create_histogram,
            "--MINIMUM_MAPPING_QUALITY",
            inputs.options.min_map_quality,
            "--MINIMUM_BASE_QUALITY",
            inputs.options.min_quality,
            "--COVERAGE_CAP",
            inputs.options.coverage_cap,
            "--LOCUS_ACCUMULATION_CAP",
            inputs.options.accumulation_cap,
            "--COUNT_UNPAIRED",
            inputs.options.count_unpaired,
            "--SAMPLE_SIZE",
            inputs.options.sample_size,
            "--VALIDATION_STRINGENCY",
            inputs.options.validation_stringency,
        ]

        return_code, _, _ = Cmd["gatk"]["CollectWgsMetrics"][args] & TEE(
            retcode=None)
        if return_code:
            self.error("CollectWgsMetrics tool failed.")

        replace_metrics_class(metrics_file)

        outputs.report = metrics_file
        outputs.species = inputs.bam.output.species
        outputs.build = inputs.bam.output.build
コード例 #30
0
    def firewallrule(self,rulename):
        """If given, firewall rule is created. If no value specified, rule name would be defaulted to locustweb"""

        if not rulename:
            rulename = "locustweb"

        #check if the firewall rule exists gcloud compute firewall-rules list | grep locustweb

        print(info | "existinng firewall rules")
        rcode, sout, serr =gcloud["compute","firewall-rules","list"] & TEE()

        for line in sout.splitlines():
            if rulename in line:
                print("Firewall rule already exists")
                return

        print(info | "creating firewall rule")
        gcloud["compute", "firewall-rules", "create", rulename, "--allow=tcp:8089"]()
        print(info | "firewall rule created")