Esempio n. 1
0
 def run(self):
     cmd = [
         '$MANTA/bin/configManta.py', '--exome', '--referenceFasta',
         self.cfg['fasta_file'], '--normalBam',
         self.input()['N']['preprocess']['bam'].path, '--tumorBam',
         self.input()['T']['preprocess']['bam'].path, '--rundir',
         os.path.join(self.cfg['output_dir'], self.case, 'variant_prep',
                      'manta')
     ]  # TODO add logic for exome to handle multiple sequencing preps
     # pipeline_utils.cluster_command_call(self, cmd, threads=1, ram=1, cfg=self.cfg)
     pipeline_utils.command_call(cmd)
     cmd = [
         os.path.join(self.cfg['output_dir'], self.case, 'variant_prep',
                      'manta', 'runWorkflow.py'), '-j', self.max_threads
     ]
     if self.cfg['cluster_exec']:
         pipeline_utils.cluster_command_call(
             self,
             cmd,
             threads=self.cfg['max_threads'],
             ram=12,
             cfg=self.cfg,
             err_log=self.output()['err_log'].path)
     else:
         pipeline_utils.command_call(cmd,
                                     err_log=self.output()['err_log'].path)
Esempio n. 2
0
 def run(self):
     if self.input()['filter_germline']['filter_germline'].path.endswith(
             '.gz'):
         input_vcf = self.input(
         )['filter_germline']['filter_germline'].path.split('.gz')[0]
         with gzip.open(
                 self.input()['filter_germline']['filter_germline'].path,
                 'rb') as vcf_in, open(input_vcf, 'wb') as vcf_out:
             shutil.copyfileobj(vcf_in, vcf_out)
     else:
         input_vcf = self.input(
         )['filter_germline']['filter_germline'].path.endswith('.gz')
     cmd = [
         'perl', '/root/pipeline/code/source/vcf2maf/vcf2maf.pl',
         '--ref-fasta', self.cfg['fasta_file'], '--vep-forks',
         self.cfg['max_threads'], '--input-vcf', input_vcf, '--output-maf',
         self.output()['vcf2maf'].path, '--tumor-id',
         '%s_N' % self.case
     ]
     if self.cfg['cluster_exec']:
         pipeline_utils.cluster_command_call(
             self,
             cmd,
             threads=self.cfg['max_threads'],
             ram=16,
             cfg=self.cfg,
             err_log=self.output()['err_log'].path)
     else:
         pipeline_utils.command_call(cmd,
                                     err_log=self.output()['err_log'].path)
Esempio n. 3
0
 def run(self):
     if self.input()['filter_mutect2']['filter_mutect2'].path.endswith(
             '.gz'):
         input_vcf = self.input(
         )['filter_mutect2']['filter_mutect2'].path.split('.gz')[0]
         with gzip.open(
                 self.input()['filter_mutect2']['filter_mutect2'].path,
                 'rb') as vcf_in, open(input_vcf, 'wb') as vcf_out:
             shutil.copyfileobj(vcf_in, vcf_out)
     else:
         input_vcf = self.input(
         )['filter_mutect2']['filter_mutect2'].path.endswith('.gz')
     cmd = [
         'fpfilter', '--vcf-file', input_vcf, '--bam-file',
         self.input()['preprocess']['bam'].path, '--reference',
         self.cfg['fasta_file'], '--sample', self.case + '_T', '--output',
         self.output()['fpfilter'].path
     ]
     if self.cfg['cluster_exec']:
         pipeline_utils.cluster_command_call(
             self,
             cmd,
             threads=self.cfg['max_threads'],
             ram=16,
             cfg=self.cfg,
             err_log=self.output()['err_log'].path)
     else:
         pipeline_utils.command_call(cmd,
                                     err_log=self.output()['err_log'].path)
Esempio n. 4
0
	def run(self):
		cmd = ['java', '-jar', '$PICARD', 'CreateSequenceDictionary', 'R=%s' % self.cfg['fasta_file'], 'O=%s' % self.output().path]
		if self.cfg['cluster_exec']:
			pipeline_utils.cluster_command_call(self, cmd, threads=1, ram=5, cfg=self.cfg) #, err_log=self.output()['err_log'].path)
		else:
			pipeline_utils.command_call(cmd) #, err_log=self.output()['err_log'].path)
		shutil.copyfile(self.output()['picard_index'].path, self.cfg['fasta_file'].split('.fa')[0] + '.dict')
Esempio n. 5
0
 def run(self):
     cmd = [
         'java', '-Dsamjdk.use_async_io_read_samtools=false',
         '-Dsamjdk.use_async_io_write_samtools=true',
         '-Dsamjdk.use_async_io_write_tribble=false',
         '-Dsamjdk.compression_level=2',
         '-Djava.io.tmpdir=%s' % self.cfg['tmp_dir'], '-jar',
         '/root/pipeline/resources/broad/gatk-4.0.12.0/gatk-package-4.0.12.0-local.jar',
         'CreateSomaticPanelOfNormals', '-O',
         self.output()['mutect2_pon'].path, '--tmp-dir', self.cfg['tmp_dir']
     ]
     for case in self.input():
         cmd += [
             '-vcfs',
             self.input()[case]['mutect2_normal']['mutect2_normal'].path
         ]
     if self.cfg['cluster_exec']:
         pipeline_utils.cluster_command_call(
             self,
             cmd,
             threads=1,
             ram=12,
             cfg=self.cfg,
             err_log=self.output()['err_log'].path)
     else:
         pipeline_utils.command_call(cmd,
                                     err_log=self.output()['err_log'].path)
Esempio n. 6
0
 def run(self):
     cmd = [
         '$STRELKA/bin/configureStrelkaSomaticWorkflow.py', '--exome',
         '--referenceFasta', self.cfg['fasta_file'], '--normalBam',
         self.input()['N']['preprocess']['bam'].path, '--tumorBam',
         self.input()['T']['preprocess']['bam'].path, '--indelCandidates',
         self.input()['manta']['manta'].path, '--rundir',
         os.path.join(self.cfg['output_dir'], self.case, 'variant_prep',
                      'strelka')
     ]
     # pipeline_utils.cluster_command_call(self, cmd, threads=1, ram=1, cfg=self.cfg)
     pipeline_utils.command_call(cmd)
     cmd = [
         os.path.join(self.cfg['output_dir'], self.case, 'variant_prep',
                      'strelka', 'runWorkflow.py'), '-m', 'local', '-j',
         self.cfg['max_threads']
     ]
     if self.cfg['cluster_exec']:
         pipeline_utils.cluster_command_call(
             self,
             cmd,
             threads=self.cfg['max_threads'],
             ram=12,
             cfg=self.cfg,
             err_log=self.output()['err_log'].path)
     else:
         pipeline_utils.command_call(cmd,
                                     err_log=self.output()['err_log'].path)
Esempio n. 7
0
	def run(self):
		cmd = ['trim_galore', '--fastqc', '--fastqc_args "--outdir %s"' % os.path.dirname(self.output()['fastqc'][0].path), '--paired', '-o', os.path.dirname(self.output()['trimgalore'][0].path), '--basename', '%s_%s_%s' % (self.case, self.sample, self.lane), '--gzip', self.cfg['cases'][self.case][self.sample][self.lane]['fastq1'], self.cfg['cases'][self.case][self.sample][self.lane]['fastq2']]
		pipeline_utils.confirm_path(self.output()['trimgalore'][0].path)
		pipeline_utils.confirm_path(self.output()['fastqc'][0].path)
		if self.cfg['cluster_exec']:
			pipeline_utils.cluster_command_call(self, cmd, threads=1, ram=4, cfg=self.cfg, err_log=self.output()['err_log'].path)
		else:
			pipeline_utils.command_call(cmd, err_log=self.output()['err_log'].path)
Esempio n. 8
0
	def run(self):
		read_group = pipeline_utils.assign_rg(self.input()['trim']['trimgalore'][0].path, self.input()['trim']['trimgalore'][1].path, self.case, self.sample, self.cfg)
		if self.cfg['cluster_exec']:
			cmd = ['bwa', 'mem', '-M', '-t', self.cfg['max_threads'], '-R', "'%s'" % read_group, self.cfg['fasta_file'], self.input()['trim']['trimgalore'][0].path, self.input()['trim']['trimgalore'][1].path, '|', 'samtools', 'view', '-bh', '|', 'samtools', 'sort', '-o', self.output()['bwa_mem'].path]
			pipeline_utils.cluster_command_call(self, cmd, threads=self.cfg['max_threads'], ram=8, cfg=self.cfg, err_log=self.output()['err_log'].path)
		else:
			# cmds = [['bwa', 'mem', '-M', '-t', self.cfg['max_threads'], '-R', "'%s'" % read_group, self.cfg['fasta_file'], self.input()['trim']['trimgalore'][0].path, self.input()['trim']['trimgalore'][1].path], ['samtools', 'view', '-bh', ], ['samtools', 'sort', '-o', self.output()['bwa_mem'].path]]
			cmd = ['bwa', 'mem', '-M', '-t', self.cfg['max_threads'], '-R', "'%s'" % read_group, self.cfg['fasta_file'], self.input()['trim']['trimgalore'][0].path, self.input()['trim']['trimgalore'][1].path, '|', 'samtools', 'view', '-bh', '|', 'samtools', 'sort', '-o', self.output()['bwa_mem'].path]
			# pipeline_utils.piped_command_call(cmds, err_log=self.output()['err_log'].path)
			pipeline_utils.command_call(cmd, err_log=self.output()['err_log'].path)
Esempio n. 9
0
	def run(self):
		if len(self.input()) > 1:
			cmd = ['java', '-Djava.io.tmpdir=%s' % self.cfg['tmp_dir'], '-jar', '$PICARD', 'MergeSamFiles', 'O=%s' % self.output()['merge_bams'].path]
			for lane in self.input():
				cmd += ['I=%s' % self.input()[lane]['align']['bwa_mem'].path]
			if self.cfg['cluster_exec']:
				pipeline_utils.cluster_command_call(self, cmd, threads=1, ram=5, cfg=self.cfg, err_log=self.output()['err_log'].path)
			else:
				pipeline_utils.command_call(cmd, err_log=self.output()['err_log'].path)
		else:
			for lane in self.input():
				shutil.copyfile(self.input()[lane]['align']['bwa_mem'].path, self.output()['merge_bams'].path)
			with open(self.output()['err_log'].path, 'w') as f:
				f.write('')
Esempio n. 10
0
 def run(self):
     cmd = [
         'msisensor', 'msi', '-d',
         '/root/pipeline/resources/misc/hg19_microsatellites.list', '-t',
         self.input()['T']['preprocess']['bam'].path, '-o',
         self.output()['msisensor'].path, '-l', '1', '-q', '1', '-c', '20'
     ]  # , '-e', self.cfg['library_bed'], '-b', self.cfg['max_threads']
     if self.cfg['cluster_exec']:
         pipeline_utils.cluster_command_call(
             self,
             cmd,
             threads=self.cfg['max_threads'],
             ram=16,
             cfg=self.cfg,
             err_log=self.output()['err_log'].path)
     else:
         pipeline_utils.command_call(cmd,
                                     err_log=self.output()['err_log'].path)
Esempio n. 11
0
 def run(self):
     cmd = [
         'snp-pileup', '-g', '-q15', '-Q20', '-P100', '-r25,0',
         self.cfg['germline_all'],
         self.output()['facets_snp_pileup'].path,
         self.input()['N']['preprocess']['bam'].path,
         self.input()['T']['preprocess']['bam'].path
     ]
     if self.cfg['cluster_exec']:
         pipeline_utils.cluster_command_call(
             self,
             cmd,
             threads=self.cfg['max_threads'],
             ram=2,
             cfg=self.cfg,
             err_log=self.output()['err_log'].path)
     else:
         pipeline_utils.command_call(cmd,
                                     err_log=self.output()['err_log'].path)
Esempio n. 12
0
 def run(self):
     cmd = [
         'gatk4', '--java-options',
         '"-Djava.io.tmpdir=%s"' % self.cfg['tmp_dir'], 'Mutect2', '-R',
         self.cfg['fasta_file'], '--native-pair-hmm-threads',
         self.cfg['max_threads'], '-I',
         self.input()['preprocess']['bam'].path, '-O',
         self.output()['mutect2_normal'].path
     ]
     if self.cfg['cluster_exec']:
         pipeline_utils.cluster_command_call(
             self,
             cmd,
             threads=self.cfg['max_threads'],
             ram=16,
             cfg=self.cfg,
             err_log=self.output()['err_log'].path)
     else:
         pipeline_utils.command_call(cmd,
                                     err_log=self.output()['err_log'].path)
Esempio n. 13
0
    def run(self):
        file_base = self.output()['filter_mutect2'].path.split('.vcf.gz')[0]
        filtermutect = file_base + '.FilterMutectCalls.vcf'
        raw_snps = file_base + '.snps.raw.vcf'
        raw_indels = file_base + '.indels.raw.vcf'
        filtered_snps = file_base + '.snps.filtered.vcf'
        filtered_indels = file_base + '.indels.filtered.vcf'

        # use default FilterMutectCalls first
        cmd = [
            'gatk4', '--java-options',
            '"-Djava.io.tmpdir=%s"' % self.cfg['tmp_dir'], 'FilterMutectCalls',
            '-R', self.cfg['fasta_file'], '-V',
            self.input()['mutect2']['mutect2'].path, '-O', filtermutect
        ]
        if self.cfg['cluster_exec']:
            pipeline_utils.cluster_command_call(
                self,
                cmd,
                threads=self.cfg['max_threads'],
                ram=16,
                cfg=self.cfg,
                err_log=self.output()['err_log'].path)
        else:
            pipeline_utils.command_call(cmd,
                                        err_log=self.output()['err_log'].path)
        # select snps
        cmd = [
            'gatk4', '--java-options',
            '"-Djava.io.tmpdir=%s"' % self.cfg['tmp_dir'], 'SelectVariants',
            '-R', self.cfg['fasta_file'], '-V', filtermutect,
            '--select-type-to-include', 'SNP', '-O', raw_snps
        ]
        if self.cfg['cluster_exec']:
            pipeline_utils.cluster_command_call(
                self,
                cmd,
                threads=self.cfg['max_threads'],
                ram=16,
                cfg=self.cfg)
        else:
            pipeline_utils.command_call(cmd)
        # select indels
        cmd = [
            'gatk4', '--java-options',
            '"-Djava.io.tmpdir=%s"' % self.cfg['tmp_dir'], 'SelectVariants',
            '-R', self.cfg['fasta_file'], '-V', filtermutect,
            '--select-type-to-include', 'INDEL', '-O', raw_indels
        ]
        if self.cfg['cluster_exec']:
            pipeline_utils.cluster_command_call(
                self,
                cmd,
                threads=self.cfg['max_threads'],
                ram=16,
                cfg=self.cfg)
        else:
            pipeline_utils.command_call(cmd)
        # filter snps
        cmd = [
            'gatk4', '--java-options',
            '"-Djava.io.tmpdir=%s"' % self.cfg['tmp_dir'], 'VariantFiltration',
            '-R', self.cfg['fasta_file'], '-V', raw_snps, '-O', filtered_snps
        ]
        for filter_name, filter_expression in [
            ('"FS60"', '"FS > 60.0"'), ('"MQ40"', '"MQ < 40.0"'),
            ('"MQRS-12.5"', '"MQRankSum < -12.5"'),
            ('"RPRS-8"', '"ReadPosRankSum < -8.0"'), ('"SOR3"', '"SOR > 3.0"'),
            ('"FS60"', '"FS > 60.0"')
        ]:  # , ('"Q30"', '"QUAL < 30.0"')
            cmd += [
                '--filter-name', filter_name, '--filter-expression',
                filter_expression
            ]
        if not 'N' in self.cfg['cases'][self.case]:
            cmd += [
                '--filter-name', '"TLOD10"', '--filter-expression',
                '"TLOD < 10.0"', '--filter-name', '"QD5"',
                '--filter-expression', '"QD < 5.0"'
            ]
Esempio n. 14
0
    def run(self):
        file_base = self.output()['filter_germline'].path.split('.vcf.gz')[0]
        raw_snps = file_base + '.snps.raw.vcf'
        raw_indels = file_base + '.indels.raw.vcf'
        filtered_snps = file_base + '.snps.filtered.vcf'
        filtered_indels = file_base + '.indels.filtered.vcf'
        # select snps
        cmd = [
            'gatk4', '--java-options',
            '"-Djava.io.tmpdir=%s"' % self.cfg['tmp_dir'], 'SelectVariants',
            '-R', self.cfg['fasta_file'], '-V',
            self.input()['haplotype_caller']['haplotype_caller'].path,
            '--select-type-to-include', 'SNP', '-O', raw_snps
        ]
        if self.cfg['cluster_exec']:
            pipeline_utils.cluster_command_call(
                self,
                cmd,
                threads=self.cfg['max_threads'],
                ram=16,
                cfg=self.cfg)
        else:
            pipeline_utils.command_call(cmd)
        # select indels
        cmd = [
            'gatk4', '--java-options',
            '"-Djava.io.tmpdir=%s"' % self.cfg['tmp_dir'], 'SelectVariants',
            '-R', self.cfg['fasta_file'], '-V',
            self.input()['haplotype_caller']['haplotype_caller'].path,
            '--select-type-to-include', 'INDEL', '-O', raw_indels
        ]
        if self.cfg['cluster_exec']:
            pipeline_utils.cluster_command_call(
                self,
                cmd,
                threads=self.cfg['max_threads'],
                ram=16,
                cfg=self.cfg)
        else:
            pipeline_utils.command_call(cmd)
        # filter snps
        cmd = [
            'gatk4', '--java-options',
            '"-Djava.io.tmpdir=%s"' % self.cfg['tmp_dir'], 'VariantFiltration',
            '-R', self.cfg['fasta_file'], '-V', raw_snps, '-O', filtered_snps
        ]
        for filter_name, filter_expression in [
            ('"filter_1"', '"QD < 2.0"'), ('"filter_2"', '"FS > 60.0"'),
            ('"filter_3"', '"MQ < 40.0"'),
            ('"filter_4"', '"MQRankSum < -12.5"'),
            ('"filter_5"', '"ReadPosRankSum < -8.0"')
        ]:
            cmd += [
                '--filter-name', filter_name, '--filter-expression',
                filter_expression
            ]
        if self.cfg['cluster_exec']:
            pipeline_utils.cluster_command_call(
                self,
                cmd,
                threads=self.cfg['max_threads'],
                ram=16,
                cfg=self.cfg)
        else:
            pipeline_utils.command_call(cmd)
        # filter indels
        cmd = [
            'gatk4', '--java-options',
            '"-Djava.io.tmpdir=%s"' % self.cfg['tmp_dir'], 'VariantFiltration',
            '-R', self.cfg['fasta_file'], '-V', raw_indels, '-O',
            filtered_indels
        ]
        for filter_name, filter_expression in [('"filter_1"', '"QD < 2.0"'),
                                               ('"filter_2"', '"FS > 200.0"'),
                                               ('"filter_3"',
                                                '"ReadPosRankSum < -20.0"')]:
            cmd += [
                '--filter-name', filter_name, '--filter-expression',
                filter_expression
            ]
        if self.cfg['cluster_exec']:
            pipeline_utils.cluster_command_call(
                self,
                cmd,
                threads=self.cfg['max_threads'],
                ram=16,
                cfg=self.cfg)
        else:
            pipeline_utils.command_call(cmd)
        # combine snps and indels
        cmd = [
            'java',
            '-Djava.io.tmpdir=%s' % self.cfg['tmp_dir'], '-jar', '$PICARD',
            'MergeVcfs',
            'I=%s' % filtered_snps,
            'I=%s' % filtered_indels,
            'O=%s' % self.output()['filter_germline'].path
        ]
        if self.cfg['cluster_exec']:
            pipeline_utils.cluster_command_call(
                self,
                cmd,
                threads=self.cfg['max_threads'],
                ram=16,
                cfg=self.cfg,
                err_log=self.output()['err_log'].path)
        else:
            pipeline_utils.command_call(cmd,
                                        err_log=self.output()['err_log'].path)

        for file in [raw_snps, raw_indels, filtered_snps, filtered_indels]:
            try:
                os.remove(file)
                os.remove(file + '.idx')
            except:
                pass
Esempio n. 15
0
	def run(self):
		cmd = ['java', '-Djava.io.tmpdir=%s' % self.cfg['tmp_dir'], '-jar', '$PICARD', 'MarkDuplicates', 'I=%s' % self.input()['merge_bams']['merge_bams'].path, 'O=%s' % self.output()['mark_duplicates']['bam'].path, 'M=%s' % self.output()['mark_duplicates']['metrics'].path, 'TAGGING_POLICY=All']
		if self.cfg['cluster_exec']:
			pipeline_utils.cluster_command_call(self, cmd, threads=self.cfg['max_threads'], ram=5, cfg=self.cfg, err_log=self.output()['err_log'].path)
		else:
			pipeline_utils.command_call(cmd, err_log=self.output()['err_log'].path)
Esempio n. 16
0
	def run(self):
		cmd = ['samtools', 'index', self.input()['mark_duplicates']['mark_duplicates']['bam'].path]
		if self.cfg['cluster_exec']:
			pipeline_utils.cluster_command_call(self, cmd, threads=1, ram=5, cfg=self.cfg, err_log=self.output()['err_log'].path)
		else:
			pipeline_utils.command_call(cmd, err_log=self.output()['err_log'].path)
Esempio n. 17
0
	def run(self):
		cmd = ['java', '-Djava.io.tmpdir=%s' % self.cfg['tmp_dir'], '-jar', '$GATK3', '-T', 'PrintReads', '-I', self.input()['mark_duplicates']['mark_duplicates']['bam'].path, '-R', self.cfg['fasta_file'], '-BQSR', self.input()['base_recalibrator']['base_recalibrator'].path, '-o', self.output()['apply_bqsr'].path] # self.input()['indel_realigner']['indel_realigner'][self.case][self.sample].path
		if self.cfg['cluster_exec']:
			pipeline_utils.cluster_command_call(self, cmd, threads=1, ram=5, cfg=self.cfg, err_log=self.output()['err_log'].path)
		else:
			pipeline_utils.command_call(cmd, err_log=self.output()['err_log'].path)
Esempio n. 18
0
class vcf2maf_tumor(luigi.Task):
    priority = 85
    # resources = {'threads': 1}
    cfg = luigi.DictParameter()

    case = luigi.Parameter()

    @property  # This is necessary to assign a dynamic value to the 'threads' resource within a task
    def resources(self):
        return {'threads': self.cfg['max_threads']}

    def requires(self):
        return {'fpfilter': fpfilter(case=self.case, cfg=self.cfg)}

    def output(self):
        outputs = {
            'vcf2maf':
            luigi.LocalTarget(
                os.path.join(self.cfg['output_dir'], self.case, 'variants',
                             '%s.maf' % self.case)),
            'err_log':
            luigi.LocalTarget(
                os.path.join(self.cfg['output_dir'], self.case, 'log',
                             '%s_vcf2maf_err.txt' % self.case))
        }
        for task in outputs:
            if isinstance(outputs[task], luigi.LocalTarget):
                pipeline_utils.confirm_path(outputs[task].path)
        return outputs

    def run(self):
        # if self.input()['filter_mutect2']['filter_mutect2'].path.endswith('.gz'):
        # 	input_vcf = self.input()['filter_mutect2']['filter_mutect2'].path.split('.gz')[0]
        # 	with gzip.open(self.input()['filter_mutect2']['filter_mutect2'].path, 'rb') as vcf_in, open(input_vcf, 'wb') as vcf_out:
        # 		shutil.copyfileobj(vcf_in, vcf_out)
        # else:
        # 	input_vcf = self.input()['filter_mutect2']['filter_mutect2'].path.endswith('.gz')
        cmd = [
            'perl', '/root/pipeline/code/source/vcf2maf/vcf2maf.pl',
            '--ref-fasta', self.cfg['fasta_file'], '--vep-forks',
            self.cfg['max_threads'], '--input-vcf',
            self.input()['fpfilter']['fpfilter'].path, '--output-maf',
            self.output()['vcf2maf'].path, '--tumor-id',
            '%s_T' % self.case
        ]
        if 'N' in self.cfg['cases'][self.case]:
            cmd += ['--normal-id', '%s_N' % self.case]
        if self.cfg['cluster_exec']:
            pipeline_utils.cluster_command_call(
                self,
                cmd,
                threads=self.cfg['max_threads'],
                ram=16,
                cfg=self.cfg,
                err_log=self.output()['err_log'].path)
        else:
            pipeline_utils.command_call(cmd,
                                        err_log=self.output()['err_log'].path)
        try:
            os.remove(
                '%s.vep.vcf' %
                self.input()['fpfilter']['fpfilter'].path.split('.vcf')[0])
        except:
            pass
Esempio n. 19
0
	def run(self):
		cmd = ['bwa', 'index', '-a', 'bwtsw', self.cfg['fasta_file']]
		if self.cfg['cluster_exec']:
			pipeline_utils.cluster_command_call(self, cmd, threads=1, ram=5, cfg=self.cfg) #, err_log=self.output()['err_log'].path)
		else:
			pipeline_utils.command_call(cmd) #, err_log=self.output()['err_log'].path)
Esempio n. 20
0
            self.cfg['library_bed'], '--af-of-alleles-not-in-resource',
            '0.0000025', '-I',
            self.input()['T']['preprocess']['bam'].path, '-O',
            self.output()['mutect2'].path
        ]
        if 'N' in self.cfg['cases'][self.case]:
            cmd += [
                '-I',
                self.input()['N']['preprocess']['bam'].path, '-normal',
                '%s_N' % self.case
            ]
        if self.cfg['cluster_exec']:
            pipeline_utils.cluster_command_call(
                self,
                cmd,
                threads=self.cfg['max_threads'],
                ram=16,
                cfg=self.cfg,
                err_log=self.output()['err_log'].path)
        else:
            pipeline_utils.command_call(cmd,
                                        err_log=self.output()['err_log'].path)


class filter_mutect2(luigi.Task):
    priority = 86
    resources = {'threads': 1}
    cfg = luigi.DictParameter()

    case = luigi.Parameter()
Esempio n. 21
0
		return requirements

	def output(self):
		# return {'realigner_target': luigi.LocalTarget(os.path.join(self.cfg['output_dir'], self.case, 'preprocess', '%s_%s_realigner_targets.intervals' % (self.case, self.sample))), 'err_log': luigi.LocalTarget(os.path.join(self.cfg['output_dir'], self.case, 'log', '%s_%s_realigner_target_err.txt' % (self.case, self.sample)))}
		return {'realigner_target': luigi.LocalTarget(os.path.join(self.cfg['output_dir'], 'all_samples', 'preprocess', 'all_samples_realigner_targets.intervals')), 'file_map': luigi.LocalTarget(os.path.join(self.cfg['output_dir'], 'all_samples', 'preprocess', 'all_samples_realigner.map')), 'err_log': luigi.LocalTarget(os.path.join(self.cfg['output_dir'], 'all_samples', 'log', 'realigner_target_err.txt'))}
	def run(self):
		cmd = ['java', '-Djava.io.tmpdir=%s' % self.cfg['tmp_dir'], '-jar', '$GATK3', '-T', 'RealignerTargetCreator', '-R', self.cfg['fasta_file'], '--known', self.cfg['germline_indels'], '-nt', self.cfg['global_max_threads'], '-o', self.output()['realigner_target'].path]
		file_map = []
		for case in self.input():
			for sample in self.input()[case]:
				filename = self.input()[case][sample]['mark_duplicates']['mark_duplicates']['bam'].path
				realigned_filename = filename.split('marked_duplicates.bam')[0] + 'realigned.bam'
				file_map.append('%s\t%s' % (os.path.basename(filename), realigned_filename))
				cmd += ['-I', filename]
		if self.cfg['cluster_exec']:
			pipeline_utils.cluster_command_call(self, cmd, threads=self.cfg['global_max_threads'], ram=48, cfg=self.cfg, err_log=self.output()['err_log'].path)
		else:
			pipeline_utils.command_call(cmd, err_log=self.output()['err_log'].path)
		pipeline_utils.confirm_path(self.output()['file_map'].path)
		with open(self.output()['file_map'].path, 'w') as f:
			f.write('\n'.join(file_map))

class indel_realigner(luigi.Task):
	priority = 94
	resources = {'threads': 1}
	cfg = luigi.DictParameter()

	# @property # This is necessary to assign a dynamic value to the 'threads' resource within a task
	# def resources(self):
	# 	return {'threads': self.cfg['global_max_threads']}