def run(self): # try: # cwd = os.getcwd() # print(cwd) # os.chdir(os.path.join(os.path.join(*self.cfg['fasta_file'].split('/')[:-1]), 'index')) pipeline_utils.confirm_path(self.output().path) # os.chdir(os.path.join(self.fasta_dir, 'index')) # print(os.getcwd()) # cmd = [os.path.join(cwd, self.cfg['bowtie_location'], 'bowtie2'), '-x', self.cfg['base_name'], '--threads=%s' % self.max_threads, '-U', self.fastq_file, '-S', self.sample + '_raw.sam'] fasta_dir = os.path.join(*self.cfg['fasta_file'].split('/')[:-1]) cmd = [ self.cfg['bowtie_location'], '-x', os.path.join(fasta_dir, 'index', self.cfg['base_name']), '-1', self.input()[-1][0][0].path, '-2', self.input()[-1][1][0].path, '-p', self.max_threads, '--very-sensitive-local' '|', self.cfg['samtools_location'], 'view', '-bh', '-', '>', self.output().path ] pipeline_utils.command_call(cmd, [self.output()], cwd=os.getcwd(), threads_needed=self.max_threads, sleep_time=0.2)
def run(self): if self.matched_n != '': cmd = [ './packages/misc/mskcc-vcf2maf-decbf60/vcf2maf.pl', '--input-vcf', self.input().path, '--output-maf', self.output().path, '--tumor-id', self.case + '_T', '--vcf-tumor-id', self.case + '_T', '--normal-id', self.case + '_N', '--vcf-normal-id', self.case + '_N', '--vep-path', './packages/ensembl-vep', '--vep-data', './packages/ensembl-vep/cache', '--ref-fasta', self.cfg['fasta_file'], '--species', 'homo_sapiens', '--ncbi-build', 'GRCh38', '--cache-version', '94', '--filter-vcf', '0' ] else: cmd = [ './packages/misc/mskcc-vcf2maf-decbf60/vcf2maf.pl', '--input-vcf', self.input().path, '--output-maf', self.output().path, '--tumor-id', self.case + '_T', '--vcf-tumor-id', self.case + '_T', '--vep-path', './packages/ensembl-vep', '--vep-data', './packages/ensembl-vep/cache', '--ref-fasta', self.cfg['fasta_file'], '--species', 'homo_sapiens', '--ncbi-build', 'GRCh38', '--cache-version', '94', '--filter-vcf', '0' ] pipeline_utils.command_call(cmd, [self.output()])
def run(self): pipeline_utils.confirm_path(self.output().path) if self.matched_n: cmd = [ './packages/VarDictJava/build/install/VarDict/bin/VarDict', '-G', self.cfg['fasta_file'], '-f', '0.01', '-N', self.case + '_T', '-b', '"%s|%s"' % (self.input()[0][0].path, self.input()[1][0].path), '-z', '-F', '-c', '1', '-S', '2', '-E', '3', '-g', '4', self.cfg['library_bed'], '|', './packages/VarDictJava/VarDict/testsomatic.R', '|', './packages/VarDictJava/VarDict/var2vcf_paired.pl', '-N', '"%s|%s"' % (self.case + '_T', self.case + '_N'), '-f', '0.01', '> %s' % os.path.join(self.vcf_path, 'vardict') ] else: cmd = [ './packages/VarDictJava/build/install/VarDict/bin/VarDict', '-G', self.cfg['fasta_file'], '-f', '0.01', '-N', self.case + '_T', '-b', self.input()[0][0].path, '-z', '-c', '1', '-S', '2', '-E', '3', '-g', '4', self.cfg['library_bed'], '|', './packages/VarDictJava/VarDict/teststrandbias.R', '|', './packages/VarDictJava/VarDict/var2vcf_valid.pl', '-N', self.case + '_T', 'E', '-f', '0.01', '> %s' % os.path.join(self.vcf_path, 'vardict') ] pipeline_utils.command_call(cmd, [self.output()])
def run(self): for output in self.output(): pipeline_utils.confirm_path(output.path) if self.matched_n != '': cmd = [ 'python3', './packages/MANTIS/mantis.py', '-b', './packages/msings/doc/mSINGS_TCGA_MANTIS.bed', '--genome', self.cfg['fasta_file'], '-t', self.input()[0][0].path, '-n', self.input()[1][0].path, '-mrq', '20.0', '-mlq', '25.0', '-mlc', '20', '-mrr', '1', '-o', self.output()[0].path ] pipeline_utils.command_call(cmd, self.output()) # else: # tumor_bams_file = os.path.join(self.project_dir, 'output', 'msings', 'baseline', 'tumor_bams.txt') # with open(tumor_bams_file, 'w') as f: # tumor_bams_list = [os.path.join(self.project_dir, 'output', self.case, 'alignment', case_name + '_T_recalibrated.bam') for case_name in self.case_dict if self.case_dict[case_name]['N'] == ''] # f.write('\n'.join(tumor_bams_list)) cmd = [ './packages/msings/scripts/run_msings_single_sample.sh', self.input()[0][0].path, './packages/msings/doc/mSINGS_TCGA.msi_intervals', './packages/msings/doc/mSINGS_TCGA.bed', self.cfg['fasta_file'], './packages/msings/doc/mSINGS_TCGA.baseline', os.path.join(self.project_dir, 'output', 'msings', 'tumor') ] pipeline_utils.command_call(cmd, self.output()) os.rename( os.path.join(self.project_dir, 'output', 'msings', 'tumor', self.case + '_T_recalibrated', self.case + '_T_recalibrated.MSI_Analysis.txt'), os.path.join(self.vcf_path, self.case + '_msings.txt'))
def run(self): if self.input()['filter_germline']['filter_germline'].path.endswith( '.gz'): input_vcf = self.input( )['filter_germline']['filter_germline'].path.split('.gz')[0] with gzip.open( self.input()['filter_germline']['filter_germline'].path, 'rb') as vcf_in, open(input_vcf, 'wb') as vcf_out: shutil.copyfileobj(vcf_in, vcf_out) else: input_vcf = self.input( )['filter_germline']['filter_germline'].path.endswith('.gz') cmd = [ 'perl', '/root/pipeline/code/source/vcf2maf/vcf2maf.pl', '--ref-fasta', self.cfg['fasta_file'], '--vep-forks', self.cfg['max_threads'], '--input-vcf', input_vcf, '--output-maf', self.output()['vcf2maf'].path, '--tumor-id', '%s_N' % self.case ] if self.cfg['cluster_exec']: pipeline_utils.cluster_command_call( self, cmd, threads=self.cfg['max_threads'], ram=16, cfg=self.cfg, err_log=self.output()['err_log'].path) else: pipeline_utils.command_call(cmd, err_log=self.output()['err_log'].path)
def run(self): if self.input()['filter_mutect2']['filter_mutect2'].path.endswith( '.gz'): input_vcf = self.input( )['filter_mutect2']['filter_mutect2'].path.split('.gz')[0] with gzip.open( self.input()['filter_mutect2']['filter_mutect2'].path, 'rb') as vcf_in, open(input_vcf, 'wb') as vcf_out: shutil.copyfileobj(vcf_in, vcf_out) else: input_vcf = self.input( )['filter_mutect2']['filter_mutect2'].path.endswith('.gz') cmd = [ 'fpfilter', '--vcf-file', input_vcf, '--bam-file', self.input()['preprocess']['bam'].path, '--reference', self.cfg['fasta_file'], '--sample', self.case + '_T', '--output', self.output()['fpfilter'].path ] if self.cfg['cluster_exec']: pipeline_utils.cluster_command_call( self, cmd, threads=self.cfg['max_threads'], ram=16, cfg=self.cfg, err_log=self.output()['err_log'].path) else: pipeline_utils.command_call(cmd, err_log=self.output()['err_log'].path)
def run(self): for output in self.output(): pipeline_utils.confirm_path(output.path) if self.matched_n: cmd = [ self.cfg['gatk4_location'], '--java-options', '"-Xmx8g -Xms8g -XX:+UseSerialGC -Djava.io.tmpdir=%s"' % self.cfg['tmp_dir'], 'Mutect2', '-R', self.cfg['fasta_file'], '-I', self.input()[0][0].path, '-tumor', self.case + '_T', '-I', self.input()[1][0].path, '-normal', self.case + '_N', '--germline-resource', self.cfg['germline_resource'], '--af-of-alleles-not-in-resource', '0.0000025', '-L', self.cfg['library_bed'], '-pon', self.input()[-1].path, '--native-pair-hmm-threads', self.max_threads, '-O', self.output()[0].path ] else: cmd = [ self.cfg['gatk4_location'], '--java-options', '"-Xmx8g -Xms8g -XX:+UseSerialGC -Djava.io.tmpdir=%s"' % self.cfg['tmp_dir'], 'Mutect2', '-R', self.cfg['fasta_file'], '-I', self.input()[0][0].path, '-tumor', self.case + '_T', '--germline-resource', self.cfg['germline_resource'], '--af-of-alleles-not-in-resource', '0.0000025', '-L', self.cfg['library_bed'], '-pon', self.input()[-1].path, '--native-pair-hmm-threads', self.max_threads, '-O', self.output()[0].path ] pipeline_utils.command_call(cmd, self.output(), threads_needed=self.max_threads)
def run(self): cmd = [ 'java', '-Dsamjdk.use_async_io_read_samtools=false', '-Dsamjdk.use_async_io_write_samtools=true', '-Dsamjdk.use_async_io_write_tribble=false', '-Dsamjdk.compression_level=2', '-Djava.io.tmpdir=%s' % self.cfg['tmp_dir'], '-jar', '/root/pipeline/resources/broad/gatk-4.0.12.0/gatk-package-4.0.12.0-local.jar', 'CreateSomaticPanelOfNormals', '-O', self.output()['mutect2_pon'].path, '--tmp-dir', self.cfg['tmp_dir'] ] for case in self.input(): cmd += [ '-vcfs', self.input()[case]['mutect2_normal']['mutect2_normal'].path ] if self.cfg['cluster_exec']: pipeline_utils.cluster_command_call( self, cmd, threads=1, ram=12, cfg=self.cfg, err_log=self.output()['err_log'].path) else: pipeline_utils.command_call(cmd, err_log=self.output()['err_log'].path)
def run(self): cmd = [ '$MANTA/bin/configManta.py', '--exome', '--referenceFasta', self.cfg['fasta_file'], '--normalBam', self.input()['N']['preprocess']['bam'].path, '--tumorBam', self.input()['T']['preprocess']['bam'].path, '--rundir', os.path.join(self.cfg['output_dir'], self.case, 'variant_prep', 'manta') ] # TODO add logic for exome to handle multiple sequencing preps # pipeline_utils.cluster_command_call(self, cmd, threads=1, ram=1, cfg=self.cfg) pipeline_utils.command_call(cmd) cmd = [ os.path.join(self.cfg['output_dir'], self.case, 'variant_prep', 'manta', 'runWorkflow.py'), '-j', self.max_threads ] if self.cfg['cluster_exec']: pipeline_utils.cluster_command_call( self, cmd, threads=self.cfg['max_threads'], ram=12, cfg=self.cfg, err_log=self.output()['err_log'].path) else: pipeline_utils.command_call(cmd, err_log=self.output()['err_log'].path)
def run(self): cmd = ['java', '-jar', '$PICARD', 'CreateSequenceDictionary', 'R=%s' % self.cfg['fasta_file'], 'O=%s' % self.output().path] if self.cfg['cluster_exec']: pipeline_utils.cluster_command_call(self, cmd, threads=1, ram=5, cfg=self.cfg) #, err_log=self.output()['err_log'].path) else: pipeline_utils.command_call(cmd) #, err_log=self.output()['err_log'].path) shutil.copyfile(self.output()['picard_index'].path, self.cfg['fasta_file'].split('.fa')[0] + '.dict')
def run(self): cmd = [ '$STRELKA/bin/configureStrelkaSomaticWorkflow.py', '--exome', '--referenceFasta', self.cfg['fasta_file'], '--normalBam', self.input()['N']['preprocess']['bam'].path, '--tumorBam', self.input()['T']['preprocess']['bam'].path, '--indelCandidates', self.input()['manta']['manta'].path, '--rundir', os.path.join(self.cfg['output_dir'], self.case, 'variant_prep', 'strelka') ] # pipeline_utils.cluster_command_call(self, cmd, threads=1, ram=1, cfg=self.cfg) pipeline_utils.command_call(cmd) cmd = [ os.path.join(self.cfg['output_dir'], self.case, 'variant_prep', 'strelka', 'runWorkflow.py'), '-m', 'local', '-j', self.cfg['max_threads'] ] if self.cfg['cluster_exec']: pipeline_utils.cluster_command_call( self, cmd, threads=self.cfg['max_threads'], ram=12, cfg=self.cfg, err_log=self.output()['err_log'].path) else: pipeline_utils.command_call(cmd, err_log=self.output()['err_log'].path)
def run(self): cmd = ['trim_galore', '--fastqc', '--fastqc_args "--outdir %s"' % os.path.dirname(self.output()['fastqc'][0].path), '--paired', '-o', os.path.dirname(self.output()['trimgalore'][0].path), '--basename', '%s_%s_%s' % (self.case, self.sample, self.lane), '--gzip', self.cfg['cases'][self.case][self.sample][self.lane]['fastq1'], self.cfg['cases'][self.case][self.sample][self.lane]['fastq2']] pipeline_utils.confirm_path(self.output()['trimgalore'][0].path) pipeline_utils.confirm_path(self.output()['fastqc'][0].path) if self.cfg['cluster_exec']: pipeline_utils.cluster_command_call(self, cmd, threads=1, ram=4, cfg=self.cfg, err_log=self.output()['err_log'].path) else: pipeline_utils.command_call(cmd, err_log=self.output()['err_log'].path)
def run(self): pipeline_utils.confirm_path(self.output()[1].path) # cmd = [os.getcwd() + '/' + self.cfg['samtools_location'], 'index', '-b', self.input()[0].path] cmd = [ self.cfg['samtools_location'], 'index', '-b', self.input()[0].path ] pipeline_utils.command_call(cmd, self.output(), sleep_time=0.5)
def run(self): pindel_input = '_'.join(self.input()[0].path.split('_')[:-1]) pipeline_utils.confirm_path(self.output().path) cmd = [ './packages/pindel/pindel2vcf', '-r', self.cfg['fasta_file'], '-G', '-R', self.cfg['base_name'], '-d', 'idk', '-P', pindel_input, '-v', self.output().path ] pipeline_utils.command_call(cmd, [self.output()])
def run(self): pipeline_utils.confirm_path(self.output().path) cmd = 'python3 %s reference %s --fasta %s -o %s' % ( self.cfg['cnvkit_location'], os.path.join(self.project_dir, 'output', 'cnvkit', 'coverage', '*N.*targetcoverage.cnn'), self.cfg['fasta_file'], self.output().path) cmd = cmd.split(' ') pipeline_utils.command_call(cmd, self.output())
def run(self): pipeline_utils.confirm_path(self.output().path) cmd = [ self.cfg['gatk4_location'], '--java-options', '"-Xmx8g -Xms8g -XX:+UseSerialGC -Djava.io.tmpdir=%s"' % self.cfg['tmp_dir'], 'FilterMutectCalls', '-V', self.input()[0].path, '-O', self.output().path ] pipeline_utils.command_call(cmd, [self.output()], sleep_time=1.1)
def run(self): pipeline_utils.confirm_path(self.output().path) cmd = [ 'java', '-jar', self.cfg['picard_location'], 'SortVcf', 'I=%s' % self.input()[0].path, 'O=%s' % self.output().path, 'SEQUENCE_DICTIONARY=%s' % self.input()[1].path ] pipeline_utils.command_call(cmd, [self.output()], threads_needed=self.max_threads)
def run(self): cmd = [ 'java', '-jar', self.cfg['picard_location'], 'CreateSequenceDictionary', 'R=%s' % self.cfg['fasta_file'], 'O=%s' % self.output().path ] pipeline_utils.command_call(cmd, [self.output()], sleep_time=0.1) shutil.copyfile(self.output().path, self.cfg['fasta_file'].split('.fa')[0] + '.dict')
def run(self): read_group = pipeline_utils.assign_rg(self.input()['trim']['trimgalore'][0].path, self.input()['trim']['trimgalore'][1].path, self.case, self.sample, self.cfg) if self.cfg['cluster_exec']: cmd = ['bwa', 'mem', '-M', '-t', self.cfg['max_threads'], '-R', "'%s'" % read_group, self.cfg['fasta_file'], self.input()['trim']['trimgalore'][0].path, self.input()['trim']['trimgalore'][1].path, '|', 'samtools', 'view', '-bh', '|', 'samtools', 'sort', '-o', self.output()['bwa_mem'].path] pipeline_utils.cluster_command_call(self, cmd, threads=self.cfg['max_threads'], ram=8, cfg=self.cfg, err_log=self.output()['err_log'].path) else: # cmds = [['bwa', 'mem', '-M', '-t', self.cfg['max_threads'], '-R', "'%s'" % read_group, self.cfg['fasta_file'], self.input()['trim']['trimgalore'][0].path, self.input()['trim']['trimgalore'][1].path], ['samtools', 'view', '-bh', ], ['samtools', 'sort', '-o', self.output()['bwa_mem'].path]] cmd = ['bwa', 'mem', '-M', '-t', self.cfg['max_threads'], '-R', "'%s'" % read_group, self.cfg['fasta_file'], self.input()['trim']['trimgalore'][0].path, self.input()['trim']['trimgalore'][1].path, '|', 'samtools', 'view', '-bh', '|', 'samtools', 'sort', '-o', self.output()['bwa_mem'].path] # pipeline_utils.piped_command_call(cmds, err_log=self.output()['err_log'].path) pipeline_utils.command_call(cmd, err_log=self.output()['err_log'].path)
def run(self): for output in self.output(): pipeline_utils.confirm_path(output.path) cmd = [ self.cfg['trim_location'], '--paired', self.fastq_file.split('\t')[0], self.fastq_file.split('\t')[1], '-o', os.path.join(self.project_dir, 'output', self.sample[:-2]) ] pipeline_utils.command_call(cmd, self.output(), sleep_time=0.05)
def run(self): pipeline_utils.confirm_path(self.output().path) cmd = [ './packages/msisensor/binary/msisensor.linux', 'msi', '-d', './packages/msisensor/microsatellites.list', '-t', self.input()[0].path, '-e', self.cfg['library_bed'], '-o', self.output().path ] # , '-b', self.max_threads pipeline_utils.command_call( cmd, [self.output()]) # , threads_needed=self.max_threads)
def run(self): pipeline_utils.confirm_path(self.output().path) cmd = [ './packages/fpfilter/fpfilter.pl', '--vcf-file', self.input()[0][0].path, '--bam-file', self.input()[1][0].path, '--reference', self.cfg['fasta_file'], '--sample', self.case + '_T', '--output', self.output().path ] pipeline_utils.command_call(cmd, [self.output()])
def run(self): pipeline_utils.confirm_path(self.output().path) cmd = [ 'python3', self.cfg['cnvkit_location'], 'fix', self.input()[0][0].path, self.input()[0][1].path, self.input()[1].path, '-o', self.output().path ] # 'python3 %s fix %s %s %s -o %s' % (self.cfg['cnvkit_location'], os.path.join(self.project_dir, 'output', 'cnvkit', 'coverage', '%s_T.targetcoverage.cnn' % self.case), os.path.join(self.project_dir, 'output', 'cnvkit', 'coverage', '%s_T.antitargetcoverage.cnn' % self.case), self.input()[1].path, self.output().path) # cmd = cmd.split(' ') pipeline_utils.command_call(cmd, self.output())
def run(self): for output in self.output(): pipeline_utils.confirm_path(output.path) pipeline_utils.confirm_path(self.output()[1].path) cmd = [ self.cfg['fastqc_location'], '--outdir=%s' % os.path.join(self.project_dir, 'output', self.sample[:-2], 'fastqc'), os.path.join(self.project_dir, 'output', self.sample[:-2], self.fastq_file) ] pipeline_utils.command_call(cmd, self.output(), sleep_time=0.1)
def run(self): pipeline_utils.confirm_path(self.output().path) # if self.matched_n: # cmd = ['./packages/VarDictJava/build/install/VarDict/bin/VarDict', '-G', self.cfg['fasta_file'], '-f', '0.01', '-N', self.case + '_T', '-b', '"%s|%s"' % (self.input()[0][0].path, self.input()[1][0].path), '-z', '-F', '-c', '1', '-S', '2', '-E', '3', '-g', '4', self.cfg['library_bed'], '|', './packages/VarDictJava/VarDict/testsomatic.R', '|', './packages/VarDictJava/VarDict/var2vcf_paired.pl', '-N', '"%s|%s"' % (self.case + '_T', self.case + '_N'), '-f', '0.01', '>%s' % os.path.join(self.vcf_path, 'vardict')] # else: cmd = [self.cfg['gatk4_location'], 'CreateSomaticPanelOfNormals'] for normal_vcf in self.input(): cmd.append('--vcfs') cmd.append(normal_vcf[0].path) cmd.append('--output') cmd.append(self.output().path) pipeline_utils.command_call(cmd, [self.output()])
def run(self): pipeline_utils.confirm_path(self.output()[2].path) cmd = [ self.cfg['gatk4_location'], '--java-options', '"-Xmx8g -Xms8g -XX:+UseSerialGC -Djava.io.tmpdir=%s"' % self.cfg['tmp_dir'], 'BaseRecalibrator', '-R', self.cfg['fasta_file'], '-I', self.input()[0].path, '--known-sites', self.cfg['known_vcf'], '--known-sites', self.cfg['mills'], '--known-sites', self.cfg['kg'], '-O', self.output()[2].path ] pipeline_utils.command_call(cmd, self.output(), sleep_time=0.8)
def run(self): pipeline_utils.confirm_path(self.output()[0].path) pipeline_utils.confirm_path(self.output()[1].path) cmd = [ 'java', '-Xmx8g', '-Xms8g', '-XX:+UseSerialGC', '-Djava.io.tmpdir=%s' % self.cfg['tmp_dir'], '-jar', self.cfg['gatk3_location'], '-T', 'IndelRealigner', '-R', self.cfg['fasta_file'], '-I', self.input()[0].path, '-known', self.cfg['mills'], '-known', self.cfg['kg'], '-targetIntervals', self.input()[2].path, '-o', self.output()[0].path ] pipeline_utils.command_call(cmd, self.output(), sleep_time=0.7)
def run(self): if len(self.input()) > 1: cmd = ['java', '-Djava.io.tmpdir=%s' % self.cfg['tmp_dir'], '-jar', '$PICARD', 'MergeSamFiles', 'O=%s' % self.output()['merge_bams'].path] for lane in self.input(): cmd += ['I=%s' % self.input()[lane]['align']['bwa_mem'].path] if self.cfg['cluster_exec']: pipeline_utils.cluster_command_call(self, cmd, threads=1, ram=5, cfg=self.cfg, err_log=self.output()['err_log'].path) else: pipeline_utils.command_call(cmd, err_log=self.output()['err_log'].path) else: for lane in self.input(): shutil.copyfile(self.input()[lane]['align']['bwa_mem'].path, self.output()['merge_bams'].path) with open(self.output()['err_log'].path, 'w') as f: f.write('')
def run(self): pipeline_utils.confirm_path(self.output()[0].path) pipeline_utils.confirm_path(self.output()[1].path) cmd = [ 'java', '-Xmx8g', '-Xms8g', '-XX:+UseSerialGC', '-Djava.io.tmpdir=%s' % self.cfg['tmp_dir'], '-jar', self.cfg['picard_location'], 'MarkDuplicates', 'I=%s' % self.input()[0].path, 'O=%s' % self.output()[0].path, 'M=%s' % self.output()[1].path, 'CREATE_INDEX=true', 'ASSUME_SORT_ORDER=coordinate', 'TAGGING_POLICY=All', 'REMOVE_DUPLICATES=true' ] pipeline_utils.command_call(cmd, self.output(), sleep_time=0.4)
def run(self): pipeline_utils.confirm_path(self.output()[2].path) cmd = [ 'java', '-Xmx8g', '-Xms8g', '-XX:+UseSerialGC', '-Djava.io.tmpdir=%s' % self.cfg['tmp_dir'], '-jar', self.cfg['gatk3_location'], '-T', 'RealignerTargetCreator', '-nt', str(self.max_threads), '-R', self.cfg['fasta_file'], '-I', self.input()[0].path, '--known', self.cfg['mills'], '--known', self.cfg['kg'], '-o', self.output()[2].path ] pipeline_utils.command_call(cmd, self.output(), threads_needed=self.max_threads, sleep_time=0.6)