def run(self): for output in self.output(): pipeline_utils.confirm_path(output.path) wait_time = random.uniform(0, 3) time.sleep(wait_time) sys.stdout.flush() while not pipeline_utils.add_thread_count(global_vars.thread_file, 1): time.sleep(1.2) for i, input_file in enumerate(self.input()): cmd = 'grep "ChrID" %s' % input_file.path #| awk '$17 >= 3' > $file_out p1 = subprocess.Popen(cmd, stdout=subprocess.PIPE, shell=True) # outs, err = p.communicate() cmd = "awk '$17>=%s'" % self.cfg['pindel_min_reads'] p2 = subprocess.Popen(cmd, stdout=subprocess.PIPE, stdin=p1.stdout, shell=True) # outs, err = p.communicate() outs, err = p2.communicate() with open(self.output()[i].path, 'wb') as f: f.write(outs) while not pipeline_utils.sub_thread_count(global_vars.thread_file, 1): time.sleep(1.2)
def run(self): pipeline_utils.confirm_path(self.output().path) if self.matched_n: cmd = [ './packages/VarDictJava/build/install/VarDict/bin/VarDict', '-G', self.cfg['fasta_file'], '-f', '0.01', '-N', self.case + '_T', '-b', '"%s|%s"' % (self.input()[0][0].path, self.input()[1][0].path), '-z', '-F', '-c', '1', '-S', '2', '-E', '3', '-g', '4', self.cfg['library_bed'], '|', './packages/VarDictJava/VarDict/testsomatic.R', '|', './packages/VarDictJava/VarDict/var2vcf_paired.pl', '-N', '"%s|%s"' % (self.case + '_T', self.case + '_N'), '-f', '0.01', '> %s' % os.path.join(self.vcf_path, 'vardict') ] else: cmd = [ './packages/VarDictJava/build/install/VarDict/bin/VarDict', '-G', self.cfg['fasta_file'], '-f', '0.01', '-N', self.case + '_T', '-b', self.input()[0][0].path, '-z', '-c', '1', '-S', '2', '-E', '3', '-g', '4', self.cfg['library_bed'], '|', './packages/VarDictJava/VarDict/teststrandbias.R', '|', './packages/VarDictJava/VarDict/var2vcf_valid.pl', '-N', self.case + '_T', 'E', '-f', '0.01', '> %s' % os.path.join(self.vcf_path, 'vardict') ] pipeline_utils.command_call(cmd, [self.output()])
def run(self): # try: # cwd = os.getcwd() # print(cwd) # os.chdir(os.path.join(os.path.join(*self.cfg['fasta_file'].split('/')[:-1]), 'index')) pipeline_utils.confirm_path(self.output().path) # os.chdir(os.path.join(self.fasta_dir, 'index')) # print(os.getcwd()) # cmd = [os.path.join(cwd, self.cfg['bowtie_location'], 'bowtie2'), '-x', self.cfg['base_name'], '--threads=%s' % self.max_threads, '-U', self.fastq_file, '-S', self.sample + '_raw.sam'] fasta_dir = os.path.join(*self.cfg['fasta_file'].split('/')[:-1]) cmd = [ self.cfg['bowtie_location'], '-x', os.path.join(fasta_dir, 'index', self.cfg['base_name']), '-1', self.input()[-1][0][0].path, '-2', self.input()[-1][1][0].path, '-p', self.max_threads, '--very-sensitive-local' '|', self.cfg['samtools_location'], 'view', '-bh', '-', '>', self.output().path ] pipeline_utils.command_call(cmd, [self.output()], cwd=os.getcwd(), threads_needed=self.max_threads, sleep_time=0.2)
def run(self): for output in self.output(): pipeline_utils.confirm_path(output.path) if self.matched_n: cmd = [ self.cfg['gatk4_location'], '--java-options', '"-Xmx8g -Xms8g -XX:+UseSerialGC -Djava.io.tmpdir=%s"' % self.cfg['tmp_dir'], 'Mutect2', '-R', self.cfg['fasta_file'], '-I', self.input()[0][0].path, '-tumor', self.case + '_T', '-I', self.input()[1][0].path, '-normal', self.case + '_N', '--germline-resource', self.cfg['germline_resource'], '--af-of-alleles-not-in-resource', '0.0000025', '-L', self.cfg['library_bed'], '-pon', self.input()[-1].path, '--native-pair-hmm-threads', self.max_threads, '-O', self.output()[0].path ] else: cmd = [ self.cfg['gatk4_location'], '--java-options', '"-Xmx8g -Xms8g -XX:+UseSerialGC -Djava.io.tmpdir=%s"' % self.cfg['tmp_dir'], 'Mutect2', '-R', self.cfg['fasta_file'], '-I', self.input()[0][0].path, '-tumor', self.case + '_T', '--germline-resource', self.cfg['germline_resource'], '--af-of-alleles-not-in-resource', '0.0000025', '-L', self.cfg['library_bed'], '-pon', self.input()[-1].path, '--native-pair-hmm-threads', self.max_threads, '-O', self.output()[0].path ] pipeline_utils.command_call(cmd, self.output(), threads_needed=self.max_threads)
def run(self): for output in self.output(): pipeline_utils.confirm_path(output.path) if self.matched_n != '': cmd = [ 'python3', './packages/MANTIS/mantis.py', '-b', './packages/msings/doc/mSINGS_TCGA_MANTIS.bed', '--genome', self.cfg['fasta_file'], '-t', self.input()[0][0].path, '-n', self.input()[1][0].path, '-mrq', '20.0', '-mlq', '25.0', '-mlc', '20', '-mrr', '1', '-o', self.output()[0].path ] pipeline_utils.command_call(cmd, self.output()) # else: # tumor_bams_file = os.path.join(self.project_dir, 'output', 'msings', 'baseline', 'tumor_bams.txt') # with open(tumor_bams_file, 'w') as f: # tumor_bams_list = [os.path.join(self.project_dir, 'output', self.case, 'alignment', case_name + '_T_recalibrated.bam') for case_name in self.case_dict if self.case_dict[case_name]['N'] == ''] # f.write('\n'.join(tumor_bams_list)) cmd = [ './packages/msings/scripts/run_msings_single_sample.sh', self.input()[0][0].path, './packages/msings/doc/mSINGS_TCGA.msi_intervals', './packages/msings/doc/mSINGS_TCGA.bed', self.cfg['fasta_file'], './packages/msings/doc/mSINGS_TCGA.baseline', os.path.join(self.project_dir, 'output', 'msings', 'tumor') ] pipeline_utils.command_call(cmd, self.output()) os.rename( os.path.join(self.project_dir, 'output', 'msings', 'tumor', self.case + '_T_recalibrated', self.case + '_T_recalibrated.MSI_Analysis.txt'), os.path.join(self.vcf_path, self.case + '_msings.txt'))
def run(self): for output in self.output(): pipeline_utils.confirm_path(output.path) wait_time = random.uniform(0, 3) time.sleep(wait_time) sys.stdout.flush() while not pipeline_utils.add_thread_count(global_vars.thread_file, 1): time.sleep(1.2) sample_dict = { output.path.split('/')[-1].split('.pindel.bed')[0]: output.path for output in self.output()[:-1] } misc_utils.filter_pindel( pindel_files=[input_file.path for input_file in self.input()], sample_dict=sample_dict, project_dir=self.project_dir, all_samples_output=self.output()[-1].path, min_reads=self.cfg['pindel_min_reads'], min_qual=self.cfg['pindel_min_qual'], max_inv_length=self.cfg['pindel_max_inv_length']) while not pipeline_utils.sub_thread_count(global_vars.thread_file, 1): time.sleep(1.2)
def run(self): for output in self.output(): pipeline_utils.confirm_path(output.path) cmd = [ 'python3', self.cfg['cnvkit_location'], 'target', '--annotate', self.cfg['refFlat'], '--split', '-o', os.path.join(self.project_dir, 'output', 'cnvkit', 'ref', 'targets.bed'), self.cfg['library_bed'] ] # '%s target %s --annotate %s -o %s' % (self.cfg['cnvkit_location'], self.cfg['library_bed'], self.cfg['refFlat'], os.path.join(self.project_dir, 'output', 'cnvkit', 'ref', 'targets.bed')) # cmd = cmd.split(' ') pipeline_utils.command_call(cmd, self.output()) cmd = 'python3 %s access %s -o %s' % ( self.cfg['cnvkit_location'], self.cfg['fasta_file'], os.path.join(self.project_dir, 'output', 'cnvkit', 'ref', 'access.bed')) cmd = cmd.split(' ') pipeline_utils.command_call(cmd, self.output()) cmd = 'python3 %s antitarget %s -g %s -o %s' % ( self.cfg['cnvkit_location'], os.path.join(self.project_dir, 'output', 'cnvkit', 'ref', 'targets.bed'), os.path.join(self.project_dir, 'output', 'cnvkit', 'ref', 'access.bed'), os.path.join(self.project_dir, 'output', 'cnvkit', 'ref', 'antitargets.bed')) cmd = cmd.split(' ') pipeline_utils.command_call(cmd, self.output())
def run(self): cmd = ['trim_galore', '--fastqc', '--fastqc_args "--outdir %s"' % os.path.dirname(self.output()['fastqc'][0].path), '--paired', '-o', os.path.dirname(self.output()['trimgalore'][0].path), '--basename', '%s_%s_%s' % (self.case, self.sample, self.lane), '--gzip', self.cfg['cases'][self.case][self.sample][self.lane]['fastq1'], self.cfg['cases'][self.case][self.sample][self.lane]['fastq2']] pipeline_utils.confirm_path(self.output()['trimgalore'][0].path) pipeline_utils.confirm_path(self.output()['fastqc'][0].path) if self.cfg['cluster_exec']: pipeline_utils.cluster_command_call(self, cmd, threads=1, ram=4, cfg=self.cfg, err_log=self.output()['err_log'].path) else: pipeline_utils.command_call(cmd, err_log=self.output()['err_log'].path)
def run(self): pipeline_utils.confirm_path(self.output()[1].path) # cmd = [os.getcwd() + '/' + self.cfg['samtools_location'], 'index', '-b', self.input()[0].path] cmd = [ self.cfg['samtools_location'], 'index', '-b', self.input()[0].path ] pipeline_utils.command_call(cmd, self.output(), sleep_time=0.5)
def run(self): pindel_input = '_'.join(self.input()[0].path.split('_')[:-1]) pipeline_utils.confirm_path(self.output().path) cmd = [ './packages/pindel/pindel2vcf', '-r', self.cfg['fasta_file'], '-G', '-R', self.cfg['base_name'], '-d', 'idk', '-P', pindel_input, '-v', self.output().path ] pipeline_utils.command_call(cmd, [self.output()])
def run(self): pipeline_utils.confirm_path(self.output().path) cmd = 'python3 %s reference %s --fasta %s -o %s' % ( self.cfg['cnvkit_location'], os.path.join(self.project_dir, 'output', 'cnvkit', 'coverage', '*N.*targetcoverage.cnn'), self.cfg['fasta_file'], self.output().path) cmd = cmd.split(' ') pipeline_utils.command_call(cmd, self.output())
def run(self): pipeline_utils.confirm_path(self.output().path) cmd = [ 'java', '-jar', self.cfg['picard_location'], 'SortVcf', 'I=%s' % self.input()[0].path, 'O=%s' % self.output().path, 'SEQUENCE_DICTIONARY=%s' % self.input()[1].path ] pipeline_utils.command_call(cmd, [self.output()], threads_needed=self.max_threads)
def run(self): pipeline_utils.confirm_path(self.output().path) cmd = [ self.cfg['gatk4_location'], '--java-options', '"-Xmx8g -Xms8g -XX:+UseSerialGC -Djava.io.tmpdir=%s"' % self.cfg['tmp_dir'], 'FilterMutectCalls', '-V', self.input()[0].path, '-O', self.output().path ] pipeline_utils.command_call(cmd, [self.output()], sleep_time=1.1)
def run(self): for output in self.output(): pipeline_utils.confirm_path(output.path) cmd = [ self.cfg['trim_location'], '--paired', self.fastq_file.split('\t')[0], self.fastq_file.split('\t')[1], '-o', os.path.join(self.project_dir, 'output', self.sample[:-2]) ] pipeline_utils.command_call(cmd, self.output(), sleep_time=0.05)
def run(self): pipeline_utils.confirm_path(self.output().path) cmd = [ './packages/fpfilter/fpfilter.pl', '--vcf-file', self.input()[0][0].path, '--bam-file', self.input()[1][0].path, '--reference', self.cfg['fasta_file'], '--sample', self.case + '_T', '--output', self.output().path ] pipeline_utils.command_call(cmd, [self.output()])
def run(self): pipeline_utils.confirm_path(self.output().path) cmd = [ 'python3', self.cfg['cnvkit_location'], 'fix', self.input()[0][0].path, self.input()[0][1].path, self.input()[1].path, '-o', self.output().path ] # 'python3 %s fix %s %s %s -o %s' % (self.cfg['cnvkit_location'], os.path.join(self.project_dir, 'output', 'cnvkit', 'coverage', '%s_T.targetcoverage.cnn' % self.case), os.path.join(self.project_dir, 'output', 'cnvkit', 'coverage', '%s_T.antitargetcoverage.cnn' % self.case), self.input()[1].path, self.output().path) # cmd = cmd.split(' ') pipeline_utils.command_call(cmd, self.output())
def run(self): pipeline_utils.confirm_path(self.output().path) cmd = [ './packages/msisensor/binary/msisensor.linux', 'msi', '-d', './packages/msisensor/microsatellites.list', '-t', self.input()[0].path, '-e', self.cfg['library_bed'], '-o', self.output().path ] # , '-b', self.max_threads pipeline_utils.command_call( cmd, [self.output()]) # , threads_needed=self.max_threads)
def run(self): for output in self.output(): pipeline_utils.confirm_path(output.path) pipeline_utils.confirm_path(self.output()[1].path) cmd = [ self.cfg['fastqc_location'], '--outdir=%s' % os.path.join(self.project_dir, 'output', self.sample[:-2], 'fastqc'), os.path.join(self.project_dir, 'output', self.sample[:-2], self.fastq_file) ] pipeline_utils.command_call(cmd, self.output(), sleep_time=0.1)
def run(self): for output in self.output(): pipeline_utils.confirm_path(output.path) # cmd = 'python3 %s coverage %s %s -o %s' % (self.cfg['cnvkit_location'], self.input()[1][0].path, self.input()[0][0].path, self.output()[0].path) cmd = [ 'python3', self.cfg['cnvkit_location'], 'coverage', '-o', self.output()[0].path, '-p', self.max_threads, self.input()[1][0].path, self.input()[0][0].path ] # '%s coverage %s %s -o %s' % (self.cfg['cnvkit_location'], self.input()[1][0].path, self.input()[0][0].path, self.output()[0].path) # cmd = cmd.split(' ') pipeline_utils.command_call(cmd, self.output(), threads_needed=self.max_threads) # cmd = 'python3 %s coverage %s %s -o %s' % (self.cfg['cnvkit_location'], self.input()[1][0].path, self.input()[0][1].path, self.output()[1].path) # cmd = cmd.split(' ') cmd = [ 'python3', self.cfg['cnvkit_location'], 'coverage', '-o', self.output()[1].path, '-p', self.max_threads, self.input()[1][0].path, self.input()[0][1].path ] pipeline_utils.command_call(cmd, self.output(), threads_needed=self.max_threads) if self.case_dict[self.case]['N'] != '': # cmd = 'python3 %s coverage %s %s -o %s' % (self.cfg['cnvkit_location'], self.input()[2][0].path, self.input()[0][0].path, self.output()[2].path) # cmd = cmd.split(' ') cmd = [ 'python3', self.cfg['cnvkit_location'], 'coverage', '-o', self.output()[2].path, '-p', self.max_threads, self.input()[2][0].path, self.input()[0][0].path ] pipeline_utils.command_call(cmd, self.output(), threads_needed=self.max_threads) # cmd = 'python3 %s coverage %s %s -o %s' % (self.cfg['cnvkit_location'], self.input()[2][0].path, self.input()[0][1].path, self.output()[3].path) # cmd = cmd.split(' ') cmd = [ 'python3', self.cfg['cnvkit_location'], 'coverage', '-o', self.output()[3].path, '-p', self.max_threads, self.input()[2][0].path, self.input()[0][1].path ] pipeline_utils.command_call(cmd, self.output(), threads_needed=self.max_threads)
def run(self): pipeline_utils.confirm_path(self.output().path) # if self.matched_n: # cmd = ['./packages/VarDictJava/build/install/VarDict/bin/VarDict', '-G', self.cfg['fasta_file'], '-f', '0.01', '-N', self.case + '_T', '-b', '"%s|%s"' % (self.input()[0][0].path, self.input()[1][0].path), '-z', '-F', '-c', '1', '-S', '2', '-E', '3', '-g', '4', self.cfg['library_bed'], '|', './packages/VarDictJava/VarDict/testsomatic.R', '|', './packages/VarDictJava/VarDict/var2vcf_paired.pl', '-N', '"%s|%s"' % (self.case + '_T', self.case + '_N'), '-f', '0.01', '>%s' % os.path.join(self.vcf_path, 'vardict')] # else: cmd = [self.cfg['gatk4_location'], 'CreateSomaticPanelOfNormals'] for normal_vcf in self.input(): cmd.append('--vcfs') cmd.append(normal_vcf[0].path) cmd.append('--output') cmd.append(self.output().path) pipeline_utils.command_call(cmd, [self.output()])
def run(self): for output in self.output(): pipeline_utils.confirm_path(output.path) mafs = [input_file.path for input_file in self.input()[0][1:]] cnvs = [input_files[-1].path for input_files in self.input()[1][1:]] pindel = [input_file.path for input_file in self.input()[2]] misc_utils.create_mut_mats(mafs=mafs, cnvs=cnvs, pindel=pindel, mut_mat_file=self.output()[0].path, cnv_mat_file=self.output()[1].path, mut_counts_file=self.output()[2].path)
def run(self): pipeline_utils.confirm_path(self.output()[2].path) cmd = [ self.cfg['gatk4_location'], '--java-options', '"-Xmx8g -Xms8g -XX:+UseSerialGC -Djava.io.tmpdir=%s"' % self.cfg['tmp_dir'], 'BaseRecalibrator', '-R', self.cfg['fasta_file'], '-I', self.input()[0].path, '--known-sites', self.cfg['known_vcf'], '--known-sites', self.cfg['mills'], '--known-sites', self.cfg['kg'], '-O', self.output()[2].path ] pipeline_utils.command_call(cmd, self.output(), sleep_time=0.8)
def run(self): pipeline_utils.confirm_path(self.output()[0].path) pipeline_utils.confirm_path(self.output()[1].path) cmd = [ 'java', '-Xmx8g', '-Xms8g', '-XX:+UseSerialGC', '-Djava.io.tmpdir=%s' % self.cfg['tmp_dir'], '-jar', self.cfg['gatk3_location'], '-T', 'IndelRealigner', '-R', self.cfg['fasta_file'], '-I', self.input()[0].path, '-known', self.cfg['mills'], '-known', self.cfg['kg'], '-targetIntervals', self.input()[2].path, '-o', self.output()[0].path ] pipeline_utils.command_call(cmd, self.output(), sleep_time=0.7)
def run(self): pipeline_utils.confirm_path(self.output()[0].path) pipeline_utils.confirm_path(self.output()[1].path) cmd = [ 'java', '-Xmx8g', '-Xms8g', '-XX:+UseSerialGC', '-Djava.io.tmpdir=%s' % self.cfg['tmp_dir'], '-jar', self.cfg['picard_location'], 'MarkDuplicates', 'I=%s' % self.input()[0].path, 'O=%s' % self.output()[0].path, 'M=%s' % self.output()[1].path, 'CREATE_INDEX=true', 'ASSUME_SORT_ORDER=coordinate', 'TAGGING_POLICY=All', 'REMOVE_DUPLICATES=true' ] pipeline_utils.command_call(cmd, self.output(), sleep_time=0.4)
def run(self): for output in self.output(): pipeline_utils.confirm_path(output.path) with open('___pindel_bams___.txt', 'w') as f: for input_bam in self.input(): case = input_bam[0].path.split('/')[-1].split('_')[0] if '_N' in input_bam[0].path: f.write('%s %s %s\n' % (input_bam[0].path, self.cfg['insert_size'], case + '_N')) else: f.write('%s %s %s\n' % (input_bam[0].path, self.cfg['insert_size'], case + '_T'))
def output(self): outputs = { 'facets_snp_pileup': luigi.LocalTarget( os.path.join(self.cfg['output_dir'], self.case, 'variant_prep', '%s_facets_snp_pileup.csv.gz' % self.case)), 'err_log': luigi.LocalTarget( os.path.join(self.cfg['output_dir'], self.case, 'log', '%s_facets_snp_pileup_err.txt' % self.case)) } for task in outputs: if isinstance(outputs[task], luigi.LocalTarget): pipeline_utils.confirm_path(outputs[task].path) return outputs
def run(self): pipeline_utils.confirm_path(self.output()[2].path) cmd = [ 'java', '-Xmx8g', '-Xms8g', '-XX:+UseSerialGC', '-Djava.io.tmpdir=%s' % self.cfg['tmp_dir'], '-jar', self.cfg['gatk3_location'], '-T', 'RealignerTargetCreator', '-nt', str(self.max_threads), '-R', self.cfg['fasta_file'], '-I', self.input()[0].path, '--known', self.cfg['mills'], '--known', self.cfg['kg'], '-o', self.output()[2].path ] pipeline_utils.command_call(cmd, self.output(), threads_needed=self.max_threads, sleep_time=0.6)
def output(self): outputs = { 'filter_mutect2': luigi.LocalTarget( os.path.join(self.cfg['output_dir'], self.case, 'variant_prep', '%s_mutect2_filtered.vcf.gz' % self.case)), 'err_log': luigi.LocalTarget( os.path.join(self.cfg['output_dir'], self.case, 'log', '%s_filter_mutect2_err.txt' % self.case)) } for task in outputs: if isinstance(outputs[task], luigi.LocalTarget): pipeline_utils.confirm_path(outputs[task].path) return outputs
def output(self): outputs = { 'mutect2_pon': luigi.LocalTarget( os.path.join(self.cfg['output_dir'], 'all_samples', 'variant_prep', 'mutect2_pon.vcf.gz')), 'err_log': luigi.LocalTarget( os.path.join(self.cfg['output_dir'], 'all_samples', 'log', 'mutect2_pon_err.txt')) } for task in outputs: if isinstance(outputs[task], luigi.LocalTarget): pipeline_utils.confirm_path(outputs[task].path) return outputs
def output(self): outputs = { 'vcf2maf': luigi.LocalTarget( os.path.join(self.cfg['output_dir'], self.case, 'variants', '%s.maf' % self.case)), 'err_log': luigi.LocalTarget( os.path.join(self.cfg['output_dir'], self.case, 'log', '%s_vcf2maf_err.txt' % self.case)) } for task in outputs: if isinstance(outputs[task], luigi.LocalTarget): pipeline_utils.confirm_path(outputs[task].path) return outputs