def run_prepare_check(self): with open(self._status_log_file, 'w') as f: f.write('[STATUS] MD5 checksum for reference and software\n[PROGRESS] 5') log_progress(__modname__, 'MD5 checksum for reference and software', f=self._log_file) exec_point = join(self._script_home, 'pipelines', 'prepare_check.sh') exec_cmd = [exec_point, '-w', settings.SCRIPT_HOME] run_command(__modname__, exec_cmd, self._log_file)
def copy_standard_output(self): log_progress(__modname__, 'Copy the standard output files to output directory', f=self._log_file) ### 901: DNA, 902: RNA if self._pipeline == '901': dna_output_dir = join(self._tst170_dir, 'DNA_{0}'.format(self._sample_name)) cnv_vcf = '{0}_CopyNumberVariants.vcf'.format(self._sample_name) self.copy_files(join(dna_output_dir, cnv_vcf), join(self._variant_dir, cnv_vcf)) small_variant_vcf = '{0}_SmallVariants.genome.vcf'.format(self._sample_name) self.copy_files(join(dna_output_dir, small_variant_vcf), join(self._variant_dir, small_variant_vcf)) dna_bed = join(self._tst170_dir, 'DNA_IntermediateFiles', 'Alignment', 'DNA_PicardTarget.bed') self.copy_files(dna_bed, join(self._assay_dir, '{0}.bed'.format(self._sample_name))) elif self._pipeline == '902': rna_output_dir = join(self._tst170_dir, 'RNA_{0}'.format(self._sample_name)) splice_variant_vcf = '{0}_SpliceVariants.vcf'.format(self._sample_name) self.copy_files(join(rna_output_dir, splice_variant_vcf), join(self._variant_dir, splice_variant_vcf)) fusion_csv = '{0}_Fusions.csv'.format(self._sample_name) self.copy_files(join(rna_output_dir, fusion_csv), join(self._variant_dir, fusion_csv)) high_conf_variant_csv = '{0}_HighConfidenceVariants.csv'.format(self._sample_name) self.copy_files(join(rna_output_dir, high_conf_variant_csv), join(self._variant_dir, high_conf_variant_csv)) published_fusion_csv = '{0}_PublishedFusions.csv'.format(self._sample_name) self.copy_files(join(rna_output_dir, published_fusion_csv), join(self._variant_dir, published_fusion_csv)) else: log_error(__modname__, 'Unknown pipeline code {0} for TST170 pipeline'.format(self._pipeline), f=self._log_file) sys.exit(1) log_progress(__modname__, 'Copy the standard output files finished', f=self._log_file)
def run_germline_stat(self): with open(self._status_log_file, 'w') as f: f.write('[STATUS] statistics generation\n[PROGRESS] 85') log_progress(__modname__, 'Run the analysis stat', f=self._log_file) summary_file = join(self._output_dir, "data", "stat", "{0}.panel.txt".format(self._sample_name)) stat_json_file = join(self._output_dir, "data", "stat", "{0}.stat.json".format(self._sample_name)) stat_json_gen = GermlineStatParser(self._log_file) stat_json_gen.run(summary_file, stat_json_file)
def run(self): self._md5_file = '{0}.md5'.format(self._json_file) if md5sum_check(self._json_file, self._md5_file): log_progress(__modname__, 'VCF to JSON already finished!!!', f=self._log_file) else: self.workflow()
def run(self): # remove log and completed files if os.path.exists(self._log_file): os.remove(self._log_file) if os.path.exists(self._completed_file): os.remove(self._completed_file) log_progress(__modname__, 'Entering workflow for job_id: {0}'.format(self._job_id), f=self._log_file) log_progress(__modname__, 'SNV Anlaysis start'.format(self._job_id), f=self._log_file) exec_cmd = [ 'docker', 'run', '-t', '--rm', '--net=host', '-u', 'ngenebio:ngenebio', '--name', 'snv_{0}_{1}'.format(self._sample_id, self._job_id), '-h', 'ngeneanalysys', '-v', '/etc/localtime:/etc/localtime', '-v', '{0}/{1}:/NGENEBIO/workflow_out/{1}'.format( settings.ANALYSIS_HOME, self._job_id), '-v', '{0}:/NGENEBIO/workflow'.format(settings.SCRIPT_HOME), '-v', '{0}:/NGENEBIO/workflow-dependencies'.format( settings.DEPENDENCIES), '-v', '{0}:/NGENEBIO/workflow-app'.format(settings.APP_HOME), '-e', 'HGVS_SEQREPO_DIR=/NGENEBIO/workflow-dependencies/HGVS/seq_repo/latest', '{0}'.format(settings.DOCKER_IMAGE), '/NGENEBIO/workflow/run.sh', self._job_id, self._sample_id ] run_command(__modname__, exec_cmd, self._log_file) log_progress(__modname__, 'SNV Processing Complete.', f=self._log_file) exec_cmd = [ '{0}/pcgr/pcgr.py'.format(settings.APP_HOME), '--input_vcf', '{0}/{1}/data/variant/{2}_final.vcf'.format( settings.ANALYSIS_HOME, self._job_id, self._sample_name), '{0}/pcgr/'.format(settings.APP_HOME), '{0}/{1}/data/variant/'.format(settings.ANALYSIS_HOME, self._job_id), 'grch37', '{0}/pcgr/pcgr.toml'.format(settings.APP_HOME), '{0}'.format(self._sample_name), '--force_overwrite' ] if self._pipeline_code.startswith( '2') or self._pipeline_code.startswith('3'): log_progress(__modname__, 'PCGR Processing Start.', f=self._log_file) run_command(__modname__, exec_cmd, self._log_file) log_progress(__modname__, 'PCGR Processing Complete.', f=self._log_file) with open(self._completed_file, 'w') as completed_file: completed_file.write('Job with SGE_TASKID {0} completed.'.format( self._sample_id))
def brca_qc_workflow(self): self.run_brca_qc() with open(self._status_log_file, 'w') as f: f.write('[STATUS] CNV Processing Compleate\n[PROGRESS] 100') log_progress(__modname__, 'BRCA QC Data Generation Completed.', f=self._log_file) completed_file_path = join(self._output_dir,'logs/completed.{0}'.format(self._sample_id)) with open(completed_file_path, 'w') as completed_file: completed_file.write('Job with SGE_TASKID {0} completed.'.format(self._sample_id))
def run_summarize_final_result(self): with open(self._status_log_file, 'w') as f: f.write('[STATUS] summarize final result\n[PROGRESS] 90') log_progress(__modname__, 'Run the final result summarization', f=self._log_file) exec_point = join(self._script_home, 'pipelines', 'summarize_final_result.sh') exec_cmd = [exec_point, '-w', settings.SCRIPT_HOME, '-o', self._output_dir, '-n', self._sample_name, '-i', self._fastq_r1, '-j', self._fastq_r2] run_command(__modname__, exec_cmd, self._log_file)
def run_dna_hered_cnv(self): with open(self._status_log_file, 'w') as f: f.write('[STATUS] Germline HERED CNV pipeline\n[PROGRESS] 100') log_progress(__modname__, 'Run the hered cnv pipeline', f=self._log_file) exec_point = join(self._script_home, 'pipelines', 'ngb_heredaccutest_cnv_v1.sh') exec_cmd = [exec_point, '-w', settings.SCRIPT_HOME, '-n', self._sample_name, '-o', self._output_dir] #run_command(__modname__, exec_cmd, self._log_file) retcode = logging_subprocess_call(exec_cmd, self._log_file) if retcode != 0: self.handle_workflow_failure('DNA Germline HERED CNV pipeline fail', retcode)
def run_tst170_vcf_to_json(self): with open(self._status_log_file, 'w') as f: f.write('[STATUS] TruSightTumor170 pipeline\n[PROGRESS] 87') log_progress(__modname__, 'VCF to JSON start', f=self._log_file) exec_point = join(self._script_home, 'run_vcf_to_json.py') exec_cmd = [ 'python', exec_point, 'vcf_file={0}'.format(settings.get_final_vcf(self._output_dir, self._sample_name)), 'json_file={0}'.format(settings.get_final_json(self._output_dir, self._sample_name)), 'log_file={0}'.format(self._log_file) ] run_command(__modname__, exec_cmd, self._log_file) log_progress(__modname__, 'VCF to JSON finished', f=self._log_file)
def run_brca_qc(self): with open(self._status_log_file, 'w') as f: f.write('[STATUS] BRCA QC Data Generation \n[PROGRESS] 99') log_progress(__modname__, 'BRCA QC Data Generation start', f=self._log_file) exec_point = join(self._script_home, 'pipelines', 'summarize_brca_result.sh') exec_cmd = [exec_point, '-w', settings.SCRIPT_HOME, '-n', self._run_name, "-a", self._amplicon_bed, '-o', self._output_dir ] retcode = logging_subprocess_call(exec_cmd, self._log_file) #run_command(__modname__, exec_cmd, self._log_file) log_progress(__modname__, 'BRCA QC Data Generation finished', f=self._log_file)
def run_read_depth_normalization(self): with open(self._status_log_file, 'w') as f: f.write('[STATUS] Read depth normalization\n[PROGRESS] 15') log_progress(__modname__, 'Run the read depth normalization', f=self._log_file) exec_point = join(self._script_home, 'pipelines', 'read_depth_normalization.sh') exec_cmd = [exec_point, '-w', settings.SCRIPT_HOME, '-i', self._fastq_r1, "-j", self._fastq_r2, '-o', self._output_dir, '-n', self._sample_name] run_command(__modname__, exec_cmd, self._log_file) self._fastq_r1 = "%s_normalize.1.fastq"%(self._sample_name) self._fastq_r2 = "%s_normalize.2.fastq"%(self._sample_name)
def run_analysis_stat(self): with open(self._status_log_file, 'w') as f: f.write('[STATUS] statistics generation\n[PROGRESS] 85') log_progress(__modname__, 'Run the analysis stat', f=self._log_file) stat_file = join(self._output_dir, 'data', 'stat', '%s_summary.txt'%(self._sample_name)) exec_point = join(self._script_home, 'pipelines', 'analysis_stat.sh') exec_cmd = [exec_point, '-w', settings.SCRIPT_HOME, '-i', stat_file, '-n', self._sample_name, '-o', self._output_dir, '-u', join(self._script_home, 'utils'), '-p', self._pipeline] run_command(__modname__, exec_cmd, self._log_file)
def cnv_workflow(self): if self._pipeline.startswith("4"): ### brca self.run_dna_brca_cnv() elif self._pipeline.startswith('2'): ### solid self.run_dna_somatic_cnv() elif self._pipeline.startswith('3'): ### blood self.run_dna_somatic_cnv() elif self._pipeline.startswith('6'): ### hered self.run_dna_hered_cnv() with open(self._status_log_file, 'w') as f: f.write('[STATUS] CNV Processing Compleate\n[PROGRESS] 100') log_progress(__modname__, 'CNV Processing Completed.', f=self._log_file)
def check_tst170_result(self, tst170_dir): log_progress(__modname__, 'Check the TruSightTumor170 analysis result status start', f=self._log_file) try: with open(join(tst170_dir, 'Summary.tsv'), 'r') as f: lines = f.readlines() sample_name_sp = self._config_opts['SAMPLE_NAMES'].split("|") for sample_name in sample_name_sp: if not '{0}\tAnalysis Completed.\n'.format(sample_name) in lines: log_error(__modname__, 'TruSightTumor170 analysis result fail', f=self._log_file) sys.exit(1) except Exception as ex: log_error(__modname__, str(ex), f=self._log_file) sys.exit(1) log_progress(__modname__, 'All the TruSightTumor170 analysis result OK', f=self._log_file)
def run_amplicon_check(self): with open(self._status_log_file, 'w') as f: f.write('[STATUS] Amplicon(adapter) analysis\n[PROGRESS] 50') log_progress(__modname__, 'Run the amplicon stat', f=self._log_file) input_sam = join(self._output_dir, "data", "basecall", "alignment", "%s.primer.stat.sam" %(self._sample_name)) if self._pipeline == "444": input_sam = join(self._output_dir, "data", "basecall", "alignment", "%s_final.sam" %(self._sample_name)) exec_point = join(self._script_home, 'pipelines', 'amplicon_check.sh') exec_cmd = [exec_point, '-w', settings.SCRIPT_HOME, '-i', input_sam, '-n', self._sample_name, '-o', self._output_dir, '-u', join(self._script_home, 'utils')] run_command(__modname__, exec_cmd, self._log_file)
def run_rna_somatic_pipeline(self): with open(self._status_log_file, 'w') as f: f.write('[STATUS] RNA Somatic pipeline\n[PROGRESS] 30') log_progress(__modname__, 'Run the RNA somatic pipeline', f=self._log_file) exec_point = join(self._script_home, 'pipelines', 'ngb_rna_pipeline.sh') exec_cmd = [exec_point, '-w', settings.SCRIPT_HOME, '-n', self._sample_name, '-i', self._fastq_r1, '-j', self._fastq_r2, '-o', self._output_dir, '-t', self._pe_core] retcode = logging_subprocess_call(exec_cmd, self._log_file) if retcode != 0: self.handle_workflow_failure('RNA somatic pipeline fail', retcode)
def run_fastqc(self): with open(self._status_log_file, 'w') as f: f.write('[STATUS] TruSightTumor170 pipeline\n[PROGRESS] 75') log_progress(__modname__, 'Raw data FASTQC start', f=self._log_file) exec_point = join(self._script_home, 'run_fastqc.py') exec_cmd = [ 'python', exec_point, 'output_dir={0}'.format(self._output_dir), 'fastq_dir={0}'.format(join(self._output_dir, settings.FASTQ_HOME)), 'sample_name={0}'.format(self._sample_name), 'log_file={0}'.format(self._log_file), 'fastqc_dir={0}'.format(settings.FASTQC_HOME) ] run_command(__modname__, exec_cmd, self._log_file) log_progress(__modname__, 'RAW data FASTQC finished', f=self._log_file)
def run_tst170_stat_parser(self, tst170_dir): with open(self._status_log_file, 'w') as f: f.write('[STATUS] TruSightTumor170 pipeline\n[PROGRESS] 93') log_progress(__modname__, 'Analysis statistics start', f=self._log_file) exec_point = join(self._script_home, 'run_stat_parser.py') exec_cmd = [ 'python', exec_point, 'pipeline={0}'.format(self._pipeline), 'sample_name={0}'.format(self._sample_name), 'stat_json={0}'.format(settings.get_stat_json(self._output_dir, self._sample_name)), 'tst170_dir={0}'.format(tst170_dir), 'summary_file={0}'.format(settings.get_summary_file(self._output_dir, self._sample_name)) ] run_command(__modname__, exec_cmd, self._log_file) log_progress(__modname__, 'Analysis statistics finished', f=self._log_file)
def run_dna_hered_pipeline(self): with open(self._status_log_file, 'w') as f: f.write('[STATUS] Germline HERED pipeline\n[PROGRESS] 30') log_progress(__modname__, 'Run the hered pipeline', f=self._log_file) exec_point = join(self._script_home, 'pipelines', 'ngb_heredaccutest_pipeline_v1.sh') exec_cmd = [exec_point, '-w', settings.SCRIPT_HOME, '-n', self._sample_name, '-i', self._fastq_r1, '-j', self._fastq_r2, '-o', self._output_dir, '-c', self._canonical_transcript_file, '-t', self._pe_core] retcode = logging_subprocess_call(exec_cmd, self._log_file) if retcode != 0: self.handle_workflow_failure('DNA Germline HERED pipeline fail', retcode)
def run_fastq_validation(self): with open(self._status_log_file, 'w') as f: f.write('[STATUS] FASTQ file validation\n[PROGRESS] 10') log_progress(__modname__, 'Run the fastq validation', f=self._log_file) exec_point = join(self._script_home, 'pipelines', 'fastq_validation.sh') exec_cmd_1 = [exec_point, '-w', settings.SCRIPT_HOME, '-i', self._fastq_r1, '-n', self._sample_name, '-o', self._output_dir] run_command(__modname__, exec_cmd_1, self._log_file) exec_cmd_2 = [exec_point, '-w', settings.SCRIPT_HOME, '-i', self._fastq_r2, '-n', self._sample_name, '-o', self._output_dir] run_command(__modname__, exec_cmd_2, self._log_file)
def run_tst170_cnv_parser(self): with open(self._status_log_file, 'w') as f: f.write('[STATUS] TruSightTumor170 pipeline\n[PROGRESS] 90') log_progress(__modname__, 'CNV JSON and plot generation start', f=self._log_file) exec_point = join(self._script_home, 'run_cnv_parser.py') exec_cmd = [ 'python', exec_point, 'cnv_vcf={0}'.format(settings.get_cnv_vcf(self._output_dir, self._sample_name)), 'cnv_tsv={0}'.format(settings.get_cnv_tsv(self._output_dir, self._sample_name)), 'cnv_plot={0}'.format(settings.get_cnv_plot(self._output_dir, self._sample_name)), 'cnv_fc_stat={0}'.format(settings.get_cnv_fc(self._output_dir, self._sample_name)), 'log_file={0}'.format(self._log_file), 'sample_name={0}'.format(self._sample_name), 'cnv_plot_script={0}'.format(join(self._script_home, 'cnv_plot.R')) ] run_command(__modname__, exec_cmd, self._log_file) log_progress(__modname__, 'CNV JSON and plot generation finished', f=self._log_file)
def run_vcf_to_json(self, flag): with open(self._status_log_file, 'w') as f: f.write('[STATUS] convert vcf to json\n[PROGRESS] 75') log_progress(__modname__, 'Run the vcf to json', f=self._log_file) exec_point = join(self._script_home, 'pipelines', 'vcf_to_json.sh') if flag == "HEREDaccuTest": final_vcf = join(self._output_dir, 'data', 'variant', '%s_final.snv.vcf'%(self._sample_name)) else: final_vcf = join(self._output_dir, 'data', 'variant', '%s_final.vcf'%(self._sample_name)) exec_cmd = [exec_point, '-w', settings.SCRIPT_HOME, '-i', final_vcf, '-a', self._pipeline_name, '-o', self._output_dir, '-n', self._sample_name, '-u', join(self._script_home, 'utils'), '-p', self._pipeline, '-s', self._sample_source.replace("(", "\(").replace(")", "\)") ] run_command(__modname__, exec_cmd, self._log_file)
def run_tst170_summarize_result(self): with open(self._status_log_file, 'w') as f: f.write('[STATUS] Summarize Final Result\n[PROGRESS] 99') log_progress(__modname__, 'Summarize Final Result start', f=self._log_file) exec_point = join(self._script_home, 'summarize_final_result.py') exec_cmd = [ 'python', exec_point, 'output_dir={0}'.format(self._output_dir), 'report_dir={0}'.format(settings.REPORT_HOME), 'fastqc_dir={0}'.format(settings.FASTQC_HOME), 'sample_name={0}'.format(self._sample_name), 'log_file={0}'.format(self._log_file), 'cnv_plot={0}'.format(settings.get_cnv_plot(self._output_dir, self._sample_name)), 'statistics_file={0}'.format(settings.get_statistics_file(self._output_dir, self._sample_name)), 'pipeline={0}'.format(self._pipeline) ] run_command(__modname__, exec_cmd, self._log_file) log_progress(__modname__, 'Summarize Final Result finished', f=self._log_file)
def run_tst170_annotation(self): with open(self._status_log_file, 'w') as f: f.write('[STATUS] TruSightTumor170 pipeline\n[PROGRESS] 80') log_progress(__modname__, 'TST170 annotation start', f=self._log_file) exec_point = join(self._script_home, 'run_annotation.py') exec_cmd = [ 'python', exec_point, 'output_dir={0}'.format(self._output_dir), 'variant_dir={0}'.format(settings.VARIANT_HOME), 'tmp_dir={0}'.format(settings.TEMP_HOME), 'log_file={0}'.format(self._log_file), 'final_bam={0}'.format(settings.get_final_bam(self._output_dir, self._sample_name)), 'raw_vcf={0}'.format(settings.get_tst170_raw_vcf(self._output_dir, self._sample_name)), 'final_vcf={0}'.format(settings.get_final_vcf(self._output_dir, self._sample_name)), 'sample_name={0}'.format(self._sample_name), 'pe_core={0}'.format(self._pe_core) ] run_command(__modname__, exec_cmd, self._log_file) log_progress(__modname__, 'TST170 annotation finished', f=self._log_file)
def run_summarize_intermediate_result(self, tst170_dir): with open(self._status_log_file, 'w') as f: f.write('[STATUS] Copy intermediate results\n[PROGRESS] 70') log_progress(__modname__, 'Copy the TruSightTumor170 pipeline result start', f=self._log_file) exec_point = join(self._script_home, 'summarize_intermediate_result.py') exec_cmd = [ 'python', exec_point, 'output_dir={0}'.format(self._output_dir), 'assay_dir={0}'.format(settings.ASSAY_HOME), 'fastq_dir={0}'.format(settings.FASTQ_HOME), 'align_dir={0}'.format(settings.ALIGNMENT_HOME), 'variant_dir={0}'.format(settings.VARIANT_HOME), 'log_file={0}'.format(self._log_file), 'tst170_dir={0}'.format(tst170_dir), 'pipeline={0}'.format(self._pipeline), 'sample_name={0}'.format(self._sample_name), 'final_bam={0}'.format(settings.get_final_bam(self._output_dir, self._sample_name)) ] run_command(__modname__, exec_cmd, self._log_file) log_progress(__modname__, 'Copy the TST170 pipeline result finished', f=self._log_file)
def run_tst170_analysis_stat(self): with open(self._status_log_file, 'w') as f: f.write('[STATUS] TruSightTumor170 pipeline\n[PROGRESS] 93') log_progress(__modname__, 'Analysis statistics start', f=self._log_file) exec_point = join(self._script_home, 'run_analysis_statistics.py') exec_cmd = [ 'python', exec_point, 'output_dir={0}'.format(self._output_dir), 'log_file={0}'.format(self._log_file), 'fastq_dir={0}'.format(settings.FASTQ_HOME), 'assay_dir={0}'.format(settings.ASSAY_HOME), 'align_dir={0}'.format(settings.ALIGNMENT_HOME), 'target_bed={0}'.format(settings.get_tst170_DNA_bed(self._output_dir, self._pipeline)), 'sample_name={0}'.format(self._sample_name), 'final_bam={0}'.format(settings.get_final_bam(self._output_dir, self._sample_name)), 'summary_file={0}'.format(settings.get_summary_file(self._output_dir, self._sample_name)), 'script={0}'.format(join(self._script_home, 'run_analysis_statistics.sh')), 'pipeline={0}'.format(self._pipeline) ] run_command(__modname__, exec_cmd, self._log_file) log_progress(__modname__, 'Analysis statistics finished', f=self._log_file)
def pileup_depth(self, pileup_depth): md5_file = '%s.md5' % (pileup_depth) if md5sum_check(pileup_depth, md5_file): log_progress(__modname__, 'Get Pileup Depth already finished!!!', f=self._log_file) log_version(__modname__, self._sw['samtools_ver'], f=self._log_file) else: log_progress(__modname__, 'Get Pileup Depth start', f=self._log_file) log_version(__modname__, self._sw['samtools_ver'], f=self._log_file) exec_cmd = [ self._sw['samtools'], 'depth', '-a', '-q', '0', '-Q', '1', '-d', '1000000', '-b', self._target_bed, '--reference', self._sw['hg19'], self._final_bam ] run_command_file_handle(__modname__, exec_cmd, self._log_file, 'w', pileup_depth) run_command_md5sum(__modname__, self._log_file, pileup_depth, md5_file) log_progress(__modname__, 'Get Pileup Depth finished', f=self._log_file)
def run(self): # completed files if os.path.exists(self._completed_file): os.remove(self._completed_file) log_progress(__modname__, 'Entering workflow for job_id: {0}'.format(self._job_id), f=self._log_file) log_progress(__modname__, 'CNV Anlaysis start'.format(self._job_id), f=self._log_file) exec_cmd = [ 'docker', 'run', '-t', '--rm', '--net=host', '-u', 'ngenebio:ngenebio', '--name', 'cnv_{0}_{1}'.format(self._sample_id, self._job_id), '-h', 'ngeneanalysys', '-v', '/etc/localtime:/etc/localtime', '-v', '{0}/{1}:/NGENEBIO/workflow_out/{1}'.format( settings.ANALYSIS_HOME, self._job_id), '-v', '{0}:/NGENEBIO/workflow'.format(settings.SCRIPT_HOME), '-v', '{0}:/NGENEBIO/workflow-dependencies'.format( settings.DEPENDENCIES), '-v', '{0}:/NGENEBIO/workflow-app'.format(settings.APP_HOME), '{0}'.format(settings.DOCKER_IMAGE), '/NGENEBIO/workflow/run_cnv.sh', self._job_id, self._sample_id ] run_command(__modname__, exec_cmd, self._log_file) log_progress(__modname__, 'CNV Processing Complete.', f=self._log_file) with open(self._completed_file, 'w') as completed_file: completed_file.write('Job with SGE_TASKID {0} completed.'.format( self._sample_id))
def vcf_post_processing(self, input_file, refined_vcf): refined_vcf_md5 = '{0}.md5'.format(refined_vcf) if md5sum_check(refined_vcf, refined_vcf_md5): log_progress(__modname__, 'VCF post processing already finished!!!', f=self._log_file) log_version(__modname__, self._sw['vt_ver'], f=self._log_file) else: log_progress(__modname__, 'VCF post processing start', f=self._log_file) log_version(__modname__, self._sw['vt_ver'], f=self._log_file) if os.path.exists(refined_vcf): os.remove(refined_vcf) exec_cmd = [ '{0} normalize -r {1} {2}'.format(self._sw['vt'], self._sw['hg19'], input_file), '{0} decompose -s -'.format(self._sw['vt']), ] run_command_pipe_file_handle(__modname__, exec_cmd, self._log_file, 'w', refined_vcf) run_command_md5sum(__modname__, self._log_file, refined_vcf, refined_vcf_md5) log_progress(__modname__, 'VCF post processing finished', f=self._log_file)
def copy_bam_files(self): self._final_bam = join(self._align_dir, '{0}_final.bam'.format(self._sample_name)) bam_dst_md5 = '{0}.md5'.format(self._final_bam) if md5sum_check(self._final_bam, bam_dst_md5): log_progress(__modname__, 'Copy the BAM file to output directory already finished', f=self._log_file) else: log_progress(__modname__, 'Copy the BAM file to output directory', f=self._log_file) ### 901: DNA, 902: RNA if self._pipeline == '901': bam_dir = join(self._tst170_dir, 'DNA_IntermediateFiles', 'Alignment') bam_src = join(bam_dir, '{0}_realigned.bam'.format(self._sample_name)) bed_name = 'DNA_PicardTarget.bed' elif self._pipeline == '902': bam_dir = join(self._tst170_dir, 'RNA_IntermediateFiles', 'Alignment') bam_src = join(bam_dir, '{0}.bam'.format(self._sample_name)) bed_name = 'RNA_PicardTarget.bed' else: log_error(__modname__, 'Unknown pipeline code {0} for TST170 pipeline'.format(self._pipeline), f=self._log_file) sys.exit(1) self.copy_files(bam_src, self._final_bam) bai_src = '{0}.bai'.format(bam_src) bai_dst = '{0}.bai'.format(self._final_bam) self.copy_files(bai_src, bai_dst) self.copy_files(join(bam_dir, bed_name), join(self._align_dir, bed_name)) self.generate_tdf_file(self._final_bam) run_command_md5sum(__modname__, self._log_file, self._final_bam, bam_dst_md5) log_progress(__modname__, 'Copy the BAM file finished', f=self._log_file)