def run_prepare_check(self): with open(self._status_log_file, 'w') as f: f.write('[STATUS] MD5 checksum for reference and software\n[PROGRESS] 5') log_progress(__modname__, 'MD5 checksum for reference and software', f=self._log_file) exec_point = join(self._script_home, 'pipelines', 'prepare_check.sh') exec_cmd = [exec_point, '-w', settings.SCRIPT_HOME] run_command(__modname__, exec_cmd, self._log_file)
def run(self): # completed files if os.path.exists(self._completed_file): os.remove(self._completed_file) log_progress(__modname__, 'Entering workflow for job_id: {0}'.format(self._job_id), f=self._log_file) log_progress(__modname__, 'CNV Anlaysis start'.format(self._job_id), f=self._log_file) exec_cmd = [ 'docker', 'run', '-t', '--rm', '--net=host', '-u', 'ngenebio:ngenebio', '--name', 'cnv_{0}_{1}'.format(self._sample_id, self._job_id), '-h', 'ngeneanalysys', '-v', '/etc/localtime:/etc/localtime', '-v', '{0}/{1}:/NGENEBIO/workflow_out/{1}'.format( settings.ANALYSIS_HOME, self._job_id), '-v', '{0}:/NGENEBIO/workflow'.format(settings.SCRIPT_HOME), '-v', '{0}:/NGENEBIO/workflow-dependencies'.format( settings.DEPENDENCIES), '-v', '{0}:/NGENEBIO/workflow-app'.format(settings.APP_HOME), '{0}'.format(settings.DOCKER_IMAGE), '/NGENEBIO/workflow/run_cnv.sh', self._job_id, self._sample_id ] run_command(__modname__, exec_cmd, self._log_file) log_progress(__modname__, 'CNV Processing Complete.', f=self._log_file) with open(self._completed_file, 'w') as completed_file: completed_file.write('Job with SGE_TASKID {0} completed.'.format( self._sample_id))
def low_confidence_annotation(self, input_file, lowconf_vcf): low_conf_homopolyx = join( self._variant_dir, '{0}_lowconf.homopolyx'.format(self._sample_name)) lowconf_vcf_md5 = '{0}.md5'.format(lowconf_vcf) if md5sum_check(lowconf_vcf, lowconf_vcf_md5): log_progress(__modname__, 'Low confidence annotation already finished!!!', f=self._log_file) else: log_progress(__modname__, 'Low confidence annotation start', f=self._log_file) if os.path.exists(low_conf_homopolyx): os.remove(low_conf_homopolyx) if os.path.exists(lowconf_vcf): os.remove(lowconf_vcf) exec_cmd1 = [ 'python', self._sw['ngb_lowconf_homopolyx'], '-p', '5', '-r', self._sw['hg19'], '-o', low_conf_homopolyx, input_file ] run_command(__modname__, exec_cmd1, self._log_file) exec_cmd2 = [ 'python', self._sw['ngb_lowconf_repeatcnt'], '-r', self._sw['hg19'], '-o', lowconf_vcf, low_conf_homopolyx ] run_command(__modname__, exec_cmd2, self._log_file) run_command_md5sum(__modname__, self._log_file, lowconf_vcf, lowconf_vcf_md5) log_progress(__modname__, 'Low confidence annotation finished', f=self._log_file)
def run(self): # remove log and completed files if os.path.exists(self._log_file): os.remove(self._log_file) if os.path.exists(self._completed_file): os.remove(self._completed_file) log_progress(__modname__, 'Entering workflow for job_id: {0}'.format(self._job_id), f=self._log_file) log_progress(__modname__, 'SNV Anlaysis start'.format(self._job_id), f=self._log_file) exec_cmd = [ 'docker', 'run', '-t', '--rm', '--net=host', '-u', 'ngenebio:ngenebio', '--name', 'snv_{0}_{1}'.format(self._sample_id, self._job_id), '-h', 'ngeneanalysys', '-v', '/etc/localtime:/etc/localtime', '-v', '{0}/{1}:/NGENEBIO/workflow_out/{1}'.format( settings.ANALYSIS_HOME, self._job_id), '-v', '{0}:/NGENEBIO/workflow'.format(settings.SCRIPT_HOME), '-v', '{0}:/NGENEBIO/workflow-dependencies'.format( settings.DEPENDENCIES), '-v', '{0}:/NGENEBIO/workflow-app'.format(settings.APP_HOME), '-e', 'HGVS_SEQREPO_DIR=/NGENEBIO/workflow-dependencies/HGVS/seq_repo/latest', '{0}'.format(settings.DOCKER_IMAGE), '/NGENEBIO/workflow/run.sh', self._job_id, self._sample_id ] run_command(__modname__, exec_cmd, self._log_file) log_progress(__modname__, 'SNV Processing Complete.', f=self._log_file) exec_cmd = [ '{0}/pcgr/pcgr.py'.format(settings.APP_HOME), '--input_vcf', '{0}/{1}/data/variant/{2}_final.vcf'.format( settings.ANALYSIS_HOME, self._job_id, self._sample_name), '{0}/pcgr/'.format(settings.APP_HOME), '{0}/{1}/data/variant/'.format(settings.ANALYSIS_HOME, self._job_id), 'grch37', '{0}/pcgr/pcgr.toml'.format(settings.APP_HOME), '{0}'.format(self._sample_name), '--force_overwrite' ] if self._pipeline_code.startswith( '2') or self._pipeline_code.startswith('3'): log_progress(__modname__, 'PCGR Processing Start.', f=self._log_file) run_command(__modname__, exec_cmd, self._log_file) log_progress(__modname__, 'PCGR Processing Complete.', f=self._log_file) with open(self._completed_file, 'w') as completed_file: completed_file.write('Job with SGE_TASKID {0} completed.'.format( self._sample_id))
def run_summarize_final_result(self): with open(self._status_log_file, 'w') as f: f.write('[STATUS] summarize final result\n[PROGRESS] 90') log_progress(__modname__, 'Run the final result summarization', f=self._log_file) exec_point = join(self._script_home, 'pipelines', 'summarize_final_result.sh') exec_cmd = [exec_point, '-w', settings.SCRIPT_HOME, '-o', self._output_dir, '-n', self._sample_name, '-i', self._fastq_r1, '-j', self._fastq_r2] run_command(__modname__, exec_cmd, self._log_file)
def run_tst170_vcf_to_json(self): with open(self._status_log_file, 'w') as f: f.write('[STATUS] TruSightTumor170 pipeline\n[PROGRESS] 87') log_progress(__modname__, 'VCF to JSON start', f=self._log_file) exec_point = join(self._script_home, 'run_vcf_to_json.py') exec_cmd = [ 'python', exec_point, 'vcf_file={0}'.format(settings.get_final_vcf(self._output_dir, self._sample_name)), 'json_file={0}'.format(settings.get_final_json(self._output_dir, self._sample_name)), 'log_file={0}'.format(self._log_file) ] run_command(__modname__, exec_cmd, self._log_file) log_progress(__modname__, 'VCF to JSON finished', f=self._log_file)
def copy_version_file(self, panel,ver): version_dir = join(self._output_dir, 'version') exec_cmd = [ 'mkdir', '-p', version_dir] run_command(__modname__, exec_cmd, self._log_file) for file_type, file_acronym, file_format in zip(['version','database','software'], ['version','DB','SW'], ['txt','tsv','tsv']): exec_point = join(settings.SCRIPT_HOME, 'docs', file_type, '{0}_{1}.{2}'.format(panel, ver, file_format)) exec_point2 = join(version_dir, '{0}_{1}.{2}'.format(self._sample_name, file_acronym, file_format)) exec_cmd = [ 'cp', exec_point, exec_point2] run_command(__modname__, exec_cmd, self._log_file)
def run_analysis_stat(self): with open(self._status_log_file, 'w') as f: f.write('[STATUS] statistics generation\n[PROGRESS] 85') log_progress(__modname__, 'Run the analysis stat', f=self._log_file) stat_file = join(self._output_dir, 'data', 'stat', '%s_summary.txt'%(self._sample_name)) exec_point = join(self._script_home, 'pipelines', 'analysis_stat.sh') exec_cmd = [exec_point, '-w', settings.SCRIPT_HOME, '-i', stat_file, '-n', self._sample_name, '-o', self._output_dir, '-u', join(self._script_home, 'utils'), '-p', self._pipeline] run_command(__modname__, exec_cmd, self._log_file)
def run_read_depth_normalization(self): with open(self._status_log_file, 'w') as f: f.write('[STATUS] Read depth normalization\n[PROGRESS] 15') log_progress(__modname__, 'Run the read depth normalization', f=self._log_file) exec_point = join(self._script_home, 'pipelines', 'read_depth_normalization.sh') exec_cmd = [exec_point, '-w', settings.SCRIPT_HOME, '-i', self._fastq_r1, "-j", self._fastq_r2, '-o', self._output_dir, '-n', self._sample_name] run_command(__modname__, exec_cmd, self._log_file) self._fastq_r1 = "%s_normalize.1.fastq"%(self._sample_name) self._fastq_r2 = "%s_normalize.2.fastq"%(self._sample_name)
def run_fastqc(self): with open(self._status_log_file, 'w') as f: f.write('[STATUS] TruSightTumor170 pipeline\n[PROGRESS] 75') log_progress(__modname__, 'Raw data FASTQC start', f=self._log_file) exec_point = join(self._script_home, 'run_fastqc.py') exec_cmd = [ 'python', exec_point, 'output_dir={0}'.format(self._output_dir), 'fastq_dir={0}'.format(join(self._output_dir, settings.FASTQ_HOME)), 'sample_name={0}'.format(self._sample_name), 'log_file={0}'.format(self._log_file), 'fastqc_dir={0}'.format(settings.FASTQC_HOME) ] run_command(__modname__, exec_cmd, self._log_file) log_progress(__modname__, 'RAW data FASTQC finished', f=self._log_file)
def run_amplicon_check(self): with open(self._status_log_file, 'w') as f: f.write('[STATUS] Amplicon(adapter) analysis\n[PROGRESS] 50') log_progress(__modname__, 'Run the amplicon stat', f=self._log_file) input_sam = join(self._output_dir, "data", "basecall", "alignment", "%s.primer.stat.sam" %(self._sample_name)) if self._pipeline == "444": input_sam = join(self._output_dir, "data", "basecall", "alignment", "%s_final.sam" %(self._sample_name)) exec_point = join(self._script_home, 'pipelines', 'amplicon_check.sh') exec_cmd = [exec_point, '-w', settings.SCRIPT_HOME, '-i', input_sam, '-n', self._sample_name, '-o', self._output_dir, '-u', join(self._script_home, 'utils')] run_command(__modname__, exec_cmd, self._log_file)
def run_tst170_stat_parser(self, tst170_dir): with open(self._status_log_file, 'w') as f: f.write('[STATUS] TruSightTumor170 pipeline\n[PROGRESS] 93') log_progress(__modname__, 'Analysis statistics start', f=self._log_file) exec_point = join(self._script_home, 'run_stat_parser.py') exec_cmd = [ 'python', exec_point, 'pipeline={0}'.format(self._pipeline), 'sample_name={0}'.format(self._sample_name), 'stat_json={0}'.format(settings.get_stat_json(self._output_dir, self._sample_name)), 'tst170_dir={0}'.format(tst170_dir), 'summary_file={0}'.format(settings.get_summary_file(self._output_dir, self._sample_name)) ] run_command(__modname__, exec_cmd, self._log_file) log_progress(__modname__, 'Analysis statistics finished', f=self._log_file)
def run_tst170_cnv_parser(self): with open(self._status_log_file, 'w') as f: f.write('[STATUS] TruSightTumor170 pipeline\n[PROGRESS] 90') log_progress(__modname__, 'CNV JSON and plot generation start', f=self._log_file) exec_point = join(self._script_home, 'run_cnv_parser.py') exec_cmd = [ 'python', exec_point, 'cnv_vcf={0}'.format(settings.get_cnv_vcf(self._output_dir, self._sample_name)), 'cnv_tsv={0}'.format(settings.get_cnv_tsv(self._output_dir, self._sample_name)), 'cnv_plot={0}'.format(settings.get_cnv_plot(self._output_dir, self._sample_name)), 'cnv_fc_stat={0}'.format(settings.get_cnv_fc(self._output_dir, self._sample_name)), 'log_file={0}'.format(self._log_file), 'sample_name={0}'.format(self._sample_name), 'cnv_plot_script={0}'.format(join(self._script_home, 'cnv_plot.R')) ] run_command(__modname__, exec_cmd, self._log_file) log_progress(__modname__, 'CNV JSON and plot generation finished', f=self._log_file)
def run_fastq_validation(self): with open(self._status_log_file, 'w') as f: f.write('[STATUS] FASTQ file validation\n[PROGRESS] 10') log_progress(__modname__, 'Run the fastq validation', f=self._log_file) exec_point = join(self._script_home, 'pipelines', 'fastq_validation.sh') exec_cmd_1 = [exec_point, '-w', settings.SCRIPT_HOME, '-i', self._fastq_r1, '-n', self._sample_name, '-o', self._output_dir] run_command(__modname__, exec_cmd_1, self._log_file) exec_cmd_2 = [exec_point, '-w', settings.SCRIPT_HOME, '-i', self._fastq_r2, '-n', self._sample_name, '-o', self._output_dir] run_command(__modname__, exec_cmd_2, self._log_file)
def run_vcf_to_json(self, flag): with open(self._status_log_file, 'w') as f: f.write('[STATUS] convert vcf to json\n[PROGRESS] 75') log_progress(__modname__, 'Run the vcf to json', f=self._log_file) exec_point = join(self._script_home, 'pipelines', 'vcf_to_json.sh') if flag == "HEREDaccuTest": final_vcf = join(self._output_dir, 'data', 'variant', '%s_final.snv.vcf'%(self._sample_name)) else: final_vcf = join(self._output_dir, 'data', 'variant', '%s_final.vcf'%(self._sample_name)) exec_cmd = [exec_point, '-w', settings.SCRIPT_HOME, '-i', final_vcf, '-a', self._pipeline_name, '-o', self._output_dir, '-n', self._sample_name, '-u', join(self._script_home, 'utils'), '-p', self._pipeline, '-s', self._sample_source.replace("(", "\(").replace(")", "\)") ] run_command(__modname__, exec_cmd, self._log_file)
def run_tst170_summarize_result(self): with open(self._status_log_file, 'w') as f: f.write('[STATUS] Summarize Final Result\n[PROGRESS] 99') log_progress(__modname__, 'Summarize Final Result start', f=self._log_file) exec_point = join(self._script_home, 'summarize_final_result.py') exec_cmd = [ 'python', exec_point, 'output_dir={0}'.format(self._output_dir), 'report_dir={0}'.format(settings.REPORT_HOME), 'fastqc_dir={0}'.format(settings.FASTQC_HOME), 'sample_name={0}'.format(self._sample_name), 'log_file={0}'.format(self._log_file), 'cnv_plot={0}'.format(settings.get_cnv_plot(self._output_dir, self._sample_name)), 'statistics_file={0}'.format(settings.get_statistics_file(self._output_dir, self._sample_name)), 'pipeline={0}'.format(self._pipeline) ] run_command(__modname__, exec_cmd, self._log_file) log_progress(__modname__, 'Summarize Final Result finished', f=self._log_file)
def run_dbnsfp_annotation(self, input_file, output_file): dbnsfp_tmp_vcf = join(self._variant_dir, '{0}_dbnsfp_tmp.vcf'.format(self._sample_name)) output_md5 = '{0}.md5'.format(output_file) if md5sum_check(output_file, output_md5): log_progress(__modname__, 'dbNSFP annotation already finished!!!', f=self._log_file) log_version(__modname__, self._sw['dbnsfp_db_ver'], f=self._log_file) else: log_progress(__modname__, 'dbNSFP annotation start', f=self._log_file) log_version(__modname__, self._sw['dbnsfp_db_ver'], f=self._log_file) exec_cmd1 = [ self._sw['java'], '-Xmx4g', '-XX:ParallelGCThreads={0}'.format(self._pe_core), '-Djava.io.tmpdir={0}'.format(self._sample_tmp_dir), '-jar', self._sw['snpsift'], 'dbnsfp', '-f', 'aapos,aapos_SIFT,aapos_FATHMM,Uniprot_acc,Interpro_domain,SIFT_pred,SIFT_score,LRT_pred,MutationTaster_pred,MutationTaster_score,GERP++_NR,GERP++_RS,phastCons100way_vertebrate,MutationAssessor_pred,FATHMM_pred,PROVEAN_pred,MetaSVM_pred,Polyphen2_HDIV_pred,Polyphen2_HDIV_score,Polyphen2_HVAR_pred,Polyphen2_HVAR_score,CADD_phred', '-db', self._sw['dbnsfp_db'], input_file ] run_command_file_handle(__modname__, exec_cmd1, self._log_file, 'w', dbnsfp_tmp_vcf) exec_cmd2 = [ 'python', self._sw['ngb_transcript_dbNSFP'], '-o', output_file, dbnsfp_tmp_vcf ] run_command(__modname__, exec_cmd2, self._log_file) run_command_md5sum(__modname__, self._log_file, output_file, output_md5) run_command(__modname__, ['rm', '-rf', '{0}/*'.format(self._sample_tmp_dir)], self._log_file) log_progress(__modname__, 'dbNSFP annotation finished', f=self._log_file)
def link_files(self): # fastqc fastqc_files = self.get_fastqc_names() for fastqc_file in fastqc_files: src = join(self._fastqc_dir, fastqc_file) dst = join(self._sample_report_dir, fastqc_file) if os.path.exists(dst): os.remove(dst) exec_cmd = ['ln', '-s', src, dst] run_command(__modname__, exec_cmd, self._log_file) if self._pipeline == '901': # cnv plot dst = join(self._sample_report_dir, os.path.basename(self._cnv_plot)) if os.path.exists(dst): os.remove(dst) exec_cmd = ['ln', '-s', self._cnv_plot, dst] run_command(__modname__, exec_cmd, self._log_file)
def run_tst170_annotation(self): with open(self._status_log_file, 'w') as f: f.write('[STATUS] TruSightTumor170 pipeline\n[PROGRESS] 80') log_progress(__modname__, 'TST170 annotation start', f=self._log_file) exec_point = join(self._script_home, 'run_annotation.py') exec_cmd = [ 'python', exec_point, 'output_dir={0}'.format(self._output_dir), 'variant_dir={0}'.format(settings.VARIANT_HOME), 'tmp_dir={0}'.format(settings.TEMP_HOME), 'log_file={0}'.format(self._log_file), 'final_bam={0}'.format(settings.get_final_bam(self._output_dir, self._sample_name)), 'raw_vcf={0}'.format(settings.get_tst170_raw_vcf(self._output_dir, self._sample_name)), 'final_vcf={0}'.format(settings.get_final_vcf(self._output_dir, self._sample_name)), 'sample_name={0}'.format(self._sample_name), 'pe_core={0}'.format(self._pe_core) ] run_command(__modname__, exec_cmd, self._log_file) log_progress(__modname__, 'TST170 annotation finished', f=self._log_file)
def add_type_to_vcf(self, input_file, output_file): output_md5 = '{0}.md5'.format(output_file) if md5sum_check(output_file, output_md5): log_progress(__modname__, 'Add TYPE info already finished!!!', f=self._log_file) else: log_progress(__modname__, 'Add TYPE info start', f=self._log_file) if os.path.exists(output_file): os.remove(output_file) exec_cmd = [ 'python', self._sw['ngb_add_vcfinfo'], '-o', output_file, input_file ] run_command(__modname__, exec_cmd, self._log_file) run_command_md5sum(__modname__, self._log_file, output_file, output_md5) log_progress(__modname__, 'Add TYPE info finished', f=self._log_file)
def run_summarize_intermediate_result(self, tst170_dir): with open(self._status_log_file, 'w') as f: f.write('[STATUS] Copy intermediate results\n[PROGRESS] 70') log_progress(__modname__, 'Copy the TruSightTumor170 pipeline result start', f=self._log_file) exec_point = join(self._script_home, 'summarize_intermediate_result.py') exec_cmd = [ 'python', exec_point, 'output_dir={0}'.format(self._output_dir), 'assay_dir={0}'.format(settings.ASSAY_HOME), 'fastq_dir={0}'.format(settings.FASTQ_HOME), 'align_dir={0}'.format(settings.ALIGNMENT_HOME), 'variant_dir={0}'.format(settings.VARIANT_HOME), 'log_file={0}'.format(self._log_file), 'tst170_dir={0}'.format(tst170_dir), 'pipeline={0}'.format(self._pipeline), 'sample_name={0}'.format(self._sample_name), 'final_bam={0}'.format(settings.get_final_bam(self._output_dir, self._sample_name)) ] run_command(__modname__, exec_cmd, self._log_file) log_progress(__modname__, 'Copy the TST170 pipeline result finished', f=self._log_file)
def generate_tdf_file(self, final_bam): tdf_file = '{0}.tdf'.format(final_bam) tdf_file_md5 = '{0}.md5'.format(tdf_file) if md5sum_check(tdf_file, tdf_file_md5): log_progress(__modname__, 'TDF file generation already finished!!!', f=self._log_file) log_version(__modname__, self._sw['igvtools_ver'], f=self._log_file) else: log_progress(__modname__, 'TDF file generation start', f=self._log_file) log_version(__modname__, self._sw['igvtools_ver'], f=self._log_file) if os.path.exists(tdf_file): os.remove(tdf_file) exec_cmd = [ self._sw['igvtools'], 'count', final_bam, tdf_file, 'hg19' ] run_command(__modname__, exec_cmd, self._log_file) run_command_md5sum(__modname__, self._log_file, tdf_file, tdf_file_md5) log_progress(__modname__, 'TDF file generation finished', f=self._log_file)
def statistics(self, pileup_depth): fastq_files = self.get_fastq_names() md5_file = '%s.md5' % (self._summary_file) if md5sum_check(self._summary_file, md5_file): log_progress(__modname__, 'Analysis Statistics already finished!!!', f=self._log_file) else: log_progress(__modname__, 'Analysis Statistics start', f=self._log_file) exec_cmd = [ self._script, fastq_files[0], fastq_files[1], fastq_files[2], fastq_files[3], fastq_files[4], fastq_files[5], fastq_files[6], fastq_files[7], self._final_bam, self._target_bed, pileup_depth, self._summary_file ] run_command(__modname__, exec_cmd, self._log_file) log_progress(__modname__, 'Analysis Statistics finished', f=self._log_file)
def run_tst170_analysis_stat(self): with open(self._status_log_file, 'w') as f: f.write('[STATUS] TruSightTumor170 pipeline\n[PROGRESS] 93') log_progress(__modname__, 'Analysis statistics start', f=self._log_file) exec_point = join(self._script_home, 'run_analysis_statistics.py') exec_cmd = [ 'python', exec_point, 'output_dir={0}'.format(self._output_dir), 'log_file={0}'.format(self._log_file), 'fastq_dir={0}'.format(settings.FASTQ_HOME), 'assay_dir={0}'.format(settings.ASSAY_HOME), 'align_dir={0}'.format(settings.ALIGNMENT_HOME), 'target_bed={0}'.format(settings.get_tst170_DNA_bed(self._output_dir, self._pipeline)), 'sample_name={0}'.format(self._sample_name), 'final_bam={0}'.format(settings.get_final_bam(self._output_dir, self._sample_name)), 'summary_file={0}'.format(settings.get_summary_file(self._output_dir, self._sample_name)), 'script={0}'.format(join(self._script_home, 'run_analysis_statistics.sh')), 'pipeline={0}'.format(self._pipeline) ] run_command(__modname__, exec_cmd, self._log_file) log_progress(__modname__, 'Analysis statistics finished', f=self._log_file)
def run_tst170_qc_report(self): with open(self._status_log_file, 'w') as f: f.write('[STATUS] TruSightTumor170 pipeline\n[PROGRESS] 98') log_progress(__modname__, 'QC report generation start', f=self._log_file) exec_point = join(self._script_home, 'run_qc_report_generation.py') exec_cmd = [ 'python', exec_point, 'sample_name={0}'.format(self._sample_name), 'pipeline={0}'.format(self._pipeline), 'pipeline_name={0}'.format(self._pipeline_name), 'instrument={0}'.format(self._platform), 'specimen={0}'.format(self._sample_source), 'run_name={0}'.format(self._run_name), 'fastqc_dir={0}'.format(settings.FASTQC_HOME), 'stat_json={0}'.format(settings.get_stat_json(self._output_dir, self._sample_name)), 'log_file={0}'.format(self._log_file), 'output_dir={0}'.format(self._output_dir), 'cnv_tsv={0}'.format(settings.get_cnv_tsv(self._output_dir, self._sample_name)), 'cnv_plot={0}'.format(settings.get_cnv_plot(self._output_dir, self._sample_name)) ] run_command(__modname__, exec_cmd, self._log_file) log_progress(__modname__, 'QC report generation finished', f=self._log_file)
def plot_generation(self): png_md5 = '{0}.md5'.format(self._cnv_plot) if md5sum_check(self._cnv_plot, png_md5): log_progress(__modname__, 'CNV plot generation already finished!!!', f=self._log_file) else: log_progress(__modname__, 'CNV plot generation start', f=self._log_file) if os.path.exists(self._cnv_plot): os.remove(self._cnv_plot) exec_cmd = [ 'Rscript', self._cnv_plot_script, self._sample_name, self._cnv_fc_stat, self._cnv_plot ] run_command(__modname__, exec_cmd, self._log_file) run_command_md5sum(__modname__, self._log_file, self._cnv_plot, png_md5) log_progress(__modname__, 'CNV plot generation finished', f=self._log_file)
def add_hgvs(self, input_file, output_file): output_md5 = '{0}.md5'.format(output_file) if md5sum_check(output_file, output_md5): log_progress(__modname__, 'Add HGVS info and variant type already finished!!!', f=self._log_file) else: log_progress(__modname__, 'Add HGVS info and variant type start', f=self._log_file) if os.path.exists(output_file): os.remove(output_file) exec_cmd = [ 'python', self._sw['ngb_add_HGVS'], '-d', self._sw['mutect2_bed'], '-o', output_file, input_file ] run_command(__modname__, exec_cmd, self._log_file) run_command_md5sum(__modname__, self._log_file, output_file, output_md5) log_progress(__modname__, 'Add HGVS info and variant type finished', f=self._log_file)
def run_clinvar_annotation(self, input_file, output_file): clinvar_tmp_vcf = join(self._variant_dir, '{0}_clinvar_tmp.vcf'.format(self._sample_name)) output_md5 = '{0}.md5'.format(output_file) if md5sum_check(output_file, output_md5): log_progress(__modname__, 'ClinVar annotation already finished!!!', f=self._log_file) log_version(__modname__, self._sw['ngb_clinvar_ver'], f=self._log_file) else: log_progress(__modname__, 'ClinVar annotation start', f=self._log_file) log_version(__modname__, self._sw['ngb_clinvar_ver'], f=self._log_file) exec_cmd1 = [ 'python', self._sw['ngb_anno_clinvar'], '--dbfile', self._sw['ngb_clinvar_db'], '--infoVCF', self._sw['clinvar_compact_header'], '--inVCF', input_file, '--outVCF', clinvar_tmp_vcf ] run_command(__modname__, exec_cmd1, self._log_file) exec_cmd2 = [ 'python', self._sw['ngb_clinvar_variation'], '-d', self._sw['ngb_clinvar_ref'], '-o', output_file, clinvar_tmp_vcf ] run_command(__modname__, exec_cmd2, self._log_file) run_command_md5sum(__modname__, self._log_file, output_file, output_md5) log_progress(__modname__, 'ClinVar annotation finished', f=self._log_file)
def run_annotation(self, db_name, target_vcf, target_vcf_ver, _info, _name, input_file, output_file): output_md5 = '{0}.md5'.format(output_file) if md5sum_check(output_file, output_md5): log_progress(__modname__, '{0} annotation already finished!!!'.format(db_name), f=self._log_file) log_version(__modname__, target_vcf_ver, f=self._log_file) else: log_progress(__modname__, '{0} annotation start'.format(db_name), f=self._log_file) log_version(__modname__, target_vcf_ver, f=self._log_file) exec_cmd = [ self._sw['java'], '-Xmx4g', '-XX:ParallelGCThreads={0}'.format(self._pe_core), '-Djava.io.tmpdir={0}'.format(self._sample_tmp_dir), '-jar', self._sw['snpsift'], 'annotate' ] if _info != '': exec_cmd.append('-info') exec_cmd.append(_info) if _name != '': exec_cmd.append('-name') exec_cmd.append(_name) exec_cmd.append(target_vcf) exec_cmd.append(input_file) run_command_file_handle(__modname__, exec_cmd, self._log_file, 'w', output_file) run_command_md5sum(__modname__, self._log_file, output_file, output_md5) run_command(__modname__, ['rm', '-rf', '{0}/*'.format(self._sample_tmp_dir)], self._log_file) log_progress(__modname__, '{0} annotation finished'.format(db_name), f=self._log_file)
def run_TST170_pipeline(self, TST170_completed_file_path): with open(self._status_log_file, 'w') as f: f.write('[STATUS] TruSightTumor170 pipeline\n[PROGRESS] 5') log_progress(__modname__, 'Run the TruSightTumor170 pipeline', f=self._log_file) if os.path.exists(TST170_completed_file_path): log_progress(__modname__, 'TruSightTumor170 pipeline already finished', f=self._log_file) else: log_progress(__modname__, 'TruSightTumor170 pipeline start', f=self._log_file) exec_cmd = [ 'docker', 'run', '-t', '--rm', '--name', self._run_id, '-v', '/etc/localtime:/etc/localtime', '-v', '{0}:/data'.format(self._run_dir), '-v', '{0}:/genomes'.format(self._TST170_Resources), '-v', '{0}:/analysis'.format(self._output_dir), 'ngb_tst170:1.0.0.0' ] run_command(__modname__, exec_cmd, self._log_file) sleep(10) # check TST170 output directory log_version(__modname__, 'software,TST170 Local App,v1.0,Oct.2017,http://sapac.support.illumina.com/downloads/trusight-tumor-170-local-app-documentation.html?langsel=/kr/,None', f=self._log_file) _dirs = os.listdir(self._output_dir) tst170_dir = None for _dir in _dirs: if _dir.startswith("TruSightTumor170_Analysis_"): tst170_dir = os.path.join(self._output_dir, _dir) if tst170_dir is not None and os.path.exists(join(tst170_dir, 'Summary.tsv')): with open(TST170_completed_file_path, 'w') as completed_file: completed_file.write('TruSightTumor170 pipeline finished') log_progress(__modname__, 'TruSightTumor170 pipeline finished', f=self._log_file) return tst170_dir