Ejemplo n.º 1
0
 def run_prepare_check(self):
     with open(self._status_log_file, 'w') as f:
         f.write('[STATUS] MD5 checksum for reference and software\n[PROGRESS] 5')
     log_progress(__modname__, 'MD5 checksum for reference and software', f=self._log_file)
     exec_point = join(self._script_home, 'pipelines', 'prepare_check.sh')
     exec_cmd = [exec_point, '-w', settings.SCRIPT_HOME]
     run_command(__modname__, exec_cmd, self._log_file)
Ejemplo n.º 2
0
    def copy_standard_output(self):
        log_progress(__modname__, 'Copy the standard output files to output directory', f=self._log_file)
        ### 901: DNA, 902: RNA
        if self._pipeline == '901':
            dna_output_dir = join(self._tst170_dir, 'DNA_{0}'.format(self._sample_name))
            cnv_vcf = '{0}_CopyNumberVariants.vcf'.format(self._sample_name)
            self.copy_files(join(dna_output_dir, cnv_vcf), join(self._variant_dir, cnv_vcf))
            small_variant_vcf = '{0}_SmallVariants.genome.vcf'.format(self._sample_name)
            self.copy_files(join(dna_output_dir, small_variant_vcf), join(self._variant_dir, small_variant_vcf))
            dna_bed = join(self._tst170_dir, 'DNA_IntermediateFiles', 'Alignment', 'DNA_PicardTarget.bed')
            self.copy_files(dna_bed, join(self._assay_dir, '{0}.bed'.format(self._sample_name)))

        elif self._pipeline == '902':
            rna_output_dir = join(self._tst170_dir, 'RNA_{0}'.format(self._sample_name))
            splice_variant_vcf = '{0}_SpliceVariants.vcf'.format(self._sample_name)
            self.copy_files(join(rna_output_dir, splice_variant_vcf), join(self._variant_dir, splice_variant_vcf))
            fusion_csv = '{0}_Fusions.csv'.format(self._sample_name)
            self.copy_files(join(rna_output_dir, fusion_csv), join(self._variant_dir, fusion_csv))
            high_conf_variant_csv = '{0}_HighConfidenceVariants.csv'.format(self._sample_name)
            self.copy_files(join(rna_output_dir, high_conf_variant_csv), join(self._variant_dir, high_conf_variant_csv))
            published_fusion_csv = '{0}_PublishedFusions.csv'.format(self._sample_name)
            self.copy_files(join(rna_output_dir, published_fusion_csv), join(self._variant_dir, published_fusion_csv))
        else:
            log_error(__modname__, 'Unknown pipeline code {0} for TST170 pipeline'.format(self._pipeline), f=self._log_file)
            sys.exit(1)
        log_progress(__modname__, 'Copy the standard output files finished', f=self._log_file)
Ejemplo n.º 3
0
 def run_germline_stat(self):
     with open(self._status_log_file, 'w') as f:
         f.write('[STATUS] statistics generation\n[PROGRESS] 85')
     log_progress(__modname__, 'Run the analysis stat', f=self._log_file)
     summary_file = join(self._output_dir, "data", "stat", "{0}.panel.txt".format(self._sample_name))
     stat_json_file = join(self._output_dir, "data", "stat", "{0}.stat.json".format(self._sample_name))
     stat_json_gen = GermlineStatParser(self._log_file)
     stat_json_gen.run(summary_file, stat_json_file)
Ejemplo n.º 4
0
 def run(self):
     self._md5_file = '{0}.md5'.format(self._json_file)
     if md5sum_check(self._json_file, self._md5_file):
         log_progress(__modname__,
                      'VCF to JSON already finished!!!',
                      f=self._log_file)
     else:
         self.workflow()
Ejemplo n.º 5
0
    def run(self):

        # remove log and completed files
        if os.path.exists(self._log_file):
            os.remove(self._log_file)
        if os.path.exists(self._completed_file):
            os.remove(self._completed_file)

        log_progress(__modname__,
                     'Entering workflow for job_id: {0}'.format(self._job_id),
                     f=self._log_file)
        log_progress(__modname__,
                     'SNV Anlaysis start'.format(self._job_id),
                     f=self._log_file)

        exec_cmd = [
            'docker', 'run', '-t', '--rm', '--net=host', '-u',
            'ngenebio:ngenebio', '--name',
            'snv_{0}_{1}'.format(self._sample_id, self._job_id), '-h',
            'ngeneanalysys', '-v', '/etc/localtime:/etc/localtime', '-v',
            '{0}/{1}:/NGENEBIO/workflow_out/{1}'.format(
                settings.ANALYSIS_HOME, self._job_id), '-v',
            '{0}:/NGENEBIO/workflow'.format(settings.SCRIPT_HOME), '-v',
            '{0}:/NGENEBIO/workflow-dependencies'.format(
                settings.DEPENDENCIES), '-v',
            '{0}:/NGENEBIO/workflow-app'.format(settings.APP_HOME), '-e',
            'HGVS_SEQREPO_DIR=/NGENEBIO/workflow-dependencies/HGVS/seq_repo/latest',
            '{0}'.format(settings.DOCKER_IMAGE), '/NGENEBIO/workflow/run.sh',
            self._job_id, self._sample_id
        ]

        run_command(__modname__, exec_cmd, self._log_file)
        log_progress(__modname__, 'SNV Processing Complete.', f=self._log_file)

        exec_cmd = [
            '{0}/pcgr/pcgr.py'.format(settings.APP_HOME), '--input_vcf',
            '{0}/{1}/data/variant/{2}_final.vcf'.format(
                settings.ANALYSIS_HOME, self._job_id,
                self._sample_name), '{0}/pcgr/'.format(settings.APP_HOME),
            '{0}/{1}/data/variant/'.format(settings.ANALYSIS_HOME,
                                           self._job_id), 'grch37',
            '{0}/pcgr/pcgr.toml'.format(settings.APP_HOME),
            '{0}'.format(self._sample_name), '--force_overwrite'
        ]

        if self._pipeline_code.startswith(
                '2') or self._pipeline_code.startswith('3'):
            log_progress(__modname__,
                         'PCGR Processing Start.',
                         f=self._log_file)
            run_command(__modname__, exec_cmd, self._log_file)
            log_progress(__modname__,
                         'PCGR Processing Complete.',
                         f=self._log_file)

        with open(self._completed_file, 'w') as completed_file:
            completed_file.write('Job with SGE_TASKID {0} completed.'.format(
                self._sample_id))
Ejemplo n.º 6
0
    def brca_qc_workflow(self):

        self.run_brca_qc()

        with open(self._status_log_file, 'w') as f:
            f.write('[STATUS] CNV Processing Compleate\n[PROGRESS] 100')
        log_progress(__modname__, 'BRCA QC Data Generation Completed.', f=self._log_file)

        completed_file_path = join(self._output_dir,'logs/completed.{0}'.format(self._sample_id))
        with open(completed_file_path, 'w') as completed_file:
            completed_file.write('Job with SGE_TASKID {0} completed.'.format(self._sample_id))
Ejemplo n.º 7
0
 def run_summarize_final_result(self):
     with open(self._status_log_file, 'w') as f:
         f.write('[STATUS] summarize final result\n[PROGRESS] 90')
     log_progress(__modname__, 'Run the final result summarization', f=self._log_file)
     exec_point = join(self._script_home, 'pipelines', 'summarize_final_result.sh')
     exec_cmd = [exec_point,
                 '-w', settings.SCRIPT_HOME,
                 '-o', self._output_dir,
                 '-n', self._sample_name,
                 '-i', self._fastq_r1,
                 '-j', self._fastq_r2]
     run_command(__modname__, exec_cmd, self._log_file)
Ejemplo n.º 8
0
 def run_dna_hered_cnv(self):
     with open(self._status_log_file, 'w') as f:
         f.write('[STATUS] Germline HERED CNV pipeline\n[PROGRESS] 100')
     log_progress(__modname__, 'Run the hered cnv pipeline', f=self._log_file)
     exec_point = join(self._script_home, 'pipelines', 'ngb_heredaccutest_cnv_v1.sh')
     exec_cmd = [exec_point,
                 '-w', settings.SCRIPT_HOME,
                 '-n', self._sample_name,
                 '-o', self._output_dir]
     #run_command(__modname__, exec_cmd, self._log_file)
     retcode = logging_subprocess_call(exec_cmd, self._log_file)
     if retcode != 0:
         self.handle_workflow_failure('DNA Germline HERED CNV pipeline fail', retcode)
Ejemplo n.º 9
0
 def run_tst170_vcf_to_json(self):
     with open(self._status_log_file, 'w') as f:
         f.write('[STATUS] TruSightTumor170 pipeline\n[PROGRESS] 87')
     log_progress(__modname__, 'VCF to JSON start', f=self._log_file)
     exec_point = join(self._script_home, 'run_vcf_to_json.py')
     exec_cmd = [
             'python', exec_point,
             'vcf_file={0}'.format(settings.get_final_vcf(self._output_dir, self._sample_name)),
             'json_file={0}'.format(settings.get_final_json(self._output_dir, self._sample_name)),
             'log_file={0}'.format(self._log_file)
     ]
     run_command(__modname__, exec_cmd, self._log_file)
     log_progress(__modname__, 'VCF to JSON finished', f=self._log_file)
Ejemplo n.º 10
0
 def run_brca_qc(self):
     with open(self._status_log_file, 'w') as f:
         f.write('[STATUS] BRCA QC Data Generation \n[PROGRESS] 99')
     log_progress(__modname__, 'BRCA QC Data Generation start', f=self._log_file)
     exec_point = join(self._script_home, 'pipelines', 'summarize_brca_result.sh')
     exec_cmd = [exec_point,
                 '-w', settings.SCRIPT_HOME,
                 '-n', self._run_name,
                 "-a", self._amplicon_bed,
                 '-o', self._output_dir
     ]
     retcode = logging_subprocess_call(exec_cmd, self._log_file)
     #run_command(__modname__, exec_cmd, self._log_file)
     log_progress(__modname__, 'BRCA QC Data Generation finished', f=self._log_file)
Ejemplo n.º 11
0
 def run_read_depth_normalization(self):
     with open(self._status_log_file, 'w') as f:
         f.write('[STATUS] Read depth normalization\n[PROGRESS] 15')
     log_progress(__modname__, 'Run the read depth normalization', f=self._log_file)
     exec_point = join(self._script_home, 'pipelines', 'read_depth_normalization.sh')
     exec_cmd = [exec_point,
                 '-w', settings.SCRIPT_HOME,
                 '-i', self._fastq_r1,
                 "-j", self._fastq_r2,
                 '-o', self._output_dir,
                 '-n', self._sample_name]
     run_command(__modname__, exec_cmd, self._log_file)
     self._fastq_r1 = "%s_normalize.1.fastq"%(self._sample_name)
     self._fastq_r2 = "%s_normalize.2.fastq"%(self._sample_name)
Ejemplo n.º 12
0
 def run_analysis_stat(self):
     with open(self._status_log_file, 'w') as f:
         f.write('[STATUS] statistics generation\n[PROGRESS] 85')
     log_progress(__modname__, 'Run the analysis stat', f=self._log_file)
     stat_file = join(self._output_dir, 'data', 'stat', '%s_summary.txt'%(self._sample_name))
     exec_point = join(self._script_home, 'pipelines', 'analysis_stat.sh')
     exec_cmd = [exec_point,
                 '-w', settings.SCRIPT_HOME,
                 '-i', stat_file,
                 '-n', self._sample_name,
                 '-o', self._output_dir,
                 '-u', join(self._script_home, 'utils'),
                 '-p', self._pipeline]
     run_command(__modname__, exec_cmd, self._log_file)
Ejemplo n.º 13
0
    def cnv_workflow(self):

        if self._pipeline.startswith("4"): ### brca
            self.run_dna_brca_cnv()
        elif self._pipeline.startswith('2'): ### solid
            self.run_dna_somatic_cnv()
        elif self._pipeline.startswith('3'): ### blood
            self.run_dna_somatic_cnv()
        elif self._pipeline.startswith('6'): ### hered
            self.run_dna_hered_cnv()

        with open(self._status_log_file, 'w') as f:
            f.write('[STATUS] CNV Processing Compleate\n[PROGRESS] 100')
        log_progress(__modname__, 'CNV Processing Completed.', f=self._log_file)
Ejemplo n.º 14
0
 def check_tst170_result(self, tst170_dir):
     log_progress(__modname__, 'Check the TruSightTumor170 analysis result status start', f=self._log_file)
     try:
         with open(join(tst170_dir, 'Summary.tsv'), 'r') as f:
             lines = f.readlines()
             sample_name_sp = self._config_opts['SAMPLE_NAMES'].split("|")
             for sample_name in sample_name_sp:
                 if not '{0}\tAnalysis Completed.\n'.format(sample_name) in lines:
                     log_error(__modname__, 'TruSightTumor170 analysis result fail', f=self._log_file)
                     sys.exit(1)
     except Exception as ex:
         log_error(__modname__, str(ex), f=self._log_file)
         sys.exit(1)
     log_progress(__modname__, 'All the TruSightTumor170 analysis result OK', f=self._log_file)
Ejemplo n.º 15
0
 def run_amplicon_check(self):
     with open(self._status_log_file, 'w') as f:
         f.write('[STATUS] Amplicon(adapter) analysis\n[PROGRESS] 50')
     log_progress(__modname__, 'Run the amplicon stat', f=self._log_file)
     input_sam = join(self._output_dir, "data", "basecall", "alignment", "%s.primer.stat.sam" %(self._sample_name))
     if self._pipeline == "444":
         input_sam = join(self._output_dir, "data", "basecall", "alignment", "%s_final.sam" %(self._sample_name))
     exec_point = join(self._script_home, 'pipelines', 'amplicon_check.sh')
     exec_cmd = [exec_point,
                 '-w', settings.SCRIPT_HOME,
                 '-i', input_sam,
                 '-n', self._sample_name,
                 '-o', self._output_dir,
                 '-u', join(self._script_home, 'utils')]
     run_command(__modname__, exec_cmd, self._log_file)
Ejemplo n.º 16
0
 def run_rna_somatic_pipeline(self):
     with open(self._status_log_file, 'w') as f:
         f.write('[STATUS] RNA Somatic pipeline\n[PROGRESS] 30')
     log_progress(__modname__, 'Run the RNA somatic pipeline', f=self._log_file)
     exec_point = join(self._script_home, 'pipelines', 'ngb_rna_pipeline.sh')
     exec_cmd = [exec_point,
                 '-w', settings.SCRIPT_HOME,
                 '-n', self._sample_name,
                 '-i', self._fastq_r1,
                 '-j', self._fastq_r2,
                 '-o', self._output_dir,
                 '-t', self._pe_core]
     retcode = logging_subprocess_call(exec_cmd, self._log_file)
     if retcode != 0:
         self.handle_workflow_failure('RNA somatic pipeline fail', retcode)
Ejemplo n.º 17
0
 def run_fastqc(self):
     with open(self._status_log_file, 'w') as f:
         f.write('[STATUS] TruSightTumor170 pipeline\n[PROGRESS] 75')
     log_progress(__modname__, 'Raw data FASTQC start', f=self._log_file)
     exec_point = join(self._script_home, 'run_fastqc.py')
     exec_cmd = [
             'python', exec_point,
             'output_dir={0}'.format(self._output_dir),
             'fastq_dir={0}'.format(join(self._output_dir, settings.FASTQ_HOME)),
             'sample_name={0}'.format(self._sample_name),
             'log_file={0}'.format(self._log_file),
             'fastqc_dir={0}'.format(settings.FASTQC_HOME)
     ]
     run_command(__modname__, exec_cmd, self._log_file)
     log_progress(__modname__, 'RAW data FASTQC finished', f=self._log_file)
Ejemplo n.º 18
0
 def run_tst170_stat_parser(self, tst170_dir):
     with open(self._status_log_file, 'w') as f:
         f.write('[STATUS] TruSightTumor170 pipeline\n[PROGRESS] 93')
     log_progress(__modname__, 'Analysis statistics start', f=self._log_file)
     exec_point = join(self._script_home, 'run_stat_parser.py')
     exec_cmd = [
             'python', exec_point,
             'pipeline={0}'.format(self._pipeline),
             'sample_name={0}'.format(self._sample_name),
             'stat_json={0}'.format(settings.get_stat_json(self._output_dir, self._sample_name)),
             'tst170_dir={0}'.format(tst170_dir),
             'summary_file={0}'.format(settings.get_summary_file(self._output_dir, self._sample_name))
     ]
     run_command(__modname__, exec_cmd, self._log_file)
     log_progress(__modname__, 'Analysis statistics finished', f=self._log_file)
Ejemplo n.º 19
0
 def run_dna_hered_pipeline(self):
     with open(self._status_log_file, 'w') as f:
         f.write('[STATUS] Germline HERED pipeline\n[PROGRESS] 30')
     log_progress(__modname__, 'Run the hered pipeline', f=self._log_file)
     exec_point = join(self._script_home, 'pipelines', 'ngb_heredaccutest_pipeline_v1.sh')
     exec_cmd = [exec_point,
                 '-w', settings.SCRIPT_HOME,
                 '-n', self._sample_name,
                 '-i', self._fastq_r1,
                 '-j', self._fastq_r2,
                 '-o', self._output_dir,
                 '-c', self._canonical_transcript_file,
                 '-t', self._pe_core]
     retcode = logging_subprocess_call(exec_cmd, self._log_file)
     if retcode != 0:
         self.handle_workflow_failure('DNA Germline HERED pipeline fail', retcode)
Ejemplo n.º 20
0
 def run_fastq_validation(self):
     with open(self._status_log_file, 'w') as f:
         f.write('[STATUS] FASTQ file validation\n[PROGRESS] 10')
     log_progress(__modname__, 'Run the fastq validation', f=self._log_file)
     exec_point = join(self._script_home, 'pipelines', 'fastq_validation.sh')
     exec_cmd_1 = [exec_point,
                 '-w', settings.SCRIPT_HOME,
                 '-i', self._fastq_r1,
                 '-n', self._sample_name,
                 '-o', self._output_dir]
     run_command(__modname__, exec_cmd_1, self._log_file)
     exec_cmd_2 = [exec_point,
                 '-w', settings.SCRIPT_HOME,
                 '-i', self._fastq_r2,
                 '-n', self._sample_name,
                 '-o', self._output_dir]
     run_command(__modname__, exec_cmd_2, self._log_file)
Ejemplo n.º 21
0
 def run_tst170_cnv_parser(self):
     with open(self._status_log_file, 'w') as f:
         f.write('[STATUS] TruSightTumor170 pipeline\n[PROGRESS] 90')
     log_progress(__modname__, 'CNV JSON and plot generation start', f=self._log_file)
     exec_point = join(self._script_home, 'run_cnv_parser.py')
     exec_cmd = [
             'python', exec_point,
             'cnv_vcf={0}'.format(settings.get_cnv_vcf(self._output_dir, self._sample_name)),
             'cnv_tsv={0}'.format(settings.get_cnv_tsv(self._output_dir, self._sample_name)),
             'cnv_plot={0}'.format(settings.get_cnv_plot(self._output_dir, self._sample_name)),
             'cnv_fc_stat={0}'.format(settings.get_cnv_fc(self._output_dir, self._sample_name)),
             'log_file={0}'.format(self._log_file),
             'sample_name={0}'.format(self._sample_name),
             'cnv_plot_script={0}'.format(join(self._script_home, 'cnv_plot.R'))
     ]
     run_command(__modname__, exec_cmd, self._log_file)
     log_progress(__modname__, 'CNV JSON and plot generation finished', f=self._log_file)
Ejemplo n.º 22
0
 def run_vcf_to_json(self, flag):
     with open(self._status_log_file, 'w') as f:
         f.write('[STATUS] convert vcf to json\n[PROGRESS] 75')
     log_progress(__modname__, 'Run the vcf to json', f=self._log_file)
     exec_point = join(self._script_home, 'pipelines', 'vcf_to_json.sh')
     if flag == "HEREDaccuTest": final_vcf = join(self._output_dir, 'data', 'variant', '%s_final.snv.vcf'%(self._sample_name))
     else: final_vcf = join(self._output_dir, 'data', 'variant', '%s_final.vcf'%(self._sample_name))
     exec_cmd = [exec_point,
                 '-w', settings.SCRIPT_HOME,
                 '-i', final_vcf,
                 '-a', self._pipeline_name,
                 '-o', self._output_dir,
                 '-n', self._sample_name,
                 '-u', join(self._script_home, 'utils'),
                 '-p', self._pipeline,
                 '-s', self._sample_source.replace("(", "\(").replace(")", "\)")
     ]
     run_command(__modname__, exec_cmd, self._log_file)
Ejemplo n.º 23
0
 def run_tst170_summarize_result(self):
     with open(self._status_log_file, 'w') as f:
         f.write('[STATUS] Summarize Final Result\n[PROGRESS] 99')
     log_progress(__modname__, 'Summarize Final Result start', f=self._log_file)
     exec_point = join(self._script_home, 'summarize_final_result.py')
     exec_cmd = [
             'python', exec_point,
             'output_dir={0}'.format(self._output_dir),
             'report_dir={0}'.format(settings.REPORT_HOME),
             'fastqc_dir={0}'.format(settings.FASTQC_HOME),
             'sample_name={0}'.format(self._sample_name),
             'log_file={0}'.format(self._log_file),
             'cnv_plot={0}'.format(settings.get_cnv_plot(self._output_dir, self._sample_name)),
             'statistics_file={0}'.format(settings.get_statistics_file(self._output_dir, self._sample_name)),
             'pipeline={0}'.format(self._pipeline)
     ]
     run_command(__modname__, exec_cmd, self._log_file)
     log_progress(__modname__, 'Summarize Final Result finished', f=self._log_file)
Ejemplo n.º 24
0
 def run_tst170_annotation(self):
     with open(self._status_log_file, 'w') as f:
         f.write('[STATUS] TruSightTumor170 pipeline\n[PROGRESS] 80')
     log_progress(__modname__, 'TST170 annotation start', f=self._log_file)
     exec_point = join(self._script_home, 'run_annotation.py')
     exec_cmd = [
             'python', exec_point,
             'output_dir={0}'.format(self._output_dir),
             'variant_dir={0}'.format(settings.VARIANT_HOME),
             'tmp_dir={0}'.format(settings.TEMP_HOME),
             'log_file={0}'.format(self._log_file),
             'final_bam={0}'.format(settings.get_final_bam(self._output_dir, self._sample_name)),
             'raw_vcf={0}'.format(settings.get_tst170_raw_vcf(self._output_dir, self._sample_name)),
             'final_vcf={0}'.format(settings.get_final_vcf(self._output_dir, self._sample_name)),
             'sample_name={0}'.format(self._sample_name),
             'pe_core={0}'.format(self._pe_core)
     ]
     run_command(__modname__, exec_cmd, self._log_file)
     log_progress(__modname__, 'TST170 annotation finished', f=self._log_file)
Ejemplo n.º 25
0
 def run_summarize_intermediate_result(self, tst170_dir):
     with open(self._status_log_file, 'w') as f:
         f.write('[STATUS] Copy intermediate results\n[PROGRESS] 70')
     log_progress(__modname__, 'Copy the TruSightTumor170 pipeline result start', f=self._log_file)
     exec_point = join(self._script_home, 'summarize_intermediate_result.py')
     exec_cmd = [
             'python', exec_point,
             'output_dir={0}'.format(self._output_dir),
             'assay_dir={0}'.format(settings.ASSAY_HOME),
             'fastq_dir={0}'.format(settings.FASTQ_HOME),
             'align_dir={0}'.format(settings.ALIGNMENT_HOME),
             'variant_dir={0}'.format(settings.VARIANT_HOME),
             'log_file={0}'.format(self._log_file),
             'tst170_dir={0}'.format(tst170_dir),
             'pipeline={0}'.format(self._pipeline),
             'sample_name={0}'.format(self._sample_name),
             'final_bam={0}'.format(settings.get_final_bam(self._output_dir, self._sample_name))
     ]
     run_command(__modname__, exec_cmd, self._log_file)
     log_progress(__modname__, 'Copy the TST170 pipeline result finished', f=self._log_file)
Ejemplo n.º 26
0
 def run_tst170_analysis_stat(self):
     with open(self._status_log_file, 'w') as f:
         f.write('[STATUS] TruSightTumor170 pipeline\n[PROGRESS] 93')
     log_progress(__modname__, 'Analysis statistics start', f=self._log_file)
     exec_point = join(self._script_home, 'run_analysis_statistics.py')
     exec_cmd = [
             'python', exec_point,
             'output_dir={0}'.format(self._output_dir),
             'log_file={0}'.format(self._log_file),
             'fastq_dir={0}'.format(settings.FASTQ_HOME),
             'assay_dir={0}'.format(settings.ASSAY_HOME),
             'align_dir={0}'.format(settings.ALIGNMENT_HOME),
             'target_bed={0}'.format(settings.get_tst170_DNA_bed(self._output_dir, self._pipeline)),
             'sample_name={0}'.format(self._sample_name),
             'final_bam={0}'.format(settings.get_final_bam(self._output_dir, self._sample_name)),
             'summary_file={0}'.format(settings.get_summary_file(self._output_dir, self._sample_name)),
             'script={0}'.format(join(self._script_home, 'run_analysis_statistics.sh')),
             'pipeline={0}'.format(self._pipeline)
     ]
     run_command(__modname__, exec_cmd, self._log_file)
     log_progress(__modname__, 'Analysis statistics finished', f=self._log_file)
Ejemplo n.º 27
0
 def pileup_depth(self, pileup_depth):
     md5_file = '%s.md5' % (pileup_depth)
     if md5sum_check(pileup_depth, md5_file):
         log_progress(__modname__,
                      'Get Pileup Depth already finished!!!',
                      f=self._log_file)
         log_version(__modname__,
                     self._sw['samtools_ver'],
                     f=self._log_file)
     else:
         log_progress(__modname__,
                      'Get Pileup Depth start',
                      f=self._log_file)
         log_version(__modname__,
                     self._sw['samtools_ver'],
                     f=self._log_file)
         exec_cmd = [
             self._sw['samtools'], 'depth', '-a', '-q', '0', '-Q', '1',
             '-d', '1000000', '-b', self._target_bed, '--reference',
             self._sw['hg19'], self._final_bam
         ]
         run_command_file_handle(__modname__, exec_cmd, self._log_file, 'w',
                                 pileup_depth)
         run_command_md5sum(__modname__, self._log_file, pileup_depth,
                            md5_file)
         log_progress(__modname__,
                      'Get Pileup Depth finished',
                      f=self._log_file)
Ejemplo n.º 28
0
    def run(self):

        # completed files
        if os.path.exists(self._completed_file):
            os.remove(self._completed_file)

        log_progress(__modname__,
                     'Entering workflow for job_id: {0}'.format(self._job_id),
                     f=self._log_file)
        log_progress(__modname__,
                     'CNV Anlaysis start'.format(self._job_id),
                     f=self._log_file)

        exec_cmd = [
            'docker', 'run', '-t', '--rm', '--net=host', '-u',
            'ngenebio:ngenebio', '--name',
            'cnv_{0}_{1}'.format(self._sample_id, self._job_id), '-h',
            'ngeneanalysys', '-v', '/etc/localtime:/etc/localtime', '-v',
            '{0}/{1}:/NGENEBIO/workflow_out/{1}'.format(
                settings.ANALYSIS_HOME, self._job_id), '-v',
            '{0}:/NGENEBIO/workflow'.format(settings.SCRIPT_HOME), '-v',
            '{0}:/NGENEBIO/workflow-dependencies'.format(
                settings.DEPENDENCIES), '-v',
            '{0}:/NGENEBIO/workflow-app'.format(settings.APP_HOME),
            '{0}'.format(settings.DOCKER_IMAGE),
            '/NGENEBIO/workflow/run_cnv.sh', self._job_id, self._sample_id
        ]

        run_command(__modname__, exec_cmd, self._log_file)
        log_progress(__modname__, 'CNV Processing Complete.', f=self._log_file)

        with open(self._completed_file, 'w') as completed_file:
            completed_file.write('Job with SGE_TASKID {0} completed.'.format(
                self._sample_id))
Ejemplo n.º 29
0
 def vcf_post_processing(self, input_file, refined_vcf):
     refined_vcf_md5 = '{0}.md5'.format(refined_vcf)
     if md5sum_check(refined_vcf, refined_vcf_md5):
         log_progress(__modname__,
                      'VCF post processing already finished!!!',
                      f=self._log_file)
         log_version(__modname__, self._sw['vt_ver'], f=self._log_file)
     else:
         log_progress(__modname__,
                      'VCF post processing start',
                      f=self._log_file)
         log_version(__modname__, self._sw['vt_ver'], f=self._log_file)
         if os.path.exists(refined_vcf):
             os.remove(refined_vcf)
         exec_cmd = [
             '{0} normalize -r {1} {2}'.format(self._sw['vt'],
                                               self._sw['hg19'],
                                               input_file),
             '{0} decompose -s -'.format(self._sw['vt']),
         ]
         run_command_pipe_file_handle(__modname__, exec_cmd, self._log_file,
                                      'w', refined_vcf)
         run_command_md5sum(__modname__, self._log_file, refined_vcf,
                            refined_vcf_md5)
         log_progress(__modname__,
                      'VCF post processing finished',
                      f=self._log_file)
Ejemplo n.º 30
0
 def copy_bam_files(self):
     self._final_bam = join(self._align_dir, '{0}_final.bam'.format(self._sample_name))
     bam_dst_md5 = '{0}.md5'.format(self._final_bam)
     if md5sum_check(self._final_bam, bam_dst_md5):
         log_progress(__modname__, 'Copy the BAM file to output directory already finished', f=self._log_file)
     else:
         log_progress(__modname__, 'Copy the BAM file to output directory', f=self._log_file)
         ### 901: DNA, 902: RNA
         if self._pipeline == '901':
             bam_dir = join(self._tst170_dir, 'DNA_IntermediateFiles', 'Alignment')
             bam_src = join(bam_dir, '{0}_realigned.bam'.format(self._sample_name))
             bed_name = 'DNA_PicardTarget.bed'
         elif self._pipeline == '902':
             bam_dir = join(self._tst170_dir, 'RNA_IntermediateFiles', 'Alignment')
             bam_src = join(bam_dir, '{0}.bam'.format(self._sample_name))
             bed_name = 'RNA_PicardTarget.bed'
         else:
             log_error(__modname__, 'Unknown pipeline code {0} for TST170 pipeline'.format(self._pipeline), f=self._log_file)
             sys.exit(1)
         self.copy_files(bam_src, self._final_bam)
         bai_src = '{0}.bai'.format(bam_src)
         bai_dst = '{0}.bai'.format(self._final_bam)
         self.copy_files(bai_src, bai_dst)
         self.copy_files(join(bam_dir, bed_name), join(self._align_dir, bed_name))
         self.generate_tdf_file(self._final_bam)
         run_command_md5sum(__modname__, self._log_file, self._final_bam, bam_dst_md5)
         log_progress(__modname__, 'Copy the BAM file finished', f=self._log_file)