Ejemplo n.º 1
0
 def run_prepare_check(self):
     with open(self._status_log_file, 'w') as f:
         f.write('[STATUS] MD5 checksum for reference and software\n[PROGRESS] 5')
     log_progress(__modname__, 'MD5 checksum for reference and software', f=self._log_file)
     exec_point = join(self._script_home, 'pipelines', 'prepare_check.sh')
     exec_cmd = [exec_point, '-w', settings.SCRIPT_HOME]
     run_command(__modname__, exec_cmd, self._log_file)
Ejemplo n.º 2
0
    def run(self):

        # completed files
        if os.path.exists(self._completed_file):
            os.remove(self._completed_file)

        log_progress(__modname__,
                     'Entering workflow for job_id: {0}'.format(self._job_id),
                     f=self._log_file)
        log_progress(__modname__,
                     'CNV Anlaysis start'.format(self._job_id),
                     f=self._log_file)

        exec_cmd = [
            'docker', 'run', '-t', '--rm', '--net=host', '-u',
            'ngenebio:ngenebio', '--name',
            'cnv_{0}_{1}'.format(self._sample_id, self._job_id), '-h',
            'ngeneanalysys', '-v', '/etc/localtime:/etc/localtime', '-v',
            '{0}/{1}:/NGENEBIO/workflow_out/{1}'.format(
                settings.ANALYSIS_HOME, self._job_id), '-v',
            '{0}:/NGENEBIO/workflow'.format(settings.SCRIPT_HOME), '-v',
            '{0}:/NGENEBIO/workflow-dependencies'.format(
                settings.DEPENDENCIES), '-v',
            '{0}:/NGENEBIO/workflow-app'.format(settings.APP_HOME),
            '{0}'.format(settings.DOCKER_IMAGE),
            '/NGENEBIO/workflow/run_cnv.sh', self._job_id, self._sample_id
        ]

        run_command(__modname__, exec_cmd, self._log_file)
        log_progress(__modname__, 'CNV Processing Complete.', f=self._log_file)

        with open(self._completed_file, 'w') as completed_file:
            completed_file.write('Job with SGE_TASKID {0} completed.'.format(
                self._sample_id))
Ejemplo n.º 3
0
    def low_confidence_annotation(self, input_file, lowconf_vcf):
        low_conf_homopolyx = join(
            self._variant_dir,
            '{0}_lowconf.homopolyx'.format(self._sample_name))
        lowconf_vcf_md5 = '{0}.md5'.format(lowconf_vcf)
        if md5sum_check(lowconf_vcf, lowconf_vcf_md5):
            log_progress(__modname__,
                         'Low confidence annotation already finished!!!',
                         f=self._log_file)
        else:
            log_progress(__modname__,
                         'Low confidence annotation start',
                         f=self._log_file)

            if os.path.exists(low_conf_homopolyx):
                os.remove(low_conf_homopolyx)
            if os.path.exists(lowconf_vcf):
                os.remove(lowconf_vcf)

            exec_cmd1 = [
                'python', self._sw['ngb_lowconf_homopolyx'], '-p', '5', '-r',
                self._sw['hg19'], '-o', low_conf_homopolyx, input_file
            ]
            run_command(__modname__, exec_cmd1, self._log_file)

            exec_cmd2 = [
                'python', self._sw['ngb_lowconf_repeatcnt'], '-r',
                self._sw['hg19'], '-o', lowconf_vcf, low_conf_homopolyx
            ]
            run_command(__modname__, exec_cmd2, self._log_file)
            run_command_md5sum(__modname__, self._log_file, lowconf_vcf,
                               lowconf_vcf_md5)
            log_progress(__modname__,
                         'Low confidence annotation finished',
                         f=self._log_file)
Ejemplo n.º 4
0
    def run(self):

        # remove log and completed files
        if os.path.exists(self._log_file):
            os.remove(self._log_file)
        if os.path.exists(self._completed_file):
            os.remove(self._completed_file)

        log_progress(__modname__,
                     'Entering workflow for job_id: {0}'.format(self._job_id),
                     f=self._log_file)
        log_progress(__modname__,
                     'SNV Anlaysis start'.format(self._job_id),
                     f=self._log_file)

        exec_cmd = [
            'docker', 'run', '-t', '--rm', '--net=host', '-u',
            'ngenebio:ngenebio', '--name',
            'snv_{0}_{1}'.format(self._sample_id, self._job_id), '-h',
            'ngeneanalysys', '-v', '/etc/localtime:/etc/localtime', '-v',
            '{0}/{1}:/NGENEBIO/workflow_out/{1}'.format(
                settings.ANALYSIS_HOME, self._job_id), '-v',
            '{0}:/NGENEBIO/workflow'.format(settings.SCRIPT_HOME), '-v',
            '{0}:/NGENEBIO/workflow-dependencies'.format(
                settings.DEPENDENCIES), '-v',
            '{0}:/NGENEBIO/workflow-app'.format(settings.APP_HOME), '-e',
            'HGVS_SEQREPO_DIR=/NGENEBIO/workflow-dependencies/HGVS/seq_repo/latest',
            '{0}'.format(settings.DOCKER_IMAGE), '/NGENEBIO/workflow/run.sh',
            self._job_id, self._sample_id
        ]

        run_command(__modname__, exec_cmd, self._log_file)
        log_progress(__modname__, 'SNV Processing Complete.', f=self._log_file)

        exec_cmd = [
            '{0}/pcgr/pcgr.py'.format(settings.APP_HOME), '--input_vcf',
            '{0}/{1}/data/variant/{2}_final.vcf'.format(
                settings.ANALYSIS_HOME, self._job_id,
                self._sample_name), '{0}/pcgr/'.format(settings.APP_HOME),
            '{0}/{1}/data/variant/'.format(settings.ANALYSIS_HOME,
                                           self._job_id), 'grch37',
            '{0}/pcgr/pcgr.toml'.format(settings.APP_HOME),
            '{0}'.format(self._sample_name), '--force_overwrite'
        ]

        if self._pipeline_code.startswith(
                '2') or self._pipeline_code.startswith('3'):
            log_progress(__modname__,
                         'PCGR Processing Start.',
                         f=self._log_file)
            run_command(__modname__, exec_cmd, self._log_file)
            log_progress(__modname__,
                         'PCGR Processing Complete.',
                         f=self._log_file)

        with open(self._completed_file, 'w') as completed_file:
            completed_file.write('Job with SGE_TASKID {0} completed.'.format(
                self._sample_id))
Ejemplo n.º 5
0
 def run_summarize_final_result(self):
     with open(self._status_log_file, 'w') as f:
         f.write('[STATUS] summarize final result\n[PROGRESS] 90')
     log_progress(__modname__, 'Run the final result summarization', f=self._log_file)
     exec_point = join(self._script_home, 'pipelines', 'summarize_final_result.sh')
     exec_cmd = [exec_point,
                 '-w', settings.SCRIPT_HOME,
                 '-o', self._output_dir,
                 '-n', self._sample_name,
                 '-i', self._fastq_r1,
                 '-j', self._fastq_r2]
     run_command(__modname__, exec_cmd, self._log_file)
Ejemplo n.º 6
0
 def run_tst170_vcf_to_json(self):
     with open(self._status_log_file, 'w') as f:
         f.write('[STATUS] TruSightTumor170 pipeline\n[PROGRESS] 87')
     log_progress(__modname__, 'VCF to JSON start', f=self._log_file)
     exec_point = join(self._script_home, 'run_vcf_to_json.py')
     exec_cmd = [
             'python', exec_point,
             'vcf_file={0}'.format(settings.get_final_vcf(self._output_dir, self._sample_name)),
             'json_file={0}'.format(settings.get_final_json(self._output_dir, self._sample_name)),
             'log_file={0}'.format(self._log_file)
     ]
     run_command(__modname__, exec_cmd, self._log_file)
     log_progress(__modname__, 'VCF to JSON finished', f=self._log_file)
Ejemplo n.º 7
0
    def copy_version_file(self, panel,ver):
        version_dir = join(self._output_dir, 'version')
        exec_cmd = [
                'mkdir', '-p', version_dir]
        run_command(__modname__, exec_cmd, self._log_file)

        for file_type, file_acronym, file_format in zip(['version','database','software'], ['version','DB','SW'], ['txt','tsv','tsv']):
            exec_point = join(settings.SCRIPT_HOME, 'docs', file_type, '{0}_{1}.{2}'.format(panel, ver, file_format))
            exec_point2 = join(version_dir, '{0}_{1}.{2}'.format(self._sample_name, file_acronym, file_format))
            exec_cmd = [
                    'cp', exec_point,
                    exec_point2]
            run_command(__modname__, exec_cmd, self._log_file)
Ejemplo n.º 8
0
 def run_analysis_stat(self):
     with open(self._status_log_file, 'w') as f:
         f.write('[STATUS] statistics generation\n[PROGRESS] 85')
     log_progress(__modname__, 'Run the analysis stat', f=self._log_file)
     stat_file = join(self._output_dir, 'data', 'stat', '%s_summary.txt'%(self._sample_name))
     exec_point = join(self._script_home, 'pipelines', 'analysis_stat.sh')
     exec_cmd = [exec_point,
                 '-w', settings.SCRIPT_HOME,
                 '-i', stat_file,
                 '-n', self._sample_name,
                 '-o', self._output_dir,
                 '-u', join(self._script_home, 'utils'),
                 '-p', self._pipeline]
     run_command(__modname__, exec_cmd, self._log_file)
Ejemplo n.º 9
0
 def run_read_depth_normalization(self):
     with open(self._status_log_file, 'w') as f:
         f.write('[STATUS] Read depth normalization\n[PROGRESS] 15')
     log_progress(__modname__, 'Run the read depth normalization', f=self._log_file)
     exec_point = join(self._script_home, 'pipelines', 'read_depth_normalization.sh')
     exec_cmd = [exec_point,
                 '-w', settings.SCRIPT_HOME,
                 '-i', self._fastq_r1,
                 "-j", self._fastq_r2,
                 '-o', self._output_dir,
                 '-n', self._sample_name]
     run_command(__modname__, exec_cmd, self._log_file)
     self._fastq_r1 = "%s_normalize.1.fastq"%(self._sample_name)
     self._fastq_r2 = "%s_normalize.2.fastq"%(self._sample_name)
Ejemplo n.º 10
0
 def run_fastqc(self):
     with open(self._status_log_file, 'w') as f:
         f.write('[STATUS] TruSightTumor170 pipeline\n[PROGRESS] 75')
     log_progress(__modname__, 'Raw data FASTQC start', f=self._log_file)
     exec_point = join(self._script_home, 'run_fastqc.py')
     exec_cmd = [
             'python', exec_point,
             'output_dir={0}'.format(self._output_dir),
             'fastq_dir={0}'.format(join(self._output_dir, settings.FASTQ_HOME)),
             'sample_name={0}'.format(self._sample_name),
             'log_file={0}'.format(self._log_file),
             'fastqc_dir={0}'.format(settings.FASTQC_HOME)
     ]
     run_command(__modname__, exec_cmd, self._log_file)
     log_progress(__modname__, 'RAW data FASTQC finished', f=self._log_file)
Ejemplo n.º 11
0
 def run_amplicon_check(self):
     with open(self._status_log_file, 'w') as f:
         f.write('[STATUS] Amplicon(adapter) analysis\n[PROGRESS] 50')
     log_progress(__modname__, 'Run the amplicon stat', f=self._log_file)
     input_sam = join(self._output_dir, "data", "basecall", "alignment", "%s.primer.stat.sam" %(self._sample_name))
     if self._pipeline == "444":
         input_sam = join(self._output_dir, "data", "basecall", "alignment", "%s_final.sam" %(self._sample_name))
     exec_point = join(self._script_home, 'pipelines', 'amplicon_check.sh')
     exec_cmd = [exec_point,
                 '-w', settings.SCRIPT_HOME,
                 '-i', input_sam,
                 '-n', self._sample_name,
                 '-o', self._output_dir,
                 '-u', join(self._script_home, 'utils')]
     run_command(__modname__, exec_cmd, self._log_file)
Ejemplo n.º 12
0
 def run_tst170_stat_parser(self, tst170_dir):
     with open(self._status_log_file, 'w') as f:
         f.write('[STATUS] TruSightTumor170 pipeline\n[PROGRESS] 93')
     log_progress(__modname__, 'Analysis statistics start', f=self._log_file)
     exec_point = join(self._script_home, 'run_stat_parser.py')
     exec_cmd = [
             'python', exec_point,
             'pipeline={0}'.format(self._pipeline),
             'sample_name={0}'.format(self._sample_name),
             'stat_json={0}'.format(settings.get_stat_json(self._output_dir, self._sample_name)),
             'tst170_dir={0}'.format(tst170_dir),
             'summary_file={0}'.format(settings.get_summary_file(self._output_dir, self._sample_name))
     ]
     run_command(__modname__, exec_cmd, self._log_file)
     log_progress(__modname__, 'Analysis statistics finished', f=self._log_file)
Ejemplo n.º 13
0
 def run_tst170_cnv_parser(self):
     with open(self._status_log_file, 'w') as f:
         f.write('[STATUS] TruSightTumor170 pipeline\n[PROGRESS] 90')
     log_progress(__modname__, 'CNV JSON and plot generation start', f=self._log_file)
     exec_point = join(self._script_home, 'run_cnv_parser.py')
     exec_cmd = [
             'python', exec_point,
             'cnv_vcf={0}'.format(settings.get_cnv_vcf(self._output_dir, self._sample_name)),
             'cnv_tsv={0}'.format(settings.get_cnv_tsv(self._output_dir, self._sample_name)),
             'cnv_plot={0}'.format(settings.get_cnv_plot(self._output_dir, self._sample_name)),
             'cnv_fc_stat={0}'.format(settings.get_cnv_fc(self._output_dir, self._sample_name)),
             'log_file={0}'.format(self._log_file),
             'sample_name={0}'.format(self._sample_name),
             'cnv_plot_script={0}'.format(join(self._script_home, 'cnv_plot.R'))
     ]
     run_command(__modname__, exec_cmd, self._log_file)
     log_progress(__modname__, 'CNV JSON and plot generation finished', f=self._log_file)
Ejemplo n.º 14
0
 def run_fastq_validation(self):
     with open(self._status_log_file, 'w') as f:
         f.write('[STATUS] FASTQ file validation\n[PROGRESS] 10')
     log_progress(__modname__, 'Run the fastq validation', f=self._log_file)
     exec_point = join(self._script_home, 'pipelines', 'fastq_validation.sh')
     exec_cmd_1 = [exec_point,
                 '-w', settings.SCRIPT_HOME,
                 '-i', self._fastq_r1,
                 '-n', self._sample_name,
                 '-o', self._output_dir]
     run_command(__modname__, exec_cmd_1, self._log_file)
     exec_cmd_2 = [exec_point,
                 '-w', settings.SCRIPT_HOME,
                 '-i', self._fastq_r2,
                 '-n', self._sample_name,
                 '-o', self._output_dir]
     run_command(__modname__, exec_cmd_2, self._log_file)
Ejemplo n.º 15
0
 def run_vcf_to_json(self, flag):
     with open(self._status_log_file, 'w') as f:
         f.write('[STATUS] convert vcf to json\n[PROGRESS] 75')
     log_progress(__modname__, 'Run the vcf to json', f=self._log_file)
     exec_point = join(self._script_home, 'pipelines', 'vcf_to_json.sh')
     if flag == "HEREDaccuTest": final_vcf = join(self._output_dir, 'data', 'variant', '%s_final.snv.vcf'%(self._sample_name))
     else: final_vcf = join(self._output_dir, 'data', 'variant', '%s_final.vcf'%(self._sample_name))
     exec_cmd = [exec_point,
                 '-w', settings.SCRIPT_HOME,
                 '-i', final_vcf,
                 '-a', self._pipeline_name,
                 '-o', self._output_dir,
                 '-n', self._sample_name,
                 '-u', join(self._script_home, 'utils'),
                 '-p', self._pipeline,
                 '-s', self._sample_source.replace("(", "\(").replace(")", "\)")
     ]
     run_command(__modname__, exec_cmd, self._log_file)
Ejemplo n.º 16
0
 def run_tst170_summarize_result(self):
     with open(self._status_log_file, 'w') as f:
         f.write('[STATUS] Summarize Final Result\n[PROGRESS] 99')
     log_progress(__modname__, 'Summarize Final Result start', f=self._log_file)
     exec_point = join(self._script_home, 'summarize_final_result.py')
     exec_cmd = [
             'python', exec_point,
             'output_dir={0}'.format(self._output_dir),
             'report_dir={0}'.format(settings.REPORT_HOME),
             'fastqc_dir={0}'.format(settings.FASTQC_HOME),
             'sample_name={0}'.format(self._sample_name),
             'log_file={0}'.format(self._log_file),
             'cnv_plot={0}'.format(settings.get_cnv_plot(self._output_dir, self._sample_name)),
             'statistics_file={0}'.format(settings.get_statistics_file(self._output_dir, self._sample_name)),
             'pipeline={0}'.format(self._pipeline)
     ]
     run_command(__modname__, exec_cmd, self._log_file)
     log_progress(__modname__, 'Summarize Final Result finished', f=self._log_file)
Ejemplo n.º 17
0
    def run_dbnsfp_annotation(self, input_file, output_file):
        dbnsfp_tmp_vcf = join(self._variant_dir,
                              '{0}_dbnsfp_tmp.vcf'.format(self._sample_name))
        output_md5 = '{0}.md5'.format(output_file)
        if md5sum_check(output_file, output_md5):
            log_progress(__modname__,
                         'dbNSFP annotation already finished!!!',
                         f=self._log_file)
            log_version(__modname__,
                        self._sw['dbnsfp_db_ver'],
                        f=self._log_file)
        else:
            log_progress(__modname__,
                         'dbNSFP annotation start',
                         f=self._log_file)
            log_version(__modname__,
                        self._sw['dbnsfp_db_ver'],
                        f=self._log_file)

            exec_cmd1 = [
                self._sw['java'], '-Xmx4g',
                '-XX:ParallelGCThreads={0}'.format(self._pe_core),
                '-Djava.io.tmpdir={0}'.format(self._sample_tmp_dir), '-jar',
                self._sw['snpsift'], 'dbnsfp', '-f',
                'aapos,aapos_SIFT,aapos_FATHMM,Uniprot_acc,Interpro_domain,SIFT_pred,SIFT_score,LRT_pred,MutationTaster_pred,MutationTaster_score,GERP++_NR,GERP++_RS,phastCons100way_vertebrate,MutationAssessor_pred,FATHMM_pred,PROVEAN_pred,MetaSVM_pred,Polyphen2_HDIV_pred,Polyphen2_HDIV_score,Polyphen2_HVAR_pred,Polyphen2_HVAR_score,CADD_phred',
                '-db', self._sw['dbnsfp_db'], input_file
            ]
            run_command_file_handle(__modname__, exec_cmd1, self._log_file,
                                    'w', dbnsfp_tmp_vcf)

            exec_cmd2 = [
                'python', self._sw['ngb_transcript_dbNSFP'], '-o', output_file,
                dbnsfp_tmp_vcf
            ]
            run_command(__modname__, exec_cmd2, self._log_file)
            run_command_md5sum(__modname__, self._log_file, output_file,
                               output_md5)
            run_command(__modname__,
                        ['rm', '-rf', '{0}/*'.format(self._sample_tmp_dir)],
                        self._log_file)
            log_progress(__modname__,
                         'dbNSFP annotation finished',
                         f=self._log_file)
Ejemplo n.º 18
0
    def link_files(self):
        # fastqc
        fastqc_files = self.get_fastqc_names()
        for fastqc_file in fastqc_files:
            src = join(self._fastqc_dir, fastqc_file)
            dst = join(self._sample_report_dir, fastqc_file)
            if os.path.exists(dst):
                os.remove(dst)
            exec_cmd = ['ln', '-s', src, dst]
            run_command(__modname__, exec_cmd, self._log_file)

        if self._pipeline == '901':
            # cnv plot
            dst = join(self._sample_report_dir,
                       os.path.basename(self._cnv_plot))
            if os.path.exists(dst):
                os.remove(dst)
            exec_cmd = ['ln', '-s', self._cnv_plot, dst]
            run_command(__modname__, exec_cmd, self._log_file)
Ejemplo n.º 19
0
 def run_tst170_annotation(self):
     with open(self._status_log_file, 'w') as f:
         f.write('[STATUS] TruSightTumor170 pipeline\n[PROGRESS] 80')
     log_progress(__modname__, 'TST170 annotation start', f=self._log_file)
     exec_point = join(self._script_home, 'run_annotation.py')
     exec_cmd = [
             'python', exec_point,
             'output_dir={0}'.format(self._output_dir),
             'variant_dir={0}'.format(settings.VARIANT_HOME),
             'tmp_dir={0}'.format(settings.TEMP_HOME),
             'log_file={0}'.format(self._log_file),
             'final_bam={0}'.format(settings.get_final_bam(self._output_dir, self._sample_name)),
             'raw_vcf={0}'.format(settings.get_tst170_raw_vcf(self._output_dir, self._sample_name)),
             'final_vcf={0}'.format(settings.get_final_vcf(self._output_dir, self._sample_name)),
             'sample_name={0}'.format(self._sample_name),
             'pe_core={0}'.format(self._pe_core)
     ]
     run_command(__modname__, exec_cmd, self._log_file)
     log_progress(__modname__, 'TST170 annotation finished', f=self._log_file)
Ejemplo n.º 20
0
 def add_type_to_vcf(self, input_file, output_file):
     output_md5 = '{0}.md5'.format(output_file)
     if md5sum_check(output_file, output_md5):
         log_progress(__modname__,
                      'Add TYPE info already finished!!!',
                      f=self._log_file)
     else:
         log_progress(__modname__, 'Add TYPE info start', f=self._log_file)
         if os.path.exists(output_file):
             os.remove(output_file)
         exec_cmd = [
             'python', self._sw['ngb_add_vcfinfo'], '-o', output_file,
             input_file
         ]
         run_command(__modname__, exec_cmd, self._log_file)
         run_command_md5sum(__modname__, self._log_file, output_file,
                            output_md5)
         log_progress(__modname__,
                      'Add TYPE info finished',
                      f=self._log_file)
Ejemplo n.º 21
0
 def run_summarize_intermediate_result(self, tst170_dir):
     with open(self._status_log_file, 'w') as f:
         f.write('[STATUS] Copy intermediate results\n[PROGRESS] 70')
     log_progress(__modname__, 'Copy the TruSightTumor170 pipeline result start', f=self._log_file)
     exec_point = join(self._script_home, 'summarize_intermediate_result.py')
     exec_cmd = [
             'python', exec_point,
             'output_dir={0}'.format(self._output_dir),
             'assay_dir={0}'.format(settings.ASSAY_HOME),
             'fastq_dir={0}'.format(settings.FASTQ_HOME),
             'align_dir={0}'.format(settings.ALIGNMENT_HOME),
             'variant_dir={0}'.format(settings.VARIANT_HOME),
             'log_file={0}'.format(self._log_file),
             'tst170_dir={0}'.format(tst170_dir),
             'pipeline={0}'.format(self._pipeline),
             'sample_name={0}'.format(self._sample_name),
             'final_bam={0}'.format(settings.get_final_bam(self._output_dir, self._sample_name))
     ]
     run_command(__modname__, exec_cmd, self._log_file)
     log_progress(__modname__, 'Copy the TST170 pipeline result finished', f=self._log_file)
Ejemplo n.º 22
0
 def generate_tdf_file(self, final_bam):
     tdf_file = '{0}.tdf'.format(final_bam)
     tdf_file_md5 = '{0}.md5'.format(tdf_file)
     if md5sum_check(tdf_file, tdf_file_md5):
         log_progress(__modname__, 'TDF file generation already finished!!!', f=self._log_file)
         log_version(__modname__, self._sw['igvtools_ver'], f=self._log_file)
     else:
         log_progress(__modname__, 'TDF file generation start', f=self._log_file)
         log_version(__modname__, self._sw['igvtools_ver'], f=self._log_file)
         if os.path.exists(tdf_file):
             os.remove(tdf_file)
         exec_cmd = [
             self._sw['igvtools'],
             'count',
             final_bam,
             tdf_file,
             'hg19'
         ]
         run_command(__modname__, exec_cmd, self._log_file)
         run_command_md5sum(__modname__, self._log_file, tdf_file, tdf_file_md5)
         log_progress(__modname__, 'TDF file generation finished', f=self._log_file)
Ejemplo n.º 23
0
 def statistics(self, pileup_depth):
     fastq_files = self.get_fastq_names()
     md5_file = '%s.md5' % (self._summary_file)
     if md5sum_check(self._summary_file, md5_file):
         log_progress(__modname__,
                      'Analysis Statistics already finished!!!',
                      f=self._log_file)
     else:
         log_progress(__modname__,
                      'Analysis Statistics start',
                      f=self._log_file)
         exec_cmd = [
             self._script, fastq_files[0], fastq_files[1], fastq_files[2],
             fastq_files[3], fastq_files[4], fastq_files[5], fastq_files[6],
             fastq_files[7], self._final_bam, self._target_bed,
             pileup_depth, self._summary_file
         ]
         run_command(__modname__, exec_cmd, self._log_file)
         log_progress(__modname__,
                      'Analysis Statistics finished',
                      f=self._log_file)
Ejemplo n.º 24
0
 def run_tst170_analysis_stat(self):
     with open(self._status_log_file, 'w') as f:
         f.write('[STATUS] TruSightTumor170 pipeline\n[PROGRESS] 93')
     log_progress(__modname__, 'Analysis statistics start', f=self._log_file)
     exec_point = join(self._script_home, 'run_analysis_statistics.py')
     exec_cmd = [
             'python', exec_point,
             'output_dir={0}'.format(self._output_dir),
             'log_file={0}'.format(self._log_file),
             'fastq_dir={0}'.format(settings.FASTQ_HOME),
             'assay_dir={0}'.format(settings.ASSAY_HOME),
             'align_dir={0}'.format(settings.ALIGNMENT_HOME),
             'target_bed={0}'.format(settings.get_tst170_DNA_bed(self._output_dir, self._pipeline)),
             'sample_name={0}'.format(self._sample_name),
             'final_bam={0}'.format(settings.get_final_bam(self._output_dir, self._sample_name)),
             'summary_file={0}'.format(settings.get_summary_file(self._output_dir, self._sample_name)),
             'script={0}'.format(join(self._script_home, 'run_analysis_statistics.sh')),
             'pipeline={0}'.format(self._pipeline)
     ]
     run_command(__modname__, exec_cmd, self._log_file)
     log_progress(__modname__, 'Analysis statistics finished', f=self._log_file)
Ejemplo n.º 25
0
 def run_tst170_qc_report(self):
     with open(self._status_log_file, 'w') as f:
         f.write('[STATUS] TruSightTumor170 pipeline\n[PROGRESS] 98')
     log_progress(__modname__, 'QC report generation start', f=self._log_file)
     exec_point = join(self._script_home, 'run_qc_report_generation.py')
     exec_cmd = [
             'python', exec_point,
             'sample_name={0}'.format(self._sample_name),
             'pipeline={0}'.format(self._pipeline),
             'pipeline_name={0}'.format(self._pipeline_name),
             'instrument={0}'.format(self._platform),
             'specimen={0}'.format(self._sample_source),
             'run_name={0}'.format(self._run_name),
             'fastqc_dir={0}'.format(settings.FASTQC_HOME),
             'stat_json={0}'.format(settings.get_stat_json(self._output_dir, self._sample_name)),
             'log_file={0}'.format(self._log_file),
             'output_dir={0}'.format(self._output_dir),
             'cnv_tsv={0}'.format(settings.get_cnv_tsv(self._output_dir, self._sample_name)),
             'cnv_plot={0}'.format(settings.get_cnv_plot(self._output_dir, self._sample_name))
     ]
     run_command(__modname__, exec_cmd, self._log_file)
     log_progress(__modname__, 'QC report generation finished', f=self._log_file)
Ejemplo n.º 26
0
 def plot_generation(self):
     png_md5 = '{0}.md5'.format(self._cnv_plot)
     if md5sum_check(self._cnv_plot, png_md5):
         log_progress(__modname__,
                      'CNV plot generation already finished!!!',
                      f=self._log_file)
     else:
         log_progress(__modname__,
                      'CNV plot generation start',
                      f=self._log_file)
         if os.path.exists(self._cnv_plot):
             os.remove(self._cnv_plot)
         exec_cmd = [
             'Rscript', self._cnv_plot_script, self._sample_name,
             self._cnv_fc_stat, self._cnv_plot
         ]
         run_command(__modname__, exec_cmd, self._log_file)
         run_command_md5sum(__modname__, self._log_file, self._cnv_plot,
                            png_md5)
         log_progress(__modname__,
                      'CNV plot generation finished',
                      f=self._log_file)
Ejemplo n.º 27
0
 def add_hgvs(self, input_file, output_file):
     output_md5 = '{0}.md5'.format(output_file)
     if md5sum_check(output_file, output_md5):
         log_progress(__modname__,
                      'Add HGVS info and variant type already finished!!!',
                      f=self._log_file)
     else:
         log_progress(__modname__,
                      'Add HGVS info and variant type start',
                      f=self._log_file)
         if os.path.exists(output_file):
             os.remove(output_file)
         exec_cmd = [
             'python', self._sw['ngb_add_HGVS'], '-d',
             self._sw['mutect2_bed'], '-o', output_file, input_file
         ]
         run_command(__modname__, exec_cmd, self._log_file)
         run_command_md5sum(__modname__, self._log_file, output_file,
                            output_md5)
         log_progress(__modname__,
                      'Add HGVS info and variant type finished',
                      f=self._log_file)
Ejemplo n.º 28
0
    def run_clinvar_annotation(self, input_file, output_file):
        clinvar_tmp_vcf = join(self._variant_dir,
                               '{0}_clinvar_tmp.vcf'.format(self._sample_name))
        output_md5 = '{0}.md5'.format(output_file)
        if md5sum_check(output_file, output_md5):
            log_progress(__modname__,
                         'ClinVar annotation already finished!!!',
                         f=self._log_file)
            log_version(__modname__,
                        self._sw['ngb_clinvar_ver'],
                        f=self._log_file)
        else:
            log_progress(__modname__,
                         'ClinVar annotation start',
                         f=self._log_file)
            log_version(__modname__,
                        self._sw['ngb_clinvar_ver'],
                        f=self._log_file)

            exec_cmd1 = [
                'python', self._sw['ngb_anno_clinvar'], '--dbfile',
                self._sw['ngb_clinvar_db'], '--infoVCF',
                self._sw['clinvar_compact_header'], '--inVCF', input_file,
                '--outVCF', clinvar_tmp_vcf
            ]
            run_command(__modname__, exec_cmd1, self._log_file)

            exec_cmd2 = [
                'python', self._sw['ngb_clinvar_variation'], '-d',
                self._sw['ngb_clinvar_ref'], '-o', output_file, clinvar_tmp_vcf
            ]
            run_command(__modname__, exec_cmd2, self._log_file)
            run_command_md5sum(__modname__, self._log_file, output_file,
                               output_md5)
            log_progress(__modname__,
                         'ClinVar annotation finished',
                         f=self._log_file)
Ejemplo n.º 29
0
 def run_annotation(self, db_name, target_vcf, target_vcf_ver, _info, _name,
                    input_file, output_file):
     output_md5 = '{0}.md5'.format(output_file)
     if md5sum_check(output_file, output_md5):
         log_progress(__modname__,
                      '{0} annotation already finished!!!'.format(db_name),
                      f=self._log_file)
         log_version(__modname__, target_vcf_ver, f=self._log_file)
     else:
         log_progress(__modname__,
                      '{0} annotation start'.format(db_name),
                      f=self._log_file)
         log_version(__modname__, target_vcf_ver, f=self._log_file)
         exec_cmd = [
             self._sw['java'], '-Xmx4g',
             '-XX:ParallelGCThreads={0}'.format(self._pe_core),
             '-Djava.io.tmpdir={0}'.format(self._sample_tmp_dir), '-jar',
             self._sw['snpsift'], 'annotate'
         ]
         if _info != '':
             exec_cmd.append('-info')
             exec_cmd.append(_info)
         if _name != '':
             exec_cmd.append('-name')
             exec_cmd.append(_name)
         exec_cmd.append(target_vcf)
         exec_cmd.append(input_file)
         run_command_file_handle(__modname__, exec_cmd, self._log_file, 'w',
                                 output_file)
         run_command_md5sum(__modname__, self._log_file, output_file,
                            output_md5)
         run_command(__modname__,
                     ['rm', '-rf', '{0}/*'.format(self._sample_tmp_dir)],
                     self._log_file)
         log_progress(__modname__,
                      '{0} annotation finished'.format(db_name),
                      f=self._log_file)
Ejemplo n.º 30
0
    def run_TST170_pipeline(self, TST170_completed_file_path):
        with open(self._status_log_file, 'w') as f:
            f.write('[STATUS] TruSightTumor170 pipeline\n[PROGRESS] 5')
        log_progress(__modname__, 'Run the TruSightTumor170 pipeline', f=self._log_file)
        
        if os.path.exists(TST170_completed_file_path):
            log_progress(__modname__, 'TruSightTumor170 pipeline already finished', f=self._log_file)
        else:
            log_progress(__modname__, 'TruSightTumor170 pipeline start', f=self._log_file)
            exec_cmd = [
                    'docker', 'run',
                    '-t',
                    '--rm',
                    '--name', self._run_id,
                    '-v', '/etc/localtime:/etc/localtime',
                    '-v', '{0}:/data'.format(self._run_dir),
                    '-v', '{0}:/genomes'.format(self._TST170_Resources),
                    '-v', '{0}:/analysis'.format(self._output_dir),
                    'ngb_tst170:1.0.0.0'
            ]
            run_command(__modname__, exec_cmd, self._log_file)
            sleep(10)
        # check TST170 output directory

        log_version(__modname__, 'software,TST170 Local App,v1.0,Oct.2017,http://sapac.support.illumina.com/downloads/trusight-tumor-170-local-app-documentation.html?langsel=/kr/,None', f=self._log_file)

        _dirs = os.listdir(self._output_dir)
        tst170_dir = None
        for _dir in _dirs:
            if _dir.startswith("TruSightTumor170_Analysis_"):
                tst170_dir = os.path.join(self._output_dir, _dir)

        if tst170_dir is not None and os.path.exists(join(tst170_dir, 'Summary.tsv')):
            with open(TST170_completed_file_path, 'w') as completed_file:
                completed_file.write('TruSightTumor170 pipeline finished')
            log_progress(__modname__, 'TruSightTumor170 pipeline finished', f=self._log_file)
        return tst170_dir