def copy_bam_files(self): self._final_bam = join(self._align_dir, '{0}_final.bam'.format(self._sample_name)) bam_dst_md5 = '{0}.md5'.format(self._final_bam) if md5sum_check(self._final_bam, bam_dst_md5): log_progress(__modname__, 'Copy the BAM file to output directory already finished', f=self._log_file) else: log_progress(__modname__, 'Copy the BAM file to output directory', f=self._log_file) ### 901: DNA, 902: RNA if self._pipeline == '901': bam_dir = join(self._tst170_dir, 'DNA_IntermediateFiles', 'Alignment') bam_src = join(bam_dir, '{0}_realigned.bam'.format(self._sample_name)) bed_name = 'DNA_PicardTarget.bed' elif self._pipeline == '902': bam_dir = join(self._tst170_dir, 'RNA_IntermediateFiles', 'Alignment') bam_src = join(bam_dir, '{0}.bam'.format(self._sample_name)) bed_name = 'RNA_PicardTarget.bed' else: log_error(__modname__, 'Unknown pipeline code {0} for TST170 pipeline'.format(self._pipeline), f=self._log_file) sys.exit(1) self.copy_files(bam_src, self._final_bam) bai_src = '{0}.bai'.format(bam_src) bai_dst = '{0}.bai'.format(self._final_bam) self.copy_files(bai_src, bai_dst) self.copy_files(join(bam_dir, bed_name), join(self._align_dir, bed_name)) self.generate_tdf_file(self._final_bam) run_command_md5sum(__modname__, self._log_file, self._final_bam, bam_dst_md5) log_progress(__modname__, 'Copy the BAM file finished', f=self._log_file)
def copy_standard_output(self): log_progress(__modname__, 'Copy the standard output files to output directory', f=self._log_file) ### 901: DNA, 902: RNA if self._pipeline == '901': dna_output_dir = join(self._tst170_dir, 'DNA_{0}'.format(self._sample_name)) cnv_vcf = '{0}_CopyNumberVariants.vcf'.format(self._sample_name) self.copy_files(join(dna_output_dir, cnv_vcf), join(self._variant_dir, cnv_vcf)) small_variant_vcf = '{0}_SmallVariants.genome.vcf'.format(self._sample_name) self.copy_files(join(dna_output_dir, small_variant_vcf), join(self._variant_dir, small_variant_vcf)) dna_bed = join(self._tst170_dir, 'DNA_IntermediateFiles', 'Alignment', 'DNA_PicardTarget.bed') self.copy_files(dna_bed, join(self._assay_dir, '{0}.bed'.format(self._sample_name))) elif self._pipeline == '902': rna_output_dir = join(self._tst170_dir, 'RNA_{0}'.format(self._sample_name)) splice_variant_vcf = '{0}_SpliceVariants.vcf'.format(self._sample_name) self.copy_files(join(rna_output_dir, splice_variant_vcf), join(self._variant_dir, splice_variant_vcf)) fusion_csv = '{0}_Fusions.csv'.format(self._sample_name) self.copy_files(join(rna_output_dir, fusion_csv), join(self._variant_dir, fusion_csv)) high_conf_variant_csv = '{0}_HighConfidenceVariants.csv'.format(self._sample_name) self.copy_files(join(rna_output_dir, high_conf_variant_csv), join(self._variant_dir, high_conf_variant_csv)) published_fusion_csv = '{0}_PublishedFusions.csv'.format(self._sample_name) self.copy_files(join(rna_output_dir, published_fusion_csv), join(self._variant_dir, published_fusion_csv)) else: log_error(__modname__, 'Unknown pipeline code {0} for TST170 pipeline'.format(self._pipeline), f=self._log_file) sys.exit(1) log_progress(__modname__, 'Copy the standard output files finished', f=self._log_file)
def handle_workflow_failure(self, workflow_name, retcode): '''Return exit when workflow failed :param config(int): configure file :param workflow_name(string): workflow name :param return_code(int): error return code ''' log_error(__modname__, workflow_name, f=self._log_file) completed_file_path = join(self._output_dir,'logs/completed.{0}'.format(self._sample_id)) with open(completed_file_path, 'w') as completed_file: completed_file.write('Job with SGE_TASKID {0} completed.'.format(self._sample_id)) sys.exit(1)
def check_tst170_result(self, tst170_dir): log_progress(__modname__, 'Check the TruSightTumor170 analysis result status start', f=self._log_file) try: with open(join(tst170_dir, 'Summary.tsv'), 'r') as f: lines = f.readlines() sample_name_sp = self._config_opts['SAMPLE_NAMES'].split("|") for sample_name in sample_name_sp: if not '{0}\tAnalysis Completed.\n'.format(sample_name) in lines: log_error(__modname__, 'TruSightTumor170 analysis result fail', f=self._log_file) sys.exit(1) except Exception as ex: log_error(__modname__, str(ex), f=self._log_file) sys.exit(1) log_progress(__modname__, 'All the TruSightTumor170 analysis result OK', f=self._log_file)
def copy_files(self, src, dst): try: if os.path.exists(dst): src_md5 = self.getHash(src) dst_md5 = self.getHash(dst) if src_md5 != dst_md5: shutil.copyfile(src, dst) else: shutil.copyfile(src, dst) src_md5 = self.getHash(src) dst_md5 = self.getHash(dst) if src_md5 != dst_md5: log_error(__modname__, 'MD5sum check fail. {0}'.format(dst), f=self._log_file) sys.exit(1) except Exception as ex: log_error(__modname__, str(ex), f=self._log_file) sys.exit(1)
def run(self, summary_file, stat_json_file): stat_json_file_md5 = "{0}.md5".format(stat_json_file) if md5sum_check(stat_json_file, stat_json_file_md5): log_progress(__modname__, "Analysis Statistics already finished", f=self._log_file) else: log_progress(__modname__, "Analysis Statistics start", f=self._log_file) stat_data = {} try: with open(summary_file, "r") as f: lines = f.readlines() for line in lines: sp = line.replace("\n", "").split("\t") if len(sp) == 3: stat_data[sp[1].replace(" ", "_")] = sp[2] except Exception as ex: log_error(__modname__, "Parsing stat summary file error: {0}".format(ex), f=self._log_file) sys.exit(1) json_list = self.workflow(stat_data) json_data = {} json_data["qc_data"] = json_list try: with open(stat_json_file, "w") as make_json_file: json.dump(json_data, make_json_file, ensure_ascii=False, sort_keys=True, indent=2) run_command_md5sum(__modname__, self._log_file, stat_json_file, stat_json_file_md5) except Exception as ex: log_error(__modname__, "{0}".format(ex), f=self._log_file) sys.exit(1) log_progress(__modname__, "Analysis Statistics finished", f=self._log_file)
def run(self): log_progress(__modname__, "QC report generation start", f=self._log_file) ### root element qc_report = Element("qc_report") ### main page main_page = self.main_page() qc_report.append(main_page) ### header contents header_contents = self.header_contents() qc_report.append(header_contents) ### contents # Step 1. analisis information analysis_information = self.analysis_information() qc_report.append(analysis_information) # Step 2. sequencing information sequencing_information = self.sequencing_information() qc_report.append(sequencing_information) # Step 3. Raw fastq format raw_fastq_format = self.raw_fastq_format() qc_report.append(raw_fastq_format) # Step 4. Data summary data_summary = self.data_summary() qc_report.append(data_summary) # Step 5. Raw read quality raw_read_quality = self.raw_read_quality() qc_report.append(raw_read_quality) # Step 6. mapping statistics mapping_statistics = self.mapping_statistics() qc_report.append(mapping_statistics) # Step 7. coverage statistics coverage_statistics = self.coverage_statistics() qc_report.append(coverage_statistics) # Step 8. Warning warning = self.warning() qc_report.append(warning) # Step 9. software and databases if os.path.exists(self._software_list) and os.path.exists( self._reference_list): tools_and_databases = self.tools_and_databases() qc_report.append(tools_and_databases) else: log_error(__modname__, "Software and Reference information is not available", f=self._log_file) sys.exit(1) ### generate xml file self.indent(qc_report) ElementTree(qc_report).write(self._xml_file) ### Generate pdf file exec_cmd = [ "/NGENEBIO/workflow-app/fop-2.2/fop/fop", "-c", "/NGENEBIO/workflow/utils/fop/fop_config.xml", "-xml", self._xml_file, "-xsl", "/NGENEBIO/workflow/utils/fop/qc_report_template_v4.xsl", "-pdf", self._pdf_file ] proc = subprocess.Popen(exec_cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE) stdout, stderr = proc.communicate() print stderr log_progress(__modname__, "QC report generation finished", f=self._log_file)
def workflow(self, stat_data): json_list = [] # ROI coverage if "Percentage_of_ROI_region_with_coveage_of_least_20x" in stat_data: stat = {} stat["qc_type"] = "roi_coverage" stat["qc_threshold"] = ">= 100%" stat["qc_threshold_value"] = 100 stat["qc_value"] = float( stat_data["Percentage_of_ROI_region_with_coveage_of_least_20x"] .split("(")[1].replace("%", "").replace(")", "")) stat["qc_unit"] = "%" stat["qc_description"] = "ROI coverage" if stat["qc_value"] >= stat["qc_threshold_value"]: stat["qc_message"] = "pass" stat["qc_warning_message"] = "None" else: stat["qc_message"] = "fail" stat[ "qc_warning_message"] = "Percentage of ROI region with coverage at least 20x is less than 100%" json_list.append(stat) elif "Percentage_of_ROI_region_with_coveage_of_least_100x" in stat_data: stat = {} stat["qc_type"] = "roi_coverage" stat["qc_threshold"] = ">= 100%" stat["qc_threshold_value"] = 100 stat["qc_value"] = float(stat_data[ "Percentage_of_ROI_region_with_coveage_of_least_100x"].split( "(")[1].replace("%", "").replace(")", "")) stat["qc_unit"] = "%" stat["qc_description"] = "ROI coverage" if stat["qc_value"] >= stat["qc_threshold_value"]: stat["qc_message"] = "pass" stat["qc_warning_message"] = "None" else: stat["qc_message"] = "fail" stat[ "qc_warning_message"] = "Percentage of ROI region with coverage at least 100x is less than 100%" json_list.append(stat) else: log_error(__modname__, "ROI coverage value is not available", f=self._log_file) sys.exit(1) # coverage uniformity if "Uniformity(Percentage_of_bases_covered_at_>=20%_of_the_mean_coverage)" in stat_data: stat = {} stat["qc_type"] = "coverage_uniformity" stat["qc_threshold"] = ">= 95%" stat["qc_threshold_value"] = 95 stat["qc_value"] = float(stat_data[ "Uniformity(Percentage_of_bases_covered_at_>=20%_of_the_mean_coverage)"] .replace("%", "")) stat["qc_unit"] = "%" stat[ "qc_description"] = "Percentage of bases covered at 0.2 times of the mean coverage" if stat["qc_value"] >= stat["qc_threshold_value"]: stat["qc_message"] = "pass" stat["qc_warning_message"] = "None" else: stat["qc_message"] = "fail" stat[ "qc_warning_message"] = "Percentage of Uniformity 0.2 is less than 95%" json_list.append(stat) else: log_error(__modname__, "Uniformity value is not available", f=self._log_file) sys.exit(1) # amplicon mean throughput if "Amplicon_Mean_Throughput" in stat_data: stat = {} stat["qc_type"] = "mean_throughput" stat["qc_threshold"] = ">= 200x" stat["qc_threshold_value"] = 200 stat["qc_value"] = float( stat_data["Amplicon_Mean_Throughput"].replace("x", "")) stat["qc_unit"] = "x" stat[ "qc_description"] = "Amplicon mean throughput is more than 200x" if stat["qc_value"] >= stat["qc_threshold_value"]: stat["qc_message"] = "pass" stat["qc_warning_message"] = "None" else: stat["qc_message"] = "fail" stat[ "qc_warning_message"] = "Amplicon mean throughput is less than 200x" json_list.append(stat) else: log_error(__modname__, "Mean throughput is not available", f=self._log_file) sys.exit(1) return json_list
def workflow(self, stat_data): json_list = [] # total_base if "raw_base" in stat_data: stat = {} stat["qc_type"] = "total_base" stat["qc_threshold"] = ">= 200Mb" stat["qc_threshold_value"] = 200000000 stat["qc_value"] = int(stat_data["raw_base"]) stat["qc_unit"] = "bp" stat["qc_description"] = "Total base pair" if stat["qc_value"] >= stat["qc_threshold_value"]: stat["qc_message"] = "pass" stat["qc_warning_message"] = "None" else: stat["qc_message"] = "fail" stat["qc_warning_message"] = "Raw fastq throughput is less than 200Mb" json_list.append(stat) else: log_error(__modname__, "Total base value is not available", f=self._log_file) sys.exit(1) # q30 trimmed base per if "trimmed_ratio" in stat_data: stat = {} stat["qc_type"] = "q30_trimmed_base" stat["qc_threshold"] = ">= 80%" stat["qc_threshold_value"] = 80 stat["qc_value"] = float(stat_data["trimmed_ratio"]) stat["qc_unit"] = "%" stat["qc_description"] = "Percentage of q30 trimmed base pair" if stat["qc_value"] >= stat["qc_threshold_value"]: stat["qc_message"] = "pass" stat["qc_warning_message"] = "None" else: stat["qc_message"] = "fail" stat["qc_warning_message"] = "Percentage of Q30 trimmed base pair is less than 80%" json_list.append(stat) else: log_error(__modname__, "Q30 trimmed ratio is not available", f=self._log_file) sys.exit(1) # mapped base per if "mapped_ratio" in stat_data: stat = {} stat["qc_type"] = "mapped_base" stat["qc_threshold"] = ">= 80%" stat["qc_threshold_value"] = 80 stat["qc_value"] = float(stat_data["mapped_ratio"]) stat["qc_unit"] = "%" stat["qc_description"] = "Percentage of Mapped base pair" if stat["qc_value"] >= stat["qc_threshold_value"]: stat["qc_message"] = "pass" stat["qc_warning_message"] = "None" else: stat["qc_message"] = "fail" stat["qc_warning_message"] = "Percentage of mapped base pair is less than 80%" json_list.append(stat) else: log_error(__modname__, "Mapped ratio is not available", f=self._log_file) sys.exit(1) # on-target ratio if "on-target_ratio" in stat_data: stat = {} stat["qc_type"] = "on_target" stat["qc_threshold"] = ">= 30%" stat["qc_threshold_value"] = 30 stat["qc_value"] = float(stat_data["on-target_ratio"]) stat["qc_unit"] = "%" stat["qc_description"] = "Percentage of on-target base pair" if stat["qc_value"] >= stat["qc_threshold_value"]: stat["qc_message"] = "pass" stat["qc_warning_message"] = "None" else: stat["qc_message"] = "fail" stat["qc_warning_message"] = "Percentage of on-target base pair is less than 20%" json_list.append(stat) else: log_error(__modname__, "On-target ratio is not available", f=self._log_file) sys.exit(1) # on-target coverage if "on-target_coverage" in stat_data: stat = {} stat["qc_type"] = "on_target_coverage" stat["qc_threshold"] = ">= 300x" stat["qc_threshold_value"] = 300 stat["qc_value"] = float(stat_data["on-target_coverage"]) stat["qc_unit"] = "x" stat["qc_description"] = "On-target coverage" if stat["qc_value"] >= stat["qc_threshold_value"]: stat["qc_message"] = "pass" stat["qc_warning_message"] = "None" else: stat["qc_message"] = "fail" stat["qc_warning_message"] = "On-target coverage is less than 400x" json_list.append(stat) else: log_error(__modname__, "On-target coverage is not available", f=self._log_file) sys.exit(1) # on-target read if "on_target_read" in stat_data: stat = {} stat["qc_type"] = "on_target_read" stat["qc_threshold"] = ">= 70%" stat["qc_threshold_value"] = 70 on_target_per = float(stat_data["on_target_read"]) / float(stat_data["raw_read"]) * 100 stat["qc_value"] = float(format(on_target_per, ".2f")) stat["qc_unit"] = "%" stat["qc_description"] = "On-target read" if stat["qc_value"] >= stat["qc_threshold_value"]: stat["qc_message"] = "pass" stat["qc_warning_message"] = "None" else: stat["qc_message"] = "fail" stat["qc_warning_message"] = "On-target read is less than 70%" json_list.append(stat) else: log_error(__modname__, "On-target read is not available", f=self._log_file) sys.exit(1) # duplicate reads percentage if "duplicate_reads_percentage" in stat_data: stat = {} stat["qc_type"] = "duplicated_reads" stat["qc_threshold"] = "< 20%" stat["qc_threshold_value"] = 20 stat["qc_value"] = float(stat_data["duplicate_reads_percentage"]) stat["qc_unit"] = "%" stat["qc_description"] = "Percentage of duplicated reads" if stat["qc_value"] < stat["qc_threshold_value"]: stat["qc_message"] = "pass" stat["qc_warning_message"] = "None" else: stat["qc_message"] = "fail" stat["qc_warning_message"] = "Percentage of duplicated reads is greater than 20%" json_list.append(stat) else: log_error(__modname__, "Duplicated reads value is not available", f=self._log_file) sys.exit(1) # uniformity 0.2 ratio if "uniformity_0.2_ratio" in stat_data: stat = {} stat["qc_type"] = "uniformity_0.2" stat["qc_threshold"] = ">= 80%" stat["qc_threshold_value"] = 80 stat["qc_value"] = float(stat_data["uniformity_0.2_ratio"]) stat["qc_unit"] = "%" stat["qc_description"] = "Percentage of bases covered at 0.2 times of the mean coverage" if stat["qc_value"] >= stat["qc_threshold_value"]: stat["qc_message"] = "pass" stat["qc_warning_message"] = "None" else: stat["qc_message"] = "fail" stat["qc_warning_message"] = "Percentage of Uniformity 0.2 is less than 80%" json_list.append(stat) else: log_error(__modname__, "Uniformity value is not available", f=self._log_file) sys.exit(1) # mapping quality 60 if "MAPQ==60_reads" in stat_data: stat = {} stat["qc_type"] = "mapping_quality_60" stat["qc_threshold"] = ">= 80%" stat["qc_threshold_value"] = 80 mapq60_per = float(stat_data["MAPQ==60_reads"]) / float(stat_data["raw_read"]) * 100 stat["qc_value"] = float(format(mapq60_per, ".2f")) stat["qc_unit"] = "%" stat["qc_description"] = "Percentage of reads with mapping quality 60" if stat["qc_value"] >= stat["qc_threshold_value"]: stat["qc_message"] = "pass" stat["qc_warning_message"] = "None" else: stat["qc_message"] = "fail" stat["qc_warning_message"] = "Percentage of reads with mapping quality 60 is less than 80%" json_list.append(stat) else: log_error(__modname__, "MAPQ==60 value is not available", f=self._log_file) sys.exit(1) # Target coverage at 30x if "coverage_>=_30x_ratio" in stat_data: stat = {} stat["qc_type"] = "target_coverage_at_30x" stat["qc_threshold"] = ">= 99%" stat["qc_threshold_value"] = 99 stat["qc_value"] = float(stat_data["coverage_>=_30x_ratio"]) stat["qc_unit"] = "%" stat["qc_description"] = "Percentage of target covertage at 30x" if stat["qc_value"] >= stat["qc_threshold_value"]: stat["qc_message"] = "pass" stat["qc_warning_message"] = "None" else: stat["qc_message"] = "fail" stat["qc_warning_message"] = "Percentage of target covertage at 30x is less than 99%" json_list.append(stat) else: log_error(__modname__, "Target coverage at 30x is not available", f=self._log_file) sys.exit(1) return json_list
def workflow(self, stat_data): json_list = [] # total_base if "raw_base" in stat_data: stat = {} stat["qc_type"] = "total_base" stat["qc_threshold"] = ">= 200Mb" stat["qc_threshold_value"] = 200000000 stat["qc_value"] = int(stat_data["raw_base"]) stat["qc_unit"] = "bp" stat["qc_description"] = "Total base pair" if stat["qc_value"] >= stat["qc_threshold_value"]: stat["qc_message"] = "pass" stat["qc_warning_message"] = "None" else: stat["qc_message"] = "fail" stat[ "qc_warning_message"] = "Raw fastq throughput is less than 200Mb" json_list.append(stat) else: log_error(__modname__, "Total base value is not available", f=self._log_file) sys.exit(1) # q30 trimmed base per if "trimmed_ratio" in stat_data: stat = {} stat["qc_type"] = "q30_trimmed_base" stat["qc_threshold"] = ">= 80%" stat["qc_threshold_value"] = 80 stat["qc_value"] = float(stat_data["trimmed_ratio"]) stat["qc_unit"] = "%" stat["qc_description"] = "Percentage of q30 trimmed base pair" if stat["qc_value"] >= stat["qc_threshold_value"]: stat["qc_message"] = "pass" stat["qc_warning_message"] = "None" else: stat["qc_message"] = "fail" stat[ "qc_warning_message"] = "Percentage of Q30 trimmed base pair is less than 80%" json_list.append(stat) else: log_error(__modname__, "Q30 trimmed ratio is not available", f=self._log_file) sys.exit(1) # mapped base per if "mapped_ratio" in stat_data: stat = {} stat["qc_type"] = "mapped_base" stat["qc_threshold"] = ">= 80%" stat["qc_threshold_value"] = 80 stat["qc_value"] = float(stat_data["mapped_ratio"]) stat["qc_unit"] = "%" stat["qc_description"] = "Percentage of Mapped base pair" if stat["qc_value"] >= stat["qc_threshold_value"]: stat["qc_message"] = "pass" stat["qc_warning_message"] = "None" else: stat["qc_message"] = "fail" stat[ "qc_warning_message"] = "Percentage of mapped base pair is less than 80%" json_list.append(stat) else: log_error(__modname__, "Mapped ratio is not available", f=self._log_file) sys.exit(1) # on-target ratio if "on-target_ratio" in stat_data: stat = {} stat["qc_type"] = "on_target" stat["qc_threshold"] = ">= 20%" stat["qc_threshold_value"] = 20 stat["qc_value"] = float(stat_data["on-target_ratio"]) stat["qc_unit"] = "%" stat["qc_description"] = "Percentage of on-target base pair" if stat["qc_value"] >= stat["qc_threshold_value"]: stat["qc_message"] = "pass" stat["qc_warning_message"] = "None" else: stat["qc_message"] = "fail" stat[ "qc_warning_message"] = "Percentage of on-target base pair is less than 20%" json_list.append(stat) else: log_error(__modname__, "On-target ratio is not available", f=self._log_file) sys.exit(1) # on-target coverage if "on-target_coverage" in stat_data: stat = {} stat["qc_type"] = "on_target_coverage" stat["qc_threshold"] = ">= 400x" stat["qc_threshold_value"] = 400 stat["qc_value"] = float(stat_data["on-target_coverage"]) stat["qc_unit"] = "x" stat["qc_description"] = "On-target coverage" if stat["qc_value"] >= stat["qc_threshold_value"]: stat["qc_message"] = "pass" stat["qc_warning_message"] = "None" else: stat["qc_message"] = "fail" stat[ "qc_warning_message"] = "On-target coverage is less than 400x" json_list.append(stat) else: log_error(__modname__, "On-target coverage is not available", f=self._log_file) sys.exit(1) # coverage >= 20x ratio if "coverage_>=_20x_ratio" in stat_data: stat = {} stat["qc_type"] = "roi_coverage" stat["qc_threshold"] = ">= 90%" stat["qc_threshold_value"] = 90 stat["qc_value"] = float(stat_data["coverage_>=_20x_ratio"]) stat["qc_unit"] = "%" stat[ "qc_description"] = "Percentage of ROI region with coveage of least 20x" if stat["qc_value"] >= stat["qc_threshold_value"]: stat["qc_message"] = "pass" stat["qc_warning_message"] = "None" else: stat["qc_message"] = "fail" stat[ "qc_warning_message"] = "Percentage of ROI region with coverage at least 20x is less than 90%" json_list.append(stat) else: log_error(__modname__, "ROI coverage value is not available", f=self._log_file) sys.exit(1) # duplicate reads percentage if "duplicate_reads_percentage" in stat_data: stat = {} stat["qc_type"] = "duplicated_reads" stat["qc_threshold"] = "< 20%" stat["qc_threshold_value"] = 20 stat["qc_value"] = float(stat_data["duplicate_reads_percentage"]) stat["qc_unit"] = "%" stat["qc_description"] = "Percentage of duplicated reads" if stat["qc_value"] < stat["qc_threshold_value"]: stat["qc_message"] = "pass" stat["qc_warning_message"] = "None" else: stat["qc_message"] = "fail" stat[ "qc_warning_message"] = "Percentage of duplicated reads is greater than 20%" json_list.append(stat) else: log_error(__modname__, "Duplicated reads value is not available", f=self._log_file) sys.exit(1) # uniformity 0.2 ratio if "uniformity_0.2_ratio" in stat_data: stat = {} stat["qc_type"] = "uniformity_0.2" stat["qc_threshold"] = ">= 80%" stat["qc_threshold_value"] = 80 stat["qc_value"] = float(stat_data["uniformity_0.2_ratio"]) stat["qc_unit"] = "%" stat[ "qc_description"] = "Percentage of bases covered at 0.2 times of the mean coverage" if stat["qc_value"] >= stat["qc_threshold_value"]: stat["qc_message"] = "pass" stat["qc_warning_message"] = "None" else: stat["qc_message"] = "fail" stat[ "qc_warning_message"] = "Percentage of Uniformity 0.2 is less than 80%" json_list.append(stat) else: log_error(__modname__, "Uniformity value is not available", f=self._log_file) sys.exit(1) # mapping quality 60 if "MAPQ==60_reads" in stat_data: stat = {} stat["qc_type"] = "mapping_quality_60" stat["qc_threshold"] = ">= 80%" stat["qc_threshold_value"] = 80 mapq60_per = float(stat_data["MAPQ==60_reads"]) / float( stat_data["raw_read"]) * 100 stat["qc_value"] = float(format(mapq60_per, ".2f")) stat["qc_unit"] = "%" stat[ "qc_description"] = "Percentage of reads with mapping quality 60" if stat["qc_value"] >= stat["qc_threshold_value"]: stat["qc_message"] = "pass" stat["qc_warning_message"] = "None" else: stat["qc_message"] = "fail" stat[ "qc_warning_message"] = "Percentage of reads with mapping quality 60 is less than 80%" json_list.append(stat) else: log_error(__modname__, "MAPQ==60 value is not available", f=self._log_file) sys.exit(1) # Uniformity 0.5 < 90% if "uniformity_0.5_gene" in stat_data: stat = {} stat["qc_type"] = "uniformity_0.5" stat["qc_threshold"] = "<= {0}".format(self._cutoff_uniformity05) stat["qc_threshold_value"] = self._cutoff_uniformity05 if stat_data["uniformity_0.5_gene"] == "": stat["qc_value"] = 0 else: stat["qc_value"] = len( stat_data["uniformity_0.5_gene"].split(",")) stat["qc_unit"] = stat_data[ "uniformity_0.5_gene"] # Gene Name List stat[ "qc_description"] = "The number of genes less than 90% of 0.5 Uniformity" if stat["qc_value"] <= stat["qc_threshold_value"]: stat["qc_message"] = "pass" stat["qc_warning_message"] = "None" else: stat["qc_message"] = "fail" stat[ "qc_warning_message"] = "The number of genes less than 90% of 0.5 Uniformity is more than {0}".format( self._cutoff_uniformity05) json_list.append(stat) else: log_error(__modname__, "Uniformity 0.5 value is not available", f=self._log_file) sys.exit(1) return json_list