def latex_bowtie(input, output, param): json_dict = json_load(input["json"]) basic_map_table = [] for sam in json_dict["stat"]: basic_map_table.append( [ underline_to_space(sam), json_dict["stat"][sam]["total_reads"], json_dict["stat"][sam]["mappable_reads"], json_dict["stat"][sam]["mappable_rate"], ] ) latex = JinjaTemplateCommand( name="mapping quality", template=input["template"], param={ "section_name": "bowtie", "basic_map_table": basic_map_table, "mappable_ratio_graph": json_dict["output"]["pdf"], "render_dump": output["latex"], }, ) template_dump(latex)
def stat_macs2(input={"macs2_peaks_xls": "", "db": "", "template": ""}, output={"R": "", "json": "", "pdf": ""}, param={"id": ""}): json_dict = {"stat": {}, "input": input, "output": output, "param": param} json_dict["stat"] = _peaks_parse(input["macs2_peaks_xls"]) json_dict["stat"]["cutoff"] = {"uni_loc": 5000000, "high_conf_peaks": 1000} json_dict["stat"]["judge"] = {"uni_loc": "Pass" if json_dict["stat"]["treat_unic"] > 5000000 else "Fail", "high_conf_peaks": "Pass" if json_dict["stat"]["peaksge10"] >= 1000 else "Fail"} name = [param["id"]] db = sqlite3.connect(input["db"]).cursor() db.execute("select peak_fc_10 from peak_calling") historyData = [math.log(i[0] + 0.001, 10) for i in db.fetchall() if i[0] > 0] high_confident_peaks_r = JinjaTemplateCommand(name="highpeaksQC", template=input["template"], param={'historic_data': historyData, 'current_data': [math.log(json_dict["stat"]["peaksge10"] + 0.01, 10)], # 'ids': name, 'cutoff': 3, 'main': 'High confidence peaks distribution', 'xlab': 'log(Number of Peaks fold greater than 10)', 'ylab': 'fn(log(Number of Peaks fold greater than 10))', "pdf": output["pdf"], "render_dump": output["R"]}) template_dump(high_confident_peaks_r) r_exec(high_confident_peaks_r) json_dump(json_dict)
def latex_venn(input, output, param): json_dict = json_load(input["json"]) latex = JinjaTemplateCommand( name = "venn diagram latex", template = input["template"], param = {"section_name": "venn", "venn_graph": json_dict["input"]["venn"], "render_dump": output["latex"]}) template_dump(latex)
def latex_cor(input, output, param): json_dict = json_load(input["json"]) latex = JinjaTemplateCommand( name = "correlation", template = input["template"], param = {"section_name": "correlation", "correlation_graph": json_dict["input"]["cor_pdf"], "render_dump": output["latex"]}) template_dump(latex)
def latex_summary_table(input, output, param): summary_table = _cons_summary_table(param["conf"]) latex = JinjaTemplateCommand( template=input["template"], param={"SummaryQC" : True, "summary_table": summary_table, "render_dump": output["latex"]}) template_dump(latex)
def latex_macs2_on_sample(input, output, param): json_dict = json_load(input["json"]) latex = JinjaTemplateCommand( name="redunRateQC", template=input["template"], param = {"section_name": "redundant", "redundant_ratio_graph": json_dict["output"]["pdf"], "render_dump": output["latex"]}) template_dump(latex)
def latex_ceas(input, output, param): json_dict = json_load(input["json"]) ceas_latex = JinjaTemplateCommand( name = "ceas redraw", template = input["template"], param = {"gene_distribution_graph": json_dict["output"]["metagene_dist_pdf"], "section_name": "ceas", "meta_gene_graph": json_dict["output"]["peakheight_and_pie_pdf"], "render_dump" : output["latex"]}) template_dump(ceas_latex)
def latex_seqpos(input, output, param): json_dict = json_load(input["json"]) latex = JinjaTemplateCommand( name = "motif finding", template = input["template"], param = {"motif_table": json_dict["stat"]["satisfied_motifs"], "section_name": "motif", "render_dump": output["latex"]}) template_dump(latex)
def latex_contamination(input, output, param): json_dict = json_load(input["json"]) library_quality_latex = JinjaTemplateCommand( name="library contamination", template=input["template"], param={"section_name": "library_contamination", "library_contamination": json_dict["stat"], 'prefix_dataset_id': json_dict["param"]['id'], "render_dump": output["latex"] }) template_dump(library_quality_latex)
def stat_macs2_on_treats(input={"all_peak_xls": [], "db": "", "template": "", "template": ""}, output={"R": "", "json": "", "pdf": ""}, param={"ids": []}): """ Show redundant ratio of the dataset in all historic data """ json_dict = {"stat": {}, "input": input, "output": output, "param": param} treat_unic_loc_rates = [] treat_unic_locs = [] control_unic_loc_rates = [] for i in input["all_peak_xls"]: parsed = _peaks_parse(i) treat_unic_loc_rates.append ( parsed["treat_unic_ratio"]) treat_unic_locs.append(parsed["treat_unic"]) for id, unic_loc_rate, unic_loc in zip(param["ids"], treat_unic_loc_rates, treat_unic_locs): json_dict["stat"][id] = {"unic_loc_rate": unic_loc_rate,"unic_loc": unic_loc } json_dict["stat"][id]["cutoff"] = {"unic_loc_rate": 0.8, "unic_loc": 5000000} # non redundant rate cutoff json_dict["stat"][id]["judge"] = {"unic_loc_rate": "Pass" if unic_loc_rate >= 0.8 else "Fail", "unic_loc": "Pass" if unic_loc >= 5000000 else "Fail"} db = sqlite3.connect(input["db"]).cursor() # TODO: column name redundant_rate => non-redundant rate db.execute("select redundant_rate from peak_calling") redundant_history = db.fetchall() historyData = [1 - float(i[0]) for i in redundant_history if i[0] != "null"] redundant_rate_R = JinjaTemplateCommand(name="redunRateQC", template=input["template"], param={'historic_data': historyData, 'current_data': treat_unic_loc_rates, 'ids': param["ids"], 'cutoff': 0.8, 'main': 'Non-Redundant rate', 'xlab': 'Non-Redundant rate', 'ylab': 'fn(Non-Redundant rate)', "pdf": output["pdf"], "render_dump": output["R"], "need_smooth_curve": True}) template_dump(redundant_rate_R) r_exec(redundant_rate_R) with open(output["json"], "w") as f: json.dump(json_dict, f, indent=4)
def stat_fastqc(input={"db": "", "fastqc_summaries": [], "template": ""}, output={"R": "", "json": "", "pdf": ""}, param={"ids": [], "id": ""}): json_dict = {"stat": {}, "input": input, "output": output, "param": param} stat = json_dict["stat"] quality_medians = [] for a_summary, a_id in zip(input["fastqc_summaries"], param["ids"]): parsed = _python_fastqc_parse(input=a_summary) stat[a_id] = {} stat[a_id]["median"] = parsed["median"] stat[a_id]["cutoff"] = 25 stat[a_id]['judge'] = "Pass" if parsed["median"] > 25 else "Fail" stat[a_id]["sequence_length"] = parsed["sequence_length"] quality_medians.append(parsed["median"]) # The table of fastqc_summary that will be used for rendering # Col 1: sample ID # Col 2: sequence length # Col 3: median of sequence quality qc_db = sqlite3.connect(input["db"]).cursor() qc_db.execute("SELECT median_quality FROM fastqc_info") history_data = [float(i[0]) for i in qc_db.fetchall()] fastqc_dist_r = JinjaTemplateCommand( template=input["template"], param={'historic_data': history_data, 'current_data': quality_medians, 'ids': [underline_to_space(i) for i in param["ids"]], 'cutoff': 25, 'main': 'Sequence Quality Score Cumulative Percentage', 'xlab': 'sequence quality score', 'ylab': 'fn(sequence quality score)', "need_smooth_curve": True, "pdf": output["pdf"], "render_dump": output["R"]}) template_dump(fastqc_dist_r) r_exec(fastqc_dist_r) json_dump(json_dict)
def latex_fastqc(input, output, param): json_dict = json_load(input["json"]) fastqc_summary = [] stat = json_dict["stat"] for sample in stat: fastqc_summary.append([underline_to_space(sample), stat[sample]["sequence_length"], stat[sample]["median"]]) latex = JinjaTemplateCommand( template=input["template"], param={"section_name": "sequence_quality", "path": json_dict["output"]["pdf"], "fastqc_table": fastqc_summary, "fastqc_graph": json_dict["output"]["pdf"], 'prefix_dataset_id': [ underline_to_space(i) for i in stat.keys() ], "render_dump": output["latex"]}) template_dump(latex)
def stat_bowtie( input={"bowtie_summaries": [], "db": "", "template": ""}, output={"json": "", "R": "", "pdf": ""}, param={"sams": []}, ): """ sams = [{'name1':a, 'total1': 5...}, {'name2':c, 'total2': 3...}...] **args """ # unique location is in text_macs2_summary part json_dict = {"stat": {}, "input": input, "output": output, "param": param} db = sqlite3.connect(input["db"]).cursor() db.execute("select map_ratio from mapping") historyData = [str(i[0]) for i in (db.fetchall())] bowtie_summaries = {"total_reads": [], "mappable_reads": [], "mappable_rate": []} for summary, sam in zip(input["bowtie_summaries"], param["sams"]): json_dict["stat"][sam] = _bowtie_summary_parse(summary) json_dict["stat"][sam]["cutoff"] = 5000000 # mappable reads json_dict["stat"][sam]["judge"] = "Pass" if json_dict["stat"][sam]["mappable_reads"] >= 5000000 else "Fail" mappable_rates = [json_dict["stat"][i]["mappable_rate"] for i in json_dict["stat"]] mappable_rate_R = JinjaTemplateCommand( template=input["template"], param={ "historic_data": historyData, "current_data": mappable_rates, "ids": param["sams"], "cutoff": 0.5, "main": "Unique mapped rate", "xlab": "Unique mapped rate", "ylab": "fn(Unique mapped rate)", "need_smooth_curve": True, "render_dump": output["R"], "pdf": output["pdf"], }, ) template_dump(mappable_rate_R) r_exec(mappable_rate_R) with open(output["json"], "w") as f: json.dump(json_dict, f, indent=4)
def latex_macs2(input, output, param): # TODO: qian work out the peaks_summary_result part json_dict = json_load(input["json"]) summary = [underline_to_space(json_dict["param"]["id"]), json_dict["stat"]["qvalue"], json_dict["stat"]["totalpeak"], json_dict["stat"]["peaksge10"], json_dict["stat"]["shiftsize"]] high_confident_latex = JinjaTemplateCommand( name = "high confident latex", template = input["template"], param = {"section_name": "high_confident_peaks", "peak_summary_table": summary, "high_confident_peak_graph": json_dict["output"]["pdf"], "render_dump": output["latex"]}) template_dump(high_confident_latex)