def stat_macs2(input={"macs2_peaks_xls": "", "db": "", "template": ""}, output={"R": "", "json": "", "pdf": ""}, param={"id": ""}): json_dict = {"stat": {}, "input": input, "output": output, "param": param} json_dict["stat"] = _peaks_parse(input["macs2_peaks_xls"]) json_dict["stat"]["cutoff"] = {"uni_loc": 5000000, "high_conf_peaks": 1000} json_dict["stat"]["judge"] = {"uni_loc": "Pass" if json_dict["stat"]["treat_unic"] > 5000000 else "Fail", "high_conf_peaks": "Pass" if json_dict["stat"]["peaksge10"] >= 1000 else "Fail"} name = [param["id"]] db = sqlite3.connect(input["db"]).cursor() db.execute("select peak_fc_10 from peak_calling") historyData = [math.log(i[0] + 0.001, 10) for i in db.fetchall() if i[0] > 0] high_confident_peaks_r = JinjaTemplateCommand(name="highpeaksQC", template=input["template"], param={'historic_data': historyData, 'current_data': [math.log(json_dict["stat"]["peaksge10"] + 0.01, 10)], # 'ids': name, 'cutoff': 3, 'main': 'High confidence peaks distribution', 'xlab': 'log(Number of Peaks fold greater than 10)', 'ylab': 'fn(log(Number of Peaks fold greater than 10))', "pdf": output["pdf"], "render_dump": output["R"]}) template_dump(high_confident_peaks_r) r_exec(high_confident_peaks_r) json_dump(json_dict)
def stat_macs2_on_treats(input={"all_peak_xls": [], "db": "", "template": "", "template": ""}, output={"R": "", "json": "", "pdf": ""}, param={"ids": []}): """ Show redundant ratio of the dataset in all historic data """ json_dict = {"stat": {}, "input": input, "output": output, "param": param} treat_unic_loc_rates = [] treat_unic_locs = [] control_unic_loc_rates = [] for i in input["all_peak_xls"]: parsed = _peaks_parse(i) treat_unic_loc_rates.append ( parsed["treat_unic_ratio"]) treat_unic_locs.append(parsed["treat_unic"]) for id, unic_loc_rate, unic_loc in zip(param["ids"], treat_unic_loc_rates, treat_unic_locs): json_dict["stat"][id] = {"unic_loc_rate": unic_loc_rate,"unic_loc": unic_loc } json_dict["stat"][id]["cutoff"] = {"unic_loc_rate": 0.8, "unic_loc": 5000000} # non redundant rate cutoff json_dict["stat"][id]["judge"] = {"unic_loc_rate": "Pass" if unic_loc_rate >= 0.8 else "Fail", "unic_loc": "Pass" if unic_loc >= 5000000 else "Fail"} db = sqlite3.connect(input["db"]).cursor() # TODO: column name redundant_rate => non-redundant rate db.execute("select redundant_rate from peak_calling") redundant_history = db.fetchall() historyData = [1 - float(i[0]) for i in redundant_history if i[0] != "null"] redundant_rate_R = JinjaTemplateCommand(name="redunRateQC", template=input["template"], param={'historic_data': historyData, 'current_data': treat_unic_loc_rates, 'ids': param["ids"], 'cutoff': 0.8, 'main': 'Non-Redundant rate', 'xlab': 'Non-Redundant rate', 'ylab': 'fn(Non-Redundant rate)', "pdf": output["pdf"], "render_dump": output["R"], "need_smooth_curve": True}) template_dump(redundant_rate_R) r_exec(redundant_rate_R) with open(output["json"], "w") as f: json.dump(json_dict, f, indent=4)
def stat_fastqc(input={"db": "", "fastqc_summaries": [], "template": ""}, output={"R": "", "json": "", "pdf": ""}, param={"ids": [], "id": ""}): json_dict = {"stat": {}, "input": input, "output": output, "param": param} stat = json_dict["stat"] quality_medians = [] for a_summary, a_id in zip(input["fastqc_summaries"], param["ids"]): parsed = _python_fastqc_parse(input=a_summary) stat[a_id] = {} stat[a_id]["median"] = parsed["median"] stat[a_id]["cutoff"] = 25 stat[a_id]['judge'] = "Pass" if parsed["median"] > 25 else "Fail" stat[a_id]["sequence_length"] = parsed["sequence_length"] quality_medians.append(parsed["median"]) # The table of fastqc_summary that will be used for rendering # Col 1: sample ID # Col 2: sequence length # Col 3: median of sequence quality qc_db = sqlite3.connect(input["db"]).cursor() qc_db.execute("SELECT median_quality FROM fastqc_info") history_data = [float(i[0]) for i in qc_db.fetchall()] fastqc_dist_r = JinjaTemplateCommand( template=input["template"], param={'historic_data': history_data, 'current_data': quality_medians, 'ids': [underline_to_space(i) for i in param["ids"]], 'cutoff': 25, 'main': 'Sequence Quality Score Cumulative Percentage', 'xlab': 'sequence quality score', 'ylab': 'fn(sequence quality score)', "need_smooth_curve": True, "pdf": output["pdf"], "render_dump": output["R"]}) template_dump(fastqc_dist_r) r_exec(fastqc_dist_r) json_dump(json_dict)
def stat_bowtie( input={"bowtie_summaries": [], "db": "", "template": ""}, output={"json": "", "R": "", "pdf": ""}, param={"sams": []}, ): """ sams = [{'name1':a, 'total1': 5...}, {'name2':c, 'total2': 3...}...] **args """ # unique location is in text_macs2_summary part json_dict = {"stat": {}, "input": input, "output": output, "param": param} db = sqlite3.connect(input["db"]).cursor() db.execute("select map_ratio from mapping") historyData = [str(i[0]) for i in (db.fetchall())] bowtie_summaries = {"total_reads": [], "mappable_reads": [], "mappable_rate": []} for summary, sam in zip(input["bowtie_summaries"], param["sams"]): json_dict["stat"][sam] = _bowtie_summary_parse(summary) json_dict["stat"][sam]["cutoff"] = 5000000 # mappable reads json_dict["stat"][sam]["judge"] = "Pass" if json_dict["stat"][sam]["mappable_reads"] >= 5000000 else "Fail" mappable_rates = [json_dict["stat"][i]["mappable_rate"] for i in json_dict["stat"]] mappable_rate_R = JinjaTemplateCommand( template=input["template"], param={ "historic_data": historyData, "current_data": mappable_rates, "ids": param["sams"], "cutoff": 0.5, "main": "Unique mapped rate", "xlab": "Unique mapped rate", "ylab": "fn(Unique mapped rate)", "need_smooth_curve": True, "render_dump": output["R"], "pdf": output["pdf"], }, ) template_dump(mappable_rate_R) r_exec(mappable_rate_R) with open(output["json"], "w") as f: json.dump(json_dict, f, indent=4)