예제 #1
0
def stat_cor(input={"correlation_R":"", "cor_pdf": "", "venn": "", },
             output={"json": ""}, param=None):
    # TODO: merge this into stat_venn
    """ ReplicateQC aims to describe the similarity of replicate experiment. Venn diagram and correlation plot will be used."""

    correlation_result_r_code = open(input["correlation_R"]).read()
    signal_list = re.findall(r"[pc]\d+ <- (.*)$", correlation_result_r_code, re.MULTILINE)

    rep_count = len(signal_list)
    correlation_value_list = []


    # F**K!! WTF!!
#    for i in range(rep_count):
#        for j in range(i + 1, rep_count):
#            tpfile_name = tempfile.mktemp()
#            open(tpfile_name,"w").write("cor(%s, %s)" % (signal_list[i], signal_list[j]))
#            cmd = 'R --slave --vanilla < %s ' %tpfile_name
#            print("Running %s"%cmd)
#            cmd_result = subprocess.check_output(cmd, shell=True)
#            correlation_value_list.append(float(re.findall("[1] (.*)", cmd_result)[0]))

#    min_correlation = min(correlation_value_list)



    result_dict = {"stat": {}, "input": input, "output": output, "param": param}
#    result_dict["stat"]["judge"] = "Pass" if min_correlation >= 0.6 else "Fail"
    result_dict["stat"]["cutoff"] = 0.6
#    result_dict["stat"]["min_cor"] = min_correlation

    json_dump(result_dict)
예제 #2
0
def stat_macs2(input={"macs2_peaks_xls": "", "db": "", "template": ""},
               output={"R": "", "json": "", "pdf": ""},
               param={"id": ""}):
    json_dict = {"stat": {}, "input": input, "output": output, "param": param}
    json_dict["stat"] = _peaks_parse(input["macs2_peaks_xls"])
    json_dict["stat"]["cutoff"] = {"uni_loc": 5000000, "high_conf_peaks": 1000}
    json_dict["stat"]["judge"] = {"uni_loc": "Pass" if  json_dict["stat"]["treat_unic"] > 5000000 else "Fail",
                                  "high_conf_peaks": "Pass" if json_dict["stat"]["peaksge10"] >= 1000 else "Fail"}

    name = [param["id"]]
    db = sqlite3.connect(input["db"]).cursor()
    db.execute("select peak_fc_10 from peak_calling")
    historyData = [math.log(i[0] + 0.001, 10) for i in db.fetchall() if i[0] > 0]

    high_confident_peaks_r = JinjaTemplateCommand(name="highpeaksQC",
        template=input["template"],
        param={'historic_data': historyData,
               'current_data': [math.log(json_dict["stat"]["peaksge10"] + 0.01, 10)], #
               'ids': name,
               'cutoff': 3,
               'main': 'High confidence peaks distribution',
               'xlab': 'log(Number of Peaks fold greater than 10)',
               'ylab': 'fn(log(Number of Peaks fold greater than 10))',

               "pdf": output["pdf"],
               "render_dump": output["R"]})

    template_dump(high_confident_peaks_r)
    r_exec(high_confident_peaks_r)

    json_dump(json_dict)
예제 #3
0
def stat_fastqc(input={"db": "", "fastqc_summaries": [], "template": ""},
                output={"R": "", "json": "", "pdf": ""},
                param={"ids": [], "id": ""}):
    json_dict = {"stat": {}, "input": input, "output": output, "param": param}
    stat = json_dict["stat"]

    quality_medians = []

    for a_summary, a_id in zip(input["fastqc_summaries"], param["ids"]):
        parsed = _python_fastqc_parse(input=a_summary)

        stat[a_id] = {}
        stat[a_id]["median"] = parsed["median"]
        stat[a_id]["cutoff"] = 25
        stat[a_id]['judge'] = "Pass" if parsed["median"] > 25 else "Fail"
        stat[a_id]["sequence_length"] = parsed["sequence_length"]

        quality_medians.append(parsed["median"])

    # The table of fastqc_summary that will be used for rendering
    # Col 1: sample ID
    # Col 2: sequence length
    # Col 3: median of sequence quality

    qc_db = sqlite3.connect(input["db"]).cursor()
    qc_db.execute("SELECT median_quality FROM fastqc_info")
    history_data = [float(i[0]) for i in qc_db.fetchall()]


    fastqc_dist_r = JinjaTemplateCommand(
        template=input["template"],
        param={'historic_data': history_data,
               'current_data': quality_medians,
               'ids': [underline_to_space(i) for i in param["ids"]],
               'cutoff': 25,
               'main': 'Sequence Quality Score Cumulative Percentage',
               'xlab': 'sequence quality score',
               'ylab': 'fn(sequence quality score)',
               "need_smooth_curve": True,

               "pdf": output["pdf"],
               "render_dump": output["R"]})

    template_dump(fastqc_dist_r)
    r_exec(fastqc_dist_r)

    json_dump(json_dict)
예제 #4
0
def stat_venn(input={"venn": ""}, output={"json",""}, param=None):
    result_dict = {"stat": {}, "input": input, "output": output, "param": param}
    json_dump(result_dict)