コード例 #1
0
ファイル: qc_macs2.py プロジェクト: hanfeisun/ChiLin2
def stat_macs2(input={"macs2_peaks_xls": "", "db": "", "template": ""},
               output={"R": "", "json": "", "pdf": ""},
               param={"id": ""}):
    json_dict = {"stat": {}, "input": input, "output": output, "param": param}
    json_dict["stat"] = _peaks_parse(input["macs2_peaks_xls"])
    json_dict["stat"]["cutoff"] = {"uni_loc": 5000000, "high_conf_peaks": 1000}
    json_dict["stat"]["judge"] = {"uni_loc": "Pass" if  json_dict["stat"]["treat_unic"] > 5000000 else "Fail",
                                  "high_conf_peaks": "Pass" if json_dict["stat"]["peaksge10"] >= 1000 else "Fail"}

    name = [param["id"]]
    db = sqlite3.connect(input["db"]).cursor()
    db.execute("select peak_fc_10 from peak_calling")
    historyData = [math.log(i[0] + 0.001, 10) for i in db.fetchall() if i[0] > 0]

    high_confident_peaks_r = JinjaTemplateCommand(name="highpeaksQC",
        template=input["template"],
        param={'historic_data': historyData,
               'current_data': [math.log(json_dict["stat"]["peaksge10"] + 0.01, 10)], #
               'ids': name,
               'cutoff': 3,
               'main': 'High confidence peaks distribution',
               'xlab': 'log(Number of Peaks fold greater than 10)',
               'ylab': 'fn(log(Number of Peaks fold greater than 10))',

               "pdf": output["pdf"],
               "render_dump": output["R"]})

    template_dump(high_confident_peaks_r)
    r_exec(high_confident_peaks_r)

    json_dump(json_dict)
コード例 #2
0
ファイル: qc_macs2.py プロジェクト: hanfeisun/ChiLin2
def stat_macs2_on_treats(input={"all_peak_xls": [], "db": "", "template": "", "template": ""},
                         output={"R": "", "json": "", "pdf": ""},
                         param={"ids": []}):
    """ Show redundant  ratio of the dataset in all historic data
    """
    json_dict = {"stat": {}, "input": input, "output": output, "param": param}

    treat_unic_loc_rates = []
    treat_unic_locs = []
    control_unic_loc_rates = []
    for i in input["all_peak_xls"]:
        parsed = _peaks_parse(i)
        treat_unic_loc_rates.append ( parsed["treat_unic_ratio"])
        treat_unic_locs.append(parsed["treat_unic"])



    for id, unic_loc_rate, unic_loc in zip(param["ids"], treat_unic_loc_rates, treat_unic_locs):
        json_dict["stat"][id] = {"unic_loc_rate": unic_loc_rate,"unic_loc": unic_loc }
        json_dict["stat"][id]["cutoff"] = {"unic_loc_rate": 0.8, "unic_loc": 5000000} # non redundant rate cutoff
        json_dict["stat"][id]["judge"] = {"unic_loc_rate": "Pass" if  unic_loc_rate >= 0.8 else "Fail",
                                          "unic_loc": "Pass" if unic_loc >= 5000000 else "Fail"}

    db = sqlite3.connect(input["db"]).cursor()

    # TODO: column name redundant_rate => non-redundant rate
    db.execute("select redundant_rate from peak_calling")
    redundant_history = db.fetchall()
    historyData = [1 - float(i[0]) for i in redundant_history if i[0] != "null"]

    redundant_rate_R = JinjaTemplateCommand(name="redunRateQC",
        template=input["template"],
        param={'historic_data': historyData,
               'current_data': treat_unic_loc_rates,
               'ids': param["ids"],
               'cutoff': 0.8,
               'main': 'Non-Redundant rate',
               'xlab': 'Non-Redundant rate',
               'ylab': 'fn(Non-Redundant rate)',
               "pdf": output["pdf"],
               "render_dump": output["R"],
               "need_smooth_curve": True})

    template_dump(redundant_rate_R)
    r_exec(redundant_rate_R)

    with open(output["json"], "w") as f:
        json.dump(json_dict, f, indent=4)
コード例 #3
0
ファイル: qc_fastqc.py プロジェクト: hanfeisun/ChiLin2
def stat_fastqc(input={"db": "", "fastqc_summaries": [], "template": ""},
                output={"R": "", "json": "", "pdf": ""},
                param={"ids": [], "id": ""}):
    json_dict = {"stat": {}, "input": input, "output": output, "param": param}
    stat = json_dict["stat"]

    quality_medians = []

    for a_summary, a_id in zip(input["fastqc_summaries"], param["ids"]):
        parsed = _python_fastqc_parse(input=a_summary)

        stat[a_id] = {}
        stat[a_id]["median"] = parsed["median"]
        stat[a_id]["cutoff"] = 25
        stat[a_id]['judge'] = "Pass" if parsed["median"] > 25 else "Fail"
        stat[a_id]["sequence_length"] = parsed["sequence_length"]

        quality_medians.append(parsed["median"])

    # The table of fastqc_summary that will be used for rendering
    # Col 1: sample ID
    # Col 2: sequence length
    # Col 3: median of sequence quality

    qc_db = sqlite3.connect(input["db"]).cursor()
    qc_db.execute("SELECT median_quality FROM fastqc_info")
    history_data = [float(i[0]) for i in qc_db.fetchall()]


    fastqc_dist_r = JinjaTemplateCommand(
        template=input["template"],
        param={'historic_data': history_data,
               'current_data': quality_medians,
               'ids': [underline_to_space(i) for i in param["ids"]],
               'cutoff': 25,
               'main': 'Sequence Quality Score Cumulative Percentage',
               'xlab': 'sequence quality score',
               'ylab': 'fn(sequence quality score)',
               "need_smooth_curve": True,

               "pdf": output["pdf"],
               "render_dump": output["R"]})

    template_dump(fastqc_dist_r)
    r_exec(fastqc_dist_r)

    json_dump(json_dict)
コード例 #4
0
ファイル: qc_bowtie.py プロジェクト: hanfeisun/ChiLin2
def stat_bowtie(
    input={"bowtie_summaries": [], "db": "", "template": ""},
    output={"json": "", "R": "", "pdf": ""},
    param={"sams": []},
):
    """ sams = [{'name1':a, 'total1': 5...}, {'name2':c, 'total2': 3...}...] **args """

    # unique location is in text_macs2_summary part
    json_dict = {"stat": {}, "input": input, "output": output, "param": param}

    db = sqlite3.connect(input["db"]).cursor()
    db.execute("select map_ratio from mapping")
    historyData = [str(i[0]) for i in (db.fetchall())]
    bowtie_summaries = {"total_reads": [], "mappable_reads": [], "mappable_rate": []}

    for summary, sam in zip(input["bowtie_summaries"], param["sams"]):
        json_dict["stat"][sam] = _bowtie_summary_parse(summary)
        json_dict["stat"][sam]["cutoff"] = 5000000  # mappable reads
        json_dict["stat"][sam]["judge"] = "Pass" if json_dict["stat"][sam]["mappable_reads"] >= 5000000 else "Fail"

    mappable_rates = [json_dict["stat"][i]["mappable_rate"] for i in json_dict["stat"]]

    mappable_rate_R = JinjaTemplateCommand(
        template=input["template"],
        param={
            "historic_data": historyData,
            "current_data": mappable_rates,
            "ids": param["sams"],
            "cutoff": 0.5,
            "main": "Unique mapped rate",
            "xlab": "Unique mapped rate",
            "ylab": "fn(Unique mapped rate)",
            "need_smooth_curve": True,
            "render_dump": output["R"],
            "pdf": output["pdf"],
        },
    )
    template_dump(mappable_rate_R)
    r_exec(mappable_rate_R)

    with open(output["json"], "w") as f:
        json.dump(json_dict, f, indent=4)