예제 #1
0
파일: qc.py 프로젝트: cfce/chilin
def json_dhs(input={"top_peaks": "", "dhs_peaks": ""}, output={"json": ""}, param={}):
    result_dict = {"stat": {}, "input": input, "output": output, "param": param}
#    result_dict["stat"] = float(open(input["dhs"]).read().strip())
    content = open(input["dhs"]).read().strip().split(",")
    result_dict["stat"]["overlap"] = int(content[1])
    result_dict["stat"]["number"] = int(content[0])
    json_dump(result_dict)
예제 #2
0
파일: qc.py 프로젝트: yangluom/chilin
def json_contamination(input={"summaries": [[]]},
                       output={"json": ""},
                       param={
                           "samples": "",
                           "species": "",
                           "id": ""
                       }):
    library_contamination = {}
    library_contamination["meta"] = {
        "sample": param["id"],
        "species": param["species"]
    }
    library_contamination["value"] = {}
    for a_summary, s in zip(input["summaries"],
                            map(underline_to_space, param["samples"])):
        ## each bowtie_summary has several species information
        library_contamination["value"][s] = {}
        for i, j in zip(a_summary, param["species"]):
            ## species 1, species2, species3
            mapped = int(open(i[0]).readlines()[2].strip().split()[0])
            total = int(open(i[1]).read().strip())
            library_contamination["value"][s][j] = float(mapped) / total

    json_dict = {"stat": {}, "input": input, "output": output, "param": param}
    json_dict["stat"] = library_contamination
    json_dump(json_dict)
예제 #3
0
def stat_frag_std(input={
    "r": "",
    "insert": ""
},
                  output={
                      "json": "",
                      "r": ""
                  },
                  param={
                      "samples": "",
                      "frag_tool": ""
                  }):
    """ parse macs2 predictd r file into json file
    """
    json_dict = {"input": input, "output": output, "param": param, "stat": {}}
    for rin, rout, s in zip(input["r"], output["r"], param["samples"]):
        values = get_size(rin)
        with open(rout, 'w') as f:
            f.write(values['positive'])
            f.write(values['minus'])
            f.write(values['xcorr'])
            f.write(values['ycorr'])
            f.write("xcorr.max = xcorr[which(ycorr==max(ycorr))]\n")
            f.write(values['x'])
            f.write("p.expect = sum(x * p/100) \n")
            f.write("m.expect = sum(x * m/100) \n")
            f.write("p.sd = sqrt(sum(((x-p.expect)^2)*p/100)) \n")
            f.write("m.sd = sqrt(sum(((x-m.expect)^2)*m/100)) \n")
            f.write("cat(paste((p.sd + m.sd)/2, '\t', xcorr.max)) \n")
        f.close()
        std_frag = os.popen("Rscript %s" % rout).read().strip().split()
        json_dict["stat"][s] = "%s" % (int(float(std_frag[1])))
    json_dump(json_dict)
예제 #4
0
파일: dc.py 프로젝트: cfce/chilin
def enrich_in_meta(input = {'meta':'', 'mapped':''}, output = {"json": ""}, param = {'dhs': '', 'down': '', 'has_dhs':'', 'id':"", 'samples':""}):
    """ enrichment in meta regions
    """
    json_dict = {"stat": {}, "input": input, "output": output, "param":param}
    for n, s in enumerate(param['samples']):
        ## total mapped reads

        mapped = float(open(input["mapped"][n]).readlines()[2].split()[0])
        json_dict['stat'][s] = {}

        meta = open(input['meta'][n]).read().strip().split(",")
        meta = map(float, meta)
        if not param["down"]:
            json_dict['stat'][s]['exon'] = meta[0]/mapped
            json_dict['stat'][s]['promoter'] = meta[1]/mapped ## use all mapped reads
        else:
            json_dict['stat'][s]['exon'] = meta[0]/meta[2]
            json_dict['stat'][s]['promoter'] = meta[1]/meta[2] ## use 4M reads

        if param['has_dhs']:
            dhs = open(param["dhs"][n]).read().strip().split(",")
            dhs = map(float, dhs)
            if not param["down"]:
                json_dict['stat'][s]['dhs'] = dhs[0]/mapped
            else:
                json_dict['stat'][s]['dhs'] = dhs[0]/dhs[1]

    json_dump(json_dict)
예제 #5
0
파일: qc.py 프로젝트: asntech/chilin
def json_conservation(input={"score": ""}, output={"json": ""}, param={}):
    """
    collect conservation_plot output Phastcon score
    """
    json_dict = {"stat": [], "input": input, "output": output, "param": ""}
    rd = lambda x: str(round(float(x), 3))
    json_dict['stat'] = map(rd, open(input['score']).read().strip().split())
    json_dump(json_dict)
예제 #6
0
파일: qc.py 프로젝트: yangluom/chilin
def json_conservation(input={"score": ""}, output={"json": ""}, param={}):
    """
    collect conservation_plot output Phastcon score
    """
    json_dict = {"stat": [], "input": input, "output": output, "param": ""}
    rd = lambda x: str(round(float(x), 3))
    json_dict['stat'] = map(rd, open(input['score']).read().strip().split())
    json_dump(json_dict)
예제 #7
0
파일: qc.py 프로젝트: yangluom/chilin
def json_reps(input, output, param):
    json_dict = {"stat": {}, "input": input, "output": output, "param": param}
    json_dict['stat']['cor'] = [
        float(i.strip().split()[2]) for i in open(input['cor']).readlines()
    ]
    json_dict["stat"]['overlap'] = [
        float(open(i).read().strip()) for i in input['overlap']
    ]
    json_dump(json_dict)
예제 #8
0
파일: qc.py 프로젝트: cfce/chilin
def json_macs2(input={"macs2_peaks_xls": ""}, output={"json": ""}, param={"id": ""}):
    """
    input macs2 _peaks.xls
    output conf.json_prefix + "_macs2.json"
    """
    json_dict = {"stat": {}, "input": input, "output": output, "param": param}
    if os.path.exists(input['macs2_peaks_xls']): ## in case only broad peaks would break down sometimes, narrowPeak very seldom no peaks
        json_dict["stat"] = _peaks_parse(input["macs2_peaks_xls"])
        json_dump(json_dict)
예제 #9
0
파일: qc.py 프로젝트: yangluom/chilin
def json_velcro(input={}, output={}, param={}):
    result_dict = {
        "stat": {},
        "input": input,
        "output": output,
        "param": param
    }
    result_dict["stat"] = 1 - float(open(input["velcro"]).read().strip())
    json_dump(result_dict)
예제 #10
0
파일: qc.py 프로젝트: yangluom/chilin
def stat_seqpos(input={
    "template": "",
    "seqpos": ""
},
                output={"latex_section": ""},
                param={
                    "prefix": "",
                    "z_score_cutoff": -15
                }):
    """parse mdsepose html file"""
    z_score_cutoff = param["z_score_cutoff"]
    seqpos_html_content = open(input['seqpos']).readlines()
    mdseqpos_result = []

    ## parse motif list json file
    for m in seqpos_html_content:
        mdseqpos_result.append(json.loads(m.strip()))
    satisfied_motif_list = []

    for a_motif in mdseqpos_result:
        if a_motif['seqpos_results']['zscore'] == 'None':
            a_motif['seqpos_results']['zscore'] = 65535
        if a_motif['factors'] == None:
            a_motif['factors'] = ['denovo']
        satisfied_motif_list.append(a_motif)

    satisfied_motif_list.sort(key=lambda x: x['seqpos_results']['zscore'])
    satisfied_count = 0
    top_motifs = []
    for a_motif in satisfied_motif_list:

        if a_motif['id'].find('observed') > 0:
            continue
        if satisfied_count == 10:
            break

        # z_score is a negative score, the smaller, the better
        if a_motif['seqpos_results']['zscore'] < z_score_cutoff:
            satisfied_count += 1
            top_motifs.append(a_motif)

    ## choose first 5 motifs to fit into latex document

    for n, _ in enumerate(top_motifs):
        top_motifs[n][
            "logoImg"] = param["prefix"] + top_motifs[n]['id'] + ".png"

    result_dict = {
        "stat": {},
        "input": input,
        "output": output,
        "param": param
    }
    result_dict["stat"]["satisfied_motifs"] = top_motifs
    json_dump(result_dict)
예제 #11
0
파일: qc.py 프로젝트: asntech/chilin
def json_fastqc(input={"fastqc_summaries": []},
                output={"R": "", "json": "", "pdf": ""},
                param={"ids": [], "id": ""}):
    json_dict = {"stat": {}, "input": input, "output": output, "param": param}
    stat = json_dict["stat"]
    for a_summary, a_id in zip(input["fastqc_summaries"], param["ids"]):
        parsed = _fastqc_parse(input=a_summary)
        stat[a_id] = {}
        stat[a_id]["median"] = parsed["median"]
        stat[a_id]["sequence_length"] = parsed["sequence_length"]
    json_dump(json_dict)
예제 #12
0
파일: qc.py 프로젝트: yangluom/chilin
def json_macs2(input={"macs2_peaks_xls": ""},
               output={"json": ""},
               param={"id": ""}):
    """
    input macs2 _peaks.xls
    output conf.json_prefix + "_macs2.json"
    """
    json_dict = {"stat": {}, "input": input, "output": output, "param": param}
    if os.path.exists(
            input['macs2_peaks_xls']
    ):  ## in case only broad peaks would break down sometimes, narrowPeak very seldom no peaks
        json_dict["stat"] = _peaks_parse(input["macs2_peaks_xls"])
        json_dump(json_dict)
예제 #13
0
파일: qc.py 프로젝트: asntech/chilin
def json_frip(input={}, output={}, param={}):    # convert to json
    """
    input is *.frip
    output is conf.json_prefix + "_frip.json"
    param for matching samples
    """
    json_dict = {"stat": {}, "input": input, "output": output, "param": param}
    for i,s in zip(input["frip"], param["samples"]):
        inf = open(i).read().strip().split(",")
        json_dict["stat"][s] = {}
        json_dict["stat"][s]["info_tag"] = int(inf[0])
        json_dict["stat"][s]["total_tag"] = int(inf[1])
        json_dict["stat"][s]["frip"] = float(int(inf[0]))/int(inf[1])
    json_dump(json_dict)
예제 #14
0
파일: qc.py 프로젝트: yangluom/chilin
def json_frip(input={}, output={}, param={}):  # convert to json
    """
    input is *.frip
    output is conf.json_prefix + "_frip.json"
    param for matching samples
    """
    json_dict = {"stat": {}, "input": input, "output": output, "param": param}
    for i, s in zip(input["frip"], param["samples"]):
        inf = open(i).read().strip().split(",")
        json_dict["stat"][s] = {}
        json_dict["stat"][s]["info_tag"] = int(inf[0])
        json_dict["stat"][s]["total_tag"] = int(inf[1])
        json_dict["stat"][s]["frip"] = float(int(inf[0])) / int(inf[1])
    json_dump(json_dict)
예제 #15
0
파일: qc.py 프로젝트: cfce/chilin
def json_macs2_on_reps(input={"all_peak_xls": []}, output={"json": ""}, param={"samples": []}):
    """
    collect replicates macs2 info to json files
    compared to merged one, collect redundant ratio with --keep-dup 1 option
    """
    json_dict = {"stat": {}, "input": input, "output": output, "param": param}
    parsed = []
    for i in input["all_peak_xls"]:
        if os.path.exists(i): ## in case only broad peaks would break down sometimes, narrowPeak very seldom no peaks
            parsed.append(_peaks_parse(i))

    if all(map(os.path.exists, input['all_peak_xls'])):
        for sample, stat in zip(param["samples"], parsed):
            json_dict["stat"][sample] = stat
        json_dump(json_dict)
예제 #16
0
파일: qc.py 프로젝트: asntech/chilin
def json_pbc(input={}, output={}, param={}): # convert to json format
    """
    input is the target + ".pbc"
    output is the json files conf.json_prefix + "_pbc.json"
    param for matching samples order
    """
    json_dict = {"stat": {}, "input": input, "output": output, "param": param}

    for i, s in zip(input["pbc"], param["samples"]):
        inl = open(i).readlines()[0].strip().split()
        json_dict["stat"][s] = {}
        json_dict["stat"][s]["N1"] = int(inl[0])
        json_dict["stat"][s]["Nd"] = int(inl[1])
        json_dict["stat"][s]["PBC"] = round(float(inl[2]), 3)

    json_dump(json_dict)
예제 #17
0
def json_pbc(input={}, output={}, param={}):  # convert to json format
    """
    input is the target + ".pbc"
    output is the json files conf.json_prefix + "_pbc.json"
    param for matching samples order
    """
    json_dict = {"stat": {}, "input": input, "output": output, "param": param}

    for i, s in zip(input["pbc"], param["samples"]):
        inl = open(i).readlines()[0].strip().split()
        json_dict["stat"][s] = {}
        json_dict["stat"][s]["N1"] = int(inl[0])
        json_dict["stat"][s]["Nd"] = int(inl[1])
        json_dict["stat"][s]["PBC"] = round(float(inl[2]), 3)

    json_dump(json_dict)
예제 #18
0
파일: qc.py 프로젝트: cfce/chilin
def json_contamination(input = {"summaries": [[]]}, output = {"json": ""}, param = {"samples": "", "species": "", "id": ""}):
    library_contamination = {}
    library_contamination["meta"] = {"sample": param["id"], "species": param["species"]}
    library_contamination["value"] = {}
    for a_summary, s in zip(input["summaries"], map(underline_to_space, param["samples"])):
        ## each bowtie_summary has several species information
        library_contamination["value"][s] = {}
        for i, j in zip(a_summary, param["species"]):
            ## species 1, species2, species3
            mapped = int(open(i[0]).readlines()[2].strip().split()[0])
            total = int(open(i[1]).read().strip())
            library_contamination["value"][s][j] = float(mapped)/total

    json_dict = {"stat": {}, "input": input, "output": output, "param": param}
    json_dict["stat"] = library_contamination
    json_dump(json_dict)
예제 #19
0
파일: dc.py 프로젝트: asntech/chilin
def enrich_in_meta(input = {'exon':'','dhs':'','promoter':'', "mapped": ""}, output = {"json": ""}, param = {'id':"", 'samples':""}):
    """ enrichment in meta regions
    """
    json_dict = {"stat": {}, "input": input, "output": output, "param":param}


    for n, s in enumerate(param['samples']):

        mapped = float(open(input["mapped"][n]).readlines()[2].split()[0])
        json_dict['stat'][s] = {}
        json_dict['stat'][s]['exon'] = float(open(input['exon'][n]).read().strip())/mapped
        json_dict['stat'][s]['promoter'] = float(open(input['promoter'][n]).read().strip())/mapped
        if param['has_dhs']:
            json_dict['stat'][s]['dhs'] = float(open(param['dhs'][n]).read().strip())/mapped
        else:
            json_dict['stat'][s]['dhs'] = 0
    json_dump(json_dict)
예제 #20
0
파일: qc.py 프로젝트: yangluom/chilin
def json_fastqc(input={"fastqc_summaries": []},
                output={
                    "R": "",
                    "json": "",
                    "pdf": ""
                },
                param={
                    "ids": [],
                    "id": ""
                }):
    json_dict = {"stat": {}, "input": input, "output": output, "param": param}
    stat = json_dict["stat"]
    for a_summary, a_id in zip(input["fastqc_summaries"], param["ids"]):
        parsed = _fastqc_parse(input=a_summary)
        stat[a_id] = {}
        stat[a_id]["median"] = parsed["median"]
        stat[a_id]["sequence_length"] = parsed["sequence_length"]
    json_dump(json_dict)
예제 #21
0
파일: qc.py 프로젝트: yangluom/chilin
def json_dhs(
        input={
            "top_peaks": "",
            "dhs_peaks": ""
        },
        output={"json": ""},
        param={}):
    result_dict = {
        "stat": {},
        "input": input,
        "output": output,
        "param": param
    }
    #    result_dict["stat"] = float(open(input["dhs"]).read().strip())
    content = open(input["dhs"]).read().strip().split(",")
    result_dict["stat"]["overlap"] = int(content[1])
    result_dict["stat"]["number"] = int(content[0])
    json_dump(result_dict)
예제 #22
0
파일: qc.py 프로젝트: yangluom/chilin
def json_macs2_on_reps(input={"all_peak_xls": []},
                       output={"json": ""},
                       param={"samples": []}):
    """
    collect replicates macs2 info to json files
    compared to merged one, collect redundant ratio with --keep-dup 1 option
    """
    json_dict = {"stat": {}, "input": input, "output": output, "param": param}
    parsed = []
    for i in input["all_peak_xls"]:
        if os.path.exists(
                i
        ):  ## in case only broad peaks would break down sometimes, narrowPeak very seldom no peaks
            parsed.append(_peaks_parse(i))

    if all(map(os.path.exists, input['all_peak_xls'])):
        for sample, stat in zip(param["samples"], parsed):
            json_dict["stat"][sample] = stat
        json_dump(json_dict)
예제 #23
0
파일: qc.py 프로젝트: cfce/chilin
def json_meta2(input={}, output={}, param={}):
    """
    generate json of genomic distribution (given by the bedAnnotate output)
    ***THE key difference between json_meta and this fn is that bedAnnotate
    conveniently outputs the distribution as a dictionary of peak counts
    """
    f = open(input["meta"])
    #f = something like: {'Intron': 68017, 'Exon': 7659, 'Intergenic': 73090, 'Promoter': 11229}
    content = eval(f.read())
    total = 0 
    for k in content.keys():
        total += content[k]
    json_dict = {"input": input, "stat": {}, "output": output, "param": param}
    json_dict["stat"]["exon"] = content['Exon']/float(total)
    json_dict["stat"]["intron"] = content['Intron']/float(total)
    json_dict["stat"]["promoter"] = content['Promoter']/float(total)
    json_dict["stat"]["inter"] = content['Intergenic']/float(total)
    f.close()
    json_dump(json_dict)
예제 #24
0
파일: qc.py 프로젝트: yangluom/chilin
def json_meta2(input={}, output={}, param={}):
    """
    generate json of genomic distribution (given by the bedAnnotate output)
    ***THE key difference between json_meta and this fn is that bedAnnotate
    conveniently outputs the distribution as a dictionary of peak counts
    """
    f = open(input["meta"])
    #f = something like: {'Intron': 68017, 'Exon': 7659, 'Intergenic': 73090, 'Promoter': 11229}
    content = eval(f.read())
    total = 0
    for k in content.keys():
        total += content[k]
    json_dict = {"input": input, "stat": {}, "output": output, "param": param}
    json_dict["stat"]["exon"] = content['Exon'] / float(total)
    json_dict["stat"]["intron"] = content['Intron'] / float(total)
    json_dict["stat"]["promoter"] = content['Promoter'] / float(total)
    json_dict["stat"]["inter"] = content['Intergenic'] / float(total)
    f.close()
    json_dump(json_dict)
예제 #25
0
파일: qc.py 프로젝트: asntech/chilin
def json_phan(input = {"spp": ""}, output = {"json": ""}, param = {"sample": ""}):
    """ fragment size keep the maximus positive one
    """
    json_dict = {"stat": {}, "input": input, "output": output, "param": param}
    frag = 0
    for i, s in zip(input["spp"], param["sample"]):
        json_dict["stat"][s] = {}
        f = open(i)
        content = f.read().strip().split()
        f.close()
        json_dict["stat"][s]["NSC"] = content[8]
        json_dict["stat"][s]["RSC"] = content[9]
        for i in content[2].split(","):
            if i >= 0: 
                frag = i
                break

        json_dict["stat"][s]["frag"] = frag ## pick the most correlated ones
        json_dict["stat"][s]["Qtag"] = content[10]
    json_dump(json_dict)
예제 #26
0
파일: qc.py 프로젝트: yangluom/chilin
def json_phan(input={"spp": ""}, output={"json": ""}, param={"sample": ""}):
    """ fragment size keep the maximus positive one
    """
    json_dict = {"stat": {}, "input": input, "output": output, "param": param}
    frag = 0
    for i, s in zip(input["spp"], param["sample"]):
        json_dict["stat"][s] = {}
        f = open(i)
        content = f.read().strip().split()
        f.close()
        json_dict["stat"][s]["NSC"] = content[8]
        json_dict["stat"][s]["RSC"] = content[9]
        for i in content[2].split(","):
            if i >= 0:
                frag = i
                break

        json_dict["stat"][s]["frag"] = frag  ## pick the most correlated ones
        json_dict["stat"][s]["Qtag"] = content[10]
    json_dump(json_dict)
예제 #27
0
파일: qc.py 프로젝트: asntech/chilin
def stat_seqpos(input = {"template": "", "seqpos": ""}, output={"latex_section": ""}, param = {"prefix": "", "z_score_cutoff":-15}):
    """parse mdsepose html file"""
    z_score_cutoff = param["z_score_cutoff"]
    seqpos_html_content = open(input['seqpos']).readlines()
    mdseqpos_result = []

    ## parse motif list json file
    for m in seqpos_html_content:
        mdseqpos_result.append(json.loads(m.strip()))
    satisfied_motif_list = []

    for a_motif in mdseqpos_result:
        if a_motif['seqpos_results']['zscore'] == 'None':
            a_motif['seqpos_results']['zscore'] = 65535
        if a_motif['factors'] == None:
            a_motif['factors'] = ['denovo']
        satisfied_motif_list.append(a_motif)

    satisfied_motif_list.sort(key=lambda x:x['seqpos_results']['zscore'])
    satisfied_count = 0
    top_motifs = []
    for a_motif in satisfied_motif_list:

        if a_motif['id'].find('observed')>0:
            continue
        if satisfied_count == 10:
            break

        # z_score is a negative score, the smaller, the better
        if a_motif['seqpos_results']['zscore'] < z_score_cutoff :
            satisfied_count += 1
            top_motifs.append(a_motif)

    ## choose first 5 motifs to fit into latex document

    for n, _ in enumerate(top_motifs):
        top_motifs[n]["logoImg"] = param["prefix"] + top_motifs[n]['id'] + ".png"

    result_dict = {"stat": {}, "input": input, "output": output, "param": param}
    result_dict["stat"]["satisfied_motifs"] = top_motifs
    json_dump(result_dict)
예제 #28
0
def enrich_in_meta(input={
    'meta': '',
    'mapped': ''
},
                   output={"json": ""},
                   param={
                       'dhs': '',
                       'down': '',
                       'has_dhs': '',
                       'id': "",
                       'samples': ""
                   }):
    """ enrichment in meta regions
    """
    json_dict = {"stat": {}, "input": input, "output": output, "param": param}
    for n, s in enumerate(param['samples']):
        ## total mapped reads

        mapped = float(open(input["mapped"][n]).readlines()[2].split()[0])
        json_dict['stat'][s] = {}

        meta = open(input['meta'][n]).read().strip().split(",")
        meta = map(float, meta)
        if not param["down"]:
            json_dict['stat'][s]['exon'] = meta[0] / mapped
            json_dict['stat'][s][
                'promoter'] = meta[1] / mapped  ## use all mapped reads
        else:
            json_dict['stat'][s]['exon'] = meta[0] / meta[2]
            json_dict['stat'][s]['promoter'] = meta[1] / meta[
                2]  ## use 4M reads

        if param['has_dhs']:
            dhs = open(param["dhs"][n]).read().strip().split(",")
            dhs = map(float, dhs)
            if not param["down"]:
                json_dict['stat'][s]['dhs'] = dhs[0] / mapped
            else:
                json_dict['stat'][s]['dhs'] = dhs[0] / dhs[1]

    json_dump(json_dict)
예제 #29
0
파일: qc.py 프로젝트: cfce/chilin
def json_meta(input={}, output={}, param={}):
    """
    ###########################################################################
    DEPRECATED!!! see json_meta2
    ###########################################################################
    generate json of promoter, intergenic and exon
    only one output either from merged one or the best one
    overlap percentage info
    """
    f = open(input["meta"])
    content = f.read().strip().split(",")
    exon = content[0]
    intron = content[1]
    inter = content[2]
    promoter = content[3]
    json_dict = {"input": input, "stat": {}, "output": output, "param": param}
    json_dict["stat"]["exon"] = float(exon)
    json_dict["stat"]["intron"] = float(intron)
    json_dict["stat"]["promoter"] = float(promoter)
    json_dict["stat"]["inter"] = float(inter)
    f.close()
    json_dump(json_dict)
예제 #30
0
파일: qc.py 프로젝트: yangluom/chilin
def json_meta(input={}, output={}, param={}):
    """
    ###########################################################################
    DEPRECATED!!! see json_meta2
    ###########################################################################
    generate json of promoter, intergenic and exon
    only one output either from merged one or the best one
    overlap percentage info
    """
    f = open(input["meta"])
    content = f.read().strip().split(",")
    exon = content[0]
    intron = content[1]
    inter = content[2]
    promoter = content[3]
    json_dict = {"input": input, "stat": {}, "output": output, "param": param}
    json_dict["stat"]["exon"] = float(exon)
    json_dict["stat"]["intron"] = float(intron)
    json_dict["stat"]["promoter"] = float(promoter)
    json_dict["stat"]["inter"] = float(inter)
    f.close()
    json_dump(json_dict)
예제 #31
0
파일: qc.py 프로젝트: asntech/chilin
def json_bwa(input={}, output={}, param={}): ## convert values to json files
    """
    input samtools flagstat standard output
    output json files
    kwargs for matching replicates order
    keep one value for each json for easier loading to html/pdf template
    example:
    3815725 + 0 in total (QC-passed reads + QC-failed reads)
    0 + 0 duplicates
    3815723 + 0 mapped (100.00%:-nan%)
    """
    json_dict = {"stat": {}, "input": input, "output": output, "param": param}

    for mapped, total, sam in zip(input["bwa_mapped"], input["bwa_total"], param["sample"]):
        inft = open(total, 'rU')
        infm = open(mapped, 'rU')
        json_dict["stat"][sam] = {}
        json_dict["stat"][sam]["mapped"] = int(infm.readlines()[2].split()[0])
        json_dict["stat"][sam]["total"] = int(inft.readlines()[0].strip())
        inft.close()
        infm.close()
    json_dump(json_dict)
예제 #32
0
파일: qc.py 프로젝트: asntech/chilin
def stat_frag_std(input = {"r": "", "insert": ""}, output = {"json": "", "r": ""}, param = {"samples": "", "frag_tool": ""}):
    """ parse macs2 predictd r file into json file
    """
    json_dict = {"input": input, "output": output, "param": param, "stat": {}}
    for rin, rout, s in zip(input["r"], output["r"], param["samples"]):
        values = get_size(rin)
        with open(rout, 'w') as f:
            f.write(values['positive'])
            f.write(values['minus'])
            f.write(values['xcorr'])
            f.write(values['ycorr'])
            f.write("xcorr.max = xcorr[which(ycorr==max(ycorr))]\n")
            f.write(values['x'])
            f.write("p.expect = sum(x * p/100) \n")
            f.write("m.expect = sum(x * m/100) \n")
            f.write("p.sd = sqrt(sum(((x-p.expect)^2)*p/100)) \n")
            f.write("m.sd = sqrt(sum(((x-m.expect)^2)*m/100)) \n")
            f.write("cat(paste((p.sd + m.sd)/2, '\t', xcorr.max)) \n")
        f.close()
        std_frag = os.popen("Rscript %s" % rout).read().strip().split()
        json_dict["stat"][s] = "%s" % (int(float(std_frag[1])))
    json_dump(json_dict)
예제 #33
0
파일: qc.py 프로젝트: yangluom/chilin
def json_bwa(input={}, output={}, param={}): ## convert values to json files
    """
    input samtools flagstat standard output
    output json files
    kwargs for matching replicates order
    keep one value for each json for easier loading to html/pdf template
    example:
    3815725 + 0 in total (QC-passed reads + QC-failed reads)
    0 + 0 duplicates
    3815723 + 0 mapped (100.00%:-nan%)
    """
    json_dict = {"stat": {}, "input": input, "output": output, "param": param}

    for mapped, total, sam in zip(input["bwa_mapped"], input["bwa_total"], param["sample"]):
        inft = open(total, 'rU')
        infm = open(mapped, 'rU')
        json_dict["stat"][sam] = {}
        json_dict["stat"][sam]["mapped"] = int(infm.readlines()[2].split()[0])
        json_dict["stat"][sam]["total"] = int(inft.readlines()[0].strip())
        inft.close()
        infm.close()
    json_dump(json_dict)
예제 #34
0
파일: qc.py 프로젝트: cfce/chilin
def json_reps(input, output, param):
    json_dict = {"stat": {}, "input": input, "output": output, "param": param}
    json_dict['stat']['cor'] = [ float(i.strip().split()[2]) for i in open(input['cor']).readlines() ]
    json_dict["stat"]['overlap'] = [ float(open(i).read().strip()) for i in input['overlap'] ]
    json_dump(json_dict)
예제 #35
0
파일: qc.py 프로젝트: cfce/chilin
def json_velcro(input={}, output={}, param={}):
    result_dict = {"stat": {}, "input": input, "output": output, "param": param}
    result_dict["stat"] = 1-float(open(input["velcro"]).read().strip())
    json_dump(result_dict)