Ejemplo n.º 1
0
def latex_environ(workflow, conf):
    """
    write out begin and end document
    including packages
    """
    attach_back(
        workflow,
        PythonCommand(latex_start,
                      input={
                          "template":
                          resource_filename("chilin2.modules.summary",
                                            "begin.tex")
                      },
                      output={"latex": conf.latex_prefix + "_begin.tex"},
                      param={
                          "id":
                          conf.id,
                          "version":
                          conf.get("basics", "version"),
                          "user":
                          conf.get('basics', 'user'),
                          "bmcard":
                          resource_filename("chilin2.modules.summary",
                                            "bmcart.cls").rstrip('.cls')
                      }))

    attach_back(
        workflow,
        PythonCommand(latex_end,
                      input={
                          "template":
                          resource_filename("chilin2.modules.summary",
                                            "end.tex")
                      },
                      output={"latex": conf.latex_prefix + "_end.tex"}))
Ejemplo n.º 2
0
def stat_fastqc(workflow, conf):  # collect raw reads quality and GC contents
    """
    long: generate long pages or not
    """
    sums = []
    for raw, target in conf.sample_pairs:
        if conf.pe:
            sums.append(target[0] + "_100k_fastqc/fastqc_data.txt")
        else:
            sums.append(target + "_100k_fastqc/fastqc_data.txt")

    collect = attach_back(
        workflow,
        PythonCommand(json_fastqc,
                      input={"fastqc_summaries": sums},
                      output={"json": conf.json_prefix + "_fastqc.json"},
                      param={
                          "ids": conf.sample_bases,
                          "id": conf.id
                      },
                      name="collect fastqc results"))
    collect.allow_fail = True
    collect.allow_dangling = True

    if conf.long:  ## prepare long document images and tex
        long_collect = attach_back(
            workflow,
            PythonCommand(fastqc_detailed_figure,
                          name='fastqc',
                          input={
                              "dbaccessor":
                              resource_filename("chilin2.modules.dbaccessor",
                                                "ChiLinQC.db"),
                              "template":
                              resource_filename("chilin2.modules.summary",
                                                "R_culmulative_plot.R"),
                              "json":
                              conf.json_prefix + "_fastqc.json"
                          },
                          output={
                              "R": conf.prefix + "_raw_sequence_qc.R",
                              "pdf": conf.prefix + "_raw_sequence_qc.pdf"
                          },
                          param={"ids": conf.sample_bases}))
        long_collect.allow_fail = True
        long_collect.allow_dangling = True
Ejemplo n.º 3
0
def stat_pbc(workflow, conf):  # collect pbc value
    """
    statistics collected from *.pbc
    """
    attach_back(
        workflow,
        PythonCommand(json_pbc,
                      input={"pbc": [t + ".pbc" for t in conf.sample_targets]},
                      output={"json": conf.json_prefix + "_pbc.json"},
                      param={"samples": conf.sample_bases}))
Ejemplo n.º 4
0
def tex_fastqc(workflow, conf):
    quality = attach_back(
        workflow,
        PythonCommand(load_latex,
                      input={
                          "json":
                          conf.json_prefix + "_fastqc.json",
                          "template":
                          resource_filename("chilin2.modules.fastqc",
                                            "fastqc.tex"),
                          "pdf":
                          conf.prefix + "_raw_sequence_qc.pdf"
                      },
                      output={"latex": conf.latex_prefix + "_fastqc.tex"}))

    quality.allow_fail = True
    quality.allow_dangling = True

    #these are name, png pairings
    if not conf.pe:
        gccontent_graphs = [(nm.replace("_"," "),
                             os.path.join(conf.target_dir, "%s_100k_fastqc" % nm,
                                          "Images","per_sequence_gc_content.png"))\
                                for nm in conf.sample_bases]
    else:
        gccontent_graphs = [(nm.replace("_"," "),
                             os.path.join(conf.target_dir, "%spair1_100k_fastqc" % nm,
                                          "Images","per_sequence_gc_content.png"))\
                                for nm in conf.sample_bases]
    gc = attach_back(
        workflow,
        PythonCommand(load_gc_latex,
                      input={
                          "template":
                          resource_filename("chilin2.modules.fastqc",
                                            "fastqc_gc.tex"),
                          "gccontent_graphs":
                          gccontent_graphs
                      },
                      output={"latex": conf.latex_prefix + "_fastqc_gc.tex"}))

    gc.allow_fail = True
    gc.allow_dangling = True
Ejemplo n.º 5
0
def stat_phan(workflow, conf):
    """
    collect NSC/RSC/Qtag and cross correlation figure
    """
    attach_back(
        workflow,
        PythonCommand(json_phan,
                      input={"spp": [t + ".spp" for t in conf.sample_targets]},
                      output={"json": conf.json_prefix + "_phan.json"},
                      param={"sample": conf.sample_bases}))
Ejemplo n.º 6
0
def tex_conserv(workflow, conf):
    tex = attach_back(
        workflow,
        PythonCommand(latex_conservation,
                      input={
                          "template":
                          resource_filename("chilin2.modules.conservation",
                                            "conservation.tex")
                      },
                      output={"latex": conf.latex_prefix + "_conserv.tex"},
                      param={"prefix": conf.prefix}))
    tex.allow_dangling = True
    tex.allow_fail = True
Ejemplo n.º 7
0
def tex_bwa(workflow, conf):
    tex = attach_back(
        workflow,
        PythonCommand(long_tex,
                      input={
                          "template":
                          resource_filename("chilin2.modules.bwa", "bwa.tex"),
                          "figure":
                          conf.prefix + "_bwa_compare.pdf"
                      },
                      output={"latex": conf.latex_prefix + "_map.tex"}))
    tex.allow_fail = True
    tex.allow_dangling = True
Ejemplo n.º 8
0
def stat_frip(workflow, conf):  # collect frip score
    """
    collect FRiP informative tag number and effective peaks number
    """
    stat = attach_back(
        workflow,
        PythonCommand(
            json_frip,
            input={"frip": [t + ".frip" for t in conf.sample_targets]},
            output={"json": conf.json_prefix + "_frip.json"},
            param={"samples": conf.sample_bases}))
    stat.allow_fail = True
    stat.allow_dangling = True
Ejemplo n.º 9
0
def stat_bedAnnotate(workflow, conf, has_dhs, has_velcro):
    """ Describe peaks' distribution
    # collect meta gene distribution info
    """
    collect_meta2 = attach_back(
        workflow,
        PythonCommand(json_meta2,
                      input={"meta": conf.prefix + ".meta"},
                      output={"json": conf.json_prefix + "_meta.json"},
                      param={"id": conf.id},
                      name="bedAnnotate summary"))
    collect_meta2.allow_fail = True
    collect_meta2.allow_dangling = True

    if has_dhs:
        collect_dhs = attach_back(
            workflow,
            PythonCommand(json_dhs,
                          input={
                              "dhs": conf.prefix + ".dhs",
                              "top_peaks": 5000
                          },
                          output={"json": conf.json_prefix + "_dhs.json"},
                          name="DHS summary"))
        collect_dhs.allow_dangling = True
        collect_dhs.allow_fail = True

    if has_velcro:
        collect_velcro = attach_back(
            workflow,
            PythonCommand(json_velcro,
                          input={
                              "velcro": conf.prefix + ".velcro",
                              "top_peaks": 5000
                          },
                          output={"json": conf.json_prefix + "_velcro.json"},
                          name="Velcro summary"))
        collect_velcro.allow_fail = True
        collect_velcro.allow_dangling = True
Ejemplo n.º 10
0
def stat_ceas(workflow, conf, has_dhs,
              has_velcro):  # collect meta gene distribution info
    """ Describe peaks' distribution
    ###########################################################################
    DEPRECATED!!!!--see stat_bedAnnotate below
    ###########################################################################
    """
    attach_back(
        workflow,
        PythonCommand(json_meta,
                      input={
                          "meta": conf.prefix + ".meta",
                          "top_peaks": 5000
                      },
                      output={"json": conf.json_prefix + "_meta.json"},
                      param={"id": conf.id},
                      name="DHS summary"))

    if has_dhs:
        attach_back(
            workflow,
            PythonCommand(json_dhs,
                          input={
                              "dhs": conf.prefix + ".dhs",
                              "top_peaks": 5000
                          },
                          output={"json": conf.json_prefix + "_dhs.json"},
                          name="DHS summary"))

    if has_velcro:
        attach_back(
            workflow,
            PythonCommand(json_velcro,
                          input={
                              "velcro": conf.prefix + ".velcro",
                              "top_peaks": 5000
                          },
                          output={"json": conf.json_prefix + "_velcro.json"},
                          name="Velcro summary"))
Ejemplo n.º 11
0
def stat_conservation(workflow, conf):
    collect = attach_back(workflow,
                PythonCommand(
                    json_conservation,
                    input={"score": conf.prefix + "_conserv.txt"},
                    output={"json": conf.json_prefix + "_conserv.json"},
                    param={"atype": conf.get("basics", "factor", "TF"), "id": conf.id},
                    name = "conservation score"))
    collect.allow_dangling = True
    collect.allow_fail = True

    if conf.long:  ## cluster figures, obsolete, keep for compatible
        fig = attach_back(workflow,
                    PythonCommand(conservation_figures,
                                  input ={"conservationR": conf.prefix + "_conserv.R",
                                          "historical_conservation_cluster_text": resource_filename("chilin2.modules.dbaccessor", "Histone_centers.txt")},


                                  output = {"R": conf.prefix+"_conserv_cluster.R",
                                            "compare_pdf": conf.prefix + "_conserv_compare.pdf"},
                                  param = {"id": conf.id}))
        fig.allow_fail = True
        fig.allow_dangling = True
Ejemplo n.º 12
0
def stat_motif(workflow, conf):
    collect = attach_back(
        workflow,
        PythonCommand(
            stat_seqpos,
            input={"seqpos": conf.prefix + "_seqpos/" + "motif_list.json"},
            output={"json": conf.json_prefix + "_seqpos.json"},
            param={
                "prefix": conf.prefix + "_seqpos/seqLogo/",
                "z_score_cutoff": -1
            },
            name="collect motif info"))
    collect.allow_fail = True
    collect.allow_dangling = True
Ejemplo n.º 13
0
def tex_frip(workflow, conf):
    tex = attach_back(
        workflow,
        PythonCommand(load_latex,
                      input={
                          "json":
                          conf.json_prefix + "_frip.json",
                          "template":
                          resource_filename("chilin2.modules.frip",
                                            "frip.tex"),
                      },
                      output={"latex": conf.latex_prefix + "_frip.tex"}))
    tex.allow_dangling = True
    tex.allow_fail = True
Ejemplo n.º 14
0
def stat_bwa(workflow, conf): ## use samtools to parse mappable reads from bwa
    """
    bam files are filtered by samtools -q 1, so mapped reads are considered to be unique
    """
    for t in conf.sample_targets:
        stat = attach_back(workflow, ShellCommand(
        """
        {tool} view -Sc {input[sam]} > {output[total]}
        {tool} flagstat {input[bam]} > {output[stat]}
        """,
        tool = "samtools",
        input = {"bam": t + ".bam",
                 "sam": t + ".sam"},
        output = {"stat": t + "_mapped.bwa",
                  "total": t + "_total.bwa"}))
        stat.allow_fail = True
        stat.allow_dangling = True
    collect = attach_back(workflow, PythonCommand(json_bwa,
        input={"bwa_mapped": [ t + "_mapped.bwa" for t in conf.sample_targets ],
               "bwa_total": [ t + "_total.bwa" for t in conf.sample_targets ]},
        output={"json": conf.json_prefix+"_map.json"},
        param={"sample":conf.sample_bases},
        name="bwa qc"))
    collect.allow_dangling = True
    collect.allow_fail = True

    if conf.long:
        long_collect = attach_back(workflow, PythonCommand(bwa_figures,
                                            input = {"dbaccessor": resource_filename("chilin2.modules.dbaccessor", "ChiLinQC.db"),
                                                     "json": conf.json_prefix + "_map.json",
                                                     "template": resource_filename("chilin2.modules.summary", "R_culmulative_plot.R")},


                                            output = {"pdf": conf.prefix + "_bwa_compare.pdf", "R": conf.prefix+"_bwa_compare.R"},
                                            param = {"sample": conf.sample_bases}))
        long_collect.allow_fail = True
        long_collect.allow_fail = True
Ejemplo n.º 15
0
def stat_macs2_on_rep(workflow, conf):
    if conf.get("macs2", "type") in ["both", "narrow"]:
        xls = [t + "_peaks.xls" for t in conf.treatment_targets]
    else:
        xls = [t + "_b_peaks.xls" for t in conf.treatment_targets]
    if len(conf.treatment_targets) > 1:
        stat = attach_back(
            workflow,
            PythonCommand(
                json_macs2_on_reps,
                input={"all_peak_xls": xls},
                output={"json": conf.json_prefix + "_macs2_rep.json"},
                param={"samples": conf.treatment_bases}))
        stat.allow_fail = True
        stat.allow_dangling = True
Ejemplo n.º 16
0
def tex_motif(workflow, conf):
    tex = attach_back(
        workflow,
        PythonCommand(latex_seqpos,
                      input={
                          "template":
                          resource_filename("chilin2.modules",
                                            "mdseqpos/motif.tex"),
                          "json":
                          conf.json_prefix + "_seqpos.json"
                      },
                      output={"tex": conf.latex_prefix + "_motif.tex"},
                      param={"id": conf.id},
                      name="generating latex of motif info"))
    tex.allow_fail = True
    tex.allow_dangling = True
Ejemplo n.º 17
0
def fragment(workflow, conf):
    ## this is done after FRiP
    if conf.get("tool", "macs2"):
        macs2_bin = conf.get("tool", "macs2")
    else:
        macs2_bin = "macs2"
    for target in conf.treatment_targets:
        fragment_size = attach_back(
            workflow,
            ShellCommand(
                "{tool} predictd -i {input[bam]} --rfile {param[prefix]} -g {param[species]}",
                tool=macs2_bin,
                input={"bam": target + ".bam"},
                output={"R": target + "_model.R"},
                param={
                    "prefix": target + "_model.R",
                    "species": 'hs'
                }))
        fragment_size.update(param=conf.items("macs2"))
        ## except too few peaks for modeling
        fragment_size.allow_fail = True
        fragment_size.allow_dangling = True

    ## extract standard deviation from MACS2 model.R,
    ## use m, p, and pileup value for standard deviation; mean fragment size is provided (choose the one with highest correlation)
    frag_qc = attach_back(
        workflow,
        PythonCommand(
            stat_frag_std,
            input={
                "r":
                [target + "_model.R" for target in conf.treatment_targets]
            },
            output={
                "json": conf.json_prefix + "_frag.json",
                "r":
                [target + "_frag_sd.R" for target in conf.treatment_targets]
            },
            param={
                "samples": conf.treatment_bases,
                "frag_tool": "BAMSE"
            },
            name="macs2 model R script parser"))
    frag_qc.allow_fail = True
    frag_qc.allow_dangling = True
Ejemplo n.º 18
0
def tex_phan(workflow, conf):
    figures = []
    for t in conf.sample_targets:
        if conf.down:
            figures.append(t + "_4000000.pdf")
        else:
            figures.append(t + ".pdf")
    attach_back(
        workflow,
        PythonCommand(long_tex,
                      input={
                          "template":
                          resource_filename("chilin2.modules.phantompeak",
                                            "phan.tex"),
                          "figure":
                          figures
                      },
                      output={"latex": conf.latex_prefix + "_phan.tex"}))
Ejemplo n.º 19
0
def stat_macs2(workflow, conf):  # collect peaks
    """
    merged peaks and replicates peaks
    high confident peaks
    duplicates level
    """
    xls = conf.prefix + "_peaks.xls" if conf.get("macs2", "type") in [
        "both", "narrow"
    ] else conf.prefix + "_b_peaks.xls"

    stat = attach_back(
        workflow,
        PythonCommand(json_macs2,
                      input={"macs2_peaks_xls": xls},
                      output={"json": conf.json_prefix + "_macs2.json"},
                      param={"id": conf.id}))
    stat.allow_fail = True
    stat.allow_dangling = True
Ejemplo n.º 20
0
def tex_contamination(workflow, conf):
    all_species = [i for i, _ in conf.items("contamination")]
    tex = attach_back(
        workflow,
        PythonCommand(latex_contamination,
                      input={
                          "template":
                          resource_filename("chilin2.modules",
                                            "contamination/contamination.tex"),
                          "json":
                          conf.json_prefix + "_contam.json"
                      },
                      output={"latex": conf.latex_prefix + "_contam.tex"},
                      param={
                          'id': conf.id,
                          'layout': 'c' * (len(all_species) + 1)
                      }))
    tex.allow_dangling = True
    tex.allow_fail = True
Ejemplo n.º 21
0
def stat_contamination(workflow, conf):
    all_species = [i for i, _ in conf.items("contamination")]
    summ = []
    for target in conf.sample_targets:
        summ.append([(target + species + "_mapped." + conf.mapper,
                      target + species + "_total." + conf.mapper)
                     for species in all_species])
    collect = attach_back(
        workflow,
        PythonCommand(json_contamination,
                      input={"summaries": summ},
                      output={"json": conf.json_prefix + "_contam.json"},
                      param={
                          "samples": conf.sample_bases,
                          "id": conf.id,
                          "species": all_species
                      },
                      name="stat contamination"))
    collect.allow_dangling = True
    collect.allow_fail = True
Ejemplo n.º 22
0
def summary_table_latex(workflow, conf):
    n = len(conf.sample_bases)
    width = 1 / float(n + 1) - 0.05
    summary_tab = attach_back(
        workflow,
        PythonCommand(
            latex_summary_table,
            input={
                "template":
                resource_filename("chilin2.modules.summary",
                                  "summary_table.tex")
            },
            output={"latex": conf.latex_prefix + "_summary_table.tex"},
            param={
                "conf":
                conf,
                "layout":
                "m{%s\\linewidth}" % (width) +
                ">{\\centering\\arraybackslash}m{%s\\linewidth}" % (width) *
                (len(conf.sample_bases))
            }))
    summary_tab.allow_fail = True
    summary_tab.allow_dangling = True
Ejemplo n.º 23
0
def stat_replicates(workflow, conf):  ## replicates peaks and bigwiggle
    """
    input:wigCorrelate of multiple replicates results
          replicates peaks overlap number(percentage: 0.3)
    output: *replicates.json
    """
    stat = attach_back(
        workflow,
        PythonCommand(json_reps,
                      input={
                          "cor":
                          conf.prefix + ".cor",
                          "overlap": [
                              conf.prefix + "_%s_%s.overlap" % (i, j)
                              for i in range(len(conf.treatment_targets))
                              for j in range(i +
                                             1, len(conf.treatment_targets))
                          ]
                      },
                      output={"json": conf.json_prefix + "_rep.json"},
                      param={"param": conf.id}))
    stat.allow_fail = True
    stat.allow_dangling = True
Ejemplo n.º 24
0
def write_conf(workflow, conf):
    #save the conf file
    attach_back(workflow, PythonCommand(WriteConf,
        output=conf.prefix + ".conf",
        param={"conf": conf}
    ))
Ejemplo n.º 25
0
def read_enrichment_on_meta(workflow, conf):
    """ total reads enrichment in exon, promoter and union DHS regions
    """
    try:
        has_dhs = conf.get(conf.get("basics", "species"), "dhs")
    except:
        has_dhs = ""
    import os
    for t in conf.sample_targets:
        enrich = attach_back(
            workflow,
            ShellCommand(
                """
            exon=$(bedtools intersect -f {param[p]} -wa -u -abam {input[bam]} -b {param[exon]} -bed | wc -l)
            promoter=$(bedtools intersect -f {param[p]} -wa -u -abam {input[bam]} -b {param[promoter]} -bed | wc -l)
            total=$(samtools flagstat {input[bam]} | head -1 | cut -d" " -f1)
            echo $exon,$promoter,$total > {output[meta]}
            """,
                tool="coverageBed",
                input={"bam": t + "_4000000.bam" if conf.down else t + ".bam"},
                output={"meta": t + ".enrich.meta"},
                param={
                    "promoter": os.path.join(conf.target_dir,
                                             "gene.bed_promoter"),
                    "p": "1E-9",
                    "exon": os.path.join(conf.target_dir, "gene.bed_exon")
                }))
        enrich.allow_dangling = True
        enrich.allow_fail = True

        if has_dhs:
            dhs = attach_back(
                workflow,
                ShellCommand(
                    """
            dhs=$(bedtools intersect -f {param[p]} -wa -u -abam {input[bam]} -b {param[dhs]} -bed | wc -l)
            total=$(samtools flagstat {input[bam]} | head -1 | cut -d" " -f1)
            echo $dhs,$total > {output[dhs]}
            """,
                    tool="coverageBed",
                    input={
                        "bam": t + "_4000000.bam" if conf.down else t + ".bam",
                        "dhs": conf.get_path(conf.get("basics", "species"),
                                             "dhs")
                    },
                    output={"dhs": t + ".enrich.dhs"},
                    param={
                        "p": "1E-9",
                        "dhs": conf.get_path(conf.get("basics", "species"),
                                             "dhs")
                    },
                ))
            dhs.allow_fail = True
            dhs.allow_dangling = True

    em = attach_back(
        workflow,
        PythonCommand(
            enrich_in_meta,
            input={
                "meta": [t + ".enrich.meta" for t in conf.sample_targets],
                "mapped": [t + "_mapped.bwa" for t in conf.sample_targets]
            },  ## use 4M reads for down sampling ones, and all reads instead
            output={"json": conf.json_prefix + "_enrich_meta.json"},
            param={
                "samples": conf.sample_bases,
                "id": conf.id,
                "has_dhs": has_dhs,
                "down": conf.down,
                "dhs": [t + ".enrich.dhs" for t in conf.sample_targets]
            }))
    em.allow_fail = True
    em.allow_dangling = True