Ejemplo n.º 1
0
def read_quality(workflow, conf, tex):
    if conf.pe:
        for raw, target in conf.treatment_pairs_pe:
            attach_back(workflow,
                        ShellCommand(
                            "{tool} {input[fastq][0]} {input[fastq][1]} {output[stat][0]} {output[stat][1]}",
                            tool = "dac_pe_read_quality",
                            input = {"fastq": raw},
                            output = {"stat": [ i + "_read_quality.qc" for i in target ]}))

        # attach_back(workflow, PythonCommand(stat_fastqStat,
        #                                     input = {"seq": [ [ p + "_100k.seq" for p in target ] for target in conf.treatment_pair_data ]},
        #                                     output = {"json": conf.json_prefix + "_seq_quality.json"},
        #                                     param = {"samples": conf.treatment_bases, "seq_type": conf.pe}))
        # attach_back(workflow, PythonCommand(
        #     seq_quality_doc,
        #     input = {"tex": tex, "json": conf.json_prefix + "_seq_quality.json"},
        #     output = {"seq": conf.latex_prefix + "seq_quality.tex", "len": conf.latex_prefix + "len.tex"},
        #     param = {"seq_type": conf.seq_type, "reps": len(conf.treatment_pairs),
        #              "pe_samples": conf.treatment_bases}))
    else:
        for raw, target in conf.treatment_pairs:
            sample_fq = {"stat": target + "_read_quality.qc"}
            attach_back(workflow,
                        ShellCommand(
                            "{tool} {input} {output[stat]}",
                            tool = "dac_se_read_quality",
                            input = raw,
                            output = sample_fq,
                            name = "100k read sequence quality and sequence length"))
Ejemplo n.º 2
0
Archivo: dc.py Proyecto: asntech/chilin
def Phan(workflow, conf): # NSC, RSC, Qtag
    """
    for calculating NSC, RSC score at 4M level
    http://code.google.com/p/phantompeakqualtools/
    (1) Determine strand cross-correlation peak / predominant fragment length OR print out quality measures
        Rscript run_spp.R -c=<tagAlign/BAMfile> -savp -out=<outFile>
    """
    # peaks calling by SPP needs control, for phantomqc, we do both treat and control independently

    
    for t in conf.sample_targets:
        if conf.down: ## default, this option
            ibam = t + "_4000000.bam"
        # elif conf.unsc: ## --total --unsc   
        #     ibam = t + "_rmdup.bam"
        else: ## --total
            ibam = t + ".bam"
        attach_back(workflow,
                    ShellCommand("{tool} {param[script]} -c={input[chip]} -rf -savp -out={output[spp]} -odir={param[dir]}",
                                 tool = "Rscript",
                                 input = {"chip": ibam},
                                 output = {"spp": t + ".spp", "pdf": t+"_4000000.pdf" if conf.down else t+".pdf"},
                                 param = {"script": conf.get("tool", "spp"),
                                          "dir": os.path.dirname(t + ".spp")},
                                 name = "SPP"))

    stat_phan(workflow, conf)
    if conf.long:
        tex_phan(workflow, conf)
Ejemplo n.º 3
0
def tex_conserv(workflow, conf):
    attach_back(workflow,
        PythonCommand(
            latex_conservation,
            input={"template": resource_filename("chilin2.modules.conservation", "conservation.tex")},
            output={"latex": conf.latex_prefix + "_conserv.tex"},
            param = {"prefix": conf.prefix}))
Ejemplo n.º 4
0
Archivo: tex.py Proyecto: cfce/chilin
def tex_fastqc(workflow, conf):
    quality = attach_back(workflow,
        PythonCommand(
            load_latex,
            input={"json": conf.json_prefix + "_fastqc.json",
                   "template": resource_filename("chilin2.modules.fastqc", "fastqc.tex"),
                   "pdf": conf.prefix + "_raw_sequence_qc.pdf"},
            output={"latex": conf.latex_prefix + "_fastqc.tex"}))

    quality.allow_fail = True
    quality.allow_dangling = True

    #these are name, png pairings
    if not conf.pe:
        gccontent_graphs = [(nm.replace("_"," "),
                             os.path.join(conf.target_dir, "%s_100k_fastqc" % nm,
                                          "Images","per_sequence_gc_content.png"))\
                                for nm in conf.sample_bases]
    else:
        gccontent_graphs = [(nm.replace("_"," "),
                             os.path.join(conf.target_dir, "%spair1_100k_fastqc" % nm,
                                          "Images","per_sequence_gc_content.png"))\
                                for nm in conf.sample_bases]
    gc = attach_back(workflow,
        PythonCommand(
            load_gc_latex,
            input={"template": resource_filename("chilin2.modules.fastqc", "fastqc_gc.tex"),
                   "gccontent_graphs":gccontent_graphs },
            output={"latex": conf.latex_prefix + "_fastqc_gc.tex"}))

    gc.allow_fail = True
    gc.allow_dangling = True
Ejemplo n.º 5
0
def latex_environ(workflow, conf):
    """
    write out begin and end document
    including packages
    """
    attach_back(
        workflow,
        PythonCommand(latex_start,
                      input={
                          "template":
                          resource_filename("chilin2.modules.summary",
                                            "begin.tex")
                      },
                      output={"latex": conf.latex_prefix + "_begin.tex"},
                      param={
                          "id":
                          conf.id,
                          "version":
                          conf.get("basics", "version"),
                          "user":
                          conf.get('basics', 'user'),
                          "bmcard":
                          resource_filename("chilin2.modules.summary",
                                            "bmcart.cls").rstrip('.cls')
                      }))

    attach_back(
        workflow,
        PythonCommand(latex_end,
                      input={
                          "template":
                          resource_filename("chilin2.modules.summary",
                                            "end.tex")
                      },
                      output={"latex": conf.latex_prefix + "_end.tex"}))
Ejemplo n.º 6
0
Archivo: dc.py Proyecto: cfce/chilin
def contamination_check(workflow, conf):
    """
    bowtie mapping back to different species
    """
    if conf.items("contamination"):
        for target in conf.sample_targets:
            for species in dict(conf.items("contamination")):
                index = conf.get("contamination", species)
                if conf.mapper == "bwa":
                    output = target + species + ".sam"
                    if conf.pe:
                        outsai = [target + species + "pair1.sai", target + species + "pair2.sai"]
                        targets = [ target + "pair1", target + "pair2" ]
                    else:
                        outsai = target + species + ".sai"
                        targets = target
                    bwa(workflow, conf, targets, output, outsai, index)
                elif conf.mapper == "bowtie":
                    output = target + species + ".sam"
                    bowtie(workflow, conf, target, output, index)
                elif conf.mapper == "star":
                    output = target + species + "Aligned.out.sam"
                    star(workflow, conf, target, output, index)

                sam2bam = attach_back(workflow,  ## use mapping quality 1 defined by samtools official FAQ
                            ShellCommand(
                                """
                                {tool} view -bS -t {param[genome]} -q {param[mapq]} {input[sam]} > {param[tmp_bam]} && {tool} sort -m {param[max_mem]} {param[tmp_bam]} {param[output_prefix]}
                                """,
                                tool="samtools",
                                input={"sam": output},
                                output={"bam":target + species + ".bam"},
                                param={"tmp_bam": target + species + ".tmp.bam", "output_prefix": target + species,
                                       "mapq": 1,
                                       "genome": conf.get(conf.get("basics", "species"), "chrom_len"),
                                       "max_mem": 4000000000},
                                name = "filtering mapping and convert")) # Use 5G memory as default

                sam2bam.update(param=conf.items("sam2bam"))
                sam2bam.allow_dangling = True
                sam2bam.allow_fail = True

                rem = attach_back(workflow, ShellCommand(
                """
                {tool} view -Sc {input[sam]} > {output[total]}
                {tool} flagstat {input[bam]} > {output[stat]}
                """,
                tool = "samtools",
                input = {"bam": target + species + ".bam",
                         "sam": output},
                output = {"stat": target + species + "_mapped." + conf.mapper,
                          "total": target + species + "_total." + conf.mapper},
                name = "contamination calculation"))
                rem.allow_fail = True
                rem.allow_dangling = True

        ## QC part
        stat_contamination(workflow, conf)
        if conf.long:
            tex_contamination(workflow, conf)
Ejemplo n.º 7
0
def _begin_latex(workflow, conf):
    attach_back(workflow,
        PythonCommand(
            latex_start,
            input={"template": latex_template},
            output={"latex": conf.latex_prefix + "_start.latex"},
            param={"id": conf.id}))
Ejemplo n.º 8
0
def sampling_bam(workflow, conf):  ## sampling to 4M
    """
    sampling bam files through macs2 and bedtools
    """
    for target in conf.sample_targets:
        ## sampling treat and control simultaneously
        ## sampling bam by macs2 and convert to bam by bedtools
        ## if total mapped reads < 4M, use original bam files link to *4000000.bam
        ## extract mapped reads number from json files
        ## use uniquely mapped reads sampling
        sampling_u = attach_back(
            workflow,
            sampling(target + "_u.sam", target + "_4000000.bam", 4000000,
                     "sam", conf))
        sampling_u.allow_dangling = True
        sampling_u.allow_fail = True

        ## use encode version of 5M non chrM reads to evaluate
        if conf.frip:
            samp = attach_back(
                workflow,
                sampling(target + "_nochrM.sam",
                         target + "_5000000_nochrM.bam", 5000000, "sam", conf))
            samp.allow_fail = True
            samp.allow_dangling = True
        else:  ## default
            ## change FRiP computing with merged peaks as reference, no chrM as comparison
            samp = attach_back(
                workflow,
                sampling(target + "_nochrM.sam",
                         target + "_4000000_nochrM.bam", 4000000, "sam", conf))
            samp.allow_fail = True
            samp.allow_dangling = True
Ejemplo n.º 9
0
def prepare_clean_up(workflow, conf):
    """
    package all the necessary results and delete temporary files
    """
    p_list = ['*.bam', '*.xls', '*_summits.bed', '*_peaks.bed', '*.bw',
              '*.png', '*.pdf', '*.R', '*.zip', '*cor*', 'json', "*summary*",
              "*seqpos","*fastqc", '*latex', "*.conf"]

    p_pattern = [os.path.join(conf.target_dir, p) for p in p_list]

    final_dir = conf.target_dir + '/dataset_' + conf.id
    attach_back(workflow,
        ShellCommand("if [ ! -d '{output}' ]; then mkdir -p {output}; fi",
            output=final_dir))

    for pf in p_pattern:
        if not glob(pf):
            print(pf)
            continue
        move = attach_back(workflow,
            ShellCommand('mv {param[preserve_files]} {output[dir]} \n# Pattern: {param[p_pattern]}',
                output={"dir": final_dir},
                param={"preserve_files": " ".join(glob(pf)),
                       "p_pattern": pf}, ))
        move.allow_fail = True
Ejemplo n.º 10
0
def _summary_table_latex(workflow, conf):
    attach_back(workflow,
        PythonCommand(
            latex_summary_table,
            input={"template": latex_template},
            output={"latex": conf.latex_prefix + "_summary_table.latex"},
            param={"conf": conf}))
Ejemplo n.º 11
0
def read_quality(workflow, conf, tex):
    if conf.pe:
        for raw, target in conf.treatment_pairs_pe:
            attach_back(
                workflow,
                ShellCommand(
                    "{tool} {input[fastq][0]} {input[fastq][1]} {output[stat][0]} {output[stat][1]}",
                    tool="dac_pe_read_quality",
                    input={"fastq": raw},
                    output={"stat": [i + "_read_quality.qc" for i in target]}))

        # attach_back(workflow, PythonCommand(stat_fastqStat,
        #                                     input = {"seq": [ [ p + "_100k.seq" for p in target ] for target in conf.treatment_pair_data ]},
        #                                     output = {"json": conf.json_prefix + "_seq_quality.json"},
        #                                     param = {"samples": conf.treatment_bases, "seq_type": conf.pe}))
        # attach_back(workflow, PythonCommand(
        #     seq_quality_doc,
        #     input = {"tex": tex, "json": conf.json_prefix + "_seq_quality.json"},
        #     output = {"seq": conf.latex_prefix + "seq_quality.tex", "len": conf.latex_prefix + "len.tex"},
        #     param = {"seq_type": conf.seq_type, "reps": len(conf.treatment_pairs),
        #              "pe_samples": conf.treatment_bases}))
    else:
        for raw, target in conf.treatment_pairs:
            sample_fq = {"stat": target + "_read_quality.qc"}
            attach_back(
                workflow,
                ShellCommand(
                    "{tool} {input} {output[stat]}",
                    tool="dac_se_read_quality",
                    input=raw,
                    output=sample_fq,
                    name="100k read sequence quality and sequence length"))
Ejemplo n.º 12
0
def _bowtie_latex(workflow, conf):
    attach_back(workflow,
        PythonCommand(
            latex_bowtie,
            input={"json": conf.json_prefix + "_bowtie.json",
                   "template": latex_template},
            output={"latex": conf.latex_prefix + "_bowtie.latex"}))
Ejemplo n.º 13
0
Archivo: qc.py Proyecto: asntech/chilin
def stat_fastqc(workflow, conf):  # collect raw reads quality and GC contents
    """
    long: generate long pages or not
    """
    sums = []
    for raw, target in conf.sample_pairs:
        if conf.pe:
            sums.append(target[0] + "_100k_fastqc/fastqc_data.txt")
        else:
            sums.append(target + "_100k_fastqc/fastqc_data.txt")

    attach_back(workflow,
                PythonCommand(
                    json_fastqc,
                    input={"fastqc_summaries": sums},
                    output={"json": conf.json_prefix + "_fastqc.json"},
                    param={"ids": conf.sample_bases,
                           "id": conf.id},
                    name = "collect fastqc results"))
    
    if conf.long:  ## prepare long document images and tex
        attach_back(workflow,
        PythonCommand(fastqc_detailed_figure,
                      input = {"dbaccessor": resource_filename("chilin2.modules.dbaccessor", "ChiLinQC.db"),
                               "template": resource_filename("chilin2.modules.summary", "R_culmulative_plot.R"), 
                               "json": conf.json_prefix + "_fastqc.json"},
                      output = {"R": conf.prefix + "_raw_sequence_qc.R",
                                "pdf": conf.prefix + "_raw_sequence_qc.pdf"},
                      param={"ids": conf.sample_bases}))
Ejemplo n.º 14
0
def Phan(workflow, conf):  # NSC, RSC, Qtag
    """
    for calculating NSC, RSC score at 4M level
    http://code.google.com/p/phantompeakqualtools/
    (1) Determine strand cross-correlation peak / predominant fragment length OR print out quality measures
        Rscript run_spp.R -c=<tagAlign/BAMfile> -savp -out=<outFile>
    """
    # peaks calling by SPP needs control, for phantomqc, we do both treat and control independently

    for t in conf.sample_targets:
        if conf.down:  ## default, this option
            ibam = t + "_4000000.bam"
        # elif conf.unsc: ## --total --unsc
        #     ibam = t + "_rmdup.bam"
        else:  ## --total
            ibam = t + ".bam"
        attach_back(
            workflow,
            ShellCommand(
                "{tool} {param[script]} -c={input[chip]} -rf -savp -out={output[spp]} -odir={param[dir]}",
                tool="Rscript",
                input={"chip": ibam},
                output={
                    "spp": t + ".spp",
                    "pdf": t + "_4000000.pdf" if conf.down else t + ".pdf"
                },
                param={
                    "script": conf.get("tool", "spp"),
                    "dir": os.path.dirname(t + ".spp")
                },
                name="SPP"))

    stat_phan(workflow, conf)
    if conf.long:
        tex_phan(workflow, conf)
Ejemplo n.º 15
0
Archivo: dc.py Proyecto: asntech/chilin
def fragment(workflow, conf):
    ## this is done after FRiP
    if conf.get("tool", "macs2"):
       macs2_bin = conf.get("tool", "macs2")
    else:
        macs2_bin = "macs2"
    for target in conf.treatment_targets:
        fragment_size = attach_back(workflow, ShellCommand(
            "{tool} predictd -i {input[bam]} --rfile {param[prefix]} -g {param[species]}",
            tool = macs2_bin,
            input = {"bam": target + ".bam"},
            output = {"R": target + "_model.R"},
            param = {"prefix": target + "_model.R",
                     "species": 'hs'}))
        fragment_size.update(param = conf.items("macs2"))
        ## except two few peaks for modeling
        fragment_size.allow_fail = True
        fragment_size.allow_dangling = True
        
    ## extract standard deviation from MACS2 model.R,
    ## use m, p, and pileup value for standard deviation; mean fragment size is provided (choose the one with highest correlation)
    frag_qc = attach_back(workflow, PythonCommand(
        stat_frag_std,
        input = {"r": [target + "_model.R" for target in conf.treatment_targets]},
        output = {"json": conf.json_prefix + "_frag.json", "r": [ target + "_frag_sd.R" for target in conf.treatment_targets ]},
        param = {"samples": conf.treatment_bases,
                 "frag_tool": "BAMSE"},
        name = "macs2 model R script parser"))
    frag_qc.allow_fail = True
    frag_qc.allow_dangling = True
Ejemplo n.º 16
0
def _seqpos_latex(workflow, conf):
    attach_back(workflow,
        PythonCommand(
            latex_seqpos,
            input={"json": conf.json_prefix + "_seqpos.json",
                   "template": latex_template},
            output={"latex": conf.latex_prefix + "_seqpos.latex"}))
Ejemplo n.º 17
0
def tex_bwa(workflow, conf):
    attach_back(workflow,
                PythonCommand(
                    long_tex,
                    input = {"template": resource_filename("chilin2.modules.bwa", "bwa.tex"),
                             "figure": conf.prefix + "_bwa_compare.pdf"},
                    output = {"latex": conf.latex_prefix + "_map.tex"}))
Ejemplo n.º 18
0
Archivo: qc.py Proyecto: cfce/chilin
def stat_bedAnnotate(workflow, conf, has_dhs, has_velcro):
    """ Describe peaks' distribution
    # collect meta gene distribution info
    """
    collect_meta2 = attach_back(workflow, PythonCommand(
        json_meta2,
        input={"meta": conf.prefix + ".meta"},
        output={"json": conf.json_prefix + "_meta.json"},
        param={"id": conf.id},
        name="bedAnnotate summary"))
    collect_meta2.allow_fail = True
    collect_meta2.allow_dangling = True

    if has_dhs:
        collect_dhs = attach_back(workflow, PythonCommand(
            json_dhs,
            input={"dhs": conf.prefix + ".dhs",
                   "top_peaks": 5000},
            output={"json": conf.json_prefix + "_dhs.json"},
            name="DHS summary"))
        collect_dhs.allow_dangling = True
        collect_dhs.allow_fail = True

    if has_velcro:
        collect_velcro = attach_back(workflow, PythonCommand(
            json_velcro,
            input={"velcro": conf.prefix + ".velcro",
                   "top_peaks": 5000},
            output={"json": conf.json_prefix + "_velcro.json"},
            name="Velcro summary"))
        collect_velcro.allow_fail = True
        collect_velcro.allow_dangling = True
Ejemplo n.º 19
0
def _bowtie(workflow, conf):
    for target in conf.sample_targets:
        bowtie = attach_back(workflow,
            ShellCommand(
                "{tool} -p {param[threads]} -S -m {param[max_align]} \
                {param[genome_index]} {input[fastq]} {output[sam]} 2> {output[bowtie_summary]}",
                input={"genome_dir": os.path.dirname(conf.get_path("lib", "genome_index")),
                       "fastq": target + ".fastq"},
                output={"sam": target + ".sam",
                        "bowtie_summary": target + "_bowtie_summary.txt", },
                tool="bowtie",
                param={"threads": 4,
                       "max_align": 1,
                       "genome_index": conf.get_path("lib", "genome_index")}))

        bowtie.update(param=conf.items("bowtie"))
    __sam2bam(workflow, conf)

    ## using bowtie standard error output
    attach_back(workflow,
        PythonCommand(stat_bowtie,
            input={"bowtie_summaries": [t + "_bowtie_summary.txt" for t in conf.sample_targets],
                   "db": ChiLinQC_db,
                   "template": rlang_template},
            output={"json": conf.json_prefix + "_bowtie.json",
                    "R": conf.prefix + "_bowtie.R",
                    "pdf": conf.prefix + "_bowtie.pdf"},
            param={"sams": [t + ".sam" for t in conf.sample_targets], }))
Ejemplo n.º 20
0
def _raw_QC_latex(workflow, conf):
    attach_back(workflow,
        PythonCommand(
            latex_fastqc,
            input={"json": conf.json_prefix + "_fastqc.json",
                   "template": latex_template},
            output={"latex": conf.latex_prefix + "_fastqc.latex"}))
Ejemplo n.º 21
0
def sample_bam_stat(workflow, conf, tex):
    """ sample non chrm bam to 15M for NSC and PBC
    sample non chrm bam to 5M for spot
    """
    for i, target in enumerate(conf.treatment_targets):
        ## for PE, use name sorted in order to calculate PBC
        input_bam = target + "_name_sorted.bam" if conf.pe else target + "_final_nochrm.bam"
        attach_back(workflow, ShellCommand(
            "{tool} {input[namesorted]} {param[run_spp]} {output[bamstat]} {output[sppstat]}  {param[pe]} {output[pbc]}",
            tool = "eap_dnase_stats",
            input = {"namesorted": input_bam},
            output = {"bamstat": target + "_bam_stat.qc",  ## 15M
                      "sppstat": target + "_spp.qc",
                      "pbc": target + "_final_nochrm_15M_pbc.qc"},
            param = {"pe": "pe" if conf.pe else "se",
                     "run_spp": conf.get("tool", "spp")}))

        if not "macs" in conf.get("tool", "peak_calling"):

            attach_back(workflow, ShellCommand(
                "{tool} {input[bamwithoutchrm]} {param[genome]} {param[readsize]} {output[spot]} {param[hotspot_dir]} {param[hotspot_output]} {param[hotspot_tmp]} {param[spot_tmp]}",
                tool = "dac_spot", ## 5M
                input = {"bamwithoutchrm": target + "_final_nochrm.bam"},
                output = {"spot": target + "_spot_nochrm_5M.qc"},

                param = {"genome": conf.species,
                         "spot_tmp": conf.hotspot_reps_tmp_prefix[i] + "_final_nochrm.bam.5000000.spot.out",
                         "readsize": conf.readsize,
                         "hotspot_dir": conf.get("tool", "peak_calling"),
                         "hotspot_output": target + "_hotspot",
                         "hotspot_tmp": target + "_hotspot_tmp"}))
Ejemplo n.º 22
0
def _star_sam2bam(workflow, conf):  # SAM -> BAM
    """
    convert SAM to BAM and use mapping quality as cutoff
    :param workflow: samflow defined class
    :param conf: parsed config file
    :return: void
    """
    import os
    for target in conf.sample_targets:
        sam2bam = attach_back(
            workflow,
            ShellCommand("""
                        ln -s {input[sam]} {output[sam]}
                        {tool} view -q 255 -bt {param[genome]} {input[sam]} -o {output[bam]}
                        """,
                         tool="samtools",
                         input={"sam": target + "Aligned.out.sam"},
                         output={
                             "bam": target + ".bam",
                             "sam": target + ".sam"
                         },
                         param={
                             "genome":
                             conf.get(conf.get("basics", "species"),
                                      "chrom_len"),
                         },
                         name="star sam2dam"))
        workflow.update(param=conf.items("sam2bam"))

        #From bwa/dc.py
        sam2bamnochrm = attach_back(
            workflow,  ## use mapping quality 1 defined by samtools official FAQ
            ShellCommand(
                """
                        awk \'BEGIN{{OFS="\\t"}} {{print $1,0,$2}}\' {param[genome]} > {param[chrom_bed]}
                        grep -v chrM {param[chrom_bed]} > {output[nochrmbed]}
                        {tool} view -h -b -L {output[nochrmbed]} {input[bam]} > {output[nochrmbam]}
                        {tool} view -h {output[nochrmbam]}  > {output[nochrmsam]}
                        {tool} view -h {input[bam]}  > {output[usam]}
                        """,
                tool="samtools",
                input={"bam": target + ".bam"},
                output={
                    "nochrmbed": target + ".nochrM",
                    "nochrmbam": target + "_nochrM.bam",
                    "usam":
                    target + "_u.sam",  ## uniquely mapping sam for sampling
                    "nochrmsam": target + "_nochrM.sam"
                },
                param={
                    "tmp_bam": target + ".tmp.bam",
                    "output_prefix": target,
                    "chrom_bed": os.path.join(conf.target_dir, "chrom.bed"),
                    "mapq": 1,
                    "genome": conf.get(conf.get("basics", "species"),
                                       "chrom_len")
                },
                name="filtering mapping and convert")
        )  # Use 5G memory as default
        sam2bamnochrm.update(param=conf.items("sam2bam"))
Ejemplo n.º 23
0
def _macs2_cor_latex(workflow, conf):
    attach_back(workflow,
        PythonCommand(
            latex_cor,
            input={"json": conf.json_prefix + "_cor.json",
                   "template": latex_template},
            output={"latex": conf.latex_prefix + "_cor.latex"}))
Ejemplo n.º 24
0
def _conservation_latex(workflow, conf):
    attach_back(workflow,
        PythonCommand(
            latex_conservation,
            input={"json": conf.json_prefix + "_conserv.json",
                   "template": latex_template},
            output={"latex": conf.latex_prefix + "_conserv.latex"}))
Ejemplo n.º 25
0
def _bwa(workflow, conf):
    """
    incorpate ENCODE ChIP-seq alignment parameters
    """
    for raw, target in conf.treatment_pairs:
        param = {"threads": conf.threads,
                 "index":conf.get(conf.species, "genome_index"),
                 "prefix": target + "_raw_sorted",
                 "qc2": target + "_rawbam_stats.qc"}

        if conf.pe:
            bwa = attach_back(workflow, ShellCommand(
                "{tool} {param[threads]} {param[index]} {input[fastq][0]} {input[fastq][1]} {output[bam]} {output[qc]} {param[prefix]} {param[qc2]}",
                tool = "eap_run_bwa_pe",
                input = {"fastq": raw},
                output = {"bam": target + "_raw_sorted.bam", "qc": target + "_rawbam.qc"},
                param = param,
                name = "pair end mapping"))
        else:
            bwa = attach_back(workflow, ShellCommand(
                "{tool} {param[threads]} {param[index]} {input[fastq]} {output[bam]} {output[qc]} {param[prefix]} {param[qc2]}",
                tool = "eap_run_bwa_se",
                input = {"fastq": raw},
                output = {"bam": target + "_raw_sorted.bam", "qc": target + "_rawbam.qc"},
                param = param,
                name = "single end mapping"))
        bwa.update(param = conf.items("bwa"))
Ejemplo n.º 26
0
Archivo: dc.py Proyecto: asntech/chilin
def reg_potential(workflow, conf):
    """

    """
    get_top_peaks = attach_back(workflow,
                                ShellCommand(
                                    "{tool} -n {param[peaks]} {input} | cut -f 1,2,3,4,9> {output}",
                                    tool="head",
                                    input=conf.prefix + "_sort_peaks.narrowPeak" if conf.get("macs2", "type") in ["both", "narrow"] else conf.prefix + "_b_sort_peaks.broadPeak",
                                    output=conf.prefix + "_peaks_top_reg.bed",
                                    param={"peaks": 10000},
                                    name="top summits for regpotential"))
    get_top_peaks.update(param=conf.items("reg_potential"))

    reg = attach_back(workflow,
                      ShellCommand(
                          "{tool} -t {input[peaks]} -g {param[geneTable]} -n {param[prefix]} -d {param[dist]}",
                          tool = "RegPotential.py",
                          input = {"peaks": conf.prefix + "_peaks_top_reg.bed"},
                          output = {"potential": conf.prefix + "_gene_score.txt"},
                          param = {"geneTable": conf.get_path(conf.get("basics", "species"), "geneTable"),
                                   "tool": resource_filename("chilin2.modules", "regulatory/RegPotential.py"),
                                   "prefix": conf.prefix,
                                   "dist": 100000},
                          name = "Regulatory Potential"))
    reg.update(param=conf.items("reg_potential"))
Ejemplo n.º 27
0
def _macs2_cor(workflow, conf):
    cor_on_bw = attach_back(workflow,
        ShellCommand(
            template=
            """{tool} \
            -s {param[wig_correlation_step]}  \
            --min-score {param[wig_correlation_min]} --max-score {param[wig_correlation_max]} \
            -r {output[R]} {param[bw]}  {param[rep]} && \
            mv {output[R]}.pdf {output[pdf]}""",
            tool="bigwig_correlation.py",
            input=[target + "_treat.bw" for target in conf.treatment_targets],
            output={"R": conf.prefix + "_cor.R", "pdf": conf.prefix + "_cor.pdf"},
            param={"wig_correlation_method": "mean",
                   "wig_correlation_min": 2,
                   "wig_correlation_max": 50,
                   "wig_correlation_step": 10,},
            name="cor_on_bw"))
    cor_on_bw.param["bw"] = " ".join(cor_on_bw.input)
    cor_on_bw.param["rep"] = " ".join([" -l replicate_%s" % (x + 1) for x in range(len(conf.treatment_pairs))])
    cor_on_bw.update(param=conf.items("correlation"))
    cor_on_bw.allow_fail = True

    attach_back(workflow,
        PythonCommand(
            stat_cor,
            input={"correlation_R": conf.prefix + "_cor.R",
                   "cor_pdf": conf.prefix + "_cor.pdf"},
            output={"json": conf.json_prefix + "_cor.json"}))
Ejemplo n.º 28
0
Archivo: qc.py Proyecto: asntech/chilin
def stat_motif(workflow, conf):
    attach_back(workflow,
                PythonCommand(
                    stat_seqpos,
                    input={"seqpos":conf.prefix + "_seqpos/" + "motif_list.json"},
                    output={"json": conf.json_prefix + "_seqpos.json"},
                    param={"prefix": conf.prefix + "_seqpos/seqLogo/", "z_score_cutoff": -1},
                    name = "collect motif info"))
Ejemplo n.º 29
0
def summary_table_latex(workflow, conf):
    attach_back(workflow,
         PythonCommand(
             latex_summary_table,
             input={"template": resource_filename("chilin2.modules.summary", "summary_table.tex")},
             output={"latex": conf.latex_prefix + "_summary_table.tex"},
             param={"conf": conf,
                    "layout": "l"+"c"*(1+len(conf.sample_bases))}))
Ejemplo n.º 30
0
def tex_frip(workflow, conf):
    attach_back(workflow,
        PythonCommand(
            load_latex,
            input={"json": conf.json_prefix + "_frip.json",
                   "template": resource_filename("chilin2.modules.frip", "frip.tex"),
                   },
            output={"latex": conf.latex_prefix + "_frip.tex"}))
Ejemplo n.º 31
0
    def test_workflow_attach_later_invoke_success(self):

        tree = self.create_tree()
        attach_front(tree, ShellCommand("touch {output}", output="outer_f1"))
        attach_back(tree, ShellCommand("rm {input}", input="outer_f1"))
        attach_front(tree, ShellCommand('echo "{0} decorator started {0}"'.format("="*10)))
        attach_back(tree, JinShCommand('echo "{0} decorator ended {0}"'.format("="*10)))
        self.assertTrue(tree.invoke())
Ejemplo n.º 32
0
def tex_contamination(workflow, conf):
    all_species = [i for i, _ in conf.items("contamination")]
    attach_back(workflow, PythonCommand(
        latex_contamination,
        input = {"template": resource_filename("chilin2.modules", "contamination/contamination.tex"),
                 "json": conf.json_prefix + "_contam.json"},
        output = {"latex": conf.latex_prefix + "_contam.tex"},
        param = {'id': conf.id, 'layout': 'c'*(len(all_species)+1)}))
Ejemplo n.º 33
0
 def create_tree(self):
     main_workflow = Workflow("main")
     sub_workflow = Workflow("sub")
     attach_back(sub_workflow, ShellCommand('echo "subtree started"'))
     attach_back(sub_workflow, JinShCommand('touch {{ output|join(" ") }}', output=["f1","f2"]))
     attach_back(sub_workflow, JinShCommand('rm {{ input|join(" ") }}', input=["f1", "f2"]))
     attach_back(sub_workflow, ShellCommand('echo "subtree ended"'))
     attach_back(main_workflow, sub_workflow)
     return main_workflow
Ejemplo n.º 34
0
Archivo: qc.py Proyecto: asntech/chilin
def stat_phan(workflow, conf):
    """
    collect NSC/RSC/Qtag and cross correlation figure
    """
    attach_back(workflow, PythonCommand(
        json_phan,
        input = {"spp": [t + ".spp" for t in conf.sample_targets]},
        output = {"json":conf.json_prefix + "_phan.json"},
        param = {"sample": conf.sample_bases}))
Ejemplo n.º 35
0
Archivo: qc.py Proyecto: asntech/chilin
def stat_pbc(workflow, conf): # collect pbc value
    """
    statistics collected from *.pbc
    """
    attach_back(workflow, PythonCommand(
        json_pbc,
        input = {"pbc": [t + ".pbc" for t in conf.sample_targets]},
        output = {"json": conf.json_prefix + "_pbc.json"},
        param = {"samples":conf.sample_bases}))
Ejemplo n.º 36
0
def stat_phan(workflow, conf):
    """
    collect NSC/RSC/Qtag and cross correlation figure
    """
    attach_back(
        workflow,
        PythonCommand(json_phan,
                      input={"spp": [t + ".spp" for t in conf.sample_targets]},
                      output={"json": conf.json_prefix + "_phan.json"},
                      param={"sample": conf.sample_bases}))
Ejemplo n.º 37
0
def stat_pbc(workflow, conf):  # collect pbc value
    """
    statistics collected from *.pbc
    """
    attach_back(
        workflow,
        PythonCommand(json_pbc,
                      input={"pbc": [t + ".pbc" for t in conf.sample_targets]},
                      output={"json": conf.json_prefix + "_pbc.json"},
                      param={"samples": conf.sample_bases}))
Ejemplo n.º 38
0
def filter_bam(workflow, conf, tex):
    """ filter bam file by samtools and sample by ucsc app
    """

    for target in conf.treatment_targets:
        input = {"raw": target + "_raw_sorted.bam"}
        if conf.pe:
            name = "pair"
            tool = "dac_bam_pe_post_filter"
            param = {
                "mapq": 3,
                "namesortedbamprefix": target + "_name_sorted",
                "finalprefix": target + "_final",
                "qc2": target + "_filter_bam_stats.qc"
            }
            output = {
                "finalbam": target + "_final.bam",
                "namesortedbam": target + "_name_sorted.bam",
                "bamwithoutchrm": target + "_final_nochrm.bam",
                "qc": target + "_filter_bam.qc"
            }

            attach_back(
                workflow,
                ShellCommand(
                    "{tool} {input[raw]} {param[namesortedbamprefix]} {output[namesortedbam]} {param[finalprefix]} {output[finalbam]} {param[mapq]} {output[bamwithoutchrm]} {output[qc]} {param[qc2]}",
                    tool=tool,
                    input=input,
                    output=output,
                    param=param,
                    name="%s end filtering" % name))
        else:
            name = "single"
            tool = "dac_bam_se_post_filter"
            param = {
                "mapq": 3,
                "finalprefix": target + "_final",
                "qc2": target + "_filter_bam_stats.qc"
            }
            output = {
                "finalbam": target + "_final.bam",
                "bamwithoutchrm": target + "_final_nochrm.bam",
                "qc": target + "_filter_bam.qc"
            }

            attach_back(
                workflow,
                ShellCommand(
                    "{tool} {input[raw]} {output[finalbam]} {param[mapq]} {output[qc]} {output[bamwithoutchrm]} {param[finalprefix]} {param[qc2]}",
                    tool=tool,
                    input=input,
                    output=output,
                    param=param,
                    name="%s end filtering" % name))
Ejemplo n.º 39
0
def star(workflow, conf):  # Mapping
    """
    Use star to map reads to genome,
    call __bwa_sam2bam to convert sam to bam
    :param workflow: samflow defined class
    :param conf: parsed config files
    :return: void
    """
    for target in conf.sample_targets:
        star = attach_back(
            workflow,
            ShellCommand(
                "{tool} --genomeDir {param[index]} --runThreadN {param[NUM_THREADS]} --readFilesIn {input[fastq]} --outFileNamePrefix {param[prefix]}",
                tool="STAR",
                input={"fastq": target + ".fastq"},
                output={"sam": target + "Aligned.out.sam"},
                param={
                    "NUM_THREADS":
                    conf.threads,
                    "prefix":
                    target,
                    ## judge chosen species from basics section
                    "index":
                    conf.get_path(conf.get("basics", "species"),
                                  "genome_index")
                },
                name="star aln"))
        star.update(param=conf.items("bowtie"))

    _star_sam2bam(workflow, conf)

    ## QC part--NOTE keeping the bwa legacy code!
    stat_bwa(workflow, conf)
    if conf.long:
        tex_bwa(workflow, conf)
Ejemplo n.º 40
0
def PBC(workflow, conf):  # PBC1
    """
    Introduce ENCODE II library complexity assessment methods
    N1 / Nd, N1 is the location with exact one read, Nd is distinct location number
    :param workflow: samflow class
    :param conf: parsed config
    :return: void
    """
    for t in conf.sample_targets:
        pbc1 = attach_back(
            workflow,
            ShellCommand(
                """
                               bamToBed -i {input[bam]} | {tool} \'{{l[$1"\\t"$2"\\t"$3"\\t"$6]+=1}} END {{for(i in l) print l[i]}}\' \\
                                 | awk \'{{n[$1]+=1}} END {{for (i in n) print i"\\t"n[i]}}\'  \\
                                 | sort -k1n -  > {output[hist]}
                               awk '{{
                               if (NR==1) {{N1=$2}}
                               Nd+=$2
                               }} END {{print N1,Nd,N1/Nd}}' {output[hist]} > {output[pbc]}
                               """,
                tool="awk",
                input={"bam": t + "_4000000.bam" if conf.down else t + ".bam"},
                output={
                    "pbc": t + ".pbc",
                    "hist": t + ".hist"
                },
                name="PBC"))
        pbc1.allow_fail = True
        pbc1.allow_dangling = True

    ## QC part
    stat_pbc(workflow, conf)
Ejemplo n.º 41
0
def replicates_peaks_overlap(workflow, conf):  # peaks bed from each replicate
    """
    :param workflow: class from samflow
    :param conf: external parsed config file
    :return: workflow through attach_back
    """
    for i in range(len(conf.treatment_targets)):
        for j in range(i + 1, len(conf.treatment_targets)):
            replicates_overlap = attach_back(
                workflow,
                ShellCommand(
                    "{tool} -f {param[p]} -a {input[0]} -b {input[1]} | wc -l > {output}",
                    tool="intersectBed",
                    input=[
                        conf.treatment_targets[i] + "_sort_peaks.narrowPeak"
                        if conf.get("macs2", "type").lower()
                        in ["both", "narrow"] else conf.treatment_targets[i] +
                        "_b_sort_peaks.broadPeak", conf.treatment_targets[j] +
                        "_sort_peaks.narrowPeak" if conf.get(
                            "macs2", "type").lower() in ["both", "narrow"] else
                        conf.treatment_targets[j] + "_b_sort_peaks.broadPeak"
                    ],
                    output=conf.prefix + "_%s_%s.overlap" % (i, j),
                    param={"p": 0.3},
                    name="Replicates peaks overlap QC"))
            replicates_overlap.allow_fail = True  # in case 0 peak in macs2
            replicates_overlap.allow_dangling = True
    ## generate a barplot for meta distribution

    replicates_overlap.update(param=conf.items("replicates"))
    return workflow
Ejemplo n.º 42
0
def merge_latex(workflow, conf):

    ## begin and end of the docs
    latex_order = [
        "_begin.tex",
        "_summary_table.tex",
    ]
    if conf.long:
        latex_order += [
            "_fastqc.tex",
            "_fastqc_gc.tex",
            "_map.tex",
            "_conserv.tex",
            # "_macs2.latex", "_macs2_on_sample.latex",
            # "_phan.tex",
            "_motif.tex",
            "_contam.tex",
            "_frip.tex",
        ]
    latex_order.append("_end.tex")

    latex_list = [conf.latex_prefix + i for i in latex_order]
    merge_cmd = attach_back(
        workflow,
        ShellCommand("cat {param[tex]} > {output}",
                     output=conf.prefix + "_report.tex"))
    merge_cmd.allow_fail = True
    merge_cmd.param = {"tex": " ".join(latex_list)}
Ejemplo n.º 43
0
def bowtie(workflow, conf):   # Mapping
    """
    Use bowtie to map reads to genome,
    call __bwa_sam2bam to convert sam to bam
    :param workflow: samflow defined class
    :param conf: parsed config files
    :return: void
    """
    for target in conf.sample_targets:
        bowtie = attach_back(workflow,
                          ShellCommand(
                              "{tool} -p {param[NUM_THREADS]} -S -m 1 {param[index]} {input[fastq]} {output[sam]}",
                              tool = "bowtie",
                              input = {"fastq": target + ".fastq"},
                              output = {"sam": target + ".sam"},
                              param = {"NUM_THREADS": conf.threads,
                                       ## judge chosen species from basics section
                                       "index": conf.get_path(conf.get("basics", "species"), "genome_index")},
                              name = "bowtie aln"))
        bowtie.update(param = conf.items("bowtie"))
        bowtie.allow_dangling = True
        bowtie.allow_fail = True

    _bowtie_sam2bam(workflow, conf)

    ## QC part--NOTE keeping the bwa legacy code!
    stat_bwa(workflow, conf)
    if conf.long:
        tex_bwa(workflow, conf)
Ejemplo n.º 44
0
def DHS(workflow, conf):  # DHS overlap percentage
    """
    get peaks overlapping percentage with union DHS
    :param workflow: uniform pipeline workflow from samflow
    :param conf: parsed config files
    :return: workflow
    """
    peaks = conf.prefix + "_sort_peaks.narrowPeak" if conf.get(
        "macs2", "type") in ["both", "narrow"
                             ] else conf.prefix + "_b_sort_peaks.broadPeak"
    DHS = attach_back(
        workflow,
        ShellCommand("""
                                   n=$(head -n {param[p]} {input[MACS2_bed]} | wc -l)
                                   dhs=$(head -n {param[p]} {input[MACS2_bed]} | {tool} -wa -u -a - -b {input[DHS_peaks_bed]}|wc -l)
                                   ##dhs=$(echo \"scale=5;$dhs/$n\" | bc)
                                   echo $n,$dhs > {output}
                                   """,
                     tool="intersectBed",
                     input={
                         "MACS2_bed":
                         peaks,
                         "DHS_peaks_bed":
                         conf.get(conf.get("basics", "species"), "dhs")
                     },
                     output=conf.prefix + ".dhs",
                     param={"p": 5000},
                     name="intersect DHS"))
    DHS.allow_dangling = True
    DHS.allow_fail = True
Ejemplo n.º 45
0
def fastqc(workflow, conf):
    """
    fastqc to extract gc contents(not yet) and median sequence quality
    :param workflow:
    :param conf:
    :return:
    """
    for raw, target in conf.sample_pairs:
        if conf.pe:
            fastqc_run = attach_back(
                workflow,
                ShellCommand(
                    "{tool} {input} --extract -t {param[threads]} -o {output[target_dir]}",
                    ## only check one pair
                    input=target[0] + "_100k.fastq",
                    output={
                        "target_dir":
                        conf.target_dir,
                        "fastqc_summary":
                        target[0] + "_100k_fastqc/fastqc_data.txt"
                    },
                    tool="fastqc",
                    param={"threads": conf.threads},
                    name="fastqc"))
        else:
            fastqc_run = attach_back(
                workflow,
                ShellCommand(
                    "{tool} {input} --extract -t {param[threads]} -o {output[target_dir]}",
                    input=target + "_100k.fastq",
                    output={
                        "target_dir": conf.target_dir,
                        "fastqc_summary":
                        target + "_100k_fastqc/fastqc_data.txt"
                    },
                    tool="fastqc",
                    param={"threads": conf.threads},
                    name="fastqc"))
            fastqc_run.update(param=conf.items("fastqc"))
        fastqc.allow_fail = True
        fastqc.allow_dangling = True

    ## QC part of chilin
    ## use conf property conf.long = True
    stat_fastqc(workflow, conf)
    if conf.long:
        tex_fastqc(workflow, conf)
Ejemplo n.º 46
0
def stat_fastqc(workflow, conf):  # collect raw reads quality and GC contents
    """
    long: generate long pages or not
    """
    sums = []
    for raw, target in conf.sample_pairs:
        if conf.pe:
            sums.append(target[0] + "_100k_fastqc/fastqc_data.txt")
        else:
            sums.append(target + "_100k_fastqc/fastqc_data.txt")

    collect = attach_back(
        workflow,
        PythonCommand(json_fastqc,
                      input={"fastqc_summaries": sums},
                      output={"json": conf.json_prefix + "_fastqc.json"},
                      param={
                          "ids": conf.sample_bases,
                          "id": conf.id
                      },
                      name="collect fastqc results"))
    collect.allow_fail = True
    collect.allow_dangling = True

    if conf.long:  ## prepare long document images and tex
        long_collect = attach_back(
            workflow,
            PythonCommand(fastqc_detailed_figure,
                          name='fastqc',
                          input={
                              "dbaccessor":
                              resource_filename("chilin2.modules.dbaccessor",
                                                "ChiLinQC.db"),
                              "template":
                              resource_filename("chilin2.modules.summary",
                                                "R_culmulative_plot.R"),
                              "json":
                              conf.json_prefix + "_fastqc.json"
                          },
                          output={
                              "R": conf.prefix + "_raw_sequence_qc.R",
                              "pdf": conf.prefix + "_raw_sequence_qc.pdf"
                          },
                          param={"ids": conf.sample_bases}))
        long_collect.allow_fail = True
        long_collect.allow_dangling = True
Ejemplo n.º 47
0
def fragment(workflow, conf):
    ## this is done after FRiP
    if conf.get("tool", "macs2"):
        macs2_bin = conf.get("tool", "macs2")
    else:
        macs2_bin = "macs2"
    for target in conf.treatment_targets:
        fragment_size = attach_back(
            workflow,
            ShellCommand(
                "{tool} predictd -i {input[bam]} --rfile {param[prefix]} -g {param[species]}",
                tool=macs2_bin,
                input={"bam": target + ".bam"},
                output={"R": target + "_model.R"},
                param={
                    "prefix": target + "_model.R",
                    "species": 'hs'
                }))
        fragment_size.update(param=conf.items("macs2"))
        ## except too few peaks for modeling
        fragment_size.allow_fail = True
        fragment_size.allow_dangling = True

    ## extract standard deviation from MACS2 model.R,
    ## use m, p, and pileup value for standard deviation; mean fragment size is provided (choose the one with highest correlation)
    frag_qc = attach_back(
        workflow,
        PythonCommand(
            stat_frag_std,
            input={
                "r":
                [target + "_model.R" for target in conf.treatment_targets]
            },
            output={
                "json": conf.json_prefix + "_frag.json",
                "r":
                [target + "_frag_sd.R" for target in conf.treatment_targets]
            },
            param={
                "samples": conf.treatment_bases,
                "frag_tool": "BAMSE"
            },
            name="macs2 model R script parser"))
    frag_qc.allow_fail = True
    frag_qc.allow_dangling = True
Ejemplo n.º 48
0
def tex_phan(workflow, conf):
    figures = []
    for t in conf.sample_targets:
        if conf.down:
            figures.append(t + "_4000000.pdf")
        else:
            figures.append(t + ".pdf")
    attach_back(
        workflow,
        PythonCommand(long_tex,
                      input={
                          "template":
                          resource_filename("chilin2.modules.phantompeak",
                                            "phan.tex"),
                          "figure":
                          figures
                      },
                      output={"latex": conf.latex_prefix + "_phan.tex"}))
Ejemplo n.º 49
0
def hotspotv4(workflow, conf, tex):
    for target in conf.treatment_targets:
        hotspot=attach_back(workflow,
                    ShellCommand(
                        "{tool} {param[hotspot_dir]} {param[genome]} {input[bam]} {param[readsize]} {output[narrowbb]} {output[broadbb]} {output[bigwig]} {param[tmp]} {output[hotspot_output]} {input[narrowas]} {input[broadas]} {param[chromsize]} {output[narrow]} {output[broad]}",
                        tool = "eap_run_hotspot",
                        input = {"bam": target + "_final_nochrm.bam",
                                 "narrowas": narrow,
                                 "broadas": broad},
                        output = {"narrowbb": target + ".narrowPeak.bigBed",
                                  "broadbb": target + ".broadPeak.bigBed",
                                  "narrow": target + ".narrowPeak",
                                  # "qc1": target + ".narrowPeak.qc",
                                  # "qc2": target + ".broadPeak.qc",
                                  "broad": target + ".broadPeak",
                                  "bigwig": target + ".bigWig",
                                  "hotspot_output": target + "_hotspot"},
                        param = {"hotspot_dir": conf.get("tool", "peak_calling"),
                                 "genome": conf.species,
                                 "chromsize": conf.get(conf.species, "chrom_len"),
                                 "tmp": target + "_hotspot_peak_call_tmp",
                                 "readsize": 36}))
    have_treat_reps = len(conf.treatment_pairs) >= 2 ## replicates

    if have_treat_reps:
        eval_reps(workflow, conf, tex)
        catsam = attach_back(workflow, ShellCommand(
            "{tool} cat {param[bams]} > {output[bam]}",
            tool = "samtools",
            input ={"bams": [ target + "_final.bam" for target in conf.treatment_targets]},
            output = {"bam": conf.prefix + "_pool.bam"}))
        catsam.param.update(bams=' '.join(catsam.input["bams"]))
        hotspot_merge = hotspot.clone
        hotspot_merge.param.update(tmp=conf.prefix+"_hotspot_peak_call_tmp")
        hotspot_merge.input.update(bam = conf.prefix + "_pool.bam")
        hotspot_merge.output ={"narrowbb": conf.prefix + ".narrowPeak.bigBed",
                               "broadbb": conf.prefix + ".broadPeak.bigBed",
                               "narrow": conf.prefix + ".narrowPeak",
                               # "qc1": conf.prefix + ".narrowPeak.qc",
                               # "qc2": conf.prefix + ".broadPeak.qc",
                               "broad": conf.prefix + ".broadPeak",
                               "bigwig": conf.prefix + ".bigWig",
                               "hotspot_output": conf.prefix + "_hotspot"}
        attach_back(workflow, hotspot_merge)
Ejemplo n.º 50
0
def tex_fastqc(workflow, conf):
    quality = attach_back(
        workflow,
        PythonCommand(load_latex,
                      input={
                          "json":
                          conf.json_prefix + "_fastqc.json",
                          "template":
                          resource_filename("chilin2.modules.fastqc",
                                            "fastqc.tex"),
                          "pdf":
                          conf.prefix + "_raw_sequence_qc.pdf"
                      },
                      output={"latex": conf.latex_prefix + "_fastqc.tex"}))

    quality.allow_fail = True
    quality.allow_dangling = True

    #these are name, png pairings
    if not conf.pe:
        gccontent_graphs = [(nm.replace("_"," "),
                             os.path.join(conf.target_dir, "%s_100k_fastqc" % nm,
                                          "Images","per_sequence_gc_content.png"))\
                                for nm in conf.sample_bases]
    else:
        gccontent_graphs = [(nm.replace("_"," "),
                             os.path.join(conf.target_dir, "%spair1_100k_fastqc" % nm,
                                          "Images","per_sequence_gc_content.png"))\
                                for nm in conf.sample_bases]
    gc = attach_back(
        workflow,
        PythonCommand(load_gc_latex,
                      input={
                          "template":
                          resource_filename("chilin2.modules.fastqc",
                                            "fastqc_gc.tex"),
                          "gccontent_graphs":
                          gccontent_graphs
                      },
                      output={"latex": conf.latex_prefix + "_fastqc_gc.tex"}))

    gc.allow_fail = True
    gc.allow_dangling = True
Ejemplo n.º 51
0
def eval_reps(workflow, conf, tex):
    peaks = [ target + ".narrowPeak" for target in conf.treatment_targets ]

    attach_back(workflow, ShellCommand(
        """
        cat {param[narrowPeaks]} | sort -k1,1 -k2,2n - | bedtools merge -i - > {output[mergedPeak]}
        bedToBigBed {output[mergedPeak]} {param[chromsize]} {output[mergedPeakbb]}
        bigWigCorrelate -restrict={output[mergedPeakbb]} {param[bigwigs]} 1>{output[qc1]}
        {tool} {param[narrowPeaksbb]} {output[qc2]}
        """,
        tool = "edwComparePeaks",
        input = {"narrowPeaks": peaks,
                 "bigwigs": [ target + ".bigWig" for target in conf.treatment_targets ],
                 "narrowPeakbbs": [ target + ".narrowPeak.bigBed" for target in conf.treatment_targets ]},
        output = {"mergedPeak": conf.prefix + "_merge.bed",
                  "mergedPeakbb": conf.prefix + "_merged.bigBed",
                  "qc1": conf.prefix + "_cor.qc",
                  "qc2": conf.prefix + "_overlap.qc"},
        param = {"narrowPeaksbb": " ".join([ target + ".narrowPeak.bigBed" for target in conf.treatment_targets ]),
                 "narrowPeaks": " ".join([ target + ".narrowPeak" for target in conf.treatment_targets ]),
                 "bigwigs": " ".join([ target + ".bigWig" for target in conf.treatment_targets ]),
                 "chromsize": conf.get(conf.species, "chrom_len")}))
Ejemplo n.º 52
0
def tex_bwa(workflow, conf):
    tex = attach_back(
        workflow,
        PythonCommand(long_tex,
                      input={
                          "template":
                          resource_filename("chilin2.modules.bwa", "bwa.tex"),
                          "figure":
                          conf.prefix + "_bwa_compare.pdf"
                      },
                      output={"latex": conf.latex_prefix + "_map.tex"}))
    tex.allow_fail = True
    tex.allow_dangling = True
Ejemplo n.º 53
0
def stat_frip(workflow, conf):  # collect frip score
    """
    collect FRiP informative tag number and effective peaks number
    """
    stat = attach_back(
        workflow,
        PythonCommand(
            json_frip,
            input={"frip": [t + ".frip" for t in conf.sample_targets]},
            output={"json": conf.json_prefix + "_frip.json"},
            param={"samples": conf.sample_bases}))
    stat.allow_fail = True
    stat.allow_dangling = True
Ejemplo n.º 54
0
def tex_conserv(workflow, conf):
    tex = attach_back(
        workflow,
        PythonCommand(latex_conservation,
                      input={
                          "template":
                          resource_filename("chilin2.modules.conservation",
                                            "conservation.tex")
                      },
                      output={"latex": conf.latex_prefix + "_conserv.tex"},
                      param={"prefix": conf.prefix}))
    tex.allow_dangling = True
    tex.allow_fail = True
Ejemplo n.º 55
0
def stat_bedAnnotate(workflow, conf, has_dhs, has_velcro):
    """ Describe peaks' distribution
    # collect meta gene distribution info
    """
    collect_meta2 = attach_back(
        workflow,
        PythonCommand(json_meta2,
                      input={"meta": conf.prefix + ".meta"},
                      output={"json": conf.json_prefix + "_meta.json"},
                      param={"id": conf.id},
                      name="bedAnnotate summary"))
    collect_meta2.allow_fail = True
    collect_meta2.allow_dangling = True

    if has_dhs:
        collect_dhs = attach_back(
            workflow,
            PythonCommand(json_dhs,
                          input={
                              "dhs": conf.prefix + ".dhs",
                              "top_peaks": 5000
                          },
                          output={"json": conf.json_prefix + "_dhs.json"},
                          name="DHS summary"))
        collect_dhs.allow_dangling = True
        collect_dhs.allow_fail = True

    if has_velcro:
        collect_velcro = attach_back(
            workflow,
            PythonCommand(json_velcro,
                          input={
                              "velcro": conf.prefix + ".velcro",
                              "top_peaks": 5000
                          },
                          output={"json": conf.json_prefix + "_velcro.json"},
                          name="Velcro summary"))
        collect_velcro.allow_fail = True
        collect_velcro.allow_dangling = True
Ejemplo n.º 56
0
def stat_conservation(workflow, conf):
    collect = attach_back(workflow,
                PythonCommand(
                    json_conservation,
                    input={"score": conf.prefix + "_conserv.txt"},
                    output={"json": conf.json_prefix + "_conserv.json"},
                    param={"atype": conf.get("basics", "factor", "TF"), "id": conf.id},
                    name = "conservation score"))
    collect.allow_dangling = True
    collect.allow_fail = True

    if conf.long:  ## cluster figures, obsolete, keep for compatible
        fig = attach_back(workflow,
                    PythonCommand(conservation_figures,
                                  input ={"conservationR": conf.prefix + "_conserv.R",
                                          "historical_conservation_cluster_text": resource_filename("chilin2.modules.dbaccessor", "Histone_centers.txt")},


                                  output = {"R": conf.prefix+"_conserv_cluster.R",
                                            "compare_pdf": conf.prefix + "_conserv_compare.pdf"},
                                  param = {"id": conf.id}))
        fig.allow_fail = True
        fig.allow_dangling = True
Ejemplo n.º 57
0
def tex_frip(workflow, conf):
    tex = attach_back(
        workflow,
        PythonCommand(load_latex,
                      input={
                          "json":
                          conf.json_prefix + "_frip.json",
                          "template":
                          resource_filename("chilin2.modules.frip",
                                            "frip.tex"),
                      },
                      output={"latex": conf.latex_prefix + "_frip.tex"}))
    tex.allow_dangling = True
    tex.allow_fail = True
Ejemplo n.º 58
0
def stat_motif(workflow, conf):
    collect = attach_back(
        workflow,
        PythonCommand(
            stat_seqpos,
            input={"seqpos": conf.prefix + "_seqpos/" + "motif_list.json"},
            output={"json": conf.json_prefix + "_seqpos.json"},
            param={
                "prefix": conf.prefix + "_seqpos/seqLogo/",
                "z_score_cutoff": -1
            },
            name="collect motif info"))
    collect.allow_fail = True
    collect.allow_dangling = True