from pyppl import PyPPL, Proc, Channel, params params('prefix', '--param-') params.datadir \ .setRequired() \ .setDesc('The data directory containing the data files.') # or # params.datadir.required = True # params.datadir.desc = 'The data directory containing the data files.' params = params.parse() pSort = Proc(desc='Sort files.') pSort.input = {"infile:file": Channel.fromPattern(params.datadir + '/*.txt')} pSort.output = "outfile:file:{{in.infile | fn}}.sorted" pSort.forks = 5 pSort.exdir = './export' pSort.script = """ sort -k1r {{in.infile}} > {{out.outfile}} """ PyPPL().start(pSort).run()
from pyppl import PyPPL, Channel # import predefined processes from TCGAprocs import pBamToFastq, pAlignment, pBamSort, pBamMerge, pMarkDups # Load the bam files pBamToFastq.input = Channel.fromPattern('./data/*.bam') # Align the reads to reference genome pAlignment.depends = pBamToFastq # Sort bam files pBamSort.depends = pAlignment # Merge bam files pBamMerge.depends = pBamSort # Mark duplicates pMarkDups.depends = pBamMerge # Export the results pMarkDups.exdir = './export/realigned_Bams' # Specify the start process and run the pipeline PyPPL().start(pBamToFastq).flowchart().run({ 'forks': 2, '_log': { 'shorten': 40 } })
from pathlib import Path from pyppl import PyPPL, Proc, Channel mockdir = Path(__file__).resolve().parent.parent.parent / 'tests' / 'mocks' pSort = Proc(desc='Sort files.') pSort.input = {"infile:file": Channel.fromPattern("./data/*.txt")} pSort.output = "outfile:file:{{i.infile | fn}}.sorted" # specify the runner pSort.runner = 'sge' # specify the runner options # using mock sge commands pSort.sgeRunner = { "qsub": mockdir / 'qsub', "qstat": mockdir / 'qstat', "qdel": mockdir / 'qdel', } pSort.preCmd = 'rm -f %s' % (mockdir / 'sge.jobs.log') # clear the queue pSort.forks = 5 pSort.exdir = './export' pSort.script = """ sort -k1r {{i.infile}} > {{o.outfile}} """ PyPPL().start(pSort).run()
from os import path from pyppl import PyPPL, Proc, Channel from pyparam import params def fn(fpath): return path.basename(fpath).split('.')[0] params.datadir.required = True params.datadir.desc = 'The data directory containing the data files.' params = params._parse() pSort = Proc(desc='Sort files.') pSort.input = { "infile:file": Channel.fromPattern(params['datadir'] + '/*.txt') } pSort.output = "outfile:file:{{i.infile | fn}}.sorted" pSort.forks = 5 pSort.exdir = './export' pSort.envs.fn = fn pSort.script = """ sort -k1r {{i.infile}} > {{o.outfile}} """ PyPPL().start(pSort).run()