def dataProvider_testBuildInput(self): pBuildInputDep = Proc() pBuildInputDep.props['channel'] = [] pBuildInput = Proc() pBuildInput.depends = pBuildInputDep yield pBuildInput, {}, {} yield pBuildInput, 'a,b', {'a': {'data': [], 'type': 'var'}, 'b': {'data': [], 'type': 'var'}} yield pBuildInput, 'a:unknowntype', {}, ProcInputError, 'Unknown input type' pBuildInputDep1 = Proc() pBuildInputDep1.props['channel'] = Channel.create([1,2]) pBuildInputDep2 = Proc() pBuildInputDep2.props['channel'] = Channel.create([3,4]) pBuildInput1 = Proc() pBuildInput1.depends = pBuildInputDep1, pBuildInputDep2 yield pBuildInput1, 'a,b', {'a': {'data': [1,2], 'type': 'var'}, 'b': {'data': [3,4], 'type': 'var'}} pBuildInput2 = Proc() pBuildInput2.depends = pBuildInputDep1, pBuildInputDep2 yield pBuildInput2, 'a', {'a': {'data': [1,2], 'type': 'var'}}, None, None, ['Not all data are used as input, 1 column(s) wasted.'] pBuildInput3 = Proc() pBuildInput3.depends = pBuildInputDep1, pBuildInputDep2 yield pBuildInput2, {'a,b,c': lambda ch1, ch2: ch1.cbind(ch2)}, {'a': {'data': [1,2], 'type': 'var'}, 'b': {'data': [3,4], 'type': 'var'}, 'c': {'data': ['',''], 'type': 'var'}}, None, None, ['No data found for input key "c", use empty strings/lists instead.'] pBuildInput4 = Proc() yield pBuildInput4, {'a': [1], 'b': 2, 'c': [1,2], 'd:files':[[self.testdir, self.testdir]]}, { 'a': {'data': [1,1], 'type': 'var'}, 'b': {'data': [2,2], 'type': 'var'}, 'c': {'data': [1,2], 'type': 'var'}, 'd': {'data': [[self.testdir,self.testdir], [self.testdir,self.testdir]], 'type': 'files'}, } pBuildInput5 = Proc() pBuildInput5.ppldir = self.testdir pBuildInput5.input = {'a': ['h"i\'nihao'], 'b': 2, 'c': [1,2], 'd:files':[[self.testdir, self.testdir]]} with helpers.log2str(): pBuildInput5._buildInput() pBuildInput5._buildProps() pBuildInput5._saveSettings() pBuildInput5.props['resume'] = 'skip+' yield pBuildInput5, {}, { 'a': {'data': ['h"i\'nihao','h"i\'nihao'], 'type': 'var'}, 'b': {'data': [2,2], 'type': 'var'}, 'c': {'data': [1,2], 'type': 'var'}, 'd': {'data': [[self.testdir,self.testdir], [self.testdir,self.testdir]], 'type': 'files'}, } pBuildInput6 = Proc() pBuildInput6.ppldir = self.testdir pBuildInput6.props['resume'] = 'skip+' yield pBuildInput6, {}, {}, ProcInputError, 'Cannot parse input for skip\+/resume process, no such file:'
def testBuildJobs(self, p, size, channel, chkeys, errs = []): with helpers.log2str(levels = 'all') as (out, err): p._buildJobs () stderr = err.getvalue() self.assertEqual(len(p.jobs), size) channel = Channel.create(channel) self.assertListEqual(p.channel, channel) for i, key in enumerate(chkeys): self.assertListEqual(getattr(p.channel, key), channel.colAt(i)) for err in errs: self.assertIn(err, stderr) stderr = stderr[(stderr.find(err) + len(err)):]
params = params.parse() starts = [] saminfo = SampleInfo(params.saminfo) aPrepareBam.pFastq2Sam.args.tool = 'bowtie2' aPrepareBam.off('qc') pBamDir = pFiles2Dir pBamDir.runner = 'local' if params.intype == 'ebam': #aPrepareBam.input = [Channel.fromPattern(path.join(params.indir, '*.bam'))] aPrepareBam.on('ebam') aPrepareBam.off('fastq') aPrepareBam.input = [Channel.create(saminfo.toChannel(params.indir)).unique()] if params.compress: aPrepareBam.args.gz = True aPrepareBam.pFastq2Sam.args.outfmt = 'bam' pBamDir.depends = aPrepareBam pBamDir.input = lambda ch: [ch.flatten()] starts.append(aPrepareBam) elif params.intype == 'fq' or params.intype == 'fastq': # pair-end fastq files # *.fq, *.fq.gz *.fastq, *.fastq.gz # sample info should be: # +--------------+----------+---------+ # | Sample | Patient | Group |
from pyppl import PyPPL, Proc, Channel, params params('prefix', '--param-') params.datadir \ .setRequired() \ .setDesc('The data directory containing the data files.') # or # params.datadir.required = True # params.datadir.desc = 'The data directory containing the data files.' params = params.parse() pSort = Proc(desc='Sort files.') pSort.input = {"infile:file": Channel.fromPattern(params.datadir + '/*.txt')} pSort.output = "outfile:file:{{in.infile | fn}}.sorted" pSort.forks = 5 pSort.exdir = './export' pSort.script = """ sort -k1r {{in.infile}} > {{out.outfile}} """ PyPPL().start(pSort).run()
from pathlib import Path from pyppl import PyPPL, Proc, Channel mockdir = Path(__file__).resolve().parent.parent.parent / 'tests' / 'mocks' pSort = Proc(desc='Sort files.') pSort.input = {"infile:file": Channel.fromPattern("./data/*.txt")} pSort.output = "outfile:file:{{i.infile | fn}}.sorted" # specify the runner pSort.runner = 'sge' # specify the runner options # using mock sge commands pSort.sgeRunner = { "qsub": mockdir / 'qsub', "qstat": mockdir / 'qstat', "qdel": mockdir / 'qdel', } pSort.preCmd = 'rm -f %s' % (mockdir / 'sge.jobs.log') # clear the queue pSort.forks = 5 pSort.exdir = './export' pSort.script = """ sort -k1r {{i.infile}} > {{o.outfile}} """ PyPPL().start(pSort).run()
) aBam2SCNV.delegate('args.nthread', 'pCNVkitCov, pCNVkitSeg') # depends aBam2SCNV.starts = aBam2SCNV.pBamDir, aBam2SCNV.pSampleInfo aBam2SCNV.ends = aBam2SCNV.pCNVkit2Vcf aBam2SCNV.pCNVkitPrepare.depends = aBam2SCNV.pBamDir, aBam2SCNV.pSampleInfo aBam2SCNV.pCNVkitCov.depends = aBam2SCNV.pBamDir, aBam2SCNV.pSampleInfo, aBam2SCNV.pCNVkitPrepare aBam2SCNV.pCNNDir.depends = aBam2SCNV.pCNVkitCov aBam2SCNV.pCNVkitRef.depends = aBam2SCNV.pCNNDir, aBam2SCNV.pSampleInfo aBam2SCNV.pCNVkitFix.depends = aBam2SCNV.pCNNDir, aBam2SCNV.pSampleInfo, aBam2SCNV.pCNVkitRef aBam2SCNV.pCNVkitSeg.depends = aBam2SCNV.pCNVkitFix aBam2SCNV.pCNVkitCall.depends = aBam2SCNV.pCNVkitSeg aBam2SCNV.pCNVkit2Vcf.depends = aBam2SCNV.pCNVkitCall # input aBam2SCNV.pCNVkitPrepare.input = lambda ch_bamdir, ch_saminfo: [ Channel.create(SampleInfo(ch_saminfo.get()).toChannel(ch_bamdir.get())). unique().flatten() ] aBam2SCNV.pCNVkitCov.input = lambda ch_bamdir, ch_saminfo, ch_target: \ Channel.create( SampleInfo(ch_saminfo.get()).toChannel(ch_bamdir.get()) ).unique().cbind(ch_target) aBam2SCNV.pCNNDir.input = lambda ch: [ch.flatten()] aBam2SCNV.pCNVkitRef.input = lambda ch_covs, ch_saminfo: [ Channel.create( SampleInfo(ch_saminfo.get()). toChannel(ch_covs.get(), paired=True, raiseExc=False)).colAt(1).unique( ).map(lambda x: (x[0].rpartition('.')[0] + '.target.cnn', x[0]. rpartition('.')[0] + '.antitarget.cnn')).flatten() ] aBam2SCNV.pCNVkitFix.input = lambda ch_covs, ch_saminfo, ch_ref: \ Channel.create(
# ---------------------------------- # region Pipeline - Novobarcode # endregion # ---------------------------------- # region Pipeline - ShapeMapper # ShapeMapper output folder path shape_output_folder = os.path.join(configs.working_folder, 'shapemapper_results') pShape = Proc(desc='Run ShapeMapper 2.x') pShape.input = { "rna_item_json:var": Channel.create(rna_lib_item_object_json_list) } # Define the "output" channel - the "output folder" pShape.output = "shape_output_folder:var: {}".format(shape_output_folder) pShape.forks = 4 # pShape.args.configs = configs_object_json pShape.lang = 'python' pShape.script = """ #!/usr/bin/env python from neoRNA.library.shape_mapper.shape_runner import ShapeMapperRunner ShapeMapperRunner.shape_mapper_v2({{args.configs | squote}}, {{in.rna_item_json | squote}}) """ # endregion
from pyppl import PyPPL, Channel # import predefined processes from TCGAprocs import pBamToFastq, pAlignment, pBamSort, pBamMerge, pMarkDups # Load the bam files pBamToFastq.input = Channel.fromPattern('./data/*.bam') # Align the reads to reference genome pAlignment.depends = pBamToFastq # Sort bam files pBamSort.depends = pAlignment # Merge bam files pBamMerge.depends = pBamSort # Mark duplicates pMarkDups.depends = pBamMerge # Export the results pMarkDups.exdir = './export/realigned_Bams' # Specify the start process and run the pipeline PyPPL().start(pBamToFastq).flowchart().run({ 'forks': 2, '_log': { 'shorten': 40 } })
from os import path from pyppl import PyPPL, Proc, Channel from pyparam import params def fn(fpath): return path.basename(fpath).split('.')[0] params.datadir.required = True params.datadir.desc = 'The data directory containing the data files.' params = params._parse() pSort = Proc(desc='Sort files.') pSort.input = { "infile:file": Channel.fromPattern(params['datadir'] + '/*.txt') } pSort.output = "outfile:file:{{i.infile | fn}}.sorted" pSort.forks = 5 pSort.exdir = './export' pSort.envs.fn = fn pSort.script = """ sort -k1r {{i.infile}} > {{o.outfile}} """ PyPPL().start(pSort).run()
pBedIntersect.args.params.wb = True pBedIntersect.args.params.wao = False pSortInter = pSort.copy() pSortInter.desc = 'Sort mutation and gene intersect file.' pSortInter.depends = pBedIntersect pSortInter.args.params.k = ['1,1', '2,2n'] pToChow = Proc(desc='Prepare files for Chow test') pToChow.input = 'expfile:file, mutfile:file, interfile:file, covfile:file, genes, tfs' pToChow.depends = pExpmat, pMsmat, pSortInter pToChow.runner = params.runner pToChow.input = lambda ch1, ch2, ch3: ch1.cbind(ch2, ch3).cbind( params.covfile).cbind( Channel([(','.join(sgenes), ';'.join(','.join(tf for tf in genes[sg]) for sg in sgenes)) for sgenes in splits])) pToChow.output = [ 'outdata:file:job{{job.index + 1}}.chowdata.txt', 'outgroup:file:job{{job.index + 1}}.chowgroup.txt', 'outcase:file:job{{job.index + 1}}.chowcase.txt', ] pToChow.lang = params.python pToChow.script = 'file:scripts/ceQTL-pToChow.py' pChow.depends = pToChow pChow.runner = params.runner pChow.args.plot = False pChow.args.fdr = False pChow.args.cov = params.covfile pChow.args.pval = 1 if params.padj else params.pval