コード例 #1
0
	def dataProvider_testBuildInput(self):
		pBuildInputDep = Proc()
		pBuildInputDep.props['channel'] = []
		
		pBuildInput = Proc()
		pBuildInput.depends = pBuildInputDep
		yield pBuildInput, {}, {}
		yield pBuildInput, 'a,b', {'a': {'data': [], 'type': 'var'}, 'b': {'data': [], 'type': 'var'}}
		yield pBuildInput, 'a:unknowntype', {}, ProcInputError, 'Unknown input type'
		
		pBuildInputDep1 = Proc()
		pBuildInputDep1.props['channel'] = Channel.create([1,2])
		pBuildInputDep2 = Proc()
		pBuildInputDep2.props['channel'] = Channel.create([3,4])
		pBuildInput1 = Proc()
		pBuildInput1.depends = pBuildInputDep1, pBuildInputDep2
		yield pBuildInput1, 'a,b', {'a': {'data': [1,2], 'type': 'var'}, 'b': {'data': [3,4], 'type': 'var'}}
		
		pBuildInput2 = Proc()
		pBuildInput2.depends = pBuildInputDep1, pBuildInputDep2
		yield pBuildInput2, 'a', {'a': {'data': [1,2], 'type': 'var'}}, None, None, ['Not all data are used as input, 1 column(s) wasted.']
		
		pBuildInput3 = Proc()
		pBuildInput3.depends = pBuildInputDep1, pBuildInputDep2
		yield pBuildInput2, {'a,b,c': lambda ch1, ch2: ch1.cbind(ch2)}, {'a': {'data': [1,2], 'type': 'var'}, 'b': {'data': [3,4], 'type': 'var'}, 'c': {'data': ['',''], 'type': 'var'}}, None, None, ['No data found for input key "c", use empty strings/lists instead.']
		
		pBuildInput4 = Proc()
		yield pBuildInput4, {'a': [1], 'b': 2, 'c': [1,2], 'd:files':[[self.testdir, self.testdir]]}, {
			'a': {'data': [1,1], 'type': 'var'},
			'b': {'data': [2,2], 'type': 'var'},
			'c': {'data': [1,2], 'type': 'var'},
			'd': {'data': [[self.testdir,self.testdir], [self.testdir,self.testdir]], 'type': 'files'},
		}
		
		pBuildInput5 = Proc()
		pBuildInput5.ppldir = self.testdir
		pBuildInput5.input  = {'a': ['h"i\'nihao'], 'b': 2, 'c': [1,2], 'd:files':[[self.testdir, self.testdir]]}
		with helpers.log2str():
			pBuildInput5._buildInput()
			pBuildInput5._buildProps()
			pBuildInput5._saveSettings()
		pBuildInput5.props['resume'] = 'skip+'
		yield pBuildInput5, {}, {
			'a': {'data': ['h"i\'nihao','h"i\'nihao'], 'type': 'var'},
			'b': {'data': [2,2], 'type': 'var'},
			'c': {'data': [1,2], 'type': 'var'},
			'd': {'data': [[self.testdir,self.testdir], [self.testdir,self.testdir]], 'type': 'files'},
		}
		
		pBuildInput6 = Proc()
		pBuildInput6.ppldir = self.testdir
		pBuildInput6.props['resume'] = 'skip+'
		yield pBuildInput6, {}, {}, ProcInputError, 'Cannot parse input for skip\+/resume process, no such file:'
コード例 #2
0
	def testBuildJobs(self, p, size, channel, chkeys, errs = []):
		with helpers.log2str(levels = 'all') as (out, err):
			p._buildJobs ()
		stderr = err.getvalue()
		self.assertEqual(len(p.jobs), size)
		channel = Channel.create(channel)
		self.assertListEqual(p.channel, channel)
		for i, key in enumerate(chkeys):
			self.assertListEqual(getattr(p.channel, key), channel.colAt(i))
		for err in errs:
			self.assertIn(err, stderr)
			stderr = stderr[(stderr.find(err) + len(err)):]
コード例 #3
0
params = params.parse()

starts = []

saminfo = SampleInfo(params.saminfo)

aPrepareBam.pFastq2Sam.args.tool    = 'bowtie2'
aPrepareBam.off('qc')

pBamDir         = pFiles2Dir
pBamDir.runner  = 'local'
if params.intype == 'ebam':
	#aPrepareBam.input = [Channel.fromPattern(path.join(params.indir, '*.bam'))]
	aPrepareBam.on('ebam')
	aPrepareBam.off('fastq')
	aPrepareBam.input = [Channel.create(saminfo.toChannel(params.indir)).unique()]
	if params.compress:
		aPrepareBam.args.gz = True
		aPrepareBam.pFastq2Sam.args.outfmt = 'bam'

	pBamDir.depends = aPrepareBam
	pBamDir.input   = lambda ch: [ch.flatten()]

	starts.append(aPrepareBam)

elif params.intype == 'fq' or params.intype == 'fastq':
	# pair-end fastq files
	# *.fq, *.fq.gz *.fastq, *.fastq.gz
	# sample info should be:
	# +--------------+----------+---------+
	# | Sample	     | Patient  | Group   |
コード例 #4
0
from pyppl import PyPPL, Proc, Channel, params

params('prefix', '--param-')

params.datadir    \
  .setRequired()  \
  .setDesc('The data directory containing the data files.')

# or
# params.datadir.required = True
# params.datadir.desc     = 'The data directory containing the data files.'

params = params.parse()

pSort = Proc(desc='Sort files.')
pSort.input = {"infile:file": Channel.fromPattern(params.datadir + '/*.txt')}
pSort.output = "outfile:file:{{in.infile | fn}}.sorted"
pSort.forks = 5
pSort.exdir = './export'
pSort.script = """
  sort -k1r {{in.infile}} > {{out.outfile}}
"""

PyPPL().start(pSort).run()
コード例 #5
0
from pathlib import Path
from pyppl import PyPPL, Proc, Channel

mockdir = Path(__file__).resolve().parent.parent.parent / 'tests' / 'mocks'

pSort = Proc(desc='Sort files.')
pSort.input = {"infile:file": Channel.fromPattern("./data/*.txt")}
pSort.output = "outfile:file:{{i.infile | fn}}.sorted"
# specify the runner
pSort.runner = 'sge'
# specify the runner options
# using mock sge commands
pSort.sgeRunner = {
    "qsub": mockdir / 'qsub',
    "qstat": mockdir / 'qstat',
    "qdel": mockdir / 'qdel',
}
pSort.preCmd = 'rm -f %s' % (mockdir / 'sge.jobs.log')  # clear the queue
pSort.forks = 5
pSort.exdir = './export'
pSort.script = """
  sort -k1r {{i.infile}} > {{o.outfile}}
"""

PyPPL().start(pSort).run()
コード例 #6
0
)
aBam2SCNV.delegate('args.nthread', 'pCNVkitCov, pCNVkitSeg')
# depends
aBam2SCNV.starts = aBam2SCNV.pBamDir, aBam2SCNV.pSampleInfo
aBam2SCNV.ends = aBam2SCNV.pCNVkit2Vcf
aBam2SCNV.pCNVkitPrepare.depends = aBam2SCNV.pBamDir, aBam2SCNV.pSampleInfo
aBam2SCNV.pCNVkitCov.depends = aBam2SCNV.pBamDir, aBam2SCNV.pSampleInfo, aBam2SCNV.pCNVkitPrepare
aBam2SCNV.pCNNDir.depends = aBam2SCNV.pCNVkitCov
aBam2SCNV.pCNVkitRef.depends = aBam2SCNV.pCNNDir, aBam2SCNV.pSampleInfo
aBam2SCNV.pCNVkitFix.depends = aBam2SCNV.pCNNDir, aBam2SCNV.pSampleInfo, aBam2SCNV.pCNVkitRef
aBam2SCNV.pCNVkitSeg.depends = aBam2SCNV.pCNVkitFix
aBam2SCNV.pCNVkitCall.depends = aBam2SCNV.pCNVkitSeg
aBam2SCNV.pCNVkit2Vcf.depends = aBam2SCNV.pCNVkitCall
# input
aBam2SCNV.pCNVkitPrepare.input = lambda ch_bamdir, ch_saminfo: [
    Channel.create(SampleInfo(ch_saminfo.get()).toChannel(ch_bamdir.get())).
    unique().flatten()
]
aBam2SCNV.pCNVkitCov.input     = lambda ch_bamdir, ch_saminfo, ch_target: \
 Channel.create(
    SampleInfo(ch_saminfo.get()).toChannel(ch_bamdir.get())
).unique().cbind(ch_target)
aBam2SCNV.pCNNDir.input = lambda ch: [ch.flatten()]
aBam2SCNV.pCNVkitRef.input = lambda ch_covs, ch_saminfo: [
    Channel.create(
        SampleInfo(ch_saminfo.get()).
        toChannel(ch_covs.get(), paired=True, raiseExc=False)).colAt(1).unique(
        ).map(lambda x: (x[0].rpartition('.')[0] + '.target.cnn', x[0].
                         rpartition('.')[0] + '.antitarget.cnn')).flatten()
]
aBam2SCNV.pCNVkitFix.input     = lambda ch_covs, ch_saminfo, ch_ref: \
 Channel.create(
コード例 #7
0
# ----------------------------------
# region Pipeline - Novobarcode

# endregion

# ----------------------------------
# region Pipeline - ShapeMapper

# ShapeMapper output folder path
shape_output_folder = os.path.join(configs.working_folder,
                                   'shapemapper_results')

pShape = Proc(desc='Run ShapeMapper 2.x')
pShape.input = {
    "rna_item_json:var": Channel.create(rna_lib_item_object_json_list)
}
# Define the "output" channel - the "output folder"
pShape.output = "shape_output_folder:var: {}".format(shape_output_folder)
pShape.forks = 4

#
pShape.args.configs = configs_object_json
pShape.lang = 'python'
pShape.script = """
#!/usr/bin/env python

from neoRNA.library.shape_mapper.shape_runner import ShapeMapperRunner
ShapeMapperRunner.shape_mapper_v2({{args.configs | squote}}, {{in.rna_item_json | squote}})
"""
# endregion
コード例 #8
0
from pyppl import PyPPL, Channel
# import predefined processes
from TCGAprocs import pBamToFastq, pAlignment, pBamSort, pBamMerge, pMarkDups

# Load the bam files
pBamToFastq.input = Channel.fromPattern('./data/*.bam')
# Align the reads to reference genome
pAlignment.depends = pBamToFastq
# Sort bam files
pBamSort.depends = pAlignment
# Merge bam files
pBamMerge.depends = pBamSort
# Mark duplicates
pMarkDups.depends = pBamMerge
# Export the results
pMarkDups.exdir = './export/realigned_Bams'
# Specify the start process and run the pipeline
PyPPL().start(pBamToFastq).flowchart().run({
    'forks': 2,
    '_log': {
        'shorten': 40
    }
})
コード例 #9
0
ファイル: useParams.py プロジェクト: makerwei/PyPPL
from os import path
from pyppl import PyPPL, Proc, Channel
from pyparam import params


def fn(fpath):
    return path.basename(fpath).split('.')[0]


params.datadir.required = True
params.datadir.desc = 'The data directory containing the data files.'

params = params._parse()

pSort = Proc(desc='Sort files.')
pSort.input = {
    "infile:file": Channel.fromPattern(params['datadir'] + '/*.txt')
}
pSort.output = "outfile:file:{{i.infile | fn}}.sorted"
pSort.forks = 5
pSort.exdir = './export'
pSort.envs.fn = fn
pSort.script = """
  sort -k1r {{i.infile}} > {{o.outfile}}
"""

PyPPL().start(pSort).run()
コード例 #10
0
    pBedIntersect.args.params.wb = True
    pBedIntersect.args.params.wao = False

    pSortInter = pSort.copy()
    pSortInter.desc = 'Sort mutation and gene intersect file.'
    pSortInter.depends = pBedIntersect
    pSortInter.args.params.k = ['1,1', '2,2n']

    pToChow = Proc(desc='Prepare files for Chow test')
    pToChow.input = 'expfile:file, mutfile:file, interfile:file, covfile:file, genes, tfs'
    pToChow.depends = pExpmat, pMsmat, pSortInter
    pToChow.runner = params.runner
    pToChow.input = lambda ch1, ch2, ch3: ch1.cbind(ch2, ch3).cbind(
        params.covfile).cbind(
            Channel([(','.join(sgenes), ';'.join(','.join(tf
                                                          for tf in genes[sg])
                                                 for sg in sgenes))
                     for sgenes in splits]))
    pToChow.output = [
        'outdata:file:job{{job.index + 1}}.chowdata.txt',
        'outgroup:file:job{{job.index + 1}}.chowgroup.txt',
        'outcase:file:job{{job.index + 1}}.chowcase.txt',
    ]
    pToChow.lang = params.python
    pToChow.script = 'file:scripts/ceQTL-pToChow.py'

    pChow.depends = pToChow
    pChow.runner = params.runner
    pChow.args.plot = False
    pChow.args.fdr = False
    pChow.args.cov = params.covfile
    pChow.args.pval = 1 if params.padj else params.pval