def test_multideps(self): ppl = pyppl() pr1 = proc("A") pr2 = proc("B") pr3 = proc("C") p1ch = [('a', ), ('b', ), ('c', )] p2ch = [(1, ), (2, ), (3, )] pr1.input = {'input': channel.create(p1ch)} pr2.input = {'input': channel.create(p2ch)} pr1.output = 'o:{{input}}' pr2.output = 'o:{{input}}' pr3.input = 'in1, in2' pr3.output = 'o:{{in1}}{{in2}}' pr3.depends = [pr1, pr2] pr1.script = "echo {{input}}" pr2.script = "echo {{input}}" pr3.script = "echo {{in1}}{{in2}}" #pr3.echo = True #p1.echo = True #p2.echo = True #p3.echo = True #print p3.props['indata'], 'xxxxxxxxxxxxxxxxxxxxxxx' ppl.starts(pr1, pr2).run() out1 = os.path.join(pr3.workdir, '0/job.stdout') out2 = os.path.join(pr3.workdir, '1/job.stdout') out3 = os.path.join(pr3.workdir, '2/job.stdout') self.assertTrue(os.path.exists(out1)) self.assertTrue(os.path.exists(out2)) self.assertTrue(os.path.exists(out3)) self.assertEqual(open(out1).read().strip(), 'a1') self.assertEqual(open(out2).read().strip(), 'b2') self.assertEqual(open(out3).read().strip(), 'c3')
def testIscached(self): p = proc('iscached') p.props['logger'] = self.logger p.script = "echo 1" # cache is False p.cache = False self.assertFalse(p._isCached()) # dependent p.cache = True p2 = proc('iscached') p2.props['cached'] = False p.depends = p2 self.assertFalse(p._isCached()) p.depends = [] p.input = {'a': range(10)} p._tidyBeforeRun() self.assertFalse(p._isCached()) self.assertEqual(p.ncjobids, range(10)) p.jobs[0].init() p.jobs[0].cache() self.assertTrue(p.jobs[0].isTrulyCached()) self.assertFalse(p._isCached()) self.assertEqual(p.ncjobids, range(1, 10))
def testInput(self): p1 = proc('commp') p2 = proc('commp') p1.input = "i11, i12" p2.input = "i21, i22" a = aggr(p1, p2) a.input = [(1, 2)] self.assertEqual(p1.input, {"i11, i12": [(1, 2)]}) #multiple starts p3 = proc('commp') p4 = proc('commp') p5 = proc('commp') p3.input = "i31, i32" p4.input = "i41, i42" a2 = aggr(p3, p4, p5, False) p5.depends = [p3, p4] a2.starts = [p3, p4] self.assertEqual(a2.starts, [p3, p4]) self.assertEqual(a2.ends, [p5]) p3.input = {p3.input: []} self.assertIs(p3, a2.p3_commp) #self.assertRaisesRegexp(RuntimeError, r'Not enough data', a2.__setattr__, 'input', [(1,2,3)]) self.assertRaisesRegexp(RuntimeError, r'Expect list or str for', a2.__setattr__, 'input', [(1, 2, 3, 4)]) a2.p3_commp.input = "i31, i32" a2.p4_commp.input = "i41, i42" a2.input = [(1, 2, 3, 4)] self.assertEqual(p3.input["i31, i32"], [(1, 2)]) self.assertEqual(p4.input["i41, i42"], [(3, 4)])
def testSetattr(self): p = proc('setattr') self.assertRaises(ValueError, p.__setattr__, 'a', 1) p.tag = 'setattr2' self.assertEqual(p.tag, 'setattr2') self.assertEqual(p.config['tag'], 'setattr2') self.assertIn('tag', p.sets) # alias p.exdir = "./" self.assertEqual(p.exportdir, "./") # input p.input = {"a": [1]} self.assertEqual(p.indata, {}) self.assertEqual(p.config['input'], {'a': [1]}) # depends p2 = proc('setattr') p.depends = p2 p3 = proc('setattr') self.assertIn(p2, p.depends) self.assertIn(p, p2.nexts) p.depends = p3 self.assertNotIn(p2, p.depends) self.assertNotIn(p, p2.nexts) self.assertIn(p3, p.depends) self.assertIn(p, p3.nexts)
def test_factory(self): ppl = pyppl({'proc': {'ppldir': './workdir'}}) p1 = proc('TAG') self.assertTrue(isinstance(p1, proc)) self.assertEqual(p1.tag, 'TAG') inch = channel.create(['a', 'b', 'c']) p1.tag = 'CREATE_FILE' p1.input = {'input': inch} p1.script = "echo {{input}} > {{outfile}}" p1.output = "o:{{input}}, outfile:file:{{input}}.txt" p1.cache = False p2 = proc("MOVE_FILE") p2.input = "input, infile:file" p2.output = "outfile:file:{{infile | fn}}-2.txt" p2.script = "mv {{infile}} {{outfile}}; cp {{outfile}} {{infile}}" p2.depends = p1 p2.exportdir = './workdir' p2.cache = False p2.forks = 3 ppl.starts(p1) ppl.run() self.assertTrue(os.path.exists('./workdir/a-2.txt')) self.assertTrue(os.path.exists('./workdir/b-2.txt')) self.assertTrue(os.path.exists('./workdir/c-2.txt'))
def testCommprops(self): p1 = proc('commp') p2 = proc('commp') a = aggr(p1, p2) a.forks = 10 a.exportdir = './' for p in a.procs: self.assertEqual(p.forks, 10) self.assertEqual(p2.exportdir, './')
def testCopy(self): p1 = proc('copy') p2 = proc('copy') a = aggr(p1, p2) a2 = a.copy() p3 = a2.procs[0] p4 = a2.procs[1] self.assertEqual(a.id, 'a') self.assertEqual(a2.id, 'a2') self.assertEqual(p3.id + '.' + p3.tag, 'p1.aggr') self.assertEqual(p4.id + '.' + p4.tag, 'p2.aggr')
def testDepends(self): p1 = proc('dep') p2 = proc('dep') p3 = proc('dep') p4 = proc('dep') a = aggr(p1, p2) a.depends = [p3, p4] self.assertEqual(p1.depends, [p3, p4]) p5 = proc('dep') p5.depends = a self.assertEqual(p5.depends, [p2])
def testConfig(self): config = { 'proc': {}, 'sge': { 'runner': 'ssh', 'sshRunner': { 'servers': ['franklin01'] } }, 'ssh2': { 'runner': 'local' }, 'ssh': { 'sshRunner': { 'servers': ['franklin01'] } } } p = proc() p.script = "echo 1" p.input = {'a': [1]} pyppl(config, '').starts(p).run() self.assertEqual(p.runner, 'local') pyppl(config, '').starts(p).run('ssh') self.assertEqual(p.runner, 'ssh') pyppl(config, '').starts(p).run('ssh2') self.assertEqual(p.runner, 'local') pyppl(config, '').starts(p).run('sge') self.assertEqual(p.runner, 'ssh')
def testSuffix(self): p = proc('tag_unique') config = { key: val for key, val in p.config.iteritems() if key not in [ 'workdir', 'forks', 'cache', 'retcodes', 'echo', 'runner', 'exportdir', 'exporthow', 'exportow', 'errorhow', 'errorntry' ] or key.endswith('Runner') } config['id'] = p.id config['tag'] = p.tag if config.has_key('callback'): config['callback'] = utils.funcsig(config['callback']) # proc is not picklable if config.has_key('depends'): depends = config['depends'] pickable_depends = [] if isinstance(depends, proc): depends = [depends] elif isinstance(depends, aggr): depends = depends.procs for depend in depends: pickable_depends.append(depend.id + '.' + depend.tag) config['depends'] = pickable_depends if config.has_key('input') and isinstance(config['input'], dict): config['input'] = copy.deepcopy(config['input']) for key, val in config['input'].iteritems(): config['input'][key] = utils.funcSig(val) if callable( val) else val signature = pickle.dumps(str(config)) self.assertEqual(p._suffix(), utils.uid(signature))
def test_sge(self): ppl = pyppl() p1 = proc() p1.input = {"input": channel.create([('a')] * 10)} p1.workdir = './workdir' p1.forks = 3 p1.script = "echo {input}" ppl.starts(p1).run('sge')
def testAggr(self): pa = proc('aggr') pb = proc('aggr') pa.script = 'echo 1' pb.script = 'echo 2' a = aggr(pa, pb) pe = proc('end') pe.depends = a a.pa_aggr.input = "input" a.pa_aggr.output = "out:{{input}}.{{proc.id}}.{{proc.tag}}" a.pb_aggr.input = "input" a.pb_aggr.output = "out:{{input}}.{{proc.id}}.{{proc.tag}}" a.input = ["AGGR"] pe.input = "input" pe.output = "out:{{input}}.{{proc.id}}.{{proc.tag}}" self.assertRaises(SystemExit, pyppl().starts(a).run) self.assertEqual(pe.channel, [('AGGR.pa.aggr.pb.aggr.pe.end', )])
def testIgnore(self): pIgnore = proc() pIgnore.input = {"input": [1]} pIgnore.output = "outfile:file:a.txt" pIgnore.script = "echo {{input}} > {{outfile}}; exit $(($RANDOM % 4))" pIgnore.errhow = "ignore" pIgnore.cache = False pyppl().starts(pIgnore).run()
def testIsRunning(self): pIsRunning = proc() pIsRunning.input = {"a": [1, 2, 3, 4, 5]} pIsRunning.script = "sleep 5" # takes time to start pIsRunning.runner = "sge" pIsRunning.forks = 5 pyppl().starts(pIsRunning).run()
def testReadconfig(self): p = proc('tag') p.tag = 'notag' p.forks = 1 config = {'tag': 'whatevertag', 'forks': 10} p._readConfig(config) self.assertEqual(p.tag, 'notag') self.assertEqual(p.forks, 1) # props not changed self.assertEqual(p.config['forks'], 1) # props not changed
def testError(self): pError = proc() pError.input = {"input": [1]} pError.output = "outfile:file:a.txt" pError.script = "echo {{input}} > {{outfile}}; exit $(($RANDOM % 3))" pError.errhow = "retry" pError.errntry = 10 pError.cache = False pyppl().starts(pError).run()
def testBuildJobs(self): p = proc('buildjobs') p.props['logger'] = self.logger p.input = {"a": range(10)} p.output = "x:{{a | lambda x: x*2}}" p._tidyBeforeRun() self.assertEqual(len(p.jobs), 10) self.assertEqual(p.channel.map(lambda x: (int(x[0]), )), channel.create(xrange(0, 20, 2)))
def testBuildProps(self): p1 = proc('tag1') p2 = proc('tag2') p2.depends = p1 p2.retcodes = "0, 1" p2._buildProps() self.assertEqual(p2.depends, [p1]) self.assertEqual(p2.retcodes, [0, 1]) self.assertEqual( p2.workdir, os.path.join(p2.ppldir, "PyPPL.%s.%s" % (p2._name(False), p2._suffix()))) self.assertTrue(os.path.exists(p2.workdir)) self.assertEqual(p1.nexts, [p2]) self.assertEqual(p1.id, 'p1') self.assertEqual(p2.id, 'p2') self.assertEqual(p2.jobs, []) p2 = proc('tag2') self.assertRaises(Exception, p2._buildProps)
def testCopy(self): p = proc('copy') p.script = 'echo {#}' p.exportdir = rootdir pCopy = p.copy('procCopy') self.assertEqual(pCopy.id, 'pCopy') self.assertEqual(pCopy.tag, 'procCopy') self.assertEqual(pCopy.exportdir, rootdir) self.assertEqual(pCopy.script, p.script)
def testCallback(self): p1 = proc('callback') p2 = proc('callback') def callback2(s): ch = channel.create([('a1', 'b'), ('x', 'y')]) s.channel.merge(ch) argv = sys.argv[:] sys.argv = ['0', '1', '2'] p1.input = {"input": channel.fromArgv()} p1.output = "output:{{input}}2" p1.script = "echo {{output}}" p1.callback = callback2 p2.depends = p1 p2.input = "input, in1, in2" p2.script = "echo {{output}}" p2.output = "output:{{input}}.{{in1}}.{{in2}}" pyppl().starts(p1).run() sys.argv = argv[:]
def testFlushFile(self): pFF = proc() pFF.input = {"input": [1]} pFF.script = """ for i in $(seq 1 60); do echo $i sleep 1 done """ pFF.echo = True pyppl().starts(pFF).run('sge')
def testRunnerWait (self): p = proc('wait') p.ppldir = self.testdir p.script = "echo {{a}}" p.input = {'a': range(10)} p.props['logger'] = self.logger p._tidyBeforeRun () r = runner(p.jobs[0]) r.submit () r.wait () self.assertEqual (r.job.rc(), 0) self.assertEqual (open(r.job.outfile).read().strip(), '0')
def testInit(self): p1 = proc('aggr') p2 = proc('aggr') a = aggr(p1, p2) self.assertTrue(isinstance(a, aggr)) self.assertEqual(p2.depends, [p1]) self.assertEqual(p1.aggr, 'a') self.assertEqual(p2.aggr, 'a') self.assertEqual(a.procs, [p1, p2]) self.assertEqual(a.starts, [p1]) self.assertEqual(a.ends, [p2]) p3 = proc('aggr') p4 = proc('aggr') a2 = aggr(p3, p4, False) self.assertTrue(isinstance(a2, aggr)) self.assertEqual(a2.procs, [p3, p4]) self.assertEqual(p4.depends, []) self.assertEqual(p3.aggr, 'a2') self.assertEqual(p4.aggr, 'a2') self.assertEqual(a2.starts, [p3]) self.assertEqual(a2.ends, [p4])
def testSshInit (self): p = proc('sshinit') p.ppldir = self.testdir p.script = "sleep 3; echo {{a}}" p.input = {'a': range(4)} p.props['logger'] = self.logger p.sshRunner = {"servers": ['franklin01', 'franklin02']} p._tidyBeforeRun () for j in p.jobs: r = runner_ssh (j) self.assertTrue (os.path.exists(j.script + '.ssh')) self.assertEqual (r.script, [os.path.realpath(j.script) + '.ssh'])
def testSgeInit (self): p = proc('sgeinit') p.ppldir = self.testdir p.script = "sleep 3; echo {{a}}" p.input = {'a': range(4)} p.props['logger'] = self.logger p.sgeRunner = {"sge_q": '1-hour'} p._tidyBeforeRun () for j in p.jobs: r = runner_sge (j) self.assertTrue (os.path.exists(j.script + '.sge')) self.assertEqual (r.script, ['qsub', os.path.realpath(j.script) + '.sge'])
def testRunnerInit (self): p = proc('init') p.ppldir = self.testdir p.script = "echo 1" p.input = {'a': range(10)} p.props['logger'] = self.logger p._tidyBeforeRun () r = runner(p.jobs[0]) self.assertEqual (p.jobs[0], r.job) self.assertEqual ([p.jobs[0].script], r.script) self.assertEqual (p.jobs[0].script, r.cmd2run) self.assertEqual (0, r.ntry) self.assertEqual (None, r.p)
def testRunnerRetry (self): p = proc('retry') p.ppldir = self.testdir p.script = "echo1 {{a}}" p.input = {'a': range(10)} p.errhow = 'retry' p.props['logger'] = self.logger p._tidyBeforeRun () r = runner(p.jobs[0]) r.submit () r.wait () r.finish () self.assertEqual (r.ntry, p.errntry + 1) self.assertEqual (r.job.rc(), 127)
def testRunnerIsRunning (self): p = proc('isrunning') p.ppldir = self.testdir p.script = "sleep .5;echo {{a}}" p.input = {'a': range(10)} p.props['logger'] = self.logger p._tidyBeforeRun () r = runner(p.jobs[0]) self.assertFalse (r.isRunning()) p = Popen (r.script, stdout=open(os.devnull, 'w'), stderr=open(os.devnull, 'w'), shell=False) r.job.id(str(p.pid)) self.assertTrue (r.isRunning()) p.wait() self.assertFalse (r.isRunning())
def testLocalIsRunning (self): p = proc('localisrunning') p.ppldir = self.testdir p.script = "sleep 1; echo {{a}}" p.input = {'a': range(4)} p.forks = 4 p.props['logger'] = self.logger p._tidyBeforeRun () for j in p.jobs: r = runner_local (j) self.assertFalse (r.isRunning()) p = Popen (r.script, stdout=open(os.devnull, 'w'), stderr=open(os.devnull, 'w')) r.job.id(str(p.pid)) self.assertTrue(r.isRunning()) p.wait() self.assertFalse(r.isRunning())
def testSshIsRunning (self): p = proc('sshisrunning') p.ppldir = self.testdir p.script = "sleep 3; echo {{a}}" p.input = {'a': range(4)} p.props['logger'] = self.logger p.sshRunner = {"servers": ['franklin01', 'franklin02']} p._tidyBeforeRun () for j in p.jobs: r = runner_ssh (j) self.assertFalse (r.isRunning()) p = Popen (r.script, stdout=open(os.devnull, 'w'), stderr=open(os.devnull, 'w')) r.job.id(str(p.pid)) self.assertTrue (r.isRunning()) p.wait() self.assertFalse (r.isRunning())