def testPandoc(self): '''Test action pandoc''' if not shutil.which('pandoc'): return script = SoS_Script(r''' [10] report: output='report.md' ## Some random figure Generated by matplotlib [100] # generate report output: 'myreport.html' pandoc(input='report.md', output=_output[0]) ''') wf = script.workflow() Base_Executor(wf).run() self.assertTrue(os.path.isfile('myreport.html')) # file_target('myreport.html').remove('both') # pandoc with specified input. script = SoS_Script(r''' [10] report: output='a.md' ## Some random figure Generated by matplotlib [100] # generate report output: 'myreport.html' pandoc(input='a.md', output=_output[0]) ''') wf = script.workflow() Base_Executor(wf).run() self.assertTrue(os.path.isfile('myreport.html')) file_target('myreport.html').remove() # # another case is no output script = SoS_Script(r''' [10] report: output='a.md' ## Some random figure Generated by matplotlib [100] # generate report pandoc(input='a.md') ''') wf = script.workflow() Base_Executor(wf).run() # test acceptance of a list of input filenames # script = SoS_Script(r''' [10] report: output='default_10.md' A_10 [20] report: output='default_20.md' A_20 [100] # generate report pandoc(input=['default_10.md', 'default_20.md'], output='output.html') ''') wf = script.workflow() Base_Executor(wf).run() for f in ['default_10.md', 'default_20.md', 'output.html']: self.assertTrue(file_target(f).target_exists()) file_target(f).remove()
def testSharedOption(self): '''Test shared option of task''' for f in ('a.txt', 'a100.txt'): if file_target(f).exists(): file_target(f).unlink() script = SoS_Script(''' [10: shared = 'a'] output: 'a.txt' task: shared={'a': 'int(open("a.txt").read())'} run: echo 100 > a.txt [20] run: expand=True touch a{a}.txt ''') wf = script.workflow() Base_Executor(wf, config={ 'sig_mode': 'force', 'default_queue': 'localhost' }).run() self.assertTrue(os.path.isfile("a100.txt")) # sequence of var or mapping for f in ('a.txt', 'a100.txt'): if file_target(f).exists(): file_target(f).unlink() script = SoS_Script(''' [10: shared = ['a', 'b']] output: 'a.txt' task: shared=[{'a': 'int(open("a.txt").read())'}, 'b'] b = 20 run: echo 100 > a.txt [20] run: expand=True touch a{a}_{b}.txt ''') wf = script.workflow() Base_Executor(wf, config={ 'sig_mode': 'force', 'default_queue': 'localhost' }).run() self.assertTrue(os.path.isfile("a100_20.txt")) script = SoS_Script(''' [10 (simulate): shared=['rng', 'step_rng']] input: for_each={'i': range(5)} task: shared='rng' print(f"{i}") import random rng = random.randint(1, 1000) ''') wf = script.workflow() Base_Executor(wf, config={'default_queue': 'localhost'}).run() var = env.sos_dict['rng'] self.assertTrue(isinstance(var, int)) self.assertTrue(isinstance(env.sos_dict['step_rng'], list)) self.assertEqual(env.sos_dict['step_rng'][-1], var)
def testNestedWorkflow(self): '''Test the creation and execution of combined workfow''' self.touch(['a.txt', 'b.txt', 'b.begin']) script = SoS_Script(''' if 'executed' not in locals(): executed = [] if 'inputs' not in locals(): inputs = [] [a_1: shared=['executed', 'inputs']] executed.append(step_name) inputs.append(_input) [a_2: shared=['executed', 'inputs']] executed.append(step_name) inputs.append(_input) [a_3: shared=['executed', 'inputs']] executed.append(step_name) inputs.append(_input) [a_4: shared=['executed', 'inputs']] executed.append(step_name) output: 'a.done' inputs.append(_input) run: expand=True touch {_output} [b_1: shared=['executed', 'inputs']] executed.append(step_name) input: 'b.begin' inputs.append(_input) [b_2: shared=['executed', 'inputs']] executed.append(step_name) inputs.append(_input) [b_3: shared=['executed', 'inputs']] executed.append(step_name) inputs.append(_input) [b_4: shared=['executed', 'inputs']] executed.append(step_name) output: 'b.txt' inputs.append(_input) [c: shared=['executed', 'inputs']] executed.append(step_name) input: 'a.txt' output: 'b.txt' inputs.append(_input) sos_run('a+b', shared=['executed', 'inputs']) ''') env.config['sig_mode'] = 'ignore' wf = script.workflow('c') Base_Executor(wf).run() # order of execution is not guaranteed self.assertEqual( sorted(env.sos_dict['executed']), sorted( ['c', 'a_1', 'a_2', 'a_3', 'a_4', 'b_1', 'b_2', 'b_3', 'b_4'])) env.sos_dict.pop('executed', None)
def testLoopWiseSignature(self): '''Test partial signature''' for i in range(10, 12): if file_target(f'myfile_{i}.txt').exists(): file_target(f'myfile_{i}.txt').unlink() # script = SoS_Script(r''' parameter: gvar = 10 [10] tt = [gvar] input: for_each='tt' output: f"myfile_{_tt}.txt" run: expand=True echo "DO {_tt}" echo {_tt} > {_output:q} ''') wf = script.workflow() res = Base_Executor(wf).run() self.assertEqual(res['__completed__']['__step_completed__'], 1) ts = os.path.getmtime('myfile_10.txt') # # now we modify the script script = SoS_Script(r''' parameter: gvar = 10 [10] tt = [gvar, gvar + 1] input: for_each='tt' output: f"myfile_{_tt}.txt" run: expand=True echo "DO {_tt}" echo {_tt} > {_output:q} ''') wf = script.workflow() res = Base_Executor(wf).run() self.assertEqual(res['__completed__']['__step_completed__'], 0.5) # this file is not regenerated self.assertEqual(ts, os.path.getmtime('myfile_10.txt')) ts1 = os.path.getmtime('myfile_11.txt') # # run it again, neither needs to be rerun res = Base_Executor(wf).run() self.assertEqual(res['__completed__']['__step_completed__'], 0) self.assertEqual(ts, os.path.getmtime('myfile_10.txt')) self.assertEqual(ts1, os.path.getmtime('myfile_11.txt')) # # change again, the second one is already there. script = SoS_Script(r''' parameter: gvar = 10 [10] tt = [gvar + 1] input: for_each='tt' output: f"myfile_{_tt}.txt" run: expand=True echo "DO {_tt}" echo {_tt} > {_output:q} ''') wf = script.workflow() res = Base_Executor(wf).run() self.assertEqual(res['__completed__']['__step_completed__'], 0) self.assertEqual(ts1, os.path.getmtime('myfile_11.txt')) # for t in range(10, 12): with open('myfile_{}.txt'.format(t)) as tmp: self.assertEqual(tmp.read().strip(), str(t)) file_target('myfile_{}.txt'.format(t)).unlink()
def testActiveActionOption(self): '''Test the active option of actions''' # disallow self.assertRaises( ParsingError, SoS_Script, ''' [1] rep = range(5) input: for_each = 'rep' # ff should change and be usable inside run ff = f"{_rep}.txt" run: expand=True, active=1,2 echo {ff} touch temp/{ff} ''') # for active, result in [ ('0', ['temp/0.txt']), ('-1', ['temp/4.txt']), ('(1,2)', ['temp/1.txt', 'temp/2.txt']), ('[2,3]', ['temp/2.txt', 'temp/3.txt']), ('(0,2,4)', ['temp/0.txt', 'temp/2.txt', 'temp/4.txt']), ('slice(1,None)', ['temp/1.txt', 'temp/2.txt', 'temp/3.txt', 'temp/4.txt']), ('slice(1,-2)', ['temp/1.txt', 'temp/2.txt']), ('slice(None,None,2)', ['temp/0.txt', 'temp/2.txt', 'temp/4.txt']), ]: if os.path.isdir('temp'): shutil.rmtree('temp') os.mkdir('temp') # test first iteration script = SoS_Script((''' [1] rep = range(5) input: for_each = 'rep' # ff should change and be usable inside run ff = f"{_rep}.txt" run: expand=True, active=%s echo {ff} touch temp/{ff} ''' % active).replace('/', os.sep)) wf = script.workflow() env.config['sig_mode'] = 'force' env.config['wait_for_task'] = True Host.reset() Base_Executor(wf).run() files = list(glob.glob(os.path.join('temp', '*.txt'))) self.assertEqual(sorted(files), sorted([x.replace('/', os.sep) for x in result])) # # test last iteration shutil.rmtree('temp') # # test active option for task os.mkdir('temp') script = SoS_Script((''' [1] rep = range(5) input: for_each = 'rep' # ff should change and be usable inside run ff = f"{_rep}.txt" task: active=%s run: expand=True echo {ff} touch temp/{ff} ''' % active).replace('/', os.sep)) wf = script.workflow() env.config['sig_mode'] = 'force' env.config['wait_for_task'] = True Host.reset() Base_Executor(wf).run() files = list(glob.glob(os.path.join('temp', '*.txt'))) self.assertEqual(sorted(files), sorted([x.replace('/', os.sep) for x in result]), 'With option {}'.format(active)) # # test last iteration shutil.rmtree('temp')
def testInputTarget(self): # test input of targets script = SoS_Script(''' parameter: b : file_target [0] ''') wf = script.workflow() Base_Executor(wf, args=['--b', 'aaa']).run(mode='dryrun') self.assertEqual(env.sos_dict['b'].__class__.__name__, 'file_target') # script = SoS_Script(''' parameter: b = file_target('file') [0] ''') wf = script.workflow() Base_Executor(wf, args=['--b', 'aaa']).run(mode='dryrun') self.assertEqual(env.sos_dict['b'].__class__.__name__, 'file_target') # script = SoS_Script(''' parameter: b : sos_targets [0] ''') wf = script.workflow() Base_Executor(wf, args=['--b', 'aaa', 'bbb']).run(mode='dryrun') self.assertEqual(env.sos_dict['b'].__class__.__name__, 'sos_targets') # script = SoS_Script(''' parameter: b = sos_targets('file') [0] ''') wf = script.workflow() Base_Executor(wf, args=['--b', 'aaa']).run(mode='dryrun') self.assertEqual(env.sos_dict['b'].__class__.__name__, 'sos_targets') # script = SoS_Script(''' parameter: a_b : file_target [0] ''') wf = script.workflow() Base_Executor(wf, args=['--a-b', 'aaa']).run(mode='dryrun') self.assertEqual(env.sos_dict['a_b'].__class__.__name__, 'file_target') # script = SoS_Script(''' parameter: a_b = file_target('file') [0] ''') wf = script.workflow() Base_Executor(wf, args=['--a-b', 'aaa']).run(mode='dryrun') self.assertEqual(env.sos_dict['a_b'].__class__.__name__, 'file_target') # script = SoS_Script(''' parameter: a_b : sos_targets [0] ''') wf = script.workflow() Base_Executor(wf, args=['--a-b', 'aaa', 'bbb']).run(mode='dryrun') self.assertEqual(env.sos_dict['a_b'].__class__.__name__, 'sos_targets') # script = SoS_Script(''' parameter: a_b = sos_targets('file') [0] ''') wf = script.workflow() Base_Executor(wf, args=['--a-b', 'aaa']).run(mode='dryrun') self.assertEqual(env.sos_dict['a_b'].__class__.__name__, 'sos_targets') # # # # script = SoS_Script(''' parameter: b : path [0] ''') wf = script.workflow() Base_Executor(wf, args=['--b', 'aaa']).run(mode='dryrun') self.assertEqual(env.sos_dict['b'].__class__.__name__, 'path') # script = SoS_Script(''' parameter: b = path('file') [0] ''') wf = script.workflow() Base_Executor(wf, args=['--b', 'aaa']).run(mode='dryrun') self.assertEqual(env.sos_dict['b'].__class__.__name__, 'path') # script = SoS_Script(''' parameter: b : paths [0] ''') wf = script.workflow() Base_Executor(wf, args=['--b', 'aaa', 'bbb']).run(mode='dryrun') self.assertEqual(env.sos_dict['b'].__class__.__name__, 'paths') # script = SoS_Script(''' parameter: b = paths('file') [0] ''') wf = script.workflow() Base_Executor(wf, args=['--b', 'aaa']).run(mode='dryrun') self.assertEqual(env.sos_dict['b'].__class__.__name__, 'paths') # script = SoS_Script(''' parameter: a_b : path [0] ''') wf = script.workflow() Base_Executor(wf, args=['--a-b', 'aaa']).run(mode='dryrun') self.assertEqual(env.sos_dict['a_b'].__class__.__name__, 'path') # script = SoS_Script(''' parameter: a_b = path('file') [0] ''') wf = script.workflow() Base_Executor(wf, args=['--a-b', 'aaa']).run(mode='dryrun') self.assertEqual(env.sos_dict['a_b'].__class__.__name__, 'path') # script = SoS_Script(''' parameter: a_b : paths [0] ''') wf = script.workflow() Base_Executor(wf, args=['--a-b', 'aaa', 'bbb']).run(mode='dryrun') self.assertEqual(env.sos_dict['a_b'].__class__.__name__, 'paths') # script = SoS_Script(''' parameter: a_b = paths('file') [0] ''') wf = script.workflow() Base_Executor(wf, args=['--a-b', 'aaa']).run(mode='dryrun') self.assertEqual(env.sos_dict['a_b'].__class__.__name__, 'paths')
def test_analyze_section(self): '''Test analysis of sections (statically)''' script = SoS_Script(''' g1 = 'a' g2 = 1 parameter: p1 = 5 parameter: infiles = 'a.txt' [A_1: shared='b'] b = p1 + 2 input: infiles output: None c = 5 [A_2] b = [1, 2, 3] input: for_each='b' depends: 'some.txt', executable('ls') import time import random r = random.randint(1, 5) time.sleep(r) [A_3] input: None print(p1) [A_4] input: None task: python: expand=True print(f'{output}') [A_5] task: print(f'{_output}') ''') wf = script.workflow('A') Base_Executor(wf) for section in wf.sections: res = analyze_section(section) if section.names[0][1] == '1': self.assertTrue(res['step_input'].undetermined()) self.assertEqual(res['step_depends'], sos_targets()) self.assertEqual(res['step_output'], sos_targets()) self.assertEqual(res['environ_vars'], {'b', 'p1', 'infiles'}) self.assertEqual(res['signature_vars'], {'c'}) self.assertEqual(res['changed_vars'], {'b'}) elif section.names[0][1] == '2': self.assertEqual(res['step_input'], sos_targets()) self.assertEqual(res['step_depends'], sos_targets('some.txt', executable('ls'))) self.assertTrue(res['step_output'].unspecified()) # for_each will not be used for DAG self.assertEqual(res['environ_vars'], {'b', 'for_each', 'executable'}) self.assertEqual(res['signature_vars'], {'r', 'time', 'random'}) self.assertEqual(res['changed_vars'], set()) elif section.names[0][1] == '4': self.assertTrue('output' in res['signature_vars']) elif section.names[0][1] == '5': self.assertTrue('output' not in res['signature_vars'])
def testUndetermined(self): '''Test DAG with undetermined input.''' # for filename in ('a.txt', 'd.txt'): with open(filename, 'w') as tmp: tmp.write('hey') # input of step 3 is undertermined so # it depends on all its previous steps. script = SoS_Script(''' [C_1] input: 'a.txt' output: 'b.txt' [C_2] input: 'b.txt' output: 'c.txt' [C_3] input: dynamic('*.txt') output: 'd.txt' [C_4] depends: 'd.txt' output: 'e.txt' ''') wf = script.workflow() dag = Base_Executor(wf).initialize_dag() dag.show_nodes() #dag.write_dot('a.dot') self.assertDAG( dag, ''' strict digraph "" { C_1; C_4; C_2; C_3; C_1 -> C_2; C_2 -> C_3; C_3 -> C_4; } ''') # # output of step # script = SoS_Script(''' [C_1] input: 'a.txt' output: 'b.txt' [C_2] input: 'b.txt' output: 'c.txt' [C_3] input: dynamic('*.txt') [C_4] depends: 'd.txt' output: 'e.txt' ''') wf = script.workflow() dag = Base_Executor(wf).initialize_dag() self.assertDAG( dag, ''' strict digraph "" { C_1; C_4; C_2; C_3; C_1 -> C_2; C_2 -> C_3; C_3 -> C_4; } ''') for filename in ('a.txt', 'd.txt'): os.remove(filename)
def testLongChain(self): '''Test long make file style dependencies.''' # for f in [ 'A1.txt', 'A2.txt', 'C2.txt', 'B2.txt', 'B1.txt', 'B3.txt', 'C1.txt', 'C3.txt', 'C4.txt' ]: file_target(f).remove('both') # # A1 <- B1 <- B2 <- B3 # | # | # \/ # A2 <- B2 <- C1 <- C2 <- C4 # C3 # script = SoS_Script(''' [A_1] input: 'B1.txt' output: 'A1.txt' run: touch A1.txt [A_2] depends: 'B2.txt' output: 'A2.txt' run: touch A2.txt [B1: provides='B1.txt'] depends: 'B2.txt' run: touch B1.txt [B2: provides='B2.txt'] depends: 'B3.txt', 'C1.txt' run: touch B2.txt [B3: provides='B3.txt'] run: touch B3.txt [C1: provides='C1.txt'] depends: 'C2.txt', 'C3.txt' run: touch C1.txt [C2: provides='C2.txt'] depends: 'C4.txt' run: touch C2.txt [C3: provides='C3.txt'] depends: 'C4.txt' run: touch C3.txt [C4: provides='C4.txt'] run: touch C4.txt ''') # the workflow should call step K for step C_2, but not C_3 wf = script.workflow() #env.verbosity = 4 dag = Base_Executor(wf).initialize_dag() self.assertDAG( dag, ''' strict digraph "" { "C4 ['C4.txt']"; "B1 ['B1.txt']"; "C1 ['C1.txt']"; "C2 ['C2.txt']"; "C3 ['C3.txt']"; A_1; "B2 ['B2.txt']"; "B3 ['B3.txt']"; A_2; "C4 ['C4.txt']" -> "C2 ['C2.txt']"; "C4 ['C4.txt']" -> "C3 ['C3.txt']"; "B1 ['B1.txt']" -> A_1; "C1 ['C1.txt']" -> "B2 ['B2.txt']"; "C2 ['C2.txt']" -> "C1 ['C1.txt']"; "C3 ['C3.txt']" -> "C1 ['C1.txt']"; A_1 -> A_2; "B2 ['B2.txt']" -> "B1 ['B1.txt']"; "B2 ['B2.txt']" -> A_2; "B3 ['B3.txt']" -> "B2 ['B2.txt']"; } ''') Base_Executor(wf).run() for f in [ 'A1.txt', 'A2.txt', 'C2.txt', 'B2.txt', 'B1.txt', 'B3.txt', 'C1.txt', 'C3.txt', 'C4.txt' ]: t = file_target(f) self.assertTrue(t.target_exists(), f + ' should exist') t.remove('both')
def testForEach(self): '''Test for_each option of input''' self.touch(['a.txt', 'b.txt', 'a.pdf']) script = SoS_Script(r""" [0: shared=['counter', 'all_names', 'all_loop']] files = ['a.txt', 'b.txt'] names = ['a', 'b', 'c'] c = ['1', '2'] counter = 0 all_names = '' all_loop = '' input: 'a.pdf', files, group_by='single', paired_with='names', for_each='c' all_names += str(_names[0]) + " " all_loop += str(_c) + " " counter = counter + 1 """) wf = script.workflow() Base_Executor(wf).run() self.assertEqual(env.sos_dict['counter'], 6) self.assertEqual(env.sos_dict['all_names'], "a b c a b c ") self.assertEqual(env.sos_dict['all_loop'], "1 1 1 2 2 2 ") # # test same-level for loop and parameter with nested list script = SoS_Script(r""" [0: shared=['processed']] files = ['a.txt', 'b.txt'] par = [(1, 2), (1, 3), (2, 3)] res = ['p1.txt', 'p2.txt', 'p3.txt'] processed = [] input: files, for_each='par,res' output: res processed.append((_par, _res)) """) wf = script.workflow() Base_Executor(wf).run(mode='dryrun') self.assertEqual(env.sos_dict['processed'], [((1, 2), 'p1.txt'), ((1, 3), 'p2.txt'), ((2, 3), 'p3.txt')]) # # test for each for pandas dataframe script = SoS_Script(r""" [0: shared={'res':'output'}] import pandas as pd data = pd.DataFrame([(1, 2, 'Hello'), (2, 4, 'World')], columns=['A', 'B', 'C']) input: for_each='data' output: f"{_data['A']}_{_data['B']}_{_data['C']}.txt" """) wf = script.workflow() Base_Executor(wf).run(mode='dryrun') self.assertEqual(env.sos_dict['res'], ['1_2_Hello.txt', '2_4_World.txt']) # test dictionary format of for_each self.touch(['a.txt', 'b.txt', 'a.pdf']) script = SoS_Script(r""" [0: shared=['counter', 'all_names', 'all_loop']] files = ['a.txt', 'b.txt'] names = ['a', 'b', 'c'] counter = 0 all_names = '' all_loop = '' input: 'a.pdf', files, group_by='single', paired_with='names', for_each={'c': ['1', '2']} all_names += str(_names[0]) + " " all_loop += c + " " counter = counter + 1 """) wf = script.workflow() Base_Executor(wf).run() self.assertEqual(env.sos_dict['counter'], 6) self.assertEqual(env.sos_dict['all_names'], "a b c a b c ") self.assertEqual(env.sos_dict['all_loop'], "1 1 1 2 2 2 ") # # test multi-key dictionary format of for_each self.touch(['a.txt']) script = SoS_Script(r""" import itertools [0: shared=['counter', 'all_names', 'all_loop']] parameter: n = [300, 100] parameter: p = [50, 200, 100] parameter: outfile = ['1', '2', '3', '4', '5', '6'] counter = 0 all_names = '' all_loop = '' input: 'a.txt', group_by='single', for_each={'_n,_p': [(_n,_p) for _n,_p in itertools.product(n,p) if _n > _p]} all_names += outfile[_index] + " " all_loop += '{} {} '.format(_n, _p) counter = counter + 1 """) wf = script.workflow() Base_Executor(wf).run() self.assertEqual(env.sos_dict['counter'], 4) self.assertEqual(env.sos_dict['all_names'], "1 2 3 4 ") self.assertEqual(env.sos_dict['all_loop'], "300 50 300 200 300 100 100 50 ") # # test same-level for loop and parameter with nested list script = SoS_Script(r""" [0: shared=['processed']] files = ['a.txt', 'b.txt'] processed = [] input: files, for_each={'par':[(1, 2), (1, 3), (2, 3)], 'res': ['p1.txt', 'p2.txt', 'p3.txt']} output: res processed.append((par, res)) """) wf = script.workflow() Base_Executor(wf).run(mode='dryrun') self.assertEqual(env.sos_dict['processed'], [((1, 2), 'p1.txt'), ((1, 3), 'p2.txt'), ((2, 3), 'p3.txt')]) # # test for each for pandas dataframe script = SoS_Script(r""" [0: shared={'res':'output'}] import pandas as pd input: for_each={'data': pd.DataFrame([(1, 2, 'Hello'), (2, 4, 'World')], columns=['A', 'B', 'C'])} output: f"{data['A']}_{data['B']}_{data['C']}.txt" """) wf = script.workflow() Base_Executor(wf).run(mode='dryrun') self.assertEqual(env.sos_dict['res'], ['1_2_Hello.txt', '2_4_World.txt']) # # support for pands Series and Index types script = SoS_Script(r""" [0: shared={'res':'output'}] import pandas as pd data = pd.DataFrame([(1, 2, 'Hello'), (2, 4, 'World')], columns=['A', 'B', 'C']) input: for_each={'A': data['A']} output: f"a_{A}.txt" """) wf = script.workflow() Base_Executor(wf).run(mode='dryrun') self.assertEqual(env.sos_dict['res'], ['a_1.txt', 'a_2.txt']) # script = SoS_Script(r""" [0: shared={'res':'output'}] import pandas as pd data = pd.DataFrame([(1, 2, 'Hello'), (2, 4, 'World')], columns=['A', 'B', 'C']) data.set_index('C', inplace=True) input: for_each={'A': data.index} output: f"{A}.txt" """) wf = script.workflow() Base_Executor(wf).run(mode='dryrun') self.assertEqual(env.sos_dict['res'], ['Hello.txt', 'World.txt']) # test for each of Series script = SoS_Script(r""" [0: shared={'res':'output'}] import pandas as pd data = pd.DataFrame([(0, 1, 'Ha'), (1, 2, 'Hello'), (2, 4, 'World')], columns=['A', 'B', 'C']) data.set_index('A', inplace=True) data = data.tail(2) input: for_each={'A': data['B']} output: f"{A}.txt" """) wf = script.workflow() Base_Executor(wf).run(mode='dryrun') self.assertEqual(env.sos_dict['res'], ['2.txt', '4.txt']) # test iterable script = SoS_Script(r""" [0: shared={'res':'output'}] import pandas as pd data = pd.DataFrame([(0, 1, 'Ha'), (1, 2, 'Hello'), (2, 4, 'World')], columns=['A', 'B', 'C']) data.set_index('A', inplace=True) data = data.tail(2) input: for_each={'A,B': zip(data['B'],data['C'])} output: f"{A}.txt" """) wf = script.workflow() Base_Executor(wf).run(mode='dryrun') self.assertEqual(env.sos_dict['res'], ['2.txt', '4.txt'])
def testSoSStepMiniworkflow(self): '''Test the addition of mini forward workflows introduced by sos_step''' script = SoS_Script(''' [a_1] print(step_name) [a_2] print(step_name) [a_20] print(step_name) [b_1] print(step_name) [b_2] print(step_name) [b_20] depends: sos_step('c') print(step_name) [c_1] print(step_name) [c_2] print(step_name) [c_20] print(step_name) [default] depends: sos_step('a'), sos_step('b') ''') wf = script.workflow() Base_Executor(wf, config={'output_dag': 'test.dot'}).run() # note that A2 is no longer mentioned self.assertDAG( 'test.dot', ''' strict digraph "" { default; a_1; a_2; a_20; b_1; b_2; b_20; c_1; c_2; c_20; a_1 -> a_2; a_2 -> a_20; a_20 -> default; b_1 -> b_2; b_2 -> b_20; b_20 -> default; c_1 -> c_2; c_2 -> c_20; c_20 -> b_20; } ''')
def testInterpolation(self): '''Test string interpolation during execution''' self.touch(['a_1.txt', 'b_2.txt', 'c_2.txt']) script = SoS_Script(r""" [0: shared='res'] res = '' b = 200 res += f"{b}" """) wf = script.workflow() Base_Executor(wf).run() self.assertEqual(env.sos_dict['res'], '200') # script = SoS_Script(r""" [0: shared='res'] res = '' for b in range(5): res += f"{b}" """) wf = script.workflow() Base_Executor(wf).run() self.assertEqual(env.sos_dict['res'], '01234') # script = SoS_Script(r""" [0: shared={'res':'output'}] input: 'a_1.txt', 'b_2.txt', 'c_2.txt', pattern='{name}_{model}.txt' output: [f'{x}_{y}_processed.txt' for x,y in zip(name, model)] """) wf = script.workflow() Base_Executor(wf).run(mode='dryrun') self.assertEqual( env.sos_dict['res'], ['a_1_processed.txt', 'b_2_processed.txt', 'c_2_processed.txt']) # script = SoS_Script(r""" [0: shared={'res':'output'}] input: 'a_1.txt', 'b_2.txt', 'c_2.txt', pattern='{name}_{model}.txt' output: [f"{x}_{y}_process.txt" for x,y in zip(name, model)] """) wf = script.workflow() Base_Executor(wf).run(mode='dryrun') self.assertEqual( env.sos_dict['res'], ['a_1_process.txt', 'b_2_process.txt', 'c_2_process.txt']) # script = SoS_Script(r""" [0: shared={'res':'output'}] def add_a(x): return ['a'+_x for _x in x] input: 'a_1.txt', 'b_2.txt', 'c_2.txt', pattern='{name}_{model}.txt' output: add_a([f"{x}_{y}_process.txt" for x,y in zip(name, model)]) """) wf = script.workflow() Base_Executor(wf).run(mode='dryrun') self.assertEqual( env.sos_dict['res'], ['aa_1_process.txt', 'ab_2_process.txt', 'ac_2_process.txt'])
def testShared(self): '''Test option shared''' script = SoS_Script(r""" parameter: res = 1 [0] res = 2 [1] res = 3 """) wf = script.workflow() Base_Executor(wf).run() self.assertEqual(env.sos_dict['res'], 1) # script = SoS_Script(r""" parameter: res = 1 [0: shared='res'] res = 2 [1] res = 3 """) wf = script.workflow() Base_Executor(wf).run() self.assertEqual(env.sos_dict['res'], 2) # script = SoS_Script(r""" parameter: res = 1 parameter: a = 30 [0: shared='a'] res = 2 [1: shared='res'] res = 3 a = 5 """) wf = script.workflow() Base_Executor(wf).run() self.assertEqual(env.sos_dict['res'], 3) self.assertEqual(env.sos_dict['a'], 30) # test multiple vars script = SoS_Script(r""" parameter: res = 1 parameter: a = 30 [1: shared=('res', 'a')] res = 3 a = 5 """) wf = script.workflow() Base_Executor(wf).run() self.assertEqual(env.sos_dict['res'], 3) self.assertEqual(env.sos_dict['a'], 5) # # test expression script = SoS_Script(r""" parameter: res = 1 parameter: a = 30 [1: shared={'res': 'res + 6', 'c': 'a'}] res = 3 a = 5 """) wf = script.workflow() Base_Executor(wf).run() self.assertEqual(env.sos_dict['res'], 9) self.assertEqual(env.sos_dict['c'], 5) # test mixed vars and mapping script = SoS_Script(r""" parameter: res = 1 parameter: a = 30 [1: shared=['res', {'c': 'a'}]] res = 3 a = 5 """) wf = script.workflow() Base_Executor(wf).run() self.assertEqual(env.sos_dict['res'], 3) self.assertEqual(env.sos_dict['c'], 5)
def testStopIf(self): '''Test action stop_if''' script = SoS_Script(r''' [0: shared='result'] rep = range(20) result = [] input: for_each='rep' stop_if(_rep > 10) result.append(_rep) ''') wf = script.workflow() Base_Executor(wf).run() self.assertEqual(env.sos_dict['result'], [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10]) # stop_if should not be treated as error so the previously # generated output file will be removed for rep in range(2): file = f'test_stop_if_{rep}.txt' if os.path.isfile(file): os.remove(file) script = SoS_Script(r''' [10] rep = range(2) input: for_each='rep' output: f'test_stop_if_{_rep}.txt' _output.touch() stop_if(_rep == 1, no_output=True) [20] assert(step_input.contains('test_stop_if_0.txt')) assert(not step_input.contains('test_stop_if_1.txt')) ''') wf = script.workflow() Base_Executor(wf).run() self.assertTrue(os.path.isfile('test_stop_if_0.txt')) self.assertFalse(os.path.isfile('test_stop_if_1.txt')) # # stop_if should not be treated as error so the previously # generated output file will not be removed for rep in range(2): file = f'test_stop_if_{rep}.txt' if os.path.isfile(file): os.remove(file) script = SoS_Script(r''' [10] rep = range(2) input: for_each='rep' output: f'test_stop_if_{_rep}.txt' _output.touch() stop_if(_rep == 1) [20] assert(step_input.contains('test_stop_if_0.txt')) assert(step_input.contains('test_stop_if_1.txt')) ''') wf = script.workflow() Base_Executor(wf).run() self.assertTrue(os.path.isfile('test_stop_if_0.txt')) self.assertTrue(os.path.isfile('test_stop_if_1.txt'))
def testReport(self): '''Test action report''' script = SoS_Script(r''' [A] parameter: num=5 report: output='report.txt', expand=True touch {num}.txt ''') # output to a file file_target('report.txt').remove('both') wf = script.workflow() # run twice Base_Executor(wf, args=['--num', '7']).run() Base_Executor(wf, args=['--num', '5']).run() with open('report.txt') as report: self.assertEqual(report.read(), 'touch 5.txt\n\n') # test overwrite file_target('report.txt').remove('both') script = SoS_Script(r''' [A] report: output='report.txt', expand=True {step_name} [A_10] report: output='report.txt', expand=True {step_name} ''') wf = script.workflow() Base_Executor(wf).run() # output to a file # run twice Base_Executor(wf).run() with open('report.txt') as report: self.assertEqual(report.read(), 'A_10\n\n') # # test input from another file file_target('report.txt').remove() script = SoS_Script(r''' [A_1] run: output='a.txt' echo something > a.txt report(input='a.txt', output='out.txt') ''') wf = script.workflow() Base_Executor(wf).run() for name in ('a.txt', 'out.txt'): with open(name) as report: self.assertEqual(report.read().strip(), 'something') file_target(name).remove() # script = SoS_Script(r''' [A_1] run: output='a.txt' echo something > a.txt [A_2] run: output='b.txt' echo something else > b.txt [A_3] report(input=['a.txt', 'b.txt'], output='out.txt') ''') wf = script.workflow() Base_Executor(wf).run() for name in ('a.txt', 'b.txt', 'out.txt'): self.assertTrue(file_target(name).target_exists()) file_target(name).remove()
def testTarget(self): '''Test executing only part of a workflow.''' # for f in [ 'A1.txt', 'A2.txt', 'C2.txt', 'B2.txt', 'B1.txt', 'B3.txt', 'C1.txt', 'C3.txt', 'C4.txt' ]: file_target(f).remove('both') # # A1 <- B1 <- B2 <- B3 # | # | # \/ # A2 <- B2 <- C1 <- C2 <- C4 # C3 # script = SoS_Script(''' [A_1] input: 'B1.txt' output: 'A1.txt' run: touch A1.txt [A_2] depends: 'B2.txt' run: touch A2.txt [B1: provides='B1.txt'] depends: 'B2.txt' run: touch B1.txt [B2: provides='B2.txt'] depends: 'B3.txt', 'C1.txt' run: touch B2.txt [B3: provides='B3.txt'] run: touch B3.txt [C1: provides='C1.txt'] depends: 'C2.txt', 'C3.txt' run: touch C1.txt [C2: provides='C2.txt'] depends: 'C4.txt' run: touch C2.txt [C3: provides='C3.txt'] depends: 'C4.txt' run: touch C3.txt [C4: provides='C4.txt'] run: touch C4.txt ''') # the workflow should call step K for step C_2, but not C_3 wf = script.workflow() # # test 1, we only need to generate target 'B1.txt' dag = Base_Executor(wf).initialize_dag(targets=['B1.txt']) # note that A2 is no longer mentioned self.assertDAG( dag, ''' strict digraph "" { "B3 ['B3.txt']"; "C4 ['C4.txt']"; "C2 ['C2.txt']"; "C1 ['C1.txt']"; "B1 ['B1.txt']"; "B2 ['B2.txt']"; "C3 ['C3.txt']"; "B3 ['B3.txt']" -> "B2 ['B2.txt']"; "C4 ['C4.txt']" -> "C3 ['C3.txt']"; "C4 ['C4.txt']" -> "C2 ['C2.txt']"; "C2 ['C2.txt']" -> "C1 ['C1.txt']"; "C1 ['C1.txt']" -> "B2 ['B2.txt']"; "B2 ['B2.txt']" -> "B1 ['B1.txt']"; "C3 ['C3.txt']" -> "C1 ['C1.txt']"; } ''') Base_Executor(wf).run(targets=['B1.txt']) for f in ['A1.txt', 'A2.txt']: self.assertFalse(file_target(f).target_exists()) for f in [ 'C2.txt', 'B2.txt', 'B1.txt', 'B3.txt', 'C1.txt', 'C3.txt', 'C4.txt' ]: t = file_target(f) self.assertTrue(t.target_exists()) t.remove('both') # # test 2, we would like to generate two files dag = Base_Executor(wf).initialize_dag(targets=['B2.txt', 'C2.txt']) # note that A2 is no longer mentioned self.assertDAG( dag, ''' strict digraph "" { "C4 ['C4.txt']"; "B2 ['B2.txt']"; "C3 ['C3.txt']"; "B3 ['B3.txt']"; "C2 ['C2.txt']"; "C1 ['C1.txt']"; "C4 ['C4.txt']" -> "C2 ['C2.txt']"; "C4 ['C4.txt']" -> "C3 ['C3.txt']"; "C3 ['C3.txt']" -> "C1 ['C1.txt']"; "B3 ['B3.txt']" -> "B2 ['B2.txt']"; "C2 ['C2.txt']" -> "C1 ['C1.txt']"; "C1 ['C1.txt']" -> "B2 ['B2.txt']"; } ''') Base_Executor(wf).run(targets=['B2.txt', 'C2.txt']) for f in ['A1.txt', 'B1.txt', 'A2.txt']: self.assertFalse(file_target(f).target_exists()) for f in ['C2.txt', 'B2.txt', 'B3.txt', 'C1.txt', 'C3.txt', 'C4.txt']: t = file_target(f) self.assertTrue(t.target_exists()) t.remove('both') # # test 3, generate two separate trees # dag = Base_Executor(wf).initialize_dag(targets=['B3.txt', 'C2.txt']) # note that A2 is no longer mentioned self.assertDAG( dag, ''' strict digraph "" { "B3 ['B3.txt']"; "C2 ['C2.txt']"; "C4 ['C4.txt']"; "C4 ['C4.txt']" -> "C2 ['C2.txt']"; } ''') Base_Executor(wf).run(targets=['B3.txt', 'C2.txt']) for f in ['A1.txt', 'B1.txt', 'A2.txt', 'B2.txt', 'C1.txt', 'C3.txt']: self.assertFalse(file_target(f).target_exists()) for f in ['C2.txt', 'B3.txt', 'C4.txt']: t = file_target(f) self.assertTrue(t.target_exists()) t.remove('both')
def testParameters(self): '''Test parameters section''' # directive not allowed in parameters script = SoS_Script(section1_sos) wf = script.workflow('chapter:0') #self.assertRaises(ArgumentError, Base_Executor(wf).run, # args=['--not_exist']) #self.assertRaises(ArgumentError, Base_Executor(wf).run, # args=['--par1', 'a', 'b']) # script = SoS_Script(''' parameter: a = [1, 2] [0] ''') wf = script.workflow() self.assertEqual(list(wf.parameters().keys()), ['a']) Base_Executor(wf).run() self.assertEqual(env.sos_dict['a'], [1, 2]) wf = script.workflow() Base_Executor(wf, args=['--a', '3']).run() self.assertEqual(env.sos_dict['a'], [3]) wf = script.workflow() Base_Executor(wf, args=['--a', '3', '5']).run() self.assertEqual(env.sos_dict['a'], [3, 5]) # script = SoS_Script(''' # comment # comment parameter: a = ['a.txt', 'b.txt'] [0] ''') wf = script.workflow() Base_Executor(wf).run() self.assertEqual(env.sos_dict['a'], ['a.txt', 'b.txt']) wf = script.workflow() Base_Executor(wf, args=['--a', '3']).run() self.assertEqual(env.sos_dict['a'], ['3']) wf = script.workflow() Base_Executor(wf, args=['--a', '3', '5']).run() self.assertEqual(env.sos_dict['a'], ['3', '5']) # # test parameter using global definition script = SoS_Script(''' a="100" # comment parameter: b=str(int(a)+1) [0] ''') wf = script.workflow() self.assertEqual(list(wf.parameters().keys()), ['b']) Base_Executor(wf).run() self.assertEqual(env.sos_dict['b'], '101') # env.sos_dict.clear() script = SoS_Script(''' a=100 parameter: b=a+1 [0] ''') wf = script.workflow() Base_Executor(wf).run() self.assertEqual(env.sos_dict['b'], 101) # script = SoS_Script(''' a=100 parameter: b=a+1. [0] ''') wf = script.workflow() Base_Executor(wf, args=['--b', '1000']).run() # self.assertEqual(env.sos_dict['b'], 1000) # # argument has hve a value self.assertRaises(ParsingError, SoS_Script, ''' [0] parameter: b= ''') # if it is a type, must provide value script = SoS_Script(''' # comment parameter: b = int [0] ''') wf = script.workflow() self.assertRaises(ArgumentError, Base_Executor(wf).run, mode='dryrun') # script = SoS_Script(''' parameter: b = list [0] ''') self.assertEqual(list(wf.parameters().keys()), ['b']) wf = script.workflow() self.assertRaises(ArgumentError, Base_Executor(wf).run, mode='dryrun') # also require the type script = SoS_Script(''' parameter: b = int [0] ''') wf = script.workflow() self.assertRaises(ArgumentError, Base_Executor(wf, args=['--b', 'file']).run, mode='dryrun') # script = SoS_Script(''' parameter: b = int [0] ''') wf = script.workflow() Base_Executor(wf, args=['--b', '5']).run(mode='dryrun') self.assertEqual(env.sos_dict['b'], 5) # string script = SoS_Script(''' parameter: b = str [0] ''') wf = script.workflow() Base_Executor(wf, args=['--b', '5']).run(mode='dryrun') self.assertEqual(env.sos_dict['b'], '5') # list is ok script = SoS_Script(''' parameter: b = list [0] ''') wf = script.workflow() Base_Executor(wf, args=['--b', '5']).run(mode='dryrun') self.assertEqual(env.sos_dict['b'], ['5']) # bool required script = SoS_Script(''' # comment parameter: b = bool [0] ''') wf = script.workflow() Base_Executor(wf, args=['--b']).run(mode='dryrun') self.assertEqual(env.sos_dict['b'], True) Base_Executor(wf, args=['--no-b']).run(mode='dryrun') self.assertEqual(env.sos_dict['b'], False) # bool with default True script = SoS_Script(''' parameter: b = True [0] ''') wf = script.workflow() Base_Executor(wf, args=[]).run(mode='dryrun') self.assertEqual(env.sos_dict['b'], True) Base_Executor(wf, args=['--b']).run(mode='dryrun') self.assertEqual(env.sos_dict['b'], True) Base_Executor(wf, args=['--no-b']).run(mode='dryrun') self.assertEqual(env.sos_dict['b'], False) # bool with default False script = SoS_Script(''' parameter: b = False [0] ''') wf = script.workflow() Base_Executor(wf, args=[]).run(mode='dryrun') self.assertEqual(env.sos_dict['b'], False) Base_Executor(wf, args=['--b']).run(mode='dryrun') self.assertEqual(env.sos_dict['b'], True) Base_Executor(wf, args=['--no-b']).run(mode='dryrun') self.assertEqual(env.sos_dict['b'], False) # # parameters cannot coincide with a readonly global variable # are masked by previous definition script = SoS_Script(''' a = 4 parameter: a = 5 [0] ''') wf = script.workflow() self.assertRaises(Exception, Base_Executor(wf, args=['--a', 7]).run, mode='dryrun') #self.assertEqual(env.sos_dict['a'], 4) # # test parameters with dash script = SoS_Script(''' parameter: a_b = 5 [0] ''') wf = script.workflow() Base_Executor(wf, args=['--a_b', '10']).run(mode='dryrun') self.assertEqual(env.sos_dict['a_b'], 10) Base_Executor(wf, args=['--a-b', '10']).run(mode='dryrun') self.assertEqual(env.sos_dict['a_b'], 10) # # script = SoS_Script(''' parameter: a_b = int [0] ''') wf = script.workflow() Base_Executor(wf, args=['--a_b', '10']).run(mode='dryrun') self.assertEqual(env.sos_dict['a_b'], 10) Base_Executor(wf, args=['--a-b', '10']).run(mode='dryrun') self.assertEqual(env.sos_dict['a_b'], 10) # # parameter cannot be any keyword for key in ['input', 'output', '_input', 'with']: self.assertRaises(Exception, SoS_Script, ''' parameter: {} = int [0] '''.format(key)) # multiple parameters script = SoS_Script(''' parameter: a_b = int [0] parameter: c_b = list ''') wf = script.workflow() self.assertEqual(sorted(list(wf.parameters().keys())), ['a_b', 'c_b'])
def testPatternReuse(self): '''Test repeated use of steps that use pattern and produce different files.''' # for f in [ 'A1.txt', 'A2.txt', 'B1.txt', 'B1.txt.p', 'B2.txt', 'B2.txt.p' ]: file_target(f).remove('both') # # A1 <- P <- B1 # A1 <- P <- B2 # A2 # script = SoS_Script(''' [A_1] input: 'B1.txt.p', 'B2.txt.p' output: 'A1.txt' run: touch A1.txt [A_2] output: 'A2.txt' run: touch A2.txt [B1: provides='B1.txt'] run: touch B1.txt [B2: provides='B2.txt'] run: touch B2.txt [P: provides='{filename}.p'] input: filename run: expand=True touch {_output} ''') # the workflow should call step K for step C_2, but not C_3 wf = script.workflow() dag = Base_Executor(wf).initialize_dag() self.assertDAG( dag, ''' strict digraph "" { "P ['B2.txt.p']"; "B1 ['B1.txt']"; "B2 ['B2.txt']"; A_2; A_1; "P ['B1.txt.p']"; "P ['B2.txt.p']" -> A_1; "B1 ['B1.txt']" -> "P ['B1.txt.p']"; "B2 ['B2.txt']" -> "P ['B2.txt.p']"; A_1 -> A_2; "P ['B1.txt.p']" -> A_1; } ''') Base_Executor(wf).run() for f in [ 'A1.txt', 'A2.txt', 'B1.txt', 'B1.txt.p', 'B2.txt', 'B2.txt.p' ]: t = file_target(f) self.assertTrue(t.target_exists(), '{} should exist'.format(f)) t.remove('both')
def testGroupBy(self): '''Test group_by parameter of step input''' # group_by = 'all' self.touch(['a{}.txt'.format(x) for x in range(11)]) # script = SoS_Script(''' [0: shared='executed'] executed = [] input: [f'a{x}.txt' for x in range(1, 5)], group_by='all' executed.append(_input) ''') wf = script.workflow() Base_Executor(wf).run(mode='dryrun') self.assertEqual(env.sos_dict['executed'], [sos_targets('a1.txt', 'a2.txt', 'a3.txt', 'a4.txt')]) # group_by = 'single' script = SoS_Script(''' [0: shared='executed'] executed = [] input: ['a{}.txt'.format(x) for x in range(1, 5)], group_by='single' executed.append(_input) ''') wf = script.workflow() Base_Executor(wf).run(mode='dryrun') self.assertEqual(env.sos_dict['executed'], [ sos_targets('a1.txt'), sos_targets('a2.txt'), sos_targets('a3.txt'), sos_targets('a4.txt') ]) # group_by = 'pairs' script = SoS_Script(''' [0: shared='executed'] executed = [] input: [f'a{x}.txt' for x in range(1, 5)], group_by='pairs' executed.append(_input) ''') wf = script.workflow() Base_Executor(wf).run(mode='dryrun') self.assertEqual( env.sos_dict['executed'], [sos_targets('a1.txt', 'a3.txt'), sos_targets('a2.txt', 'a4.txt')]) # group_by = 'pairwise' script = SoS_Script(''' [0: shared='executed'] executed = [] input: ['a{}.txt'.format(x) for x in range(1, 5)], group_by='pairwise' executed.append(_input) ''') wf = script.workflow() Base_Executor(wf).run(mode='dryrun') self.assertEqual(env.sos_dict['executed'], [ sos_targets('a1.txt', 'a2.txt'), sos_targets('a2.txt', 'a3.txt'), sos_targets('a3.txt', 'a4.txt') ]) # group_by = 'combinations' script = SoS_Script(''' [0: shared='executed'] executed = [] input: ['a{}.txt'.format(x) for x in range(1, 5)], group_by='combinations' executed.append(_input) ''') wf = script.workflow() Base_Executor(wf).run(mode='dryrun') self.assertEqual(env.sos_dict['executed'], [ sos_targets('a1.txt', 'a2.txt'), sos_targets('a1.txt', 'a3.txt'), sos_targets('a1.txt', 'a4.txt'), sos_targets('a2.txt', 'a3.txt'), sos_targets('a2.txt', 'a4.txt'), sos_targets('a3.txt', 'a4.txt') ]) # group_by chunks specified as integers script = SoS_Script(''' [0: shared='executed'] executed = [] input: ['a{}.txt'.format(x) for x in range(1, 10)], group_by=3 executed.append(_input) ''') wf = script.workflow() Base_Executor(wf).run(mode='dryrun') self.assertEqual(env.sos_dict['executed'], [ sos_targets('a1.txt', 'a2.txt', 'a3.txt'), sos_targets('a4.txt', 'a5.txt', 'a6.txt'), sos_targets('a7.txt', 'a8.txt', 'a9.txt') ]) # group_by chunks specified as integer strings script = SoS_Script(''' [0: shared='executed'] executed = [] input: ['a{}.txt'.format(x) for x in range(1, 10)], group_by='3' executed.append(_input) ''') wf = script.workflow() Base_Executor(wf).run(mode='dryrun') self.assertEqual(env.sos_dict['executed'], [ sos_targets('a1.txt', 'a2.txt', 'a3.txt'), sos_targets('a4.txt', 'a5.txt', 'a6.txt'), sos_targets('a7.txt', 'a8.txt', 'a9.txt') ]) # number of files should be divisible by group_by self.touch(['a{}.txt'.format(x) for x in range(1, 10)]) script = SoS_Script(''' [0] executed = [] input: ['a{}.txt'.format(x) for x in range(1, 10)], group_by=4 executed.append(_input) ''') wf = script.workflow() Base_Executor(wf).run(mode="dryrun") # incorrect value causes an exception script = SoS_Script(''' [0] executed = [] input: ['a{}.txt'.format(x) for x in range(1, 10)], group_by='something' executed.append(_input) ''') wf = script.workflow() self.assertRaises(Exception, Base_Executor(wf).run, mode="dryrun")
def testSimpleDAG(self): '''Test DAG with simple dependency''' for filename in ('a.txt', 'a1.txt'): with open(filename, 'w') as tmp: tmp.write('hey') # basica case # 1 -> 2 -> 3 -> 4 script = SoS_Script(''' [A_1] [A_2] [A_3] [A_4] ''') wf = script.workflow() dag = Base_Executor(wf).initialize_dag() self.assertDAG( dag, '''strict digraph "" { A_2; A_4; A_1; A_3; A_2 -> A_3; A_1 -> A_2; A_3 -> A_4; } ''') # basica case # 1 -> 2 -> 3 -> 4 script = SoS_Script(''' [A_1] [A_2] [A_3] input: 'a.txt' [A_4] ''') wf = script.workflow() dag = Base_Executor(wf).initialize_dag() self.assertDAG( dag, '''strict digraph "" { A_2; A_4; A_1; A_3; A_1 -> A_2; A_3 -> A_4; } ''') # # 1 -> 2 -> 3 -> 4 # script = SoS_Script(''' [A_1] input: 'a.txt' output: 'b.txt' [A_2] input: 'b.txt' output: 'c.txt' [A_3] input: 'c.txt' output: 'd.txt' [A_4] input: 'd.txt' output: 'e.txt' ''') wf = script.workflow() dag = Base_Executor(wf).initialize_dag() self.assertDAG( dag, '''strict digraph "" { A_2; A_4; A_1; A_3; A_2 -> A_3; A_1 -> A_2; A_3 -> A_4; } ''') # # 1 -> 2 # 3 -> 4 (3 does not have any input) # script = SoS_Script(''' [B_1] input: 'a.txt' output: 'b.txt' [B_2] input: 'b.txt' output: 'c.txt' [B_3] input: None output: 'd.txt' [B_4] input: 'd.txt' output: 'e.txt' ''') wf = script.workflow() dag = Base_Executor(wf).initialize_dag() self.assertDAG( dag, '''strict digraph "" { B_2; B_4; B_1; B_3; B_1 -> B_2; B_3 -> B_4; } ''') # # 1 -> 2 # 3 -> 4 (3 depends on something else) # script = SoS_Script(''' [B_1] input: 'a.txt' output: 'b.txt' [B_2] input: 'b.txt' output: 'c.txt' [B_3] input: 'a1.txt' output: 'd.txt' [B_4] input: 'd.txt' output: 'e.txt' ''') wf = script.workflow() dag = Base_Executor(wf).initialize_dag() self.assertDAG( dag, '''strict digraph "" { B_1; B_4; B_2; B_3; B_1 -> B_2; B_3 -> B_4; } ''') # # (1) -> 2 # (1) -> 3 -> 4 # # 2 and 3 depends on the output of 1 script = SoS_Script(''' [C_1] input: 'a.txt' output: 'b.txt' [C_2] input: 'b.txt' output: 'c.txt' [C_3] input: 'b.txt' output: 'd.txt' [C_4] depends: 'd.txt' output: 'e.txt' ''') wf = script.workflow() dag = Base_Executor(wf).initialize_dag() self.assertDAG( dag, ''' strict digraph "" { C_1; C_4; C_2; C_3; C_1 -> C_2; C_1 -> C_3; C_3 -> C_4; } ''') for filename in ('a.txt', 'a1.txt'): os.remove(filename)
def testSignatureWithParameter(self): '''Test signature''' if file_target('myfile.txt').exists(): file_target('myfile.txt').unlink() # script = SoS_Script(r''' parameter: gvar = 10 [10] # generate a file output: 'myfile.txt' # additional comment run: expand=True echo {gvar} > {_output:q} ''') wf = script.workflow() res = Base_Executor(wf).run() self.assertEqual(res['__completed__']['__step_completed__'], 1) with open('myfile.txt') as tmp: self.assertEqual(tmp.read().strip(), '10') # # now if we change parameter, the step should be rerun wf = script.workflow() res = Base_Executor(wf, args=['--gvar', '20']).run() self.assertEqual(res['__completed__']['__step_completed__'], 1) with open('myfile.txt') as tmp: self.assertEqual(tmp.read().strip(), '20') # # do it again, signature should be effective wf = script.workflow() res = Base_Executor(wf, args=['--gvar', '20']).run() self.assertEqual(res['__completed__']['__step_completed__'], 0) with open('myfile.txt') as tmp: self.assertEqual(tmp.read().strip(), '20') # script = SoS_Script(r''' parameter: gvar = 10 [10] # generate a file output: 'myfile.txt' # additional comment run: expand=True echo {gvar} > {_output:q} ''') wf = script.workflow() res = Base_Executor(wf).run() with open('myfile.txt') as tmp: self.assertEqual(tmp.read().strip(), '10') self.assertEqual(res['__completed__']['__step_completed__'], 1) # # now if we change parameter, the step should be rerun wf = script.workflow() res = Base_Executor(wf, args=['--gvar', '20']).run() with open('myfile.txt') as tmp: self.assertEqual(tmp.read().strip(), '20') self.assertEqual(res['__completed__']['__step_completed__'], 1) # # do it again, signature should be effective wf = script.workflow() res = Base_Executor(wf, args=['--gvar', '20']).run() self.assertEqual(res['__completed__']['__step_completed__'], 0) with open('myfile.txt') as tmp: self.assertEqual(tmp.read().strip(), '20') file_target('myfile.txt').unlink()
def testOutputOfDAG(self): '''Test output of dag''' # for f in [ 'A1.txt', 'A2.txt', 'C2.txt', 'B2.txt', 'B1.txt', 'B3.txt', 'C1.txt', 'C3.txt', 'C4.txt' ]: file_target(f).remove('both') # # A1 <- B1 <- B2 <- B3 # | # | # \/ # A2 <- B2 <- C1 <- C2 <- C4 # C3 # script = SoS_Script(''' [A_1] input: 'B1.txt' output: 'A1.txt' run: touch A1.txt [A_2] depends: 'B2.txt' run: touch A2.txt [B1: provides='B1.txt'] depends: 'B2.txt' run: touch B1.txt [B2: provides='B2.txt'] depends: 'B3.txt', 'C1.txt' run: touch B2.txt [B3: provides='B3.txt'] run: touch B3.txt [C1: provides='C1.txt'] depends: 'C2.txt', 'C3.txt' run: touch C1.txt [C2: provides='C2.txt'] depends: 'C4.txt' run: touch C2.txt [C3: provides='C3.txt'] depends: 'C4.txt' run: touch C3.txt [C4: provides='C4.txt'] run: touch C4.txt ''') # the workflow should call step K for step C_2, but not C_3 wf = script.workflow() # # test 1, we only need to generate target 'B1.txt' Base_Executor(wf, config={ 'output_dag': 'test' }).initialize_dag(targets=['B1.txt']) # note that A2 is no longer mentioned self.assertDAG( 'test.dot', ''' strict digraph "" { "B3 ['B3.txt']"; "C4 ['C4.txt']"; "C2 ['C2.txt']"; "C1 ['C1.txt']"; "B1 ['B1.txt']"; "B2 ['B2.txt']"; "C3 ['C3.txt']"; "B3 ['B3.txt']" -> "B2 ['B2.txt']"; "C4 ['C4.txt']" -> "C3 ['C3.txt']"; "C4 ['C4.txt']" -> "C2 ['C2.txt']"; "C2 ['C2.txt']" -> "C1 ['C1.txt']"; "C1 ['C1.txt']" -> "B2 ['B2.txt']"; "B2 ['B2.txt']" -> "B1 ['B1.txt']"; "C3 ['C3.txt']" -> "C1 ['C1.txt']"; } ''') # test 2, we would like to generate two files Base_Executor(wf, config={ 'output_dag': 'test' }).initialize_dag(targets=['B2.txt', 'C2.txt']) # note that A2 is no longer mentioned self.assertDAG( 'test.dot', ''' strict digraph "" { "C4 ['C4.txt']"; "B2 ['B2.txt']"; "C3 ['C3.txt']"; "B3 ['B3.txt']"; "C2 ['C2.txt']"; "C1 ['C1.txt']"; "C4 ['C4.txt']" -> "C2 ['C2.txt']"; "C4 ['C4.txt']" -> "C3 ['C3.txt']"; "C3 ['C3.txt']" -> "C1 ['C1.txt']"; "B3 ['B3.txt']" -> "B2 ['B2.txt']"; "C2 ['C2.txt']" -> "C1 ['C1.txt']"; "C1 ['C1.txt']" -> "B2 ['B2.txt']"; } ''') # test 3, generate two separate trees # Base_Executor(wf, config={ 'output_dag': 'test' }).initialize_dag(targets=['B3.txt', 'C2.txt']) # note that A2 is no longer mentioned self.assertDAG( 'test.dot', ''' strict digraph "" { "B3 ['B3.txt']"; "C2 ['C2.txt']"; "C4 ['C4.txt']"; "C4 ['C4.txt']" -> "C2 ['C2.txt']"; } ''') for f in ['C2.txt', 'B3.txt', 'C4.txt', 'test.dot', 'test_2.dot']: file_target(f).remove('both')
def testShared(self): '''Test option shared''' script = SoS_Script(r""" parameter: res = 1 [0] res = 2 [1] res = 3 """) wf = script.workflow() Base_Executor(wf).run() self.assertEqual(env.sos_dict['res'], 1) # env.sos_dict.pop('res', None) script = SoS_Script(r""" parameter: res = 1 [0: shared='res'] res = 2 [1] res = 3 """) wf = script.workflow() Base_Executor(wf).run() self.assertEqual(env.sos_dict['res'], 2) # env.sos_dict.pop('res', None) script = SoS_Script(r""" parameter: res = 1 parameter: a = 30 [0: shared='a'] res = 2 [1: shared='res'] res = 3 a = 5 """) wf = script.workflow() Base_Executor(wf).run() self.assertEqual(env.sos_dict['res'], 3) self.assertEqual(env.sos_dict['a'], 30) # test multiple vars env.sos_dict.pop('res', None) script = SoS_Script(r""" parameter: res = 1 parameter: a = 30 [1: shared=('res', 'a')] res = 3 a = 5 """) wf = script.workflow() Base_Executor(wf).run() self.assertEqual(env.sos_dict['res'], 3) self.assertEqual(env.sos_dict['a'], 5) # # test expression env.sos_dict.pop('res', None) script = SoS_Script(r""" parameter: res = 1 parameter: a = 30 [1: shared={'res': 'res + 6', 'c': 'a'}] res = 3 a = 5 """) wf = script.workflow() Base_Executor(wf).run() self.assertEqual(env.sos_dict['res'], 9) self.assertEqual(env.sos_dict['c'], 5) # test mixed vars and mapping env.sos_dict.pop('res', None) script = SoS_Script(r""" parameter: res = 1 parameter: a = 30 [1: shared=['res', {'c': 'a'}]] res = 3 a = 5 """) wf = script.workflow() Base_Executor(wf).run() self.assertEqual(env.sos_dict['res'], 3) self.assertEqual(env.sos_dict['c'], 5) # test the step_ version of variables script = SoS_Script(r""" parameter: res = 1 parameter: a = 30 [1: shared=['res', {'c': 'sum(step_a)'}]] input: for_each={'i': range(10)} a = _index**2 """) wf = script.workflow() Base_Executor(wf).run() self.assertEqual(env.sos_dict['c'], sum(x**2 for x in range(10)))
def testTrunkSizeOption(self): '''Test option trunk_size''' with open('test_trunksize.sos', 'w') as tt: tt.write(''' [10] input: for_each={'I': range(10)} task: trunk_size=5, cores=1, mem='1M', walltime='10m' run: expand=True echo {I} > {I}.txt sleep 0.1 ''') wf = SoS_Script(filename='test_trunksize.sos').workflow() for i in range(10): if os.path.isfile(f'{i}.txt'): file_target(f'{i}.txt').unlink() Base_Executor(wf, config={ 'sig_mode': 'force', 'script': 'test_trunksize.sos', 'max_running_jobs': 10, 'bin_dirs': [], 'workflow_args': [], 'output_dag': '', 'output_report': None, 'targets': [], 'worker_procs': ['4'], 'default_queue': 'localhost', 'workflow': 'default', 'workdir': '.', }).run() for i in range(10): self.assertTrue(os.path.isfile(f'{i}.txt')) os.remove(f'{i}.txt') # trunk size is None or 0, -1, intepreted as all tasks with open('test_trunksize.sos', 'w') as tt: tt.write(''' [10] input: for_each={'I': range(10)} task: trunk_size=None, cores=1, mem='1M', walltime='10m' run: expand=True echo {I} > {I}.txt sleep 0.1 ''') wf = SoS_Script(filename='test_trunksize.sos').workflow() Base_Executor(wf, config={ 'sig_mode': 'force', 'script': 'test_trunksize.sos', 'max_running_jobs': 10, 'bin_dirs': [], 'workflow_args': [], 'output_dag': '', 'output_report': None, 'targets': [], 'worker_procs': ['4'], 'default_queue': 'localhost', 'workflow': 'default', 'workdir': '.', }).run() for i in range(10): self.assertTrue(os.path.isfile(f'{i}.txt'))
def testTaskTags(self): '''Test option tags of tasks''' import random tag = "tag{}".format(random.randint(1, 100000)) with open('test_tags.sos', 'w') as tt: tt.write(''' [10] input: for_each={{'i': range(10)}} task: tags='{}', trunk_size=2 sh: expand=True echo {} {{i}} '''.format(tag, tag)) wf = SoS_Script(filename='test_tags.sos').workflow() res = Base_Executor(wf, config={ 'wait_for_task': False, 'sig_mode': 'force', 'script': 'test_trunkworker.sos', 'max_running_jobs': 10, 'bin_dirs': [], 'workflow_args': [], 'output_dag': '', 'targets': [], 'max_procs': 4, 'default_queue': None, 'workflow': 'default', 'workdir': '.', }).run() ret = subprocess.check_output('sos status -t {}'.format(tag), shell=True).decode() self.assertEqual(len(ret.splitlines()), 5, "Obtained {}".format(ret)) # test multiple tags tag1 = "tag{}".format(random.randint(1, 100000)) tag2 = "tag{}".format(random.randint(1, 100000)) with open('test_tags.sos', 'w') as tt: tt.write(''' [10] input: for_each={{'i': range(2)}} task: tags=['{}', '{}'] sh: expand=True echo {} {{i}} '''.format(tag1, tag2, tag1)) wf = SoS_Script(filename='test_tags.sos').workflow() res = Base_Executor(wf, config={ 'wait_for_task': False, 'sig_mode': 'force', 'script': 'test_trunkworker.sos', 'max_running_jobs': 10, 'bin_dirs': [], 'workflow_args': [], 'output_dag': '', 'targets': [], 'max_procs': 4, 'default_queue': None, 'workflow': 'default', 'workdir': '.', }).run() ret = subprocess.check_output('sos status -t {}'.format(tag2), shell=True).decode() self.assertEqual(len(ret.splitlines()), 2, "Obtained {}".format(ret))
def testDownload(self): '''Test download of resources''' if not os.path.isdir('tmp'): os.makedirs('tmp') # for name in os.listdir('tmp'): if os.path.isfile(os.path.join('tmp', name)): os.remove(os.path.join('tmp', name)) # test decompress tar.gz file script = SoS_Script(r''' [0] download(['ftp://ftp.csx.cam.ac.uk/pub/software/programming/pcre/pcre-8.41.tar.gz'], dest_dir='tmp', decompress=True) ''') wf = script.workflow() Base_Executor(wf).run() self.assertTrue(os.path.isdir('tmp/pcre-8.41')) self.assertTrue(os.path.isfile('tmp/pcre-8.41/pcre_get.c')) # # testing the download of single file # script = SoS_Script(r''' [0] download: dest_file='tmp/pcre-8.41.zip.sig' ftp://ftp.csx.cam.ac.uk/pub/software/programming/pcre/pcre-8.41.zip.sig ''') wf = script.workflow() Base_Executor(wf).run() self.assertTrue(os.path.isfile('tmp/pcre-8.41.zip.sig')) # test option dest_dir script = SoS_Script(r''' [0] download: dest_dir='tmp' ftp://ftp.csx.cam.ac.uk/pub/software/programming/pcre/pcre-8.41.zip.sig ''') wf = script.workflow() Base_Executor(wf).run() self.assertTrue(os.path.isfile('tmp/pcre-8.41.zip.sig')) # # this will take a while script = SoS_Script(r''' [0] download: dest_dir='tmp', decompress=True ftp://ftp.csx.cam.ac.uk/pub/software/programming/pcre/non-existing.gz ftp://ftp.csx.cam.ac.uk/pub/software/programming/pcre/pcre-8.41.zip ftp://ftp.csx.cam.ac.uk/pub/software/programming/pcre/pcre-8.41.zip.sig ftp://ftp.csx.cam.ac.uk/pub/software/programming/pcre/pcre-8.41.tar.gz ''') #start = time.time() wf = script.workflow() self.assertRaises(ExecuteError, Base_Executor(wf).run) self.assertTrue(os.path.isfile('tmp/pcre-8.41/pcre_get.c')) #self.assertGreater(time.time() - start, 3) # this will be fast #start = time.time() wf = script.workflow() self.assertRaises(ExecuteError, Base_Executor(wf).run) #self.assertLess(time.time() - start, 3) # # test decompress tar.gz, .zip and .gz files script = SoS_Script(r''' [0] download: dest_dir='tmp', decompress=True ftp://ftp.csx.cam.ac.uk/pub/software/programming/pcre/pcre-8.41.tar.gz ftp://ftp.csx.cam.ac.uk/pub/software/programming/pcre/pcre-8.41.zip ftp://ftp.csx.cam.ac.uk/pub/software/programming/pcre/pcre-8.41.tar.bz2 ftp://ftp.csx.cam.ac.uk/pub/software/programming/pcre/pcre-8.41.zip.sig ''') wf = script.workflow() Base_Executor(wf).run() # run in build mode script = SoS_Script(r''' [0] download: dest_dir='tmp', decompress=True ftp://ftp.csx.cam.ac.uk/pub/software/programming/pcre/pcre-8.41.tar.gz ftp://ftp.csx.cam.ac.uk/pub/software/programming/pcre/pcre-8.41.zip ftp://ftp.csx.cam.ac.uk/pub/software/programming/pcre/pcre-8.41.tar.bz2 ftp://ftp.csx.cam.ac.uk/pub/software/programming/pcre/pcre-8.41.zip.sig ''') wf = script.workflow() Base_Executor(wf, config={'sig_mode': 'build'}).run() # shutil.rmtree('tmp')
def testIncludedNestedWorkFlow(self): '''Test the source option of sos_run''' # nested subworkflow with step option and others self.touch(['a.txt', 'b.txt']) # with open('inc.sos', 'w') as sos: sos.write(''' # test sos script # global definition GLB = 5 parameter: parB = 10 [A_1: shared='executed'] executed.append('t.' + step_name) output: _input[0] + '.a1' run: expand=True touch {output} [A_2: shared='executed'] executed.append('t.' + step_name) output: _input[0] + '.a2' run: expand=True touch {output} ''') script = SoS_Script(''' %from inc include * if 'executed' not in locals(): executed = [] [b_1: skip=False, shared='executed'] executed.append(step_name) input: 'a.txt', 'b.txt', group_by='single' sos_run('A', shared='executed') ''') wf = script.workflow('b') Base_Executor(wf).run() self.assertEqual(env.sos_dict['GLB'], 5) self.assertEqual(env.sos_dict['parB'], 10) self.assertEqual(env.sos_dict['executed'], ['b_1', 't.A_1', 't.A_2', 't.A_1', 't.A_2']) # subprocess.call('sos remove -s', shell=True) for file in ('a.txt.a1', 'a.txt.a1.a2', 'b.txt.a1', 'b.txt.a1.a2'): file_target(file).remove('both') script = SoS_Script(''' %include inc as k if 'executed' not in locals(): executed = [] [b_1: skip=False, shared='executed'] executed.append('g.' + step_name) input: 'a.txt', 'b.txt', group_by='single' sos_run('k.A', shared='executed') ''') wf = script.workflow('b') Base_Executor(wf).run() self.assertEqual(env.sos_dict['k'].GLB, 5) self.assertEqual(env.sos_dict['k'].parB, 10) self.assertEqual(env.sos_dict['executed'], ['g.b_1', 't.k.A_1', 't.k.A_2', 't.k.A_1', 't.k.A_2']) # os.remove('inc.sos')
def testOutputOfDAG(self): '''Test output of dag''' # #for f in ['A1.txt', 'A2.txt', 'C2.txt', 'B2.txt', 'B1.txt', 'B3.txt', 'C1.txt', 'C3.txt', 'C4.txt']: # if file_target(f).exists(): # file_target(f).unlink() # # A1 <- B1 <- B2 <- B3 # | # | # \/ # A2 <- B2 <- C1 <- C2 <- C4 # C3 # script = SoS_Script(''' [A_1] input: 'B1.txt' output: 'A1.txt' run: touch A1.txt [A_2] depends: 'B2.txt' run: touch A2.txt [B1: provides='B1.txt'] depends: 'B2.txt' run: touch B1.txt [B2: provides='B2.txt'] depends: 'B3.txt', 'C1.txt' run: touch B2.txt [B3: provides='B3.txt'] run: touch B3.txt [C1: provides='C1.txt'] depends: 'C2.txt', 'C3.txt' run: touch C1.txt [C2: provides='C2.txt'] depends: 'C4.txt' run: touch C2.txt [C3: provides='C3.txt'] depends: 'C4.txt' run: touch C3.txt [C4: provides='C4.txt'] run: touch C4.txt ''') # the workflow should call step K for step C_2, but not C_3 wf = script.workflow(use_default=False) # # test 1, we only need to generate target 'B1.txt' Base_Executor(wf, config={ 'output_dag': 'test_outofdag1.dot', 'trace_existing': True }).initialize_dag(targets=['B1.txt']) # note that A2 is no longer mentioned self.assertDAG( 'test_outofdag1.dot', ''' strict digraph "" { "B3 (B3.txt)"; "C4 (C4.txt)"; "C2 (C2.txt)"; "C1 (C1.txt)"; "B1 (B1.txt)"; "B2 (B2.txt)"; "C3 (C3.txt)"; "B3 (B3.txt)" -> "B2 (B2.txt)"; "C4 (C4.txt)" -> "C3 (C3.txt)"; "C4 (C4.txt)" -> "C2 (C2.txt)"; "C2 (C2.txt)" -> "C1 (C1.txt)"; "C1 (C1.txt)" -> "B2 (B2.txt)"; "B2 (B2.txt)" -> "B1 (B1.txt)"; "C3 (C3.txt)" -> "C1 (C1.txt)"; } ''') # test 2, we would like to generate two files Base_Executor(wf, config={ 'output_dag': 'test_outofdag2.dot', 'trace_existing': True }).initialize_dag(targets=['B2.txt', 'C2.txt']) # note that A2 is no longer mentioned self.assertDAG( 'test_outofdag2.dot', ''' strict digraph "" { "C4 (C4.txt)"; "B2 (B2.txt)"; "C3 (C3.txt)"; "B3 (B3.txt)"; "C2 (C2.txt)"; "C1 (C1.txt)"; "C4 (C4.txt)" -> "C2 (C2.txt)"; "C4 (C4.txt)" -> "C3 (C3.txt)"; "C3 (C3.txt)" -> "C1 (C1.txt)"; "B3 (B3.txt)" -> "B2 (B2.txt)"; "C2 (C2.txt)" -> "C1 (C1.txt)"; "C1 (C1.txt)" -> "B2 (B2.txt)"; } ''') # test 3, generate two separate trees # Base_Executor(wf, config={ 'output_dag': 'test_outofdag3.dot', 'trace_existing': True }).initialize_dag(targets=['B3.txt', 'C2.txt']) # note that A2 is no longer mentioned self.assertDAG( 'test_outofdag3.dot', ''' strict digraph "" { "B3 (B3.txt)"; "C2 (C2.txt)"; "C4 (C4.txt)"; "C4 (C4.txt)" -> "C2 (C2.txt)"; } ''') for f in ['C2.txt', 'B3.txt', 'C4.txt', 'test.dot', 'test_2.dot']: if file_target(f).exists(): file_target(f).unlink()