def test_upstream(self): tups = (simple_job, self.DIR / 'root', 'ATG', '/tmp/digit.txt') res = force_run(*tups, verbose=0) # tups = (simple_job, self.DIR/'job2', 'ATG', res.output.out_txt) # self.DIR/'root.simple_job.out_txt') print('[param]', spiper.rcParams) tups = (simple_job, self.DIR / 'job2', 'ATG', self.DIR / 'root.simple_job.out_txt') job2 = force_run(*tups, verbose=0) res = spiper.graph.get_upstream_nodes([File('/tmp/digit.txt')], strict=0) print('''##### no test for get_upstream_nodes()''') # print(res) # res ==[] res = spiper.graph.get_upstream_files([File(job2.output.out_txt)], strict=0, flat=1)[1:] expect = [ InputFile( '~/.temp/singular-pipe_test_build/root.simple_job.out_txt'). expand(), InputFile('/tmp/digit.txt') ] expect = [x.expand() for x in expect] assert sorted(expect) == sorted(res), json.dumps((res, expect), indent=2)
def test_loadable_subprocess(self): ''' Make sure the input_json file can be loaded from other than project directory. ''' tups = (simple_job, self.DIR / 'root', 'ATG', '/tmp/digit.txt') force_run(*tups, dir_layout='flat') res = SafeShellCommand(''' set -e cd /tmp/ python3 -<<EOF import json from path import Path from spiper.runner import _loads,_dumps fn = Path("~/.temp/singular-pipe_test_build/root.simple_job.input_json").expand() d = json.load(open(fn,'r')) x = _loads(d['ident']) x = _loads(d['caller_dump']) s = "_dumps(x) == d['caller_dump']" print('#'*10 +' '+ s +' is not True, ') print(s) print(eval(s)) # print(x) # print(x.job.__dict__) EOF cd $OLDPWD ''', 1, shell=True) print(res)
def test_graph_2(self): THREADS = 2 DATA_DIR = Path(__file__).dirname().realpath() / 'tests/data' fn = File('/tmp/pjob.txt') fn.touch() if not fn.isfile() else None tups = ( prefix_job, self.DIR / 'root', '/tmp/pjob', ) job1 = job = force_run(*tups) tups = ( prefix_job, self.DIR / 'root2', job.output.out_prefix, ) job = force_run(*tups) fn = File(job1.output.out_prefix + '.1') fn = File('/tmp/pjob.txt') ##### dependency is to Prefix, not to File # fn = Prefix(fn[:-4]) print('###### [TBI]test_graph_2 fix this') return tree = spiper.graph.get_downstream_tree([fn], flat=0, strict=0) g = graph_from_tree(tree, last=fn) g.render(filename=self.DIR / 'graphs' / fn, format='svg') print(json.dumps(tree, indent=2, default=repr))
def get_tree1(self, http=0): if http: tups = ( http_job2, self.DIR / 'root', ) res0 = cache_run(*tups, verbose=0) tups = (dimple_job, self.DIR / 'root', 'ATG', res0.output.cache, '/tmp/letter.txt') else: tups = (dimple_job, self.DIR / 'root', 'ATG', '/tmp/digit.txt', '/tmp/letter.txt') res1 = force_run(*tups, verbose=0) tups = (dimple_job, self.DIR / 'job2', 'ATG', res1.output.out_txt, '/tmp/digit.txt') res2 = force_run(*tups, verbose=0) tups = (dimple_job, self.DIR / 'job3', 'ATG', res1.output.out_txt, res2.output.out_txt) res3 = force_run(*tups, verbose=0) fn = File('/tmp/digit.txt') f2 = res3.output.out_txt tree = None # tree = spiper.runner.get_downstream_tree(fn, flat=0,strict=0) return fn, f2, tree
def test_cacherun_use_cache(self): _ = ''' if the input files / params to a simple node changed, then trigger a recalc ''' tups = (simple_job, self.DIR / 'root', 'ATG', '/tmp/digit.txt') force_run(*tups, verbose=0) result = cache_run(*tups, verbose=0)
def test_caller_struct(self): tups = (simple_job, self.DIR / 'root', 'ATG', '/tmp/digit.txt') res = force_run(*tups, verbose=0) print('#### test_caller_struct() not impled') return tups = (simple_job, self.DIR / 'job2', 'ATG', res.output.out_txt) tups = (simple_job, self.DIR / 'job2', 'ATG', self.DIR / 'root.simple_job.out_txt') force_run(*tups, verbose=0)
def test_dag(self): tups = (simple_job, self.DIR / 'root', 'ATG', '/tmp/digit.txt') res = force_run(*tups, verbose=0) # tups = (simple_job, self.DIR/'job2', 'ATG', res.output.out_txt) # self.DIR/'root.simple_job.out_txt') tups = (simple_job, self.DIR / 'job2', 'ATG', self.DIR / 'root.simple_job.out_txt') force_run(*tups, verbose=0) pass return
def test_downstream(self): dir_layout = 'clean' # import spiper # dir_layout = spiper.DEFAULT_DIR_LAYOUT (self.DIR / 'root').dirname().rmtree_p() tups = ( simple_job, self.DIR / 'root', 'ATG', '/tmp/digit.txt', ) force_run(*tups, dir_layout=dir_layout, verbose=0) tups = ( simple_job, self.DIR / 'job2', 'ATG', self.DIR / 'root.simple_job.out_txt', ) force_run(*tups, dir_layout=dir_layout, verbose=0) import spiper.graph # s = res = spiper.graph.get_downstream_nodes([File('/tmp/digit.txt')], strict=0, flat=0, dir_layout=dir_layout) print('''##### no test for nodes in get_downstream_nodes()''') # print(res) res = spiper.graph.get_downstream_files([File('/tmp/digit.txt')], strict=0, flat=1, dir_layout=dir_layout, verbose=2)[1:] # res = spiper.runner.get_downstream_targets(File('/tmp/digit.txt'),strict=0,flat=0,target='all',dir_layout=dir_layout) expect = [ File('~/.temp/singular-pipe_test_build/root.simple_job.out_txt'), File( '~/.temp/singular-pipe_test_build/_spiper/root.simple_job.cache_pk' ), File('~/.temp/singular-pipe_test_build/job2.simple_job.out_txt'), File( '~/.temp/singular-pipe_test_build/_spiper/job2.simple_job.cache_pk' ), ] expect = [x.expand() for x in expect] assert sorted(expect) == sorted(res), json.dumps((res, expect), indent=2, default=repr)
def test_cacherun_output_prefix_change(self): _ = ''' if the output files to a simple node changed trigger a recalc ''' tups = ( prefix_job, self.DIR / 'root', '/tmp/digit', ) node = force_run(*tups, verbose=0) output_changed = cache_check_changed(*tups, verbose=0, check_changed=1)[1] assert output_changed == 0 ofname = node.output.out_prefix + '.%d' % 0 # from spiper.runner import os_stat_safe # print(os_stat_safe(ofname)) import time time.sleep(0.1) Path(ofname).touch() output_changed = cache_check_changed(*tups, verbose=0)[1] assert output_changed == 1, node
def test_cacherun_code_change(self): _ = ''' the defition of a script change is ambiguous here we used a tuple to identify a function ( func_code.co_code func_code.co_consts ) ''' tups = (simple_job, self.DIR / 'root', 'ATG', '/tmp/digit.txt') force_run(*tups, verbose=0) input_changed = cache_check_changed(*tups, verbose=0, check_changed=1)[0] assert input_changed == 0 tups = (self.change_job(), self.DIR / 'root', 'ATG', '/tmp/digit.txt') input_changed = cache_check_changed(*tups, verbose=0)[0] assert input_changed == 1
def test_downstream(self): dir_layout = 'clean' tups = ( simple_job, self.DIR / 'root', 'ATG', '/tmp/digit.txt', ) force_run(*tups, config=dir_layout, verbose=0) tups = ( simple_job, self.DIR / 'job2', 'ATG', self.DIR / 'root.simple_job.out_txt', ) force_run(*tups, config=dir_layout, verbose=0) import spiper.runner # s = res = spiper.runner.get_downstream_nodes(File('/tmp/digit.txt'), strict=0, flat=0, config=dir_layout) print('''##### no test for nodes in get_downstream_nodes()''') # print(res) res = spiper.runner.get_downstream_files(File('/tmp/digit.txt'), strict=0, flat=1, config=dir_layout) expect = [ File('~/.temp/singular-pipe_test_build/root.simple_job.out_txt'), File( '~/.temp/singular-pipe_test_build/_spiper/root.simple_job.cache_pk' ), File('~/.temp/singular-pipe_test_build/job2.simple_job.out_txt'), File( '~/.temp/singular-pipe_test_build/_spiper/job2.simple_job.cache_pk' ), ] expect = [x.expand() for x in expect] assert sorted(expect) == sorted(res), json.dumps((res, expect), indent=2)
def test_cacherun_output_change(self): _ = ''' if the output files to a simple node changed trigger a recalc ''' tups = (simple_job, self.DIR / 'root', 'ATG', '/tmp/digit.txt') force_run(*tups, verbose=0) output_changed = cache_check_changed(*tups, verbose=0)[1] assert output_changed == 0 ofname = self.DIR / 'root.simple_job.out_txt' # from spiper.runner import os_stat_safe # print(os_stat_safe(ofname)) import time time.sleep(0.1) # subprocess.check_output(['touch','-m',ofname]) # with open(ofname,'w') as f: # pass Path(ofname).touch() # print(os_stat_safe(ofname)) output_changed = cache_check_changed(*tups, verbose=0)[1] assert output_changed == 1
def test_cacherun_input_change(self): _ = ''' if the input files / params to a simple node changed, then trigger a recalc ''' tups = (simple_job, self.DIR / 'root', 'ATG', '/tmp/digit.txt') force_run(*tups, verbose=0) input_changed = cache_check_changed(*tups, verbose=0)[0] assert input_changed == 0 import time time.sleep(0.1) Path(tups[-1]).touch() input_changed = cache_check_changed(*tups, verbose=0)[0] assert input_changed == 1 tups = (simple_job, self.DIR / 'root', 'ATG', '/tmp/digit.txt') force_run(*tups, verbose=0) input_changed = cache_check_changed(*tups, verbose=0)[0] assert input_changed == 0 tups = (simple_job, self.DIR / 'root', 'ATGC', '/tmp/digit.txt') input_changed = cache_check_changed(*tups, verbose=0)[0] assert input_changed == 1
def main(force_run=force_run,prefix=None): # spiper.rcParams['dir_layout'] = dir_layout if prefix is None: prefix = Path('/tmp/spiper.test_run/root') prefix.dirname().rmtree_p() print('\n---------------------Run1---\n## got') if __name__ == '__main__' else None res1 = force_run(gen_files, prefix,verbose=0) res2 = force_run(tarball_dangerous_cache, prefix, res1.prefix_named, verbose=1) res1 = force_run(gen_files, prefix,verbose=0) res2 = force_run(tarball_dangerous_cache, prefix, res1.prefix_named, verbose=1) s = ''' ## expect [cache_run] {"job_name"="tarball_dangerous_cache"_"input_ident_changed"=1_"output_ident_chanegd"=0} * This change to input is ignored because tarball_dangerous_cache(input_prefix=File) would not expand to match the files during input validation The type specified in def line will be used for detecting a timestamp/filesize change '''.strip() print(s) if __name__ == '__main__' else None print('---------------------Run2---\n## got') if __name__ == '__main__' else None res1 = force_run(gen_files, prefix) res2 = force_run(tarball_prefix_cache, prefix, res1.prefix_named, verbose=1) res1 = force_run(gen_files, prefix) res2 = force_run(tarball_prefix_cache, prefix, res1.prefix_named, verbose=1) s = ''' ## expect [cache_run] {"job_name"="tarball_prefix_cache"_"input_ident_changed"=1_"output_ident_chanegd"=0} * Because tarball_prefix_cache(input_prefix=Prefix) is expanded into the appropriate files during input validation. * Note that the Prefix only expands into a shallow match and does not recurse into sub-directory during input validation '''.strip() print(s) if __name__ == '__main__' else None print('------Output Directory. (dir_layout={spiper.rcParams.dir_layout})--------\n'.format(spiper=spiper), LoggedShellCommand(['echo [ls]',prefix,'&&','ls','-lhtr',prefix.dirname(),],'/dev/null')) return res1,res2
curr = self.config_runner(tag='production')(patch_by_hand, prefix, curr.output.csv, hand_patch_csv) return self if __name__ == '__main__': from spiper.runner import get_changed_files, cache_run from pprint import pprint import sys if 'patch_by_hand' in sys.argv: from spiper.runner import force_run from path import Path # from main import patch_by_hand curr = force_run(patch_by_hand, '_temp', 'current.csv', 'root.hand_patch.csv') curr.output.csv.link(Path('current.csv').unlink_p()) sys.exit(0) tups = ( main, '$PWD/_build/root', '$PWD/metacsv_ath_rnaseq/root.dump_columns.csv', 'NULL', '$PWD/root.hand_patch.csv', ) runner = get_changed_files pprint(runner(*tups)) runner = cache_run if '--run' in sys.argv: curr = runner(*tups)