def main(): import spiper from path import Path from spiper.runner import get_all_files, get_changed_files, cache_run from pprint import pprint spiper.rcParams['dir_layout'] = 'flat' prefix = Path('/tmp/test_import/root') prefix.dirname().rmtree_p() print('#### [Note] currently the package change is not recorded') fs = get_changed_files(test_import, prefix) pprint(fs) fs = get_all_files(test_import,prefix) pprint(fs) cache_run(test_import,prefix) pprint(fs) print('#### [Note] Remote workflow should detect file changes') fs = get_changed_files(simple_flow, prefix) pprint(fs) assert fs != [] print('### run actual workflow') cache_run(simple_flow,prefix) fs = get_changed_files(simple_flow, prefix) pprint(fs) assert fs == []
def main(): import spiper from path import Path from spiper.runner import get_all_files, get_changed_files, cache_run from pprint import pprint spiper.rcParams['dir_layout'] = 'flat' prefix = Path('/tmp/test_import/root') prefix.dirname().rmtree_p() print('#### [Note] currently the package change is not recorded') fs = get_changed_files(test_import, prefix) pprint(fs) fs = get_all_files(test_import, prefix) pprint(fs) cache_run(test_import, prefix) pprint(fs) print('#### [Note] Remote workflow should detect file changes') fs = get_changed_files(simple_flow, prefix) pprint(fs) assert fs != [] print('### run actual workflow') cache_run(simple_flow, prefix) fs = get_changed_files(simple_flow, prefix) pprint(fs) #### The Flow() execution will never be skipped #### hence The self.output.log will always be changed assert fs == [ File('/tmp/test_import/root.workflow.log'), File('/tmp/test_import/root_backup.output..log') ]
def get_tree1(self, http=0): if http: tups = ( http_job2, self.DIR / 'root', ) res0 = cache_run(*tups, verbose=0) tups = (dimple_job, self.DIR / 'root', 'ATG', res0.output.cache, '/tmp/letter.txt') else: tups = (dimple_job, self.DIR / 'root', 'ATG', '/tmp/digit.txt', '/tmp/letter.txt') res1 = force_run(*tups, verbose=0) tups = (dimple_job, self.DIR / 'job2', 'ATG', res1.output.out_txt, '/tmp/digit.txt') res2 = force_run(*tups, verbose=0) tups = (dimple_job, self.DIR / 'job3', 'ATG', res1.output.out_txt, res2.output.out_txt) res3 = force_run(*tups, verbose=0) fn = File('/tmp/digit.txt') f2 = res3.output.out_txt tree = None # tree = spiper.runner.get_downstream_tree(fn, flat=0,strict=0) return fn, f2, tree
def test_cacherun_use_cache(self): _ = ''' if the input files / params to a simple node changed, then trigger a recalc ''' tups = (simple_job, self.DIR / 'root', 'ATG', '/tmp/digit.txt') force_run(*tups, verbose=0) result = cache_run(*tups, verbose=0)
from path import Path Path('/tmp/some_node/root').dirname().rmtree_p() fs = get_all_files(some_node, '/tmp/some_node/root', '/tmp/input_file.txt') print('##### all files governed by this node #######') pprint(fs) fs = get_changed_files(some_node, '/tmp/some_node/root', '/tmp/input_file.txt') print('#### files changed in the next execution of this node #######') pprint(fs) print('#### write some input file ###') with open('/tmp/input_file.txt', 'w') as f: f.write('barbarfoo\n') print('#### actual execution #####') res = cache_run(some_node, '/tmp/some_node/root', '/tmp/input_file.txt') print('#### the second execution is skipped ###') res = cache_run(some_node, '/tmp/some_node/root', '/tmp/input_file.txt') print() #### the fact that the execution is skipped implies #### that executing this node would not change any file fs = get_changed_files(some_node, '/tmp/some_node/root', '/tmp/input_file.txt') print('#### files changed in the next execution of this node #######') pprint(fs) assert fs == []
from path import Path from spiper.runner import cache_run, force_run def simplest_node(self, prefix, _output=[]): print('Running node:%r' % self) return self ### return Runtime Caller as output is beneficial if __name__ == '__main__': print('\n### running') prefix = Path('/tmp/bulid_spiper/root') prefix.dirname().rmtree_p() cache_run(simplest_node, prefix) cache_run(simplest_node, prefix) print() s = ''' ### comment expect: Running node:spiper.runner.Caller(dotname='__main__.simplest_node',prefix_named=File('/tmp/bulid_spiper/root.simplest_node')) Running node:spiper.runner.Caller(dotname='__main__.simplest_node',prefix_named=File('/tmp/bulid_spiper/root.simplest_node')) got: Running node:spiper.runner.Caller(dotname='__main__.simplest_node',prefix_named=File('/tmp/bulid_spiper/root.simplest_node')) The second run accessed cache ''' print(s) pass
def main(self=None, prefix = None): from spiper.runner import cache_run, mock_run, get_changed_files, get_all_files from spiper.shell import LoggedShellCommand from spiper.types import File,CacheFile from pprint import pprint spiper.rcParams['dir_layout']='clean' # if prefix is None: prefix = Path('/tmp/spiper.symbolic/root') # backup_prefix = File('/home/user/.temp/backup_03_mock_flow/root') backup_prefix = File('~/.temp/backup_03_mock_flow/root').expand() prefix.dirname().rmtree_p() backup_prefix.dirname().rmtree_p() print('\n...[start]%r'%prefix) #### once a workflow is defined, we can view the proposed file changes fs = get_changed_files(workflow, prefix, 1, 100, verbose=0) pprint(fs) assert fs ==[ File('/tmp/spiper.symbolic/root.workflow.log'), File('/tmp/spiper.symbolic/root.random_seq.seq'), File('/tmp/spiper.symbolic/root.random_seq_const.seq'), File('/tmp/spiper.symbolic/root.transcribe.fasta'), File('/tmp/spiper.symbolic/root.mutate.fasta'), File('/tmp/spiper.symbolic/root.source.py'), # File('/home/user/.temp/backup_03_mock_flow/root.source.py') ] ### backup is conveniently defined as a workflow taking an executed workflow as an input. ### To check the proposed backup, mock_run() the workflow first. workflow_out = mock_run(workflow, prefix, 1, 100) fs = get_changed_files(backup, backup_prefix, workflow_out) pprint(fs) assert fs == [ File('/home/user/.temp/backup_03_mock_flow/root.subflow.random_seq.output.seq'), File('/home/user/.temp/backup_03_mock_flow/root.subflow.random_seq_const.output.seq'), File('/home/user/.temp/backup_03_mock_flow/root.subflow.transcribe.output.fasta'), File('/home/user/.temp/backup_03_mock_flow/root.subflow.mutate.output.fasta'), File('/home/user/.temp/backup_03_mock_flow/root.output.log'), # File('/tmp/spiper.symbolic/root.source.py') File('/home/user/.temp/backup_03_mock_flow/root.source.py') ] ### a convenient Flow may be defined to execute the two in chain ### If there is certain change to the workflow, ### the backup can also be runned fs = get_changed_files (run_and_backup, prefix, 1, 100, backup_prefix, verbose=0) pprint(fs) assert fs == [ File('/tmp/spiper.symbolic/root.workflow.log'), File('/tmp/spiper.symbolic/root.random_seq.seq'), File('/tmp/spiper.symbolic/root.random_seq_const.seq'), File('/tmp/spiper.symbolic/root.transcribe.fasta'), File('/tmp/spiper.symbolic/root.mutate.fasta'), File('/tmp/spiper.symbolic/root.source.py'), File('/home/user/.temp/backup_03_mock_flow/root.subflow.random_seq.output.seq'), File('/home/user/.temp/backup_03_mock_flow/root.subflow.random_seq_const.output.seq'), File('/home/user/.temp/backup_03_mock_flow/root.subflow.transcribe.output.fasta'), File('/home/user/.temp/backup_03_mock_flow/root.subflow.mutate.output.fasta'), File('/home/user/.temp/backup_03_mock_flow/root.output.log'), File('/home/user/.temp/backup_03_mock_flow/root.source.py'), File('/home/user/.temp/backup_03_mock_flow/root.plot_graph.deptree_json'), File('/home/user/.temp/backup_03_mock_flow/root.plot_graph.deptree_dot_txt'), ] ###### constants that are preserved between runs should be detected unchanged _ = cache_run (run_and_backup, prefix, 1, 100, backup_prefix, verbose=0) fs = get_changed_files (run_and_backup, prefix, 2, 200, backup_prefix, verbose=0) pprint(fs) assert fs == [File('/tmp/spiper.symbolic/root.workflow.log'), File('/tmp/spiper.symbolic/root.random_seq.seq'), # File('/tmp/spiper.symbolic/root.random_seq_const.seq'), File('/tmp/spiper.symbolic/root.transcribe.fasta'), File('/tmp/spiper.symbolic/root.mutate.fasta'), # File('/tmp/spiper.symbolic/root.source.py'), File('/home/user/.temp/backup_03_mock_flow/root.subflow.random_seq.output.seq'), # File('/home/user/.temp/backup_03_mock_flow/root.subflow.random_seq_const.output.seq'), File('/home/user/.temp/backup_03_mock_flow/root.subflow.transcribe.output.fasta'), File('/home/user/.temp/backup_03_mock_flow/root.subflow.mutate.output.fasta'), File('/home/user/.temp/backup_03_mock_flow/root.output.log'), # File('/home/user/.temp/backup_03_mock_flow/root.source.py'), File('/home/user/.temp/backup_03_mock_flow/root.plot_graph.deptree_json'), File('/home/user/.temp/backup_03_mock_flow/root.plot_graph.deptree_dot_txt'), ] ##### get_all_files() return a leaf file regardless of whether is is changed fs = get_all_files (run_and_backup, prefix, 2, 200, backup_prefix, verbose=0) pprint(fs) assert fs == [ File('/tmp/spiper.symbolic/root.workflow.log'), File('/tmp/spiper.symbolic/root.random_seq.seq'), File('/tmp/spiper.symbolic/root.random_seq_const.seq'), File('/tmp/spiper.symbolic/root.transcribe.fasta'), File('/tmp/spiper.symbolic/root.mutate.fasta'), File('/tmp/spiper.symbolic/root.source.py'), File('/home/user/.temp/backup_03_mock_flow/root.subflow.random_seq.output.seq'), File('/home/user/.temp/backup_03_mock_flow/root.subflow.random_seq_const.output.seq'), File('/home/user/.temp/backup_03_mock_flow/root.subflow.transcribe.output.fasta'), File('/home/user/.temp/backup_03_mock_flow/root.subflow.mutate.output.fasta'), File('/home/user/.temp/backup_03_mock_flow/root.output.log'), File('/home/user/.temp/backup_03_mock_flow/root.source.py'), File('/home/user/.temp/backup_03_mock_flow/root.plot_graph.deptree_json'), File('/home/user/.temp/backup_03_mock_flow/root.plot_graph.deptree_dot_txt'), ] _ = cache_run (run_and_backup, prefix, 2, 200, backup_prefix, verbose=0)
def runner(*a, **kw): return cache_run(*a, **kw, check_only=2)
from spiper.types import Node,Flow from spiper.types import RemotePythonObject as RPO package_path = 'spiper_mock_flow@https://github.com/shouldsee/spiper_mock_flow/tarball/8cd0f6b' @Flow def simple_flow( self,prefix, _main= RPO(package_path, None, 'run_and_backup'), _output=[] ): func = _main.loaded() self.runner( func, prefix, 1, 20, prefix+'_backup') return self if __name__ == '__main__': from path import Path from spiper.runner import get_changed_files,get_all_files,cache_run from pprint import pprint prefix = Path('/tmp/test_import/root') prefix.dirname().rmtree_p() fs = get_changed_files( simple_flow, prefix) pprint(fs) cache_run( simple_flow, prefix) fs = get_changed_files( simple_flow, prefix) pprint(fs)
'gzip -d | cut -f2- >', self.output.genepred, ] LoggedShellCommand(CMD, self.output.cmd, mode='w') CMD = ['genePredToGtf', 'file', self.output.genepred, self.output.gtf] LoggedSingularityCommand(CMD, _IMAGE, self.output.cmd, mode='a') # return if __name__ == '__main__': from spiper.runner import force_run, cache_run data = {} data['fasta'] = cache_run( get_fasta, '~/.temp/0305', ) data['gtf'] = cache_run( get_genepred, '~/.temp/0305', ) data['flow1'] = cache_run( workflow, '~/.temp/0305.sample1', '~/.temp/hisat2/', data['fasta'].output.fasta, 'wuhan-ncov19', data['gtf'].output.gtf, './tests/data/test_R1_.fastq', './tests/data/test_R2_.fastq', 2,
_output=[]): curr = self.runner(get_fasta, prefix) curr = self.runner(get_genepred, prefix) curr = self.runner(workflow, prefix + '.sample1', prefix + '.hisat2/wuhan-ncov19', self.subflow['get_fasta'].output.fasta, self.subflow['get_genepred'].output.gtf, './test_data/test_R1_.fastq', './test_data/test_R2_.fastq', _THREADS) return self if __name__ == '__main__': from pprint import pprint from spiper.runner import force_run, cache_run cache_run(test_job, '_temp_build/root') 'python3 -m spiper run $PACKAGE TOPLEVEL:test_job _temp_build/root' ###### if 0: ################################### TBC afterwards ############################ def get_htseq(): _ = ''' #### htseq-count is too slow and not used htseq-count -s reverse -f bam 809_S1.bam /home/feng/ref/Arabidopsis_thaliana_TAIR10/annotation/genes.gtf -r pos -o 809_S1.htseq.sam >809_S1.htseq.count 2>809_S1.htseq.log '''