def main():
	import spiper
	from path import Path
	from spiper.runner import get_all_files, get_changed_files, cache_run
	from pprint import pprint
	spiper.rcParams['dir_layout'] = 'flat' 

	prefix = Path('/tmp/test_import/root')
	prefix.dirname().rmtree_p()

	print('#### [Note] currently the package change is not recorded')
	fs = get_changed_files(test_import, prefix) 
	pprint(fs)
	fs = get_all_files(test_import,prefix)
	pprint(fs)
	cache_run(test_import,prefix)
	pprint(fs)

	print('#### [Note] Remote workflow should detect file changes')
	fs = get_changed_files(simple_flow, prefix)
	pprint(fs)
	assert fs != []

	print('### run actual workflow')
	cache_run(simple_flow,prefix)

	fs = get_changed_files(simple_flow, prefix)
	pprint(fs)
	assert fs == []
Exemple #2
0
def main():
    import spiper
    from path import Path
    from spiper.runner import get_all_files, get_changed_files, cache_run
    from pprint import pprint
    spiper.rcParams['dir_layout'] = 'flat'

    prefix = Path('/tmp/test_import/root')
    prefix.dirname().rmtree_p()

    print('#### [Note] currently the package change is not recorded')
    fs = get_changed_files(test_import, prefix)
    pprint(fs)
    fs = get_all_files(test_import, prefix)
    pprint(fs)
    cache_run(test_import, prefix)
    pprint(fs)

    print('#### [Note] Remote workflow should detect file changes')
    fs = get_changed_files(simple_flow, prefix)
    pprint(fs)
    assert fs != []

    print('### run actual workflow')
    cache_run(simple_flow, prefix)

    fs = get_changed_files(simple_flow, prefix)
    pprint(fs)
    #### The Flow() execution will never be skipped
    #### hence The self.output.log will always be changed
    assert fs == [
        File('/tmp/test_import/root.workflow.log'),
        File('/tmp/test_import/root_backup.output..log')
    ]
Exemple #3
0
    def get_tree1(self, http=0):
        if http:
            tups = (
                http_job2,
                self.DIR / 'root',
            )
            res0 = cache_run(*tups, verbose=0)
            tups = (dimple_job, self.DIR / 'root', 'ATG', res0.output.cache,
                    '/tmp/letter.txt')
        else:
            tups = (dimple_job, self.DIR / 'root', 'ATG', '/tmp/digit.txt',
                    '/tmp/letter.txt')

        res1 = force_run(*tups, verbose=0)

        tups = (dimple_job, self.DIR / 'job2', 'ATG', res1.output.out_txt,
                '/tmp/digit.txt')
        res2 = force_run(*tups, verbose=0)

        tups = (dimple_job, self.DIR / 'job3', 'ATG', res1.output.out_txt,
                res2.output.out_txt)
        res3 = force_run(*tups, verbose=0)

        fn = File('/tmp/digit.txt')
        f2 = res3.output.out_txt
        tree = None
        # tree = spiper.runner.get_downstream_tree(fn, flat=0,strict=0)
        return fn, f2, tree
Exemple #4
0
    def test_cacherun_use_cache(self):
        _ = '''
		if the input files / params to a simple node changed,
		then trigger a recalc
		'''
        tups = (simple_job, self.DIR / 'root', 'ATG', '/tmp/digit.txt')
        force_run(*tups, verbose=0)
        result = cache_run(*tups, verbose=0)
    from path import Path
    Path('/tmp/some_node/root').dirname().rmtree_p()

    fs = get_all_files(some_node, '/tmp/some_node/root', '/tmp/input_file.txt')
    print('##### all files governed by this node #######')
    pprint(fs)

    fs = get_changed_files(some_node, '/tmp/some_node/root',
                           '/tmp/input_file.txt')
    print('#### files changed in the next execution of this node #######')
    pprint(fs)

    print('#### write some input file ###')
    with open('/tmp/input_file.txt', 'w') as f:
        f.write('barbarfoo\n')

    print('#### actual execution #####')
    res = cache_run(some_node, '/tmp/some_node/root', '/tmp/input_file.txt')

    print('#### the second execution is skipped ###')
    res = cache_run(some_node, '/tmp/some_node/root', '/tmp/input_file.txt')
    print()

    #### the fact that the execution is skipped implies
    #### that executing this node would not change any file
    fs = get_changed_files(some_node, '/tmp/some_node/root',
                           '/tmp/input_file.txt')
    print('#### files changed in the next execution of this node #######')
    pprint(fs)
    assert fs == []
Exemple #6
0
from path import Path
from spiper.runner import cache_run, force_run


def simplest_node(self, prefix, _output=[]):
    print('Running node:%r' % self)
    return self  ### return Runtime Caller as output is beneficial


if __name__ == '__main__':
    print('\n### running')
    prefix = Path('/tmp/bulid_spiper/root')
    prefix.dirname().rmtree_p()
    cache_run(simplest_node, prefix)
    cache_run(simplest_node, prefix)
    print()

    s = '''
	### comment
	expect: 
	Running node:spiper.runner.Caller(dotname='__main__.simplest_node',prefix_named=File('/tmp/bulid_spiper/root.simplest_node'))
	Running node:spiper.runner.Caller(dotname='__main__.simplest_node',prefix_named=File('/tmp/bulid_spiper/root.simplest_node'))
	
	got:
	Running node:spiper.runner.Caller(dotname='__main__.simplest_node',prefix_named=File('/tmp/bulid_spiper/root.simplest_node'))

	The second run accessed cache
	'''
    print(s)
    pass
Exemple #7
0
def main(self=None,
	prefix = None):
	from spiper.runner import cache_run, mock_run, get_changed_files, get_all_files
	from spiper.shell import LoggedShellCommand
	from spiper.types import File,CacheFile
	from pprint import pprint
	spiper.rcParams['dir_layout']='clean'

	# if prefix is None:
	prefix = Path('/tmp/spiper.symbolic/root')
	# backup_prefix = File('/home/user/.temp/backup_03_mock_flow/root')
	backup_prefix = File('~/.temp/backup_03_mock_flow/root').expand()
	prefix.dirname().rmtree_p()
	backup_prefix.dirname().rmtree_p()	

	print('\n...[start]%r'%prefix)


	#### once a workflow is defined, we can view the proposed file changes 
	fs = get_changed_files(workflow, prefix, 1, 100, verbose=0)
	pprint(fs)
	assert fs ==[
 File('/tmp/spiper.symbolic/root.workflow.log'),
 File('/tmp/spiper.symbolic/root.random_seq.seq'),
 File('/tmp/spiper.symbolic/root.random_seq_const.seq'),
 File('/tmp/spiper.symbolic/root.transcribe.fasta'),
 File('/tmp/spiper.symbolic/root.mutate.fasta'),
 File('/tmp/spiper.symbolic/root.source.py'),

 # File('/home/user/.temp/backup_03_mock_flow/root.source.py')
 ]

	### backup is conveniently defined as a workflow taking an executed workflow as an input.
	### To check the proposed backup, mock_run() the workflow first. 
	workflow_out = mock_run(workflow, prefix, 1, 100)	
	fs = get_changed_files(backup, backup_prefix, workflow_out)
	pprint(fs)
	assert fs == [
 File('/home/user/.temp/backup_03_mock_flow/root.subflow.random_seq.output.seq'),
 File('/home/user/.temp/backup_03_mock_flow/root.subflow.random_seq_const.output.seq'),
 File('/home/user/.temp/backup_03_mock_flow/root.subflow.transcribe.output.fasta'),
 File('/home/user/.temp/backup_03_mock_flow/root.subflow.mutate.output.fasta'),
 File('/home/user/.temp/backup_03_mock_flow/root.output.log'),
 # File('/tmp/spiper.symbolic/root.source.py')
 File('/home/user/.temp/backup_03_mock_flow/root.source.py')
]


	### a convenient Flow may be defined to execute the two in chain
	### If there is certain change to the workflow,
	### the backup can also be runned
	fs = get_changed_files (run_and_backup, prefix, 1, 100, backup_prefix, verbose=0)
	pprint(fs)
	assert fs == [
	File('/tmp/spiper.symbolic/root.workflow.log'),
 File('/tmp/spiper.symbolic/root.random_seq.seq'),
 File('/tmp/spiper.symbolic/root.random_seq_const.seq'),
 File('/tmp/spiper.symbolic/root.transcribe.fasta'),
 File('/tmp/spiper.symbolic/root.mutate.fasta'),
 File('/tmp/spiper.symbolic/root.source.py'),
 File('/home/user/.temp/backup_03_mock_flow/root.subflow.random_seq.output.seq'),
 File('/home/user/.temp/backup_03_mock_flow/root.subflow.random_seq_const.output.seq'),
 File('/home/user/.temp/backup_03_mock_flow/root.subflow.transcribe.output.fasta'),
 File('/home/user/.temp/backup_03_mock_flow/root.subflow.mutate.output.fasta'),
 File('/home/user/.temp/backup_03_mock_flow/root.output.log'),
 File('/home/user/.temp/backup_03_mock_flow/root.source.py'),

 File('/home/user/.temp/backup_03_mock_flow/root.plot_graph.deptree_json'),
 File('/home/user/.temp/backup_03_mock_flow/root.plot_graph.deptree_dot_txt'), 

 ]

	###### constants that are preserved between runs should be detected unchanged
	_  = cache_run         (run_and_backup,  prefix, 1, 100, backup_prefix, verbose=0)
	fs = get_changed_files (run_and_backup,  prefix, 2, 200, backup_prefix, verbose=0)
	pprint(fs)
	assert fs == [File('/tmp/spiper.symbolic/root.workflow.log'),
 File('/tmp/spiper.symbolic/root.random_seq.seq'),
 # File('/tmp/spiper.symbolic/root.random_seq_const.seq'),
 File('/tmp/spiper.symbolic/root.transcribe.fasta'),
 File('/tmp/spiper.symbolic/root.mutate.fasta'),
 # File('/tmp/spiper.symbolic/root.source.py'),	
 File('/home/user/.temp/backup_03_mock_flow/root.subflow.random_seq.output.seq'),
 # File('/home/user/.temp/backup_03_mock_flow/root.subflow.random_seq_const.output.seq'),
 File('/home/user/.temp/backup_03_mock_flow/root.subflow.transcribe.output.fasta'),
 File('/home/user/.temp/backup_03_mock_flow/root.subflow.mutate.output.fasta'),
 File('/home/user/.temp/backup_03_mock_flow/root.output.log'),
 # File('/home/user/.temp/backup_03_mock_flow/root.source.py'),
 File('/home/user/.temp/backup_03_mock_flow/root.plot_graph.deptree_json'),
 File('/home/user/.temp/backup_03_mock_flow/root.plot_graph.deptree_dot_txt'), 
	 ]
	##### get_all_files() return a leaf file regardless of whether is is changed
	fs = get_all_files     (run_and_backup,  prefix, 2, 200, backup_prefix, verbose=0)
	pprint(fs)
	assert fs == [
 File('/tmp/spiper.symbolic/root.workflow.log'),
 File('/tmp/spiper.symbolic/root.random_seq.seq'),
 File('/tmp/spiper.symbolic/root.random_seq_const.seq'),
 File('/tmp/spiper.symbolic/root.transcribe.fasta'),
 File('/tmp/spiper.symbolic/root.mutate.fasta'),
 File('/tmp/spiper.symbolic/root.source.py'),
 File('/home/user/.temp/backup_03_mock_flow/root.subflow.random_seq.output.seq'),
 File('/home/user/.temp/backup_03_mock_flow/root.subflow.random_seq_const.output.seq'),
 File('/home/user/.temp/backup_03_mock_flow/root.subflow.transcribe.output.fasta'),
 File('/home/user/.temp/backup_03_mock_flow/root.subflow.mutate.output.fasta'),
 File('/home/user/.temp/backup_03_mock_flow/root.output.log'),
 File('/home/user/.temp/backup_03_mock_flow/root.source.py'),
 File('/home/user/.temp/backup_03_mock_flow/root.plot_graph.deptree_json'),
 File('/home/user/.temp/backup_03_mock_flow/root.plot_graph.deptree_dot_txt'), 
 ]
	_  = cache_run         (run_and_backup,  prefix, 2, 200, backup_prefix, verbose=0)
Exemple #8
0
 def runner(*a, **kw):
     return cache_run(*a, **kw, check_only=2)
Exemple #9
0
from spiper.types import Node,Flow
from spiper.types import RemotePythonObject as RPO

package_path = 'spiper_mock_flow@https://github.com/shouldsee/spiper_mock_flow/tarball/8cd0f6b'
@Flow
def simple_flow(
	self,prefix,
	_main= RPO(package_path, None, 'run_and_backup'),
	_output=[]
 ):

	func = _main.loaded()
	self.runner( func,  prefix, 1, 20, prefix+'_backup')	
	
	return self

if __name__ == '__main__':
	from path import Path
	from spiper.runner import get_changed_files,get_all_files,cache_run
	from pprint import pprint
	
	prefix = Path('/tmp/test_import/root')
	prefix.dirname().rmtree_p()

	fs = get_changed_files( simple_flow, prefix)
	pprint(fs)

	cache_run( simple_flow, prefix)

	fs = get_changed_files( simple_flow, prefix)
	pprint(fs)
Exemple #10
0
        'gzip -d | cut -f2- >',
        self.output.genepred,
    ]

    LoggedShellCommand(CMD, self.output.cmd, mode='w')
    CMD = ['genePredToGtf', 'file', self.output.genepred, self.output.gtf]
    LoggedSingularityCommand(CMD, _IMAGE, self.output.cmd, mode='a')

    # return


if __name__ == '__main__':
    from spiper.runner import force_run, cache_run
    data = {}
    data['fasta'] = cache_run(
        get_fasta,
        '~/.temp/0305',
    )
    data['gtf'] = cache_run(
        get_genepred,
        '~/.temp/0305',
    )
    data['flow1'] = cache_run(
        workflow,
        '~/.temp/0305.sample1',
        '~/.temp/hisat2/',
        data['fasta'].output.fasta,
        'wuhan-ncov19',
        data['gtf'].output.gtf,
        './tests/data/test_R1_.fastq',
        './tests/data/test_R2_.fastq',
        2,
        _output=[]):
    curr = self.runner(get_fasta, prefix)
    curr = self.runner(get_genepred, prefix)
    curr = self.runner(workflow, prefix + '.sample1',
                       prefix + '.hisat2/wuhan-ncov19',
                       self.subflow['get_fasta'].output.fasta,
                       self.subflow['get_genepred'].output.gtf,
                       './test_data/test_R1_.fastq',
                       './test_data/test_R2_.fastq', _THREADS)
    return self


if __name__ == '__main__':
    from pprint import pprint
    from spiper.runner import force_run, cache_run
    cache_run(test_job, '_temp_build/root')
    'python3 -m spiper run     $PACKAGE TOPLEVEL:test_job _temp_build/root'

######
if 0:
    ################################### TBC afterwards ############################

    def get_htseq():
        _ = '''
		#### htseq-count is too slow and not used
		htseq-count 
		-s reverse 
		-f bam 809_S1.bam 
		/home/feng/ref/Arabidopsis_thaliana_TAIR10/annotation/genes.gtf 
		-r pos -o 809_S1.htseq.sam >809_S1.htseq.count 2>809_S1.htseq.log
		'''