def test_map_naming_with_invalid_template_arg(): files = ["a", "b", "c"] workflow = Workflow(working_dir="/some/dir") with pytest.raises(ValueError): workflow.map(42, files)
def test_target_list(): def my_template(path): return AnonymousTarget(inputs={"path": path}, outputs={"path": path + ".new"}, options={}) files = ["a", "b", "c"] workflow = Workflow(working_dir="/some/dir") target_list = workflow.map(my_template, files) assert len(target_list) == 3 assert len(target_list.outputs) == 3 assert target_list.outputs == [ { "path": "a.new" }, { "path": "b.new" }, { "path": "c.new" }, ] assert len(target_list.inputs) == 3 assert target_list.inputs == [{"path": "a"}, {"path": "b"}, {"path": "c"}]
def test_map_arg_passing_list_of_dicts(mocker, mock_template): files = [ { "path": "a", "output_dir": "foo/" }, { "path": "b", "output_dir": "foo/" }, { "path": "c", "output_dir": "foo/" }, ] workflow = Workflow(working_dir="/some/dir") workflow.map(mock_template, files) mock_template.assert_has_calls( [ mocker.call(path="a", output_dir="foo/"), mocker.call(path="b", output_dir="foo/"), mocker.call(path="c", output_dir="foo/"), ], any_order=True, )
def test_including_workflow_path_dispatches_to_include_path(self): workflow = Workflow() with patch.object(workflow, 'include_path', autospec=True) as mock_include_path: workflow.include('/path/to/other_workflow.py') mock_include_path.assert_called_once_with( '/path/to/other_workflow.py', namespace=None)
def test_shell_calls_subprocess_with_same_working_dir_as_workflow_in_a_shell( mock_check_output, ): workflow = Workflow(working_dir="/some/path") workflow.shell("echo hello") mock_check_output.assert_called_once_with("echo hello", cwd="/some/path", shell=True)
def test_including_workflow_with_same_name_as_this_workflow_raises_an_exception( self ): workflow = Workflow(name="foo") other_workflow = Workflow(name="foo") with self.assertRaises(WorkflowError): workflow.include(other_workflow)
def test_iglob_with_absolute_path_does_not_search_relative_to_working_dir( self, iglob_mock ): workflow = Workflow(working_dir="/some/path") res = list(workflow.iglob("/other/path/*.fa")) self.assertEqual(res, ["/other/path/A.fa", "/other/path/B.fa"]) iglob_mock.assert_called_once_with("/other/path/*.fa")
def test_target_from_template_returning_tuple(): def template_returning_tuple(): return [], [], {}, "this is the spec" workflow = Workflow(working_dir="/some/dir") workflow.target_from_template("TestTarget", template_returning_tuple()) assert "TestTarget" in workflow.targets
def test_target_from_template_returning_tuple(self): def template_returning_tuple(): return [], [], {}, "this is the spec" workflow = Workflow(working_dir="/some/dir") workflow.target_from_template("TestTarget", template_returning_tuple()) assert "TestTarget" in workflow.targets
def test_including_workflow_path_dispatches_to_include_path(): workflow = Workflow() with patch.object(workflow, "include_path", autospec=True) as mock_include_path: workflow.include("/path/to/other_workflow.py") mock_include_path.assert_called_once_with("/path/to/other_workflow.py", namespace=None)
def setUp(self): workflow = Workflow(working_dir='/some/dir') self.target1 = workflow.target('TestTarget1', inputs=[], outputs=['test_output1.txt']) self.target2 = workflow.target('TestTarget2', inputs=['test_output1.txt'], outputs=['test_output2.txt']) self.target3 = workflow.target('TestTarget3', inputs=['test_output1.txt'], outputs=['test_output3.txt']) self.target4 = workflow.target('TestTarget4', inputs=['test_output2.txt', 'test_output3.txt'], outputs=['final_output.txt']) self.graph = Graph(targets=workflow.targets)
def test_including_workflow_instance_dispatches_to_include_workflow(self): workflow = Workflow() other_workflow = Workflow() with patch.object(workflow, 'include_workflow', autospec=True) as mock_include_workflow: workflow.include(other_workflow) mock_include_workflow.assert_called_once_with( other_workflow, namespace=None)
def test_including_workflow_path_dispatches_to_include_path(self): workflow = Workflow() with patch.object(workflow, "include_path", autospec=True) as mock_include_path: workflow.include("/path/to/other_workflow.py") mock_include_path.assert_called_once_with( "/path/to/other_workflow.py", namespace=None )
def test_shell_calls_subprocess_with_same_working_dir_as_workflow_in_a_shell( self, mock_check_output ): workflow = Workflow(working_dir="/some/path") workflow.shell("echo hello") mock_check_output.assert_called_once_with( "echo hello", cwd="/some/path", shell=True )
def test_two_targets_producing_the_same_file_but_declared_with_rel_and_abs_path( mock_os_path_exists ): workflow = Workflow(working_dir="/some/dir") workflow.target("TestTarget1", inputs=[], outputs=["/some/dir/test_output.txt"]) workflow.target("TestTarget2", inputs=[], outputs=["test_output.txt"]) with pytest.raises(WorkflowError): Graph.from_targets(workflow.targets)
def test_raise_error_if_two_targets_in_different_namespaces_produce_the_same_file(self): w1 = Workflow(name='foo') w1.target('SayHello', inputs=[], outputs=['greeting.txt']) w2 = Workflow(name='bar') w2.target('SayHi', inputs=[], outputs=['greeting.txt']) w2.include(w1) with self.assertRaises(FileProvidedByMultipleTargetsError): g = Graph(targets=w2.targets)
def test_map_arg_passing_list_of_strings(mocker, mock_template): files = ["a", "b", "c"] workflow = Workflow(working_dir="/some/dir") workflow.map(mock_template, files) mock_template.assert_has_calls( [mocker.call("a"), mocker.call("b"), mocker.call("c")], any_order=True)
def main(): gwf = Workflow() working_directory = '/home/andyb/CUP_classification/faststorage/Andrej' reference_genome = f'{working_directory}/inputs/hg38.fa' gwf.target_from_template( 'create_index', create_bowtie2_index(reference_genome, cores=8, memory='16g'))
def test_target_from_invalid_template(self): def invalid_template(): return [], [] workflow = Workflow() with pytest.raises(TypeError): workflow.target_from_template("TestTarget", 50) with pytest.raises(TypeError): workflow.target_from_template("TestTarget", invalid_template())
def test_target_from_template_returning_anonymous_target_without_working_dir(self): def template_returning_anonymous_target_without_working_dir(): return AnonymousTarget( inputs=["hello.txt"], outputs=[], options={}, spec="this is the spec" ) workflow = Workflow(working_dir="/some/dir") workflow.target_from_template( "TestTarget", template_returning_anonymous_target_without_working_dir() ) assert "TestTarget" in workflow.targets
def test_including_workflow_instance_dispatches_to_include_workflow(self): workflow = Workflow() other_workflow = Workflow() with patch.object( workflow, "include_workflow", autospec=True ) as mock_include_workflow: workflow.include(other_workflow) mock_include_workflow.assert_called_once_with( other_workflow, namespace=None )
def test_target_from_template_returning_anonymous_target_without_working_dir(): def template_returning_anonymous_target_without_working_dir(): return AnonymousTarget(inputs=["hello.txt"], outputs=[], options={}, spec="this is the spec") workflow = Workflow(working_dir="/some/dir") workflow.target_from_template( "TestTarget", template_returning_anonymous_target_without_working_dir()) assert "TestTarget" in workflow.targets
def test_including_workflow_object_should_extend_including_workflow(self): workflow = Workflow() workflow.target('TestTarget1', inputs=[], outputs=[]) other_workflow = Workflow(name='foo') other_workflow.target('TestTarget2', inputs=[], outputs=[]) other_workflow.target('TestTarget3', inputs=[], outputs=[]) workflow.include_workflow(other_workflow) self.assertIn('TestTarget1', workflow.targets) self.assertIn('foo.TestTarget2', workflow.targets) self.assertIn('foo.TestTarget3', workflow.targets)
def test_including_workflow_module_gets_workflow_attribute_and_dispatches_to_include_workflow(self, mock_ismodule): workflow = Workflow(working_dir='/some/dir') other_workflow = Workflow(working_dir='/some/other/dir') mock_module = Mock() mock_module.gwf = other_workflow with patch.object(workflow, 'include_workflow', autospec=True) as mock_include_workflow: workflow.include(mock_module) mock_ismodule.assert_called_once_with(mock_module) mock_include_workflow.assert_called_once_with( other_workflow, namespace=None)
def test_including_workflow_object_should_extend_including_workflow(): workflow = Workflow() workflow.target("TestTarget1", inputs=[], outputs=[]) other_workflow = Workflow(name="foo") other_workflow.target("TestTarget2", inputs=[], outputs=[]) other_workflow.target("TestTarget3", inputs=[], outputs=[]) workflow.include_workflow(other_workflow) assert "TestTarget1" in workflow.targets assert "foo.TestTarget2" in workflow.targets assert "foo.TestTarget3" in workflow.targets
def test_map_with_custom_naming_string(): def my_template(path): return AnonymousTarget(inputs={"path": path}, outputs={"path": path + ".new"}, options={}) files = ["a", "b", "c"] workflow = Workflow(working_dir="/some/dir") workflow.map(my_template, files, name="bar") assert len(workflow.targets) == 3 assert "bar_0" in workflow.targets assert "bar_1" in workflow.targets assert "bar_2" in workflow.targets
def test_map_naming_with_template_function(): def my_template(path): return AnonymousTarget(inputs=[path], outputs=[path + ".new"], options={}) files = ["a", "b", "c"] workflow = Workflow(working_dir="/some/dir") workflow.map(my_template, files) assert len(workflow.targets) == 3 assert "my_template_0" in workflow.targets assert "my_template_1" in workflow.targets assert "my_template_2" in workflow.targets
def test_target_should_not_run_if_it_is_a_source_and_all_outputs_exist(self): workflow = Workflow(working_dir="/some/dir") target = workflow.target( "TestTarget1", inputs=[], outputs=["test_output1.txt", "test_output2.txt"] ) graph = Graph.from_targets(workflow.targets) scheduler = Scheduler(graph=graph, backend=DummyBackend()) mock_file_cache = { "/some/dir/test_output1.txt": 1, "/some/dir/test_output2.txt": 2, } with patch.dict(scheduler._file_cache, mock_file_cache): self.assertFalse(scheduler.should_run(target))
def test_exception_if_input_file_is_not_provided_and_output_file_exists(): workflow = Workflow(working_dir="/some/dir") target = workflow.target("TestTarget", inputs=["in.txt"], outputs=["out.txt"]) graph = Graph.from_targets(workflow.targets) print(graph.unresolved) backend = DummyBackend() scheduler = Scheduler( graph=graph, backend=backend, file_cache={"/some/dir/in.txt": None, "/some/dir/out.txt": 1}, ) with pytest.raises(WorkflowError): scheduler.should_run(target)
def test_including_workflow_from_path(mock_load_workflow): workflow = Workflow() workflow.target("TestTarget1", inputs=[], outputs=[]) other_workflow = Workflow() other_workflow.target("TestTarget2", inputs=[], outputs=[]) other_workflow.target("TestTarget3", inputs=[], outputs=[]) mock_load_workflow.return_value = other_workflow workflow.include_path("/path/to/other_workflow.py", namespace="other") assert workflow.targets.keys() == { "TestTarget1", "other.TestTarget2", "other.TestTarget3", }
def test_map_naming_with_template_class_instance(): class MyTemplate: def __call__(self, path): return AnonymousTarget(inputs=[path], outputs=[path + ".new"], options={}) files = ["a", "b", "c"] workflow = Workflow(working_dir="/some/dir") workflow.map(MyTemplate(), files) assert len(workflow.targets) == 3 assert "MyTemplate_0" in workflow.targets assert "MyTemplate_1" in workflow.targets assert "MyTemplate_2" in workflow.targets
def test_workflow_computes_working_dir_when_not_initialized_with_working_dir( self, inspect_getfile_mock, sys_getframe_mock): workflow = Workflow() self.assertEqual(sys_getframe_mock.call_count, 1) self.assertEqual(inspect_getfile_mock.call_count, 1) self.assertEqual(workflow.working_dir, '/some/path')
def test_workflow_computes_working_dir_when_not_initialized_with_working_dir( inspect_getfile_mock, sys_getframe_mock): workflow = Workflow() assert sys_getframe_mock.call_count == 1 assert inspect_getfile_mock.call_count == 1 assert workflow.working_dir == "/some/path"
def test_map_with_custom_naming_function(): def my_template(path): return AnonymousTarget(inputs={"path": path}, outputs={"path": path + ".new"}, options={}) files = ["a", "b", "c"] workflow = Workflow(working_dir="/some/dir") workflow.map(my_template, files, name=lambda i, t: "foo_{}".format(t.inputs["path"])) assert len(workflow.targets) == 3 assert "foo_a" in workflow.targets assert "foo_b" in workflow.targets assert "foo_c" in workflow.targets
def test_including_workflow_module_gets_workflow_attribute_and_dispatches_to_include_workflow( self, mock_ismodule ): workflow = Workflow(working_dir="/some/dir") other_workflow = Workflow(working_dir="/some/other/dir") mock_module = Mock() mock_module.gwf = other_workflow with patch.object( workflow, "include_workflow", autospec=True ) as mock_include_workflow: workflow.include(mock_module) mock_ismodule.assert_called_once_with(mock_module) mock_include_workflow.assert_called_once_with( other_workflow, namespace=None )
def test_target_should_not_run_if_it_is_a_source_and_all_outputs_exist(self): workflow = Workflow(working_dir='/some/dir') target = workflow.target( 'TestTarget1', inputs=[], outputs=['test_output1.txt', 'test_output2.txt'] ) graph = Graph(targets=workflow.targets) mock_file_cache = { '/some/dir/test_output1.txt': 1, '/some/dir/test_output2.txt': 2 } with patch.dict(graph.file_cache, mock_file_cache): self.assertFalse( graph.should_run(target) )
def test_including_workflow_from_path(self, mock_load_workflow): workflow = Workflow() target1 = workflow.target('TestTarget1', inputs=[], outputs=[]) other_workflow = Workflow() target2 = other_workflow.target('TestTarget2', inputs=[], outputs=[]) target3 = other_workflow.target('TestTarget3', inputs=[], outputs=[]) mock_load_workflow.return_value = other_workflow workflow.include_path('/path/to/other_workflow.py', namespace='other') self.assertEqual(workflow.targets.keys(), {'TestTarget1', 'other.TestTarget2', 'other.TestTarget3'})
def setUp(self): workflow = Workflow(working_dir="/some/dir") self.target1 = workflow.target( "TestTarget1", inputs=[], outputs=["test_output1.txt"] ) self.target2 = workflow.target( "TestTarget2", inputs=["test_output1.txt"], outputs=["test_output2.txt"] ) self.target3 = workflow.target( "TestTarget3", inputs=["test_output1.txt"], outputs=["test_output3.txt"] ) self.target4 = workflow.target( "TestTarget4", inputs=["test_output2.txt", "test_output3.txt"], outputs=["final_output.txt"], ) self.graph = Graph.from_targets(workflow.targets) self.backend = DummyBackend() self.scheduler = Scheduler(graph=self.graph, backend=self.backend)
def test_including_workflow_object_should_extend_including_workflow(self): workflow = Workflow() workflow.target("TestTarget1", inputs=[], outputs=[]) other_workflow = Workflow(name="foo") other_workflow.target("TestTarget2", inputs=[], outputs=[]) other_workflow.target("TestTarget3", inputs=[], outputs=[]) workflow.include_workflow(other_workflow) self.assertIn("TestTarget1", workflow.targets) self.assertIn("foo.TestTarget2", workflow.targets) self.assertIn("foo.TestTarget3", workflow.targets)
def test_dependencies_correctly_resolved_for_named_workflow(self): workflow = Workflow(name='foo') target1 = workflow.target('TestTarget1', inputs=[], outputs=['test.txt']) target2 = workflow.target('TestTarget2', inputs=['test.txt'], outputs=[]) other_workflow = Workflow(name='bar') other_workflow.include(workflow) other_target1 = other_workflow.target('TestTarget1', inputs=['test.txt'], outputs=[]) graph = Graph(targets=other_workflow.targets) assert 'TestTarget1' in graph.targets assert 'foo.TestTarget2' in graph.targets assert 'foo.TestTarget2' in graph.targets
def test_two_targets_producing_the_same_file_but_declared_with_rel_and_abs_path(self, mock_os_path_exists): workflow = Workflow(working_dir='/some/dir') workflow.target('TestTarget1', inputs=[], outputs=['/some/dir/test_output.txt']) workflow.target('TestTarget2', inputs=[], outputs=['test_output.txt']) with self.assertRaises(FileProvidedByMultipleTargetsError): Graph(targets=workflow.targets)
def test_include_target_from_workflow_in_two_different_workflows_(self): w1 = Workflow() target = w1.target("MyTarget", inputs=[], outputs=[]) w3 = Workflow() w3.include(w1, namespace="bar") w2 = Workflow() w2.include(w1, namespace="foo") self.assertEqual(target.name, "MyTarget") self.assertIn("bar.MyTarget", w3.targets) self.assertEqual(w3.targets["bar.MyTarget"].name, "bar.MyTarget") self.assertIn("foo.MyTarget", w2.targets) self.assertEqual(w2.targets["foo.MyTarget"].name, "foo.MyTarget")
def test_target_from_invalid_template(): def invalid_template(): return [], [] workflow = Workflow() with pytest.raises(TypeError): workflow.target_from_template("TestTarget", 50) with pytest.raises(TypeError): workflow.target_from_template("TestTarget", invalid_template())
def test_including_workflow_from_path(self, mock_load_workflow): workflow = Workflow() target1 = workflow.target("TestTarget1", inputs=[], outputs=[]) other_workflow = Workflow() target2 = other_workflow.target("TestTarget2", inputs=[], outputs=[]) target3 = other_workflow.target("TestTarget3", inputs=[], outputs=[]) mock_load_workflow.return_value = other_workflow workflow.include_path("/path/to/other_workflow.py", namespace="other") self.assertEqual( workflow.targets.keys(), {"TestTarget1", "other.TestTarget2", "other.TestTarget3"}, )
"""This is an example workflow for read-mapping using bwa and samtools.""" from gwf import Workflow gwf = Workflow() def unzip(inputfile, outputfile): """A template for unzipping files.""" inputs = [inputfile] outputs = [outputfile] options = { 'cores': 1, 'memory': '2g', } spec = ''' gzcat {} > {} '''.format(inputfile, outputfile) return inputs, outputs, options, spec def bwa_index(ref_genome): """Template for indexing a genome with `bwa index`.""" inputs = ['{}.fa'.format(ref_genome)] outputs = ['{}.amb'.format(ref_genome), '{}.ann'.format(ref_genome), '{}.pac'.format(ref_genome), '{}.bwt'.format(ref_genome), '{}.sa'.format(ref_genome), ]
def test_adding_two_targets_with_the_same_names_should_raise_an_exception(self): workflow = Workflow() workflow.target("TestTarget", inputs=[], outputs=[]) with self.assertRaises(WorkflowError): workflow.target("TestTarget", inputs=[], outputs=[])
from gwf import Workflow gwf = Workflow() gwf.include('other_workflow/workflow.py', namespace='other') gwf.target('World', inputs=['other_workflow/a.txt'], outputs=['b.txt']) << """ cat other_workflow/a.txt > b.txt echo world >> b.txt """
def test_adding_a_target_makes_it_available_to_the_workflow(self): workflow = Workflow() target = workflow.target("TestTarget", inputs=[], outputs=[]) self.assertIn("TestTarget", workflow.targets) self.assertIn(target, workflow.targets.values())
def test_target_with_no_output_has_empty_outputs_attribute(self): workflow = Workflow() target = workflow.target("TestTarget", inputs=[], outputs=[]) self.assertListEqual(target.outputs, [])
from gwf import Workflow gwf = Workflow() gwf.target('Target1', inputs=[], outputs=['a.txt']) << "echo hello world" gwf.target('Target2', inputs=[], outputs=['b.txt']) << "echo world hello"
def test_iglob_with_relative_path_searches_relative_to_working_dir( self, iglob_mock ): workflow = Workflow(working_dir="/some/path") workflow.iglob("*.fa") iglob_mock.assert_called_once_with("/some/path/*.fa")
def test_including_workflow_with_no_name_raises_an_exception(self): workflow = Workflow() other_workflow = Workflow() with self.assertRaises(WorkflowError): workflow.include(other_workflow)
def test_targets_inherit_workflow_defaults(self): workflow = Workflow(defaults={"cores": 8, "memory": "8g"}) target = workflow.target("TestTarget", inputs=[], outputs=[]) self.assertEqual(target.options, {"cores": 8, "memory": "8g"})
def test_targets_inherit_workflow_working_dir_with_given_working_dir(self): workflow = Workflow(working_dir="/some/path") target = workflow.target("TestTarget", inputs=[], outputs=[]) self.assertEqual(target.working_dir, "/some/path")
def test_including_non_module_str_and_object_value_raises_type_error(self): workflow = Workflow(working_dir="/some/dir") with self.assertRaises(TypeError): workflow.include(42)
from gwf import Workflow gwf = Workflow() gwf.target('World', inputs=[], outputs=['a.txt']) << """ echo hello > a.txt """
def test_target_options_override_defaults(self): workflow = Workflow(defaults={"cores": 8, "memory": "8g"}) target = workflow.target("TestTarget", inputs=[], outputs=[], cores=16) self.assertEqual(target.options, {"cores": 16, "memory": "8g"})
This workflow is for baboon data. ------------------------------------------------------------------------------------------------------------------------ ------------------------------------------------------------------------------------------------------------------------ Author: Juraj Bergman Date: 02/02/2021 ------------------------------------------------------------------------------------------------------------------------ Later modifications by Erik Fogh Sørensen 19/03/2021 ------------------------------------------------------------------------------------------------------------------------ ''' from gwf import Workflow import os gwf = Workflow() ######################################################################################################################## ############################################### ---- GENOTYPE VCFs ---- ################################################ ######################################################################################################################## def gt_gvcfLukasVersion(infile, outfile, ref, path): """Genotype individuals.""" inputs = [path + infile] outputs = [path + outfile + "done"] options = { 'cores': 4, 'memory': "16g", 'walltime': "06:00:00", "account": 'primatediversity'
shell_spec = """ sort -k1,1 -k2,2n -k3,3n --merge {input_files} -T /scratch/$GWF_JOBID | python ./scripts/bed_split.py {output_dir} {output_base_names} """.format(input_files=" ".join(input_files), output_dir=output_dir, output_base_names=" ".join(output_base_names)) return input_files, output_files, options, shell_spec ################################################################################# # Workflow components ################################################################################# gwf = Workflow(defaults={'account': 'simons'}) def split_file(input_file, split_files_dir, n_files=50): """ Split a file into n_files chunks. """ if not split_files_dir.exists(): os.makedirs(str(split_files_dir)) prefix = str(split_files_dir / input_file.with_suffix('').name) split_files = [ Path('{}.{}{}'.format(prefix, x, input_file.suffix)) for x in range(n_files) ] gwf.target('split_files', inputs=[str(input_file)],
from gwf import Workflow gwf = Workflow() gwf.target('SayHello', inputs=['name.txt'], outputs=['greeting.txt']) << """ echo -n "Hello " > greeting.txt cat name.txt >> greeting.txt """ gwf.target("World", inputs=['greeting.txt'], outputs=['world.txt']) << """ cat greeting.txt > world.txt echo "world" >> world.txt """ gwf.target("Universe", inputs=['greeting.txt'], outputs=['universe.txt']) << """ cat greeting.txt > universe.txt echo "universe" >> universe.txt """ gwf.target("All", inputs=['world.txt', 'universe.txt'], outputs=['all.txt']) << """ cat world.txt > all.txt cat universe.txt >> all.txt """