def test_prev_uploaded(self, upl): """Test pathmapper handling previously uploaded files.""" arvrunner = arvados_cwl.ArvCwlRunner(self.api) arvrunner.add_uploaded( 'tests/hw.py', MapperEnt( resolved='keep:99999999999999999999999999999991+99/hw.py', target='', type='File')) upl.side_effect = upload_mock p = ArvPathMapper(arvrunner, [{ "class": "File", "location": "tests/hw.py" }], "", "/test/%s", "/test/%s/%s") self.assertEqual( { 'tests/hw.py': MapperEnt( resolved='keep:99999999999999999999999999999991+99/hw.py', target='/test/99999999999999999999999999999991+99/hw.py', type='File') }, p._pathmap)
def test_upload(self, statfile, upl): """Test pathmapper uploading files.""" arvrunner = arvados_cwl.ArvCwlRunner(self.api) def statfile_mock(prefix, fn, fnPattern="$(file %s/%s)", dirPattern="$(dir %s/%s/)", raiseOSError=False): st = arvados.commands.run.UploadFile("", "tests/hw.py") return st upl.side_effect = upload_mock statfile.side_effect = statfile_mock p = ArvPathMapper(arvrunner, [{ "class": "File", "location": "file:tests/hw.py" }], "", "/test/%s", "/test/%s/%s") self.assertEqual( { 'file:tests/hw.py': MapperEnt( resolved='keep:99999999999999999999999999999991+99/hw.py', target='/test/99999999999999999999999999999991+99/hw.py', type='File', staged=True) }, p._pathmap)
def test_statfile(self, statfile, upl): """Test pathmapper handling ArvFile references.""" arvrunner = arvados_cwl.ArvCwlRunner(self.api) # An ArvFile object returned from arvados.commands.run.statfile means the file is located on a # keep mount, so we can construct a direct reference directly without upload. def statfile_mock(prefix, fn, fnPattern="$(file %s/%s)", dirPattern="$(dir %s/%s/)"): st = arvados.commands.run.ArvFile( "", fnPattern % ("99999999999999999999999999999991+99", "hw.py")) return st upl.side_effect = upload_mock statfile.side_effect = statfile_mock p = ArvPathMapper(arvrunner, [{ "class": "File", "location": "tests/hw.py" }], "", "/test/%s", "/test/%s/%s") self.assertEqual( { 'tests/hw.py': MapperEnt( resolved='keep:99999999999999999999999999999991+99/hw.py', target='/test/99999999999999999999999999999991+99/hw.py', type='File') }, p._pathmap)
def test_default_work_api(self): arvados_cwl.add_arv_hints() api = mock.MagicMock() api._rootDesc = copy.deepcopy(get_rootDesc()) del api._rootDesc.get('resources')['jobs']['methods']['create'] runner = arvados_cwl.ArvCwlRunner(api) self.assertEqual(runner.work_api, 'containers')
def test_make_output_collection(self, reader, col): keep_client = mock.MagicMock() runner = arvados_cwl.ArvCwlRunner(self.api, keep_client=keep_client) runner.project_uuid = 'zzzzz-j7d0g-zzzzzzzzzzzzzzz' final = mock.MagicMock() col.return_value = final readermock = mock.MagicMock() reader.return_value = readermock final_uuid = final.manifest_locator() num_retries = runner.num_retries cwlout = StringIO.StringIO() openmock = mock.MagicMock() final.open.return_value = openmock openmock.__enter__.return_value = cwlout _, runner.final_output_collection = runner.make_output_collection("Test output", ["foo"], "tag0,tag1,tag2", { "foo": { "class": "File", "location": "keep:99999999999999999999999999999991+99/foo.txt", "size": 3, "basename": "foo.txt" }, "bar": { "class": "File", "location": "keep:99999999999999999999999999999992+99/bar.txt", "basename": "baz.txt", "size": 4 } }) final.copy.assert_has_calls([mock.call('bar.txt', 'baz.txt', overwrite=False, source_collection=readermock)]) final.copy.assert_has_calls([mock.call('foo.txt', 'foo.txt', overwrite=False, source_collection=readermock)]) final.save_new.assert_has_calls([mock.call(ensure_unique_name=True, name='Test output', owner_uuid='zzzzz-j7d0g-zzzzzzzzzzzzzzz', storage_classes=['foo'])]) self.assertEqual("""{ "bar": { "basename": "baz.txt", "class": "File", "location": "baz.txt", "size": 4 }, "foo": { "basename": "foo.txt", "class": "File", "location": "foo.txt", "size": 3 } }""", cwlout.getvalue()) self.assertIs(final, runner.final_output_collection) self.assertIs(final_uuid, runner.final_output_collection.manifest_locator()) self.api.links().create.assert_has_calls([mock.call(body={"head_uuid": final_uuid, "link_class": "tag", "name": "tag0"}), mock.call().execute(num_retries=num_retries)]) self.api.links().create.assert_has_calls([mock.call(body={"head_uuid": final_uuid, "link_class": "tag", "name": "tag1"}), mock.call().execute(num_retries=num_retries)]) self.api.links().create.assert_has_calls([mock.call(body={"head_uuid": final_uuid, "link_class": "tag", "name": "tag2"}), mock.call().execute(num_retries=num_retries)])
def test_missing_file(self, stat): """Test pathmapper handling missing references.""" arvrunner = arvados_cwl.ArvCwlRunner(self.api) stat.side_effect = OSError(2, "No such file or directory") with self.assertRaises(OSError): p = ArvPathMapper(arvrunner, [{ "class": "File", "location": "file:tests/hw.py" }], "", "/test/%s", "/test/%s/%s")
def test_keepref(self): """Test direct keep references.""" arvrunner = arvados_cwl.ArvCwlRunner(self.api) p = ArvPathMapper(arvrunner, [{ "class": "File", "location": "keep:99999999999999999999999999999991+99/hw.py" }], "", "/test/%s", "/test/%s/%s") self.assertEqual({'keep:99999999999999999999999999999991+99/hw.py': MapperEnt(resolved='keep:99999999999999999999999999999991+99/hw.py', target='/test/99999999999999999999999999999991+99/hw.py', type='File', staged=True)}, p._pathmap)
def test_upload(self, upl): """Test pathmapper uploading files.""" arvrunner = arvados_cwl.ArvCwlRunner(mock.MagicMock()) upl.side_effect = upload_mock p = ArvPathMapper(arvrunner, [{ "class": "File", "location": "tests/hw.py" }], "", "/test/%s", "/test/%s/%s") self.assertEqual( { 'tests/hw.py': MapperEnt( resolved='keep:99999999999999999999999999999991+99/hw.py', target='/test/99999999999999999999999999999991+99/hw.py', type='File') }, p._pathmap)
def test_run(self, list_images_in_arv, mockcollection, mockcollectionreader): arvados_cwl.add_arv_hints() api = mock.MagicMock() api._rootDesc = get_rootDesc() runner = arvados_cwl.ArvCwlRunner(api) self.assertEqual(runner.work_api, 'jobs') list_images_in_arv.return_value = [["zzzzz-4zz18-zzzzzzzzzzzzzzz"]] runner.api.collections().get().execute.return_vaulue = { "portable_data_hash": "99999999999999999999999999999993+99" } runner.api.collections().list().execute.return_vaulue = { "items": [{ "portable_data_hash": "99999999999999999999999999999993+99" }] } runner.project_uuid = "zzzzz-8i9sb-zzzzzzzzzzzzzzz" runner.ignore_docker_for_reuse = False runner.num_retries = 0 document_loader, avsc_names, schema_metadata, metaschema_loader = cwltool.process.get_schema( "v1.0") make_fs_access = functools.partial( arvados_cwl.CollectionFsAccess, collection_cache=arvados_cwl.CollectionCache(runner.api, None, 0)) document_loader.fetcher_constructor = functools.partial( arvados_cwl.CollectionFetcher, api_client=api, fs_access=make_fs_access("")) document_loader.fetcher = document_loader.fetcher_constructor( document_loader.cache, document_loader.session) document_loader.fetch_text = document_loader.fetcher.fetch_text document_loader.check_exists = document_loader.fetcher.check_exists tool, metadata = document_loader.resolve_ref("tests/wf/scatter2.cwl") metadata["cwlVersion"] = tool["cwlVersion"] mockcollection( ).portable_data_hash.return_value = "99999999999999999999999999999999+118" arvtool = arvados_cwl.ArvadosWorkflow(runner, tool, work_api="jobs", avsc_names=avsc_names, basedir="", make_fs_access=make_fs_access, loader=document_loader, makeTool=runner.arv_make_tool, metadata=metadata) arvtool.formatgraph = None it = arvtool.job({}, mock.MagicMock(), basedir="", make_fs_access=make_fs_access) it.next().run() it.next().run() with open("tests/wf/scatter2_subwf.cwl") as f: subwf = StripYAMLComments(f.read()) runner.api.jobs().create.assert_called_with( body=JsonDiffMatcher( { 'minimum_script_version': 'a3f2cb186e437bfce0031b024b2157b73ed2717d', 'repository': 'arvados', 'script_version': 'master', 'script': 'crunchrunner', 'script_parameters': { 'tasks': [{ 'task.env': { 'HOME': '$(task.outdir)', 'TMPDIR': '$(task.tmpdir)' }, 'task.vwd': { 'workflow.cwl': '$(task.keep)/99999999999999999999999999999999+118/workflow.cwl', 'cwl.input.yml': '$(task.keep)/99999999999999999999999999999999+118/cwl.input.yml' }, 'command': [ u'cwltool', u'--no-container', u'--move-outputs', u'--preserve-entire-environment', u'workflow.cwl#main', u'cwl.input.yml' ], 'task.stdout': 'cwl.output.json' }] }, 'runtime_constraints': { 'min_scratch_mb_per_node': 2048, 'min_cores_per_node': 1, 'docker_image': 'arvados/jobs', 'min_ram_mb_per_node': 1024 }, 'owner_uuid': 'zzzzz-8i9sb-zzzzzzzzzzzzzzz' }), filters=[['repository', '=', 'arvados'], ['script', '=', 'crunchrunner'], [ 'script_version', 'in git', 'a3f2cb186e437bfce0031b024b2157b73ed2717d' ], ['docker_image_locator', 'in docker', 'arvados/jobs']], find_or_create=True) mockcollection().open().__enter__().write.assert_has_calls( [mock.call(subwf)]) mockcollection().open().__enter__().write.assert_has_calls([ mock.call('''{ "fileblub": { "basename": "token.txt", "class": "File", "location": "/keep/99999999999999999999999999999999+118/token.txt" }, "sleeptime": 5 }''') ])
def test_run(self, mockcollection): try: arvados_cwl.add_arv_hints() runner = arvados_cwl.ArvCwlRunner(mock.MagicMock()) runner.project_uuid = "zzzzz-8i9sb-zzzzzzzzzzzzzzz" runner.ignore_docker_for_reuse = False runner.num_retries = 0 document_loader, avsc_names, schema_metadata, metaschema_loader = cwltool.process.get_schema( "v1.0") tool, metadata = document_loader.resolve_ref( "tests/wf/scatter2.cwl") metadata["cwlVersion"] = tool["cwlVersion"] mockcollection( ).portable_data_hash.return_value = "99999999999999999999999999999999+118" make_fs_access = functools.partial(arvados_cwl.CollectionFsAccess, api_client=runner.api) arvtool = arvados_cwl.ArvadosWorkflow( runner, tool, work_api="jobs", avsc_names=avsc_names, basedir="", make_fs_access=make_fs_access, loader=document_loader, makeTool=runner.arv_make_tool, metadata=metadata) arvtool.formatgraph = None it = arvtool.job({}, mock.MagicMock(), basedir="", make_fs_access=make_fs_access) it.next().run() it.next().run() with open("tests/wf/scatter2_subwf.cwl") as f: subwf = f.read() mockcollection().open().__enter__().write.assert_has_calls( [mock.call(subwf)]) mockcollection().open().__enter__().write.assert_has_calls( [mock.call('{sleeptime: 5}')]) runner.api.jobs( ).create.assert_called_with(body={ 'minimum_script_version': '9e5b98e8f5f4727856b53447191f9c06e3da2ba6', 'repository': 'arvados', 'script_version': 'master', 'script': 'crunchrunner', 'script_parameters': { 'tasks': [{ 'task.env': { 'HOME': '$(task.outdir)', 'TMPDIR': '$(task.tmpdir)' }, 'task.vwd': { 'workflow.cwl': '$(task.keep)/99999999999999999999999999999999+118/workflow.cwl', 'cwl.input.yml': '$(task.keep)/99999999999999999999999999999999+118/cwl.input.yml' }, 'command': [ u'cwltool', u'--no-container', u'--move-outputs', u'--preserve-entire-environment', u'workflow.cwl#main', u'cwl.input.yml' ], 'task.stdout': 'cwl.output.json' }] }, 'runtime_constraints': { 'min_scratch_mb_per_node': 2048, 'min_cores_per_node': 1, 'docker_image': 'arvados/jobs', 'min_ram_mb_per_node': 1024 }, 'owner_uuid': 'zzzzz-8i9sb-zzzzzzzzzzzzzzz' }, filters= [['repository', '=', 'arvados'], ['script', '=', 'crunchrunner'], [ 'script_version', 'in git', '9e5b98e8f5f4727856b53447191f9c06e3da2ba6' ], [ 'docker_image_locator', 'in docker', 'arvados/jobs' ]], find_or_create=True) except: logging.exception("")
def run(): # Timestamps are added by crunch-job, so don't print redundant timestamps. arvados.log_handler.setFormatter(logging.Formatter('%(name)s %(levelname)s: %(message)s')) # Print package versions logger.info(arvados_cwl.versionstring()) api = arvados.api("v1") arvados_cwl.add_arv_hints() runner = None try: job_order_object = arvados.current_job()['script_parameters'] toolpath = "file://%s/%s" % (os.environ['TASK_KEEPMOUNT'], job_order_object.pop("cwl:tool")) pdh_path = re.compile(r'^[0-9a-f]{32}\+\d+(/.+)?$') def keeppath(v): if pdh_path.match(v): return "keep:%s" % v else: return v def keeppathObj(v): if "location" in v: v["location"] = keeppath(v["location"]) for k,v in job_order_object.items(): if isinstance(v, basestring) and arvados.util.keep_locator_pattern.match(v): job_order_object[k] = { "class": "File", "location": "keep:%s" % v } adjustFileObjs(job_order_object, keeppathObj) adjustDirObjs(job_order_object, keeppathObj) normalizeFilesDirs(job_order_object) output_name = None output_tags = None enable_reuse = True on_error = "continue" debug = False if "arv:output_name" in job_order_object: output_name = job_order_object["arv:output_name"] del job_order_object["arv:output_name"] if "arv:output_tags" in job_order_object: output_tags = job_order_object["arv:output_tags"] del job_order_object["arv:output_tags"] if "arv:enable_reuse" in job_order_object: enable_reuse = job_order_object["arv:enable_reuse"] del job_order_object["arv:enable_reuse"] if "arv:on_error" in job_order_object: on_error = job_order_object["arv:on_error"] del job_order_object["arv:on_error"] if "arv:debug" in job_order_object: debug = job_order_object["arv:debug"] del job_order_object["arv:debug"] runner = arvados_cwl.ArvCwlRunner(api_client=arvados.safeapi.ThreadSafeApiCache( api_params={"model": OrderedJsonModel()}, keep_params={"num_retries": 4}), output_name=output_name, output_tags=output_tags) make_fs_access = functools.partial(CollectionFsAccess, collection_cache=runner.collection_cache) t = load_tool(toolpath, runner.arv_make_tool, fetcher_constructor=functools.partial(CollectionFetcher, api_client=runner.api, fs_access=make_fs_access(""), num_retries=runner.num_retries)) if debug: logger.setLevel(logging.DEBUG) logging.getLogger('arvados').setLevel(logging.DEBUG) logging.getLogger("cwltool").setLevel(logging.DEBUG) args = argparse.Namespace() args.project_uuid = arvados.current_job()["owner_uuid"] args.enable_reuse = enable_reuse args.on_error = on_error args.submit = False args.debug = debug args.quiet = False args.ignore_docker_for_reuse = False args.basedir = os.getcwd() args.name = None args.cwl_runner_job={"uuid": arvados.current_job()["uuid"], "state": arvados.current_job()["state"]} args.make_fs_access = make_fs_access args.trash_intermediate = False args.intermediate_output_ttl = 0 args.priority = arvados_cwl.DEFAULT_PRIORITY args.do_validate = True args.disable_js_validation = False runner.arv_executor(t, job_order_object, **vars(args)) except Exception as e: if isinstance(e, WorkflowException): logging.info("Workflow error %s", e) else: logging.exception("Unhandled exception") if runner and runner.final_output_collection: outputCollection = runner.final_output_collection.portable_data_hash() else: outputCollection = None api.job_tasks().update(uuid=arvados.current_task()['uuid'], body={ 'output': outputCollection, 'success': False, 'progress':1.0 }).execute()
def test_overall_resource_singlecontainer(self, list_images_in_arv, mockcollection, mockcollectionreader): arvados_cwl.add_arv_hints() api = mock.MagicMock() api._rootDesc = get_rootDesc() runner = arvados_cwl.ArvCwlRunner(api) self.assertEqual(runner.work_api, 'jobs') list_images_in_arv.return_value = [["zzzzz-4zz18-zzzzzzzzzzzzzzz"]] runner.api.collections().get().execute.return_vaulue = {"portable_data_hash": "99999999999999999999999999999993+99"} runner.api.collections().list().execute.return_vaulue = {"items": [{"portable_data_hash": "99999999999999999999999999999993+99"}]} runner.project_uuid = "zzzzz-8i9sb-zzzzzzzzzzzzzzz" runner.ignore_docker_for_reuse = False runner.num_retries = 0 loadingContext, runtimeContext = self.helper(runner) tool, metadata = loadingContext.loader.resolve_ref("tests/wf/echo-wf.cwl") metadata["cwlVersion"] = tool["cwlVersion"] mockcollection.side_effect = lambda *args, **kwargs: CollectionMock(mock.MagicMock(), *args, **kwargs) arvtool = arvados_cwl.ArvadosWorkflow(runner, tool, loadingContext) arvtool.formatgraph = None it = arvtool.job({}, mock.MagicMock(), runtimeContext) it.next().run(runtimeContext) it.next().run(runtimeContext) with open("tests/wf/echo-subwf.cwl") as f: subwf = StripYAMLComments(f.read()) runner.api.jobs().create.assert_called_with( body=JsonDiffMatcher({ 'minimum_script_version': 'a3f2cb186e437bfce0031b024b2157b73ed2717d', 'repository': 'arvados', 'script_version': 'master', 'script': 'crunchrunner', 'script_parameters': { 'tasks': [{'task.env': { 'HOME': '$(task.outdir)', 'TMPDIR': '$(task.tmpdir)'}, 'task.vwd': { 'workflow.cwl': '$(task.keep)/99999999999999999999999999999996+99/workflow.cwl', 'cwl.input.yml': '$(task.keep)/99999999999999999999999999999996+99/cwl.input.yml' }, 'command': [u'cwltool', u'--no-container', u'--move-outputs', u'--preserve-entire-environment', u'workflow.cwl#main', u'cwl.input.yml'], 'task.stdout': 'cwl.output.json'}]}, 'runtime_constraints': { 'min_scratch_mb_per_node': 4096, 'min_cores_per_node': 3, 'docker_image': 'arvados/jobs', 'min_ram_mb_per_node': 1024 }, 'owner_uuid': 'zzzzz-8i9sb-zzzzzzzzzzzzzzz'}), filters=[['repository', '=', 'arvados'], ['script', '=', 'crunchrunner'], ['script_version', 'in git', 'a3f2cb186e437bfce0031b024b2157b73ed2717d'], ['docker_image_locator', 'in docker', 'arvados/jobs']], find_or_create=True)
def run(): # Print package versions logger.info(arvados_cwl.versionstring()) api = arvados.api("v1") arvados_cwl.add_arv_hints() runner = None try: job_order_object = arvados.current_job()['script_parameters'] pdh_path = re.compile(r'^[0-9a-f]{32}\+\d+(/.+)?$') def keeppath(v): if pdh_path.match(v): return "keep:%s" % v else: return v def keeppathObj(v): v["location"] = keeppath(v["location"]) job_order_object["cwl:tool"] = "file://%s/%s" % ( os.environ['TASK_KEEPMOUNT'], job_order_object["cwl:tool"]) for k, v in job_order_object.items(): if isinstance( v, basestring) and arvados.util.keep_locator_pattern.match(v): job_order_object[k] = { "class": "File", "location": "keep:%s" % v } adjustFileObjs(job_order_object, keeppathObj) adjustDirObjs(job_order_object, keeppathObj) normalizeFilesDirs(job_order_object) adjustDirObjs( job_order_object, functools.partial( getListing, arvados_cwl.fsaccess.CollectionFsAccess("", api_client=api))) output_name = None output_tags = None enable_reuse = True if "arv:output_name" in job_order_object: output_name = job_order_object["arv:output_name"] del job_order_object["arv:output_name"] if "arv:output_tags" in job_order_object: output_tags = job_order_object["arv:output_tags"] del job_order_object["arv:output_tags"] if "arv:enable_reuse" in job_order_object: enable_reuse = job_order_object["arv:enable_reuse"] del job_order_object["arv:enable_reuse"] runner = arvados_cwl.ArvCwlRunner(api_client=arvados.api( 'v1', model=OrderedJsonModel()), output_name=output_name, output_tags=output_tags) t = load_tool(job_order_object, runner.arv_make_tool) args = argparse.Namespace() args.project_uuid = arvados.current_job()["owner_uuid"] args.enable_reuse = enable_reuse args.submit = False args.debug = True args.quiet = False args.ignore_docker_for_reuse = False args.basedir = os.getcwd() args.cwl_runner_job = { "uuid": arvados.current_job()["uuid"], "state": arvados.current_job()["state"] } outputObj = runner.arv_executor(t, job_order_object, **vars(args)) except Exception as e: if isinstance(e, WorkflowException): logging.info("Workflow error %s", e) else: logging.exception("Unhandled exception") if runner and runner.final_output_collection: outputCollection = runner.final_output_collection.portable_data_hash( ) else: outputCollection = None api.job_tasks().update(uuid=arvados.current_task()['uuid'], body={ 'output': outputCollection, 'success': False, 'progress': 1.0 }).execute()