def test_fetcher(self):
        class TestFetcher(schema_salad.ref_resolver.Fetcher):
            def __init__(self, a, b):
                pass

            def fetch_text(self, url):    # type: (unicode) -> unicode
                if url == "baz:bar/foo.cwl":
                    return """
cwlVersion: v1.0
class: CommandLineTool
baseCommand: echo
inputs: []
outputs: []
"""
                else:
                    raise RuntimeError("Not foo.cwl")

            def check_exists(self, url):  # type: (unicode) -> bool
                if url == "baz:bar/foo.cwl":
                    return True
                else:
                    return False

        def test_resolver(d, a):
            return "baz:bar/" + a

        load_tool("foo.cwl", defaultMakeTool, resolver=test_resolver, fetcher_constructor=TestFetcher)

        self.assertEquals(0, main(["--print-pre", "--debug", "foo.cwl"], resolver=test_resolver, fetcher_constructor=TestFetcher))
Beispiel #2
0
    def test_fetcher(self):
        class TestFetcher(schema_salad.ref_resolver.Fetcher):
            def __init__(self, a, b):
                pass

            def fetch_text(self, url):  # type: (unicode) -> unicode
                if url == "baz:bar/foo.cwl":
                    return """
cwlVersion: v1.0
class: CommandLineTool
baseCommand: echo
inputs: []
outputs: []
"""
                else:
                    raise RuntimeError("Not foo.cwl, was %s" % url)

            def check_exists(self, url):  # type: (unicode) -> bool
                if url == "baz:bar/foo.cwl":
                    return True
                else:
                    return False

            def urljoin(self, base, url):
                    urlsp = urllib.parse.urlsplit(url)
                    if urlsp.scheme:
                        return url
                    basesp = urllib.parse.urlsplit(base)

                    if basesp.scheme == "keep":
                        return base + "/" + url
                    return urllib.parse.urljoin(base, url)

        def test_resolver(d, a):
            if a.startswith("baz:bar/"):
                return a
            else:
                return "baz:bar/" + a

        loadingContext = LoadingContext({"construct_tool_object": default_make_tool,
                                         "resolver": test_resolver,
                                         "fetcher_constructor": TestFetcher})

        load_tool("foo.cwl", loadingContext)

        self.assertEquals(0, main(["--print-pre", "--debug", "foo.cwl"], loadingContext=loadingContext))
Beispiel #3
0
def test_get_subgraph():
    loadingContext = LoadingContext({"construct_tool_object": default_make_tool})
    wf = norm(Path(get_data("tests/subgraph/count-lines1-wf.cwl")).as_uri())
    loadingContext.do_update = False
    tool = load_tool(wf, loadingContext)

    sg = norm(Path(get_data("tests/subgraph")).as_uri())

    def clean(val):
        if isinstance(val, string_types):
            if val.startswith(sg):
                return val[len(sg)+1:]
        if isinstance(val, dict):
            return {k: clean(v) for k,v in val.items()}
        if isinstance(val, list):
            return [clean(v) for v in val]
        return val

    for a in ("file1", "file2", "file3", "count_output",
              "output3", "output4", "output5",
              "step1", "step2", "step3", "step4", "step5"):
        extracted = get_subgraph([wf+"#"+a], tool)
        with open(get_data("tests/subgraph/extract_"+a+".json")) as f:
            assert json.load(f) == clean(convert_to_dict(extracted))
Beispiel #4
0
def run():
    # Timestamps are added by crunch-job, so don't print redundant timestamps.
    arvados.log_handler.setFormatter(logging.Formatter('%(name)s %(levelname)s: %(message)s'))

    # Print package versions
    logger.info(arvados_cwl.versionstring())

    api = arvados.api("v1")

    arvados_cwl.add_arv_hints()

    runner = None
    try:
        job_order_object = arvados.current_job()['script_parameters']
        toolpath = "file://%s/%s" % (os.environ['TASK_KEEPMOUNT'], job_order_object.pop("cwl:tool"))

        pdh_path = re.compile(r'^[0-9a-f]{32}\+\d+(/.+)?$')

        def keeppath(v):
            if pdh_path.match(v):
                return "keep:%s" % v
            else:
                return v

        def keeppathObj(v):
            if "location" in v:
                v["location"] = keeppath(v["location"])

        for k,v in job_order_object.items():
            if isinstance(v, basestring) and arvados.util.keep_locator_pattern.match(v):
                job_order_object[k] = {
                    "class": "File",
                    "location": "keep:%s" % v
                }

        adjustFileObjs(job_order_object, keeppathObj)
        adjustDirObjs(job_order_object, keeppathObj)
        normalizeFilesDirs(job_order_object)

        output_name = None
        output_tags = None
        enable_reuse = True
        on_error = "continue"
        debug = False

        if "arv:output_name" in job_order_object:
            output_name = job_order_object["arv:output_name"]
            del job_order_object["arv:output_name"]

        if "arv:output_tags" in job_order_object:
            output_tags = job_order_object["arv:output_tags"]
            del job_order_object["arv:output_tags"]

        if "arv:enable_reuse" in job_order_object:
            enable_reuse = job_order_object["arv:enable_reuse"]
            del job_order_object["arv:enable_reuse"]

        if "arv:on_error" in job_order_object:
            on_error = job_order_object["arv:on_error"]
            del job_order_object["arv:on_error"]

        if "arv:debug" in job_order_object:
            debug = job_order_object["arv:debug"]
            del job_order_object["arv:debug"]

        arvargs = argparse.Namespace()
        arvargs.work_api = "jobs"
        arvargs.output_name = output_name
        arvargs.output_tags = output_tags
        arvargs.thread_count = 1

        runner = arvados_cwl.ArvCwlRunner(api_client=arvados.safeapi.ThreadSafeApiCache(
            api_params={"model": OrderedJsonModel()}, keep_params={"num_retries": 4}),
                                          arvargs=arvargs)

        make_fs_access = functools.partial(CollectionFsAccess,
                                 collection_cache=runner.collection_cache)

        t = load_tool(toolpath, runner.loadingContext)

        if debug:
            logger.setLevel(logging.DEBUG)
            logging.getLogger('arvados').setLevel(logging.DEBUG)
            logging.getLogger("cwltool").setLevel(logging.DEBUG)

        args = ArvRuntimeContext(vars(arvargs))
        args.project_uuid = arvados.current_job()["owner_uuid"]
        args.enable_reuse = enable_reuse
        args.on_error = on_error
        args.submit = False
        args.debug = debug
        args.quiet = False
        args.ignore_docker_for_reuse = False
        args.basedir = os.getcwd()
        args.name = None
        args.cwl_runner_job={"uuid": arvados.current_job()["uuid"], "state": arvados.current_job()["state"]}
        args.make_fs_access = make_fs_access
        args.trash_intermediate = False
        args.intermediate_output_ttl = 0
        args.priority = arvados_cwl.DEFAULT_PRIORITY
        args.do_validate = True
        args.disable_js_validation = False
        args.tmp_outdir_prefix = "tmp"

        runner.arv_executor(t, job_order_object, args, logger=logger)
    except Exception as e:
        if isinstance(e, WorkflowException):
            logging.info("Workflow error %s", e)
        else:
            logging.exception("Unhandled exception")
        if runner and runner.final_output_collection:
            outputCollection = runner.final_output_collection.portable_data_hash()
        else:
            outputCollection = None
        api.job_tasks().update(uuid=arvados.current_task()['uuid'],
                                             body={
                                                 'output': outputCollection,
                                                 'success': False,
                                                 'progress':1.0
                                             }).execute()
Beispiel #5
0
def run():
    # Timestamps are added by crunch-job, so don't print redundant timestamps.
    arvados.log_handler.setFormatter(logging.Formatter('%(name)s %(levelname)s: %(message)s'))

    # Print package versions
    logger.info(arvados_cwl.versionstring())

    api = arvados.api("v1")

    arvados_cwl.add_arv_hints()

    runner = None
    try:
        job_order_object = arvados.current_job()['script_parameters']
        toolpath = "file://%s/%s" % (os.environ['TASK_KEEPMOUNT'], job_order_object.pop("cwl:tool"))

        pdh_path = re.compile(r'^[0-9a-f]{32}\+\d+(/.+)?$')

        def keeppath(v):
            if pdh_path.match(v):
                return "keep:%s" % v
            else:
                return v

        def keeppathObj(v):
            if "location" in v:
                v["location"] = keeppath(v["location"])

        for k,v in job_order_object.items():
            if isinstance(v, basestring) and arvados.util.keep_locator_pattern.match(v):
                job_order_object[k] = {
                    "class": "File",
                    "location": "keep:%s" % v
                }

        adjustFileObjs(job_order_object, keeppathObj)
        adjustDirObjs(job_order_object, keeppathObj)
        normalizeFilesDirs(job_order_object)

        output_name = None
        output_tags = None
        enable_reuse = True
        on_error = "continue"
        if "arv:output_name" in job_order_object:
            output_name = job_order_object["arv:output_name"]
            del job_order_object["arv:output_name"]

        if "arv:output_tags" in job_order_object:
            output_tags = job_order_object["arv:output_tags"]
            del job_order_object["arv:output_tags"]

        if "arv:enable_reuse" in job_order_object:
            enable_reuse = job_order_object["arv:enable_reuse"]
            del job_order_object["arv:enable_reuse"]

        if "arv:on_error" in job_order_object:
            on_error = job_order_object["arv:on_error"]
            del job_order_object["arv:on_error"]

        runner = arvados_cwl.ArvCwlRunner(api_client=arvados.api('v1', model=OrderedJsonModel()),
                                          output_name=output_name, output_tags=output_tags)

        make_fs_access = functools.partial(CollectionFsAccess,
                                 collection_cache=runner.collection_cache)

        t = load_tool(toolpath, runner.arv_make_tool,
                      fetcher_constructor=functools.partial(CollectionFetcher,
                                                  api_client=runner.api,
                                                  fs_access=make_fs_access(""),
                                                  num_retries=runner.num_retries))

        args = argparse.Namespace()
        args.project_uuid = arvados.current_job()["owner_uuid"]
        args.enable_reuse = enable_reuse
        args.on_error = on_error
        args.submit = False
        args.debug = False
        args.quiet = False
        args.ignore_docker_for_reuse = False
        args.basedir = os.getcwd()
        args.name = None
        args.cwl_runner_job={"uuid": arvados.current_job()["uuid"], "state": arvados.current_job()["state"]}
        args.make_fs_access = make_fs_access

        runner.arv_executor(t, job_order_object, **vars(args))
    except Exception as e:
        if isinstance(e, WorkflowException):
            logging.info("Workflow error %s", e)
        else:
            logging.exception("Unhandled exception")
        if runner and runner.final_output_collection:
            outputCollection = runner.final_output_collection.portable_data_hash()
        else:
            outputCollection = None
        api.job_tasks().update(uuid=arvados.current_task()['uuid'],
                                             body={
                                                 'output': outputCollection,
                                                 'success': False,
                                                 'progress':1.0
                                             }).execute()
Beispiel #6
0
def run():
    # Timestamps are added by crunch-job, so don't print redundant timestamps.
    arvados.log_handler.setFormatter(
        logging.Formatter('%(name)s %(levelname)s: %(message)s'))

    # Print package versions
    logger.info(arvados_cwl.versionstring())

    api = arvados.api("v1")

    arvados_cwl.add_arv_hints()

    runner = None
    try:
        job_order_object = arvados.current_job()['script_parameters']
        toolpath = "file://%s/%s" % (os.environ['TASK_KEEPMOUNT'],
                                     job_order_object.pop("cwl:tool"))

        pdh_path = re.compile(r'^[0-9a-f]{32}\+\d+(/.+)?$')

        def keeppath(v):
            if pdh_path.match(v):
                return "keep:%s" % v
            else:
                return v

        def keeppathObj(v):
            if "location" in v:
                v["location"] = keeppath(v["location"])

        for k, v in viewitems(job_order_object):
            if isinstance(
                    v,
                    basestring) and arvados.util.keep_locator_pattern.match(v):
                job_order_object[k] = {
                    "class": "File",
                    "location": "keep:%s" % v
                }

        adjustFileObjs(job_order_object, keeppathObj)
        adjustDirObjs(job_order_object, keeppathObj)
        normalizeFilesDirs(job_order_object)

        output_name = None
        output_tags = None
        enable_reuse = True
        on_error = "continue"
        debug = False

        if "arv:output_name" in job_order_object:
            output_name = job_order_object["arv:output_name"]
            del job_order_object["arv:output_name"]

        if "arv:output_tags" in job_order_object:
            output_tags = job_order_object["arv:output_tags"]
            del job_order_object["arv:output_tags"]

        if "arv:enable_reuse" in job_order_object:
            enable_reuse = job_order_object["arv:enable_reuse"]
            del job_order_object["arv:enable_reuse"]

        if "arv:on_error" in job_order_object:
            on_error = job_order_object["arv:on_error"]
            del job_order_object["arv:on_error"]

        if "arv:debug" in job_order_object:
            debug = job_order_object["arv:debug"]
            del job_order_object["arv:debug"]

        arvargs = argparse.Namespace()
        arvargs.work_api = "jobs"
        arvargs.output_name = output_name
        arvargs.output_tags = output_tags
        arvargs.thread_count = 1
        arvargs.collection_cache_size = None

        runner = arvados_cwl.ArvCwlExecutor(
            api_client=arvados.safeapi.ThreadSafeApiCache(
                api_params={"model": OrderedJsonModel()},
                keep_params={"num_retries": 4}),
            arvargs=arvargs)

        make_fs_access = functools.partial(
            CollectionFsAccess, collection_cache=runner.collection_cache)

        t = load_tool(toolpath, runner.loadingContext)

        if debug:
            logger.setLevel(logging.DEBUG)
            logging.getLogger('arvados').setLevel(logging.DEBUG)
            logging.getLogger("cwltool").setLevel(logging.DEBUG)

        args = ArvRuntimeContext(vars(arvargs))
        args.project_uuid = arvados.current_job()["owner_uuid"]
        args.enable_reuse = enable_reuse
        args.on_error = on_error
        args.submit = False
        args.debug = debug
        args.quiet = False
        args.ignore_docker_for_reuse = False
        args.basedir = os.getcwd()
        args.name = None
        args.cwl_runner_job = {
            "uuid": arvados.current_job()["uuid"],
            "state": arvados.current_job()["state"]
        }
        args.make_fs_access = make_fs_access
        args.trash_intermediate = False
        args.intermediate_output_ttl = 0
        args.priority = arvados_cwl.DEFAULT_PRIORITY
        args.do_validate = True
        args.disable_js_validation = False
        args.tmp_outdir_prefix = "tmp"

        runner.arv_executor(t, job_order_object, args, logger=logger)
    except Exception as e:
        if isinstance(e, WorkflowException):
            logging.info("Workflow error %s", e)
        else:
            logging.exception("Unhandled exception")
        if runner and runner.final_output_collection:
            outputCollection = runner.final_output_collection.portable_data_hash(
            )
        else:
            outputCollection = None
        api.job_tasks().update(uuid=arvados.current_task()['uuid'],
                               body={
                                   'output': outputCollection,
                                   'success': False,
                                   'progress': 1.0
                               }).execute()
Beispiel #7
0
 def __load(location):
     return load_tool(
         location,
         setup_loadingContext(LoadingContext(default_cwl_args),
                              RuntimeContext(default_cwl_args),
                              argparse.Namespace(**default_cwl_args)))
Beispiel #8
0
    def arv_executor(self, tool, job_order, runtimeContext, logger=None):
        self.debug = runtimeContext.debug

        tool.visit(self.check_features)

        self.project_uuid = runtimeContext.project_uuid
        self.pipeline = None
        self.fs_access = runtimeContext.make_fs_access(runtimeContext.basedir)
        self.secret_store = runtimeContext.secret_store

        self.trash_intermediate = runtimeContext.trash_intermediate
        if self.trash_intermediate and self.work_api != "containers":
            raise Exception(
                "--trash-intermediate is only supported with --api=containers."
            )

        self.intermediate_output_ttl = runtimeContext.intermediate_output_ttl
        if self.intermediate_output_ttl and self.work_api != "containers":
            raise Exception(
                "--intermediate-output-ttl is only supported with --api=containers."
            )
        if self.intermediate_output_ttl < 0:
            raise Exception(
                "Invalid value %d for --intermediate-output-ttl, cannot be less than zero"
                % self.intermediate_output_ttl)

        if runtimeContext.submit_request_uuid and self.work_api != "containers":
            raise Exception(
                "--submit-request-uuid requires containers API, but using '{}' api"
                .format(self.work_api))

        if not runtimeContext.name:
            runtimeContext.name = self.name = tool.tool.get(
                "label") or tool.metadata.get("label") or os.path.basename(
                    tool.tool["id"])

        # Upload local file references in the job order.
        job_order = upload_job_order(self, "%s input" % runtimeContext.name,
                                     tool, job_order)

        submitting = (runtimeContext.update_workflow
                      or runtimeContext.create_workflow or
                      (runtimeContext.submit
                       and not (tool.tool["class"] == "CommandLineTool"
                                and runtimeContext.wait
                                and not runtimeContext.always_submit_runner)))

        loadingContext = self.loadingContext.copy()
        loadingContext.do_validate = False
        loadingContext.do_update = False
        if submitting:
            # Document may have been auto-updated. Reload the original
            # document with updating disabled because we want to
            # submit the original document, not the auto-updated one.
            tool = load_tool(tool.tool["id"], loadingContext)

        # Upload direct dependencies of workflow steps, get back mapping of files to keep references.
        # Also uploads docker images.
        merged_map = upload_workflow_deps(self, tool)

        # Recreate process object (ArvadosWorkflow or
        # ArvadosCommandTool) because tool document may have been
        # updated by upload_workflow_deps in ways that modify
        # inheritance of hints or requirements.
        loadingContext.loader = tool.doc_loader
        loadingContext.avsc_names = tool.doc_schema
        loadingContext.metadata = tool.metadata
        tool = load_tool(tool.tool, loadingContext)

        existing_uuid = runtimeContext.update_workflow
        if existing_uuid or runtimeContext.create_workflow:
            # Create a pipeline template or workflow record and exit.
            if self.work_api == "containers":
                return (upload_workflow(
                    self,
                    tool,
                    job_order,
                    self.project_uuid,
                    uuid=existing_uuid,
                    submit_runner_ram=runtimeContext.submit_runner_ram,
                    name=runtimeContext.name,
                    merged_map=merged_map), "success")

        self.apply_reqs(job_order, tool)

        self.ignore_docker_for_reuse = runtimeContext.ignore_docker_for_reuse
        self.eval_timeout = runtimeContext.eval_timeout

        runtimeContext = runtimeContext.copy()
        runtimeContext.use_container = True
        runtimeContext.tmpdir_prefix = "tmp"
        runtimeContext.work_api = self.work_api

        if self.work_api == "containers":
            if self.ignore_docker_for_reuse:
                raise Exception(
                    "--ignore-docker-for-reuse not supported with containers API."
                )
            runtimeContext.outdir = "/var/spool/cwl"
            runtimeContext.docker_outdir = "/var/spool/cwl"
            runtimeContext.tmpdir = "/tmp"
            runtimeContext.docker_tmpdir = "/tmp"

        if runtimeContext.priority < 1 or runtimeContext.priority > 1000:
            raise Exception("--priority must be in the range 1..1000.")

        if self.should_estimate_cache_size:
            visited = set()
            estimated_size = [0]

            def estimate_collection_cache(obj):
                if obj.get("location", "").startswith("keep:"):
                    m = pdh_size.match(obj["location"][5:])
                    if m and m.group(1) not in visited:
                        visited.add(m.group(1))
                        estimated_size[0] += int(m.group(2))

            visit_class(job_order, ("File", "Directory"),
                        estimate_collection_cache)
            runtimeContext.collection_cache_size = max(
                ((estimated_size[0] * 192) // (1024 * 1024)) + 1, 256)
            self.collection_cache.set_cap(
                runtimeContext.collection_cache_size * 1024 * 1024)

        logger.info("Using collection cache size %s MiB",
                    runtimeContext.collection_cache_size)

        runnerjob = None
        if runtimeContext.submit:
            # Submit a runner job to run the workflow for us.
            if self.work_api == "containers":
                if tool.tool[
                        "class"] == "CommandLineTool" and runtimeContext.wait and (
                            not runtimeContext.always_submit_runner):
                    runtimeContext.runnerjob = tool.tool["id"]
                else:
                    tool = RunnerContainer(
                        self,
                        tool,
                        loadingContext,
                        runtimeContext.enable_reuse,
                        self.output_name,
                        self.output_tags,
                        submit_runner_ram=runtimeContext.submit_runner_ram,
                        name=runtimeContext.name,
                        on_error=runtimeContext.on_error,
                        submit_runner_image=runtimeContext.submit_runner_image,
                        intermediate_output_ttl=runtimeContext.
                        intermediate_output_ttl,
                        merged_map=merged_map,
                        priority=runtimeContext.priority,
                        secret_store=self.secret_store,
                        collection_cache_size=runtimeContext.
                        collection_cache_size,
                        collection_cache_is_default=self.
                        should_estimate_cache_size)

        if runtimeContext.cwl_runner_job is not None:
            self.uuid = runtimeContext.cwl_runner_job.get('uuid')

        jobiter = tool.job(job_order, self.output_callback, runtimeContext)

        if runtimeContext.submit and not runtimeContext.wait:
            runnerjob = next(jobiter)
            runnerjob.run(runtimeContext)
            return (runnerjob.uuid, "success")

        current_container = arvados_cwl.util.get_current_container(
            self.api, self.num_retries, logger)
        if current_container:
            logger.info("Running inside container %s",
                        current_container.get("uuid"))

        self.poll_api = arvados.api('v1', timeout=runtimeContext.http_timeout)
        self.polling_thread = threading.Thread(target=self.poll_states)
        self.polling_thread.start()

        self.task_queue = TaskQueue(self.workflow_eval_lock, self.thread_count)

        try:
            self.workflow_eval_lock.acquire()

            # Holds the lock while this code runs and releases it when
            # it is safe to do so in self.workflow_eval_lock.wait(),
            # at which point on_message can update job state and
            # process output callbacks.

            loopperf = Perf(metrics, "jobiter")
            loopperf.__enter__()
            for runnable in jobiter:
                loopperf.__exit__()

                if self.stop_polling.is_set():
                    break

                if self.task_queue.error is not None:
                    raise self.task_queue.error

                if runnable:
                    with Perf(metrics, "run"):
                        self.start_run(runnable, runtimeContext)
                else:
                    if (self.task_queue.in_flight + len(self.processes)) > 0:
                        self.workflow_eval_lock.wait(3)
                    else:
                        logger.error(
                            "Workflow is deadlocked, no runnable processes and not waiting on any pending processes."
                        )
                        break

                if self.stop_polling.is_set():
                    break

                loopperf.__enter__()
            loopperf.__exit__()

            while (self.task_queue.in_flight + len(self.processes)) > 0:
                if self.task_queue.error is not None:
                    raise self.task_queue.error
                self.workflow_eval_lock.wait(3)

        except UnsupportedRequirement:
            raise
        except:
            if sys.exc_info()[0] is KeyboardInterrupt or sys.exc_info(
            )[0] is SystemExit:
                logger.error("Interrupted, workflow will be cancelled")
            elif isinstance(sys.exc_info()[1], WorkflowException):
                logger.error(
                    "Workflow execution failed:\n%s",
                    sys.exc_info()[1],
                    exc_info=(sys.exc_info()[1] if self.debug else False))
            else:
                logger.exception("Workflow execution failed")

            if self.pipeline:
                self.api.pipeline_instances().update(
                    uuid=self.pipeline["uuid"], body={
                        "state": "Failed"
                    }).execute(num_retries=self.num_retries)

            if self.work_api == "containers" and not current_container:
                # Not running in a crunch container, so cancel any outstanding processes.
                for p in self.processes:
                    try:
                        self.api.container_requests().update(
                            uuid=p, body={
                                "priority": "0"
                            }).execute(num_retries=self.num_retries)
                    except Exception:
                        pass
        finally:
            self.workflow_eval_lock.release()
            self.task_queue.drain()
            self.stop_polling.set()
            self.polling_thread.join()
            self.task_queue.join()

        if self.final_status == "UnsupportedRequirement":
            raise UnsupportedRequirement("Check log for details.")

        if self.final_output is None:
            raise WorkflowException("Workflow did not return a result.")

        if runtimeContext.submit and isinstance(tool, Runner):
            logger.info("Final output collection %s", tool.final_output)
        else:
            if self.output_name is None:
                self.output_name = "Output of %s" % (shortname(
                    tool.tool["id"]))
            if self.output_tags is None:
                self.output_tags = ""

            storage_classes = runtimeContext.storage_classes.strip().split(",")
            self.final_output, self.final_output_collection = self.make_output_collection(
                self.output_name, storage_classes, self.output_tags,
                self.final_output)
            self.set_crunch_output()

        if runtimeContext.compute_checksum:
            adjustDirObjs(self.final_output,
                          partial(get_listing, self.fs_access))
            adjustFileObjs(self.final_output,
                           partial(compute_checksums, self.fs_access))

        if self.trash_intermediate and self.final_status == "success":
            self.trash_intermediate_output()

        return (self.final_output, self.final_status)
Beispiel #9
0
def load_cwl(cwl_file):
    load.loaders = {}
    loading_context = cwltool.context.LoadingContext(get_default_args())
    loading_context.construct_tool_object = default_make_tool
    loading_context.resolver = tool_resolver
    return load.load_tool(cwl_file, loading_context)
Beispiel #10
0
def run():
    # Print package versions
    logger.info(arvados_cwl.versionstring())

    api = arvados.api("v1")

    arvados_cwl.add_arv_hints()

    runner = None
    try:
        job_order_object = arvados.current_job()['script_parameters']

        pdh_path = re.compile(r'^[0-9a-f]{32}\+\d+(/.+)?$')

        def keeppath(v):
            if pdh_path.match(v):
                return "keep:%s" % v
            else:
                return v

        def keeppathObj(v):
            v["location"] = keeppath(v["location"])

        job_order_object["cwl:tool"] = "file://%s/%s" % (
            os.environ['TASK_KEEPMOUNT'], job_order_object["cwl:tool"])

        for k, v in job_order_object.items():
            if isinstance(
                    v,
                    basestring) and arvados.util.keep_locator_pattern.match(v):
                job_order_object[k] = {
                    "class": "File",
                    "location": "keep:%s" % v
                }

        adjustFileObjs(job_order_object, keeppathObj)
        adjustDirObjs(job_order_object, keeppathObj)
        normalizeFilesDirs(job_order_object)
        adjustDirObjs(
            job_order_object,
            functools.partial(
                getListing,
                arvados_cwl.fsaccess.CollectionFsAccess("", api_client=api)))

        output_name = None
        output_tags = None
        enable_reuse = True
        if "arv:output_name" in job_order_object:
            output_name = job_order_object["arv:output_name"]
            del job_order_object["arv:output_name"]

        if "arv:output_tags" in job_order_object:
            output_tags = job_order_object["arv:output_tags"]
            del job_order_object["arv:output_tags"]

        if "arv:enable_reuse" in job_order_object:
            enable_reuse = job_order_object["arv:enable_reuse"]
            del job_order_object["arv:enable_reuse"]

        runner = arvados_cwl.ArvCwlRunner(api_client=arvados.api(
            'v1', model=OrderedJsonModel()),
                                          output_name=output_name,
                                          output_tags=output_tags)

        t = load_tool(job_order_object, runner.arv_make_tool)

        args = argparse.Namespace()
        args.project_uuid = arvados.current_job()["owner_uuid"]
        args.enable_reuse = enable_reuse
        args.submit = False
        args.debug = True
        args.quiet = False
        args.ignore_docker_for_reuse = False
        args.basedir = os.getcwd()
        args.cwl_runner_job = {
            "uuid": arvados.current_job()["uuid"],
            "state": arvados.current_job()["state"]
        }
        outputObj = runner.arv_executor(t, job_order_object, **vars(args))
    except Exception as e:
        if isinstance(e, WorkflowException):
            logging.info("Workflow error %s", e)
        else:
            logging.exception("Unhandled exception")
        if runner and runner.final_output_collection:
            outputCollection = runner.final_output_collection.portable_data_hash(
            )
        else:
            outputCollection = None
        api.job_tasks().update(uuid=arvados.current_task()['uuid'],
                               body={
                                   'output': outputCollection,
                                   'success': False,
                                   'progress': 1.0
                               }).execute()
Beispiel #11
0
def read_config_from_cwl_file(cwl_file):
    print_pref = "[read_cwl_file]:"
    configs = {}
    metadata = {
        "doc": "",
        "workflow_name": os.path.basename(cwl_file),
        "workflow_path": os.path.abspath(cwl_file),
        "workflow_type": "CWL"
    }
    # cwltool needs to be imported on demand since
    # repeatedly calling functions on a document named 
    # with same name caused errors.
    from cwltool.context import LoadingContext
    from cwltool.load_tool import load_tool
    from cwltool.workflow import default_make_tool
    loadingContext = LoadingContext({"construct_tool_object": default_make_tool, "disable_js_validation": True})
    try:
        cwl_document = load_tool(cwl_file, loadingContext)
    except AssertionError as e:
        raise AssertionError( print_pref + "failed to read cwl file \"" + cwl_file + "\": does not exist or is invalid")
    inp_records = cwl_document.inputs_record_schema["fields"]
    if "doc" in cwl_document.tool:
        metadata["doc"] = cwl_document.tool["doc"]
    for inp_rec in inp_records:
        name = clean_string( inp_rec["name"] )
        is_array = False
        null_allowed = False
        null_items_allowed = False
        default_value = [""]
        allowed_selection = [""]
        # read type:
        try:
            type_, null_allowed, is_array, null_items_allowed, allowed_selection = read_inp_rec_type_field(inp_rec["type"])
        except Exception as e:
            raise AssertionError( print_pref + "E: reading type of param \"{}\": {}".format(name, str(e)))
        # get the default:
        if "default" in inp_rec:
            if is_basic_type_instance(inp_rec["default"]):
                default_value = [clean_string(inp_rec["default"])]
            else: 
                if is_array and isinstance(inp_rec["default"], list):
                    default_value = []
                    for entry in inp_rec["default"]:
                        if is_basic_type_instance(inp_rec["default"]):
                            default_value.append(clean_string(entry))
                        else:
                            print(print_pref + "W: invalid default value for parameter " + name + 
                                ": will be ignored", file=sys.stderr)
                            default_value = [""]
                elif type_ == "File" and isinstance(inp_rec["default"], dict):
                    print(print_pref + "W: invalid default value for parameter " + name + 
                        ": defaults for File class are not supported yet; will be ignored", file=sys.stderr)
                    default_value = [""]
                else:
                    print(print_pref + "W: invalid default value for parameter " + name + 
                        ": will be ignored", file=sys.stderr)
                    default_value = [""]
        else:
            default_value = [""]
        # read secondary files:
        if type_ == "File" and "secondaryFiles" in inp_rec:
            if isinstance(inp_rec["secondaryFiles"], str):
                secondary_files = [ inp_rec["secondaryFiles"] ]
            elif isinstance(inp_rec["secondaryFiles"], CommentedMap) and \
                "pattern" in inp_rec["secondaryFiles"].keys():
                secondary_files = [ inp_rec["secondaryFiles"]["pattern"] ]
            elif isinstance(inp_rec["secondaryFiles"], CommentedSeq) or isinstance(inp_rec["secondaryFiles"], list):
                secondary_files = []
                for sec_file in inp_rec["secondaryFiles"]:
                    if isinstance(sec_file, CommentedMap) and "pattern" in sec_file:
                        secondary_files.append(sec_file["pattern"])
                    elif isinstance(sec_file, str):
                        secondary_files.append(sec_file)
                    else:
                        raise AssertionError(print_pref + "E: invalid secondaryFiles field for parameter " + name)
            else:
                raise AssertionError( print_pref + "E: invalid secondaryFiles field for parameter " + name )
        else:
            secondary_files = [ "" ]
        # read doc:
        if "doc" in inp_rec:
            doc = inp_rec["doc"]
        else:
            doc = ""
        # assemble config parameters:
        inp_configs = {
            "type": type_,
            "is_array": is_array,
            "null_allowed": null_allowed,
            "null_items_allowed": null_items_allowed,
            "secondary_files": secondary_files,
            "default_value": default_value,
            "allowed_selection": allowed_selection,
            "doc": doc
        }
        # add to configs dict:
        configs[ name ] = inp_configs
    return configs, metadata