예제 #1
0
    def test_basename_field_generation(self):
        base_file = {
            "class": "File",
            "location": "/foo/"
        }
        # (filename, expected: (nameroot, nameext))
        testdata = [
            ("foo.bar",     ("foo",     ".bar")),
            ("foo",         ("foo",     '')),
            (".foo",        (".foo",    '')),
            ("foo.",        ("foo",    '.')),
            ("foo.bar.baz", ("foo.bar", ".baz"))
        ]

        for filename, (nameroot, nameext) in testdata:
            file = dict(base_file)
            file["location"] = file["location"] + filename

            expected = dict(file)
            expected["basename"] = filename
            expected["nameroot"] = nameroot
            expected["nameext"] = nameext

            normalizeFilesDirs(file)
            self.assertEqual(file, expected)
예제 #2
0
 def test_strip_trailing(self):
     d = {"class": "Directory", "location": "/foo/bar/"}
     normalizeFilesDirs(d)
     self.assertEqual(
         {
             "class": "Directory",
             "location": "/foo/bar",
             "basename": "bar"
         }, d)
예제 #3
0
파일: cwltoil.py 프로젝트: chapmanb/toil
 def importFiles(tool):
     visit_class(tool, ("File", "Directory"), pathToLoc)
     normalizeFilesDirs(tool)
     adjustDirObjs(tool, functools.partial(get_listing,
                                           cwltool.stdfsaccess.StdFsAccess(""),
                                           recursive=True))
     adjustFileObjs(tool, functools.partial(uploadFile,
                                            toil.importFile,
                                            fileindex, existing, skip_broken=True))
예제 #4
0
 def importFiles(tool):
     visit_class(tool, ("File", "Directory"), pathToLoc)
     normalizeFilesDirs(tool)
     adjustDirObjs(tool, functools.partial(get_listing,
                                           cwltool.stdfsaccess.StdFsAccess(""),
                                           recursive=True))
     adjustFileObjs(tool, functools.partial(uploadFile,
                                            toil.importFile,
                                            fileindex, existing, skip_broken=True))
예제 #5
0
파일: cwltoil.py 프로젝트: vallurumk/toil
 def import_files(tool):
     visit_class(tool, ("File", "Directory"), path_to_loc)
     visit_class(tool, ("File", ), functools.partial(
         add_sizes, fs_access))
     normalizeFilesDirs(tool)
     adjustDirObjs(tool, functools.partial(
         get_listing, fs_access, recursive=True))
     adjustFileObjs(tool, functools.partial(
         uploadFile, toil.importFile, fileindex, existing,
         skip_broken=True))
예제 #6
0
 def test_strip_trailing(self):
     d = {
             "class": "Directory",
             "location": "/foo/bar/"
         }
     normalizeFilesDirs(d)
     self.assertEqual(
         {
             "class": "Directory",
             "location": "/foo/bar",
             "basename": "bar"
         },
         d)
예제 #7
0
def test_basename_field_generation(filename, expected):
    nameroot, nameext = expected
    expected = {
        "class": "File",
        "location": "/foo/" + filename,
        "basename": filename,
        "nameroot": nameroot,
        "nameext": nameext,
    }

    file = {"class": "File", "location": "/foo/" + filename}

    normalizeFilesDirs(file)
    assert file == expected
예제 #8
0
def test_basename_field_generation(filename, expected):
    nameroot, nameext = expected
    expected = {
        'class': 'File',
        'location': '/foo/' + filename,
        'basename': filename,
        'nameroot': nameroot,
        'nameext': nameext
    }

    file = {'class': 'File', 'location': '/foo/' + filename}

    normalizeFilesDirs(file)
    assert file == expected
예제 #9
0
def test_basename_field_generation(filename, expected):
    nameroot, nameext = expected
    expected = {
        'class': 'File',
        'location': '/foo/' + filename,
        'basename': filename,
        'nameroot': nameroot,
        'nameext': nameext
    }

    file = {
        'class': 'File',
        'location': '/foo/' + filename
    }

    normalizeFilesDirs(file)
    assert file == expected
    def test_normalizeFilesDirs(self):
        n = {"class": "File", "location": "file1.txt"}
        normalizeFilesDirs(n)
        self.assertEqual(
            n, {
                "class": "File",
                "location": "file1.txt",
                'basename': 'file1.txt',
                'nameext': '.txt',
                'nameroot': 'file1'
            })

        n = {"class": "File", "location": "file:///foo/file1.txt"}
        normalizeFilesDirs(n)
        self.assertEqual(
            n, {
                "class": "File",
                "location": "file:///foo/file1.txt",
                'basename': 'file1.txt',
                'nameext': '.txt',
                'nameroot': 'file1'
            })

        n = {"class": "File", "location": "http://example.com/file1.txt"}
        normalizeFilesDirs(n)
        self.assertEqual(
            n, {
                "class": "File",
                "location": "http://example.com/file1.txt",
                'basename': 'file1.txt',
                'nameext': '.txt',
                'nameroot': 'file1'
            })
예제 #11
0
    def test_normalizeFilesDirs(self):
        n = {
            "class": "File",
            "location": "file1.txt"
        }
        normalizeFilesDirs(n)
        self.assertEqual(n, {
            "class": "File",
            "location": "file1.txt",
            'basename': 'file1.txt',
            'nameext': '.txt',
            'nameroot': 'file1'
        })

        n = {
            "class": "File",
            "location": "file:///foo/file1.txt"
        }
        normalizeFilesDirs(n)
        self.assertEqual(n, {
            "class": "File",
            "location": "file:///foo/file1.txt",
            'basename': 'file1.txt',
            'nameext': '.txt',
            'nameroot': 'file1'
        })

        n = {
            "class": "File",
            "location": "http://example.com/file1.txt"
        }
        normalizeFilesDirs(n)
        self.assertEqual(n, {
            "class": "File",
            "location": "http://example.com/file1.txt",
            'basename': 'file1.txt',
            'nameext': '.txt',
            'nameroot': 'file1'
        })
예제 #12
0
def add_details_to_outputs(outputs):
    adjustFileObjs(outputs, expand_to_file_uri)
    adjustDirObjs(outputs, expand_to_file_uri)
    normalizeFilesDirs(outputs)
    adjustFileObjs(outputs, partial(compute_checksums, StdFsAccess("")))
예제 #13
0
def run():
    # Timestamps are added by crunch-job, so don't print redundant timestamps.
    arvados.log_handler.setFormatter(logging.Formatter('%(name)s %(levelname)s: %(message)s'))

    # Print package versions
    logger.info(arvados_cwl.versionstring())

    api = arvados.api("v1")

    arvados_cwl.add_arv_hints()

    runner = None
    try:
        job_order_object = arvados.current_job()['script_parameters']
        toolpath = "file://%s/%s" % (os.environ['TASK_KEEPMOUNT'], job_order_object.pop("cwl:tool"))

        pdh_path = re.compile(r'^[0-9a-f]{32}\+\d+(/.+)?$')

        def keeppath(v):
            if pdh_path.match(v):
                return "keep:%s" % v
            else:
                return v

        def keeppathObj(v):
            if "location" in v:
                v["location"] = keeppath(v["location"])

        for k,v in job_order_object.items():
            if isinstance(v, basestring) and arvados.util.keep_locator_pattern.match(v):
                job_order_object[k] = {
                    "class": "File",
                    "location": "keep:%s" % v
                }

        adjustFileObjs(job_order_object, keeppathObj)
        adjustDirObjs(job_order_object, keeppathObj)
        normalizeFilesDirs(job_order_object)

        output_name = None
        output_tags = None
        enable_reuse = True
        on_error = "continue"
        debug = False

        if "arv:output_name" in job_order_object:
            output_name = job_order_object["arv:output_name"]
            del job_order_object["arv:output_name"]

        if "arv:output_tags" in job_order_object:
            output_tags = job_order_object["arv:output_tags"]
            del job_order_object["arv:output_tags"]

        if "arv:enable_reuse" in job_order_object:
            enable_reuse = job_order_object["arv:enable_reuse"]
            del job_order_object["arv:enable_reuse"]

        if "arv:on_error" in job_order_object:
            on_error = job_order_object["arv:on_error"]
            del job_order_object["arv:on_error"]

        if "arv:debug" in job_order_object:
            debug = job_order_object["arv:debug"]
            del job_order_object["arv:debug"]

        runner = arvados_cwl.ArvCwlRunner(api_client=arvados.safeapi.ThreadSafeApiCache(
            api_params={"model": OrderedJsonModel()}, keep_params={"num_retries": 4}),
                                          output_name=output_name, output_tags=output_tags)

        make_fs_access = functools.partial(CollectionFsAccess,
                                 collection_cache=runner.collection_cache)

        t = load_tool(toolpath, runner.arv_make_tool,
                      fetcher_constructor=functools.partial(CollectionFetcher,
                                                  api_client=runner.api,
                                                  fs_access=make_fs_access(""),
                                                  num_retries=runner.num_retries))

        if debug:
            logger.setLevel(logging.DEBUG)
            logging.getLogger('arvados').setLevel(logging.DEBUG)
            logging.getLogger("cwltool").setLevel(logging.DEBUG)

        args = argparse.Namespace()
        args.project_uuid = arvados.current_job()["owner_uuid"]
        args.enable_reuse = enable_reuse
        args.on_error = on_error
        args.submit = False
        args.debug = debug
        args.quiet = False
        args.ignore_docker_for_reuse = False
        args.basedir = os.getcwd()
        args.name = None
        args.cwl_runner_job={"uuid": arvados.current_job()["uuid"], "state": arvados.current_job()["state"]}
        args.make_fs_access = make_fs_access
        args.trash_intermediate = False
        args.intermediate_output_ttl = 0
        args.priority = arvados_cwl.DEFAULT_PRIORITY
        args.do_validate = True
        args.disable_js_validation = False

        runner.arv_executor(t, job_order_object, **vars(args))
    except Exception as e:
        if isinstance(e, WorkflowException):
            logging.info("Workflow error %s", e)
        else:
            logging.exception("Unhandled exception")
        if runner and runner.final_output_collection:
            outputCollection = runner.final_output_collection.portable_data_hash()
        else:
            outputCollection = None
        api.job_tasks().update(uuid=arvados.current_task()['uuid'],
                                             body={
                                                 'output': outputCollection,
                                                 'success': False,
                                                 'progress':1.0
                                             }).execute()
예제 #14
0
def test_normalizeFilesDirs(name, file_dir, expected):
    normalizeFilesDirs(file_dir)
    assert file_dir == expected, name
예제 #15
0
def run():
    # Timestamps are added by crunch-job, so don't print redundant timestamps.
    arvados.log_handler.setFormatter(logging.Formatter('%(name)s %(levelname)s: %(message)s'))

    # Print package versions
    logger.info(arvados_cwl.versionstring())

    api = arvados.api("v1")

    arvados_cwl.add_arv_hints()

    runner = None
    try:
        job_order_object = arvados.current_job()['script_parameters']
        toolpath = "file://%s/%s" % (os.environ['TASK_KEEPMOUNT'], job_order_object.pop("cwl:tool"))

        pdh_path = re.compile(r'^[0-9a-f]{32}\+\d+(/.+)?$')

        def keeppath(v):
            if pdh_path.match(v):
                return "keep:%s" % v
            else:
                return v

        def keeppathObj(v):
            if "location" in v:
                v["location"] = keeppath(v["location"])

        for k,v in job_order_object.items():
            if isinstance(v, basestring) and arvados.util.keep_locator_pattern.match(v):
                job_order_object[k] = {
                    "class": "File",
                    "location": "keep:%s" % v
                }

        adjustFileObjs(job_order_object, keeppathObj)
        adjustDirObjs(job_order_object, keeppathObj)
        normalizeFilesDirs(job_order_object)

        output_name = None
        output_tags = None
        enable_reuse = True
        on_error = "continue"
        debug = False

        if "arv:output_name" in job_order_object:
            output_name = job_order_object["arv:output_name"]
            del job_order_object["arv:output_name"]

        if "arv:output_tags" in job_order_object:
            output_tags = job_order_object["arv:output_tags"]
            del job_order_object["arv:output_tags"]

        if "arv:enable_reuse" in job_order_object:
            enable_reuse = job_order_object["arv:enable_reuse"]
            del job_order_object["arv:enable_reuse"]

        if "arv:on_error" in job_order_object:
            on_error = job_order_object["arv:on_error"]
            del job_order_object["arv:on_error"]

        if "arv:debug" in job_order_object:
            debug = job_order_object["arv:debug"]
            del job_order_object["arv:debug"]

        arvargs = argparse.Namespace()
        arvargs.work_api = "jobs"
        arvargs.output_name = output_name
        arvargs.output_tags = output_tags
        arvargs.thread_count = 1

        runner = arvados_cwl.ArvCwlRunner(api_client=arvados.safeapi.ThreadSafeApiCache(
            api_params={"model": OrderedJsonModel()}, keep_params={"num_retries": 4}),
                                          arvargs=arvargs)

        make_fs_access = functools.partial(CollectionFsAccess,
                                 collection_cache=runner.collection_cache)

        t = load_tool(toolpath, runner.loadingContext)

        if debug:
            logger.setLevel(logging.DEBUG)
            logging.getLogger('arvados').setLevel(logging.DEBUG)
            logging.getLogger("cwltool").setLevel(logging.DEBUG)

        args = ArvRuntimeContext(vars(arvargs))
        args.project_uuid = arvados.current_job()["owner_uuid"]
        args.enable_reuse = enable_reuse
        args.on_error = on_error
        args.submit = False
        args.debug = debug
        args.quiet = False
        args.ignore_docker_for_reuse = False
        args.basedir = os.getcwd()
        args.name = None
        args.cwl_runner_job={"uuid": arvados.current_job()["uuid"], "state": arvados.current_job()["state"]}
        args.make_fs_access = make_fs_access
        args.trash_intermediate = False
        args.intermediate_output_ttl = 0
        args.priority = arvados_cwl.DEFAULT_PRIORITY
        args.do_validate = True
        args.disable_js_validation = False
        args.tmp_outdir_prefix = "tmp"

        runner.arv_executor(t, job_order_object, args, logger=logger)
    except Exception as e:
        if isinstance(e, WorkflowException):
            logging.info("Workflow error %s", e)
        else:
            logging.exception("Unhandled exception")
        if runner and runner.final_output_collection:
            outputCollection = runner.final_output_collection.portable_data_hash()
        else:
            outputCollection = None
        api.job_tasks().update(uuid=arvados.current_task()['uuid'],
                                             body={
                                                 'output': outputCollection,
                                                 'success': False,
                                                 'progress':1.0
                                             }).execute()
예제 #16
0
def run():
    # Timestamps are added by crunch-job, so don't print redundant timestamps.
    arvados.log_handler.setFormatter(logging.Formatter('%(name)s %(levelname)s: %(message)s'))

    # Print package versions
    logger.info(arvados_cwl.versionstring())

    api = arvados.api("v1")

    arvados_cwl.add_arv_hints()

    runner = None
    try:
        job_order_object = arvados.current_job()['script_parameters']
        toolpath = "file://%s/%s" % (os.environ['TASK_KEEPMOUNT'], job_order_object.pop("cwl:tool"))

        pdh_path = re.compile(r'^[0-9a-f]{32}\+\d+(/.+)?$')

        def keeppath(v):
            if pdh_path.match(v):
                return "keep:%s" % v
            else:
                return v

        def keeppathObj(v):
            if "location" in v:
                v["location"] = keeppath(v["location"])

        for k,v in job_order_object.items():
            if isinstance(v, basestring) and arvados.util.keep_locator_pattern.match(v):
                job_order_object[k] = {
                    "class": "File",
                    "location": "keep:%s" % v
                }

        adjustFileObjs(job_order_object, keeppathObj)
        adjustDirObjs(job_order_object, keeppathObj)
        normalizeFilesDirs(job_order_object)

        output_name = None
        output_tags = None
        enable_reuse = True
        on_error = "continue"
        if "arv:output_name" in job_order_object:
            output_name = job_order_object["arv:output_name"]
            del job_order_object["arv:output_name"]

        if "arv:output_tags" in job_order_object:
            output_tags = job_order_object["arv:output_tags"]
            del job_order_object["arv:output_tags"]

        if "arv:enable_reuse" in job_order_object:
            enable_reuse = job_order_object["arv:enable_reuse"]
            del job_order_object["arv:enable_reuse"]

        if "arv:on_error" in job_order_object:
            on_error = job_order_object["arv:on_error"]
            del job_order_object["arv:on_error"]

        runner = arvados_cwl.ArvCwlRunner(api_client=arvados.api('v1', model=OrderedJsonModel()),
                                          output_name=output_name, output_tags=output_tags)

        make_fs_access = functools.partial(CollectionFsAccess,
                                 collection_cache=runner.collection_cache)

        t = load_tool(toolpath, runner.arv_make_tool,
                      fetcher_constructor=functools.partial(CollectionFetcher,
                                                  api_client=runner.api,
                                                  fs_access=make_fs_access(""),
                                                  num_retries=runner.num_retries))

        args = argparse.Namespace()
        args.project_uuid = arvados.current_job()["owner_uuid"]
        args.enable_reuse = enable_reuse
        args.on_error = on_error
        args.submit = False
        args.debug = False
        args.quiet = False
        args.ignore_docker_for_reuse = False
        args.basedir = os.getcwd()
        args.name = None
        args.cwl_runner_job={"uuid": arvados.current_job()["uuid"], "state": arvados.current_job()["state"]}
        args.make_fs_access = make_fs_access

        runner.arv_executor(t, job_order_object, **vars(args))
    except Exception as e:
        if isinstance(e, WorkflowException):
            logging.info("Workflow error %s", e)
        else:
            logging.exception("Unhandled exception")
        if runner and runner.final_output_collection:
            outputCollection = runner.final_output_collection.portable_data_hash()
        else:
            outputCollection = None
        api.job_tasks().update(uuid=arvados.current_task()['uuid'],
                                             body={
                                                 'output': outputCollection,
                                                 'success': False,
                                                 'progress':1.0
                                             }).execute()
예제 #17
0
def test_normalizeFilesDirs(name, file_dir, expected):
    normalizeFilesDirs(file_dir)
    assert file_dir == expected, name